5 files changed, 30 insertions, 13 deletions
diff --git a/src/generate_kmers b/src/python/generate_kmers
index 38fdf0a..38fdf0a 100755
--- a/src/generate_kmers
+++ b/src/python/generate_kmers
diff --git a/src/multifasta_to_otu.py b/src/python/multifasta_to_otu.py
index c6fb562..c6fb562 100755
--- a/src/multifasta_to_otu.py
+++ b/src/python/multifasta_to_otu.py
diff --git a/src/quikr b/src/python/quikr
index bac01ca..bac01ca 100755
--- a/src/quikr
+++ b/src/python/quikr
diff --git a/src/quikr.py b/src/python/quikr.py
index 3f8221e..368ab31 100755
--- a/src/quikr.py
+++ b/src/python/quikr.py
@@ -1,6 +1,7 @@
 #!/usr/bin/python
 import os
 import sys
+from StringIO import StringIO
 import scipy.optimize.nnls
 import scipy.sparse
 import numpy as np
@@ -11,12 +12,32 @@ import gzip
 import itertools
 
 def generate_kmers(kmer):
-  """ This will return a list of kmers seperated by newlines """
+  """ generate all possible kmers permutations seperated by newlines 
+
+ >>> kmers =  generate_kmers(1)
+ >>> generate_kmers(2)
+
+ param kmer: the desired Mer size
+ type  kmer: int
+ return: Returns a string of kmers seperated by newlines
+ rtype: string
+ """
+
   return '\n'.join(''.join(x) for x in itertools.product('acgt', repeat=kmer))
 
 def isCompressed(filename):
-  """ This function checks to see if the file is gzipped """ 
+  """ This function checks to see if the file is gzipped
+  
+  >>> boolean_value = isCompressed("/path/to/compressed/gzip/file")
+  >>> print boolean_value
+  True
+
+  param filename: the filename to check
+  type  filename: string
+  return: Returns whether the file is gzipped
+  rtype: boolean
 
+  """
   f = open(filename, "rb")
 
   # The first two bytes of a gzipped file are always '1f 8b'
@@ -35,16 +56,8 @@ def train_matrix(input_file_location, kmer):
 
   kmer_file_name = str(kmer) + "mers.txt"
 
-  if not os.path.isfile(kmer_file_name):
-    print "could not find kmer file"
-    exit()
-  
-  uname = platform.uname()[0]
-
-  if uname == "Linux": 
-    input_file = Popen(["./probabilities-by-read-linux", str(kmer), input_file_location, kmer_file_name], stdout=PIPE) 
-  elif uname == "Darwin":
-    input_file = Popen(["./probabilities-by-read-osx", str(kmer), input_file_location, kmer_file_name]) 
+  kmer_output = Popen(["generate_kmers", str(kmer)], stdout=PIPE)
+  input_file = Popen(["probabilities-by-read", str(kmer), input_file_location] , stdout=PIPE) 
 
   # load and  normalize the matrix by dividing each element by the sum of it's column.
   # also do some fancy rotations so that it works properly with quikr
@@ -104,7 +117,6 @@ def calculate_estimated_frequencies(input_fasta_location, trained_matrix, kmer,
   trained_matrix = trained_matrix * default_lambda;
   trained_matrix = np.vstack((np.ones(trained_matrix.shape[1]), trained_matrix))
 
-
   xstar, rnorm = scipy.optimize.nnls(trained_matrix, counts) 
   xstar = xstar / xstar.sum(0) 
 
diff --git a/src/quikr_train b/src/python/quikr_train
index 6e599c9..bf74e12 100755
--- a/src/quikr_train
+++ b/src/python/quikr_train
@@ -35,6 +35,11 @@ def main():
   # call the quikr train function, save the output with np.save
   matrix = quikr.train_matrix(args.input, args.kmer)
 
+  if args.kmer is None: 
+    kmer = 6
+  else:
+    kmer = args.kmer
+
   if args.compress: 
     output_file = gzip.open(args.output, "wb")
   else: