diff options
| -rwxr-xr-x | src/python/generate_kmers (renamed from src/generate_kmers) | 0 | ||||
| -rwxr-xr-x | src/python/multifasta_to_otu.py (renamed from src/multifasta_to_otu.py) | 0 | ||||
| -rwxr-xr-x | src/python/quikr (renamed from src/quikr) | 0 | ||||
| -rwxr-xr-x | src/python/quikr.py (renamed from src/quikr.py) | 38 | ||||
| -rwxr-xr-x | src/python/quikr_train (renamed from src/quikr_train) | 5 | 
5 files changed, 30 insertions, 13 deletions
| diff --git a/src/generate_kmers b/src/python/generate_kmers index 38fdf0a..38fdf0a 100755 --- a/src/generate_kmers +++ b/src/python/generate_kmers diff --git a/src/multifasta_to_otu.py b/src/python/multifasta_to_otu.py index c6fb562..c6fb562 100755 --- a/src/multifasta_to_otu.py +++ b/src/python/multifasta_to_otu.py diff --git a/src/quikr b/src/python/quikr index bac01ca..bac01ca 100755 --- a/src/quikr +++ b/src/python/quikr diff --git a/src/quikr.py b/src/python/quikr.py index 3f8221e..368ab31 100755 --- a/src/quikr.py +++ b/src/python/quikr.py @@ -1,6 +1,7 @@  #!/usr/bin/python  import os  import sys +from StringIO import StringIO  import scipy.optimize.nnls  import scipy.sparse  import numpy as np @@ -11,12 +12,32 @@ import gzip  import itertools  def generate_kmers(kmer): -  """ This will return a list of kmers seperated by newlines """ +  """ generate all possible kmers permutations seperated by newlines  + + >>> kmers =  generate_kmers(1) + >>> generate_kmers(2) + + param kmer: the desired Mer size + type  kmer: int + return: Returns a string of kmers seperated by newlines + rtype: string + """ +    return '\n'.join(''.join(x) for x in itertools.product('acgt', repeat=kmer))  def isCompressed(filename): -  """ This function checks to see if the file is gzipped """  +  """ This function checks to see if the file is gzipped +   +  >>> boolean_value = isCompressed("/path/to/compressed/gzip/file") +  >>> print boolean_value +  True + +  param filename: the filename to check +  type  filename: string +  return: Returns whether the file is gzipped +  rtype: boolean +  """    f = open(filename, "rb")    # The first two bytes of a gzipped file are always '1f 8b' @@ -35,16 +56,8 @@ def train_matrix(input_file_location, kmer):    kmer_file_name = str(kmer) + "mers.txt" -  if not os.path.isfile(kmer_file_name): -    print "could not find kmer file" -    exit() -   -  uname = platform.uname()[0] - -  if uname == "Linux":  -    input_file = Popen(["./probabilities-by-read-linux", str(kmer), input_file_location, kmer_file_name], stdout=PIPE)  -  elif uname == "Darwin": -    input_file = Popen(["./probabilities-by-read-osx", str(kmer), input_file_location, kmer_file_name])  +  kmer_output = Popen(["generate_kmers", str(kmer)], stdout=PIPE) +  input_file = Popen(["probabilities-by-read", str(kmer), input_file_location] , stdout=PIPE)     # load and  normalize the matrix by dividing each element by the sum of it's column.    # also do some fancy rotations so that it works properly with quikr @@ -104,7 +117,6 @@ def calculate_estimated_frequencies(input_fasta_location, trained_matrix, kmer,    trained_matrix = trained_matrix * default_lambda;    trained_matrix = np.vstack((np.ones(trained_matrix.shape[1]), trained_matrix)) -    xstar, rnorm = scipy.optimize.nnls(trained_matrix, counts)     xstar = xstar / xstar.sum(0)  diff --git a/src/quikr_train b/src/python/quikr_train index 6e599c9..bf74e12 100755 --- a/src/quikr_train +++ b/src/python/quikr_train @@ -35,6 +35,11 @@ def main():    # call the quikr train function, save the output with np.save    matrix = quikr.train_matrix(args.input, args.kmer) +  if args.kmer is None:  +    kmer = 6 +  else: +    kmer = args.kmer +    if args.compress:       output_file = gzip.open(args.output, "wb")    else: | 
