diff options
Diffstat (limited to 'src')
-rwxr-xr-x | src/python/generate_kmers (renamed from src/generate_kmers) | 0 | ||||
-rwxr-xr-x | src/python/multifasta_to_otu.py (renamed from src/multifasta_to_otu.py) | 0 | ||||
-rwxr-xr-x | src/python/quikr (renamed from src/quikr) | 0 | ||||
-rwxr-xr-x | src/python/quikr.py (renamed from src/quikr.py) | 38 | ||||
-rwxr-xr-x | src/python/quikr_train (renamed from src/quikr_train) | 5 |
5 files changed, 30 insertions, 13 deletions
diff --git a/src/generate_kmers b/src/python/generate_kmers index 38fdf0a..38fdf0a 100755 --- a/src/generate_kmers +++ b/src/python/generate_kmers diff --git a/src/multifasta_to_otu.py b/src/python/multifasta_to_otu.py index c6fb562..c6fb562 100755 --- a/src/multifasta_to_otu.py +++ b/src/python/multifasta_to_otu.py diff --git a/src/quikr b/src/python/quikr index bac01ca..bac01ca 100755 --- a/src/quikr +++ b/src/python/quikr diff --git a/src/quikr.py b/src/python/quikr.py index 3f8221e..368ab31 100755 --- a/src/quikr.py +++ b/src/python/quikr.py @@ -1,6 +1,7 @@ #!/usr/bin/python import os import sys +from StringIO import StringIO import scipy.optimize.nnls import scipy.sparse import numpy as np @@ -11,12 +12,32 @@ import gzip import itertools def generate_kmers(kmer): - """ This will return a list of kmers seperated by newlines """ + """ generate all possible kmers permutations seperated by newlines + + >>> kmers = generate_kmers(1) + >>> generate_kmers(2) + + param kmer: the desired Mer size + type kmer: int + return: Returns a string of kmers seperated by newlines + rtype: string + """ + return '\n'.join(''.join(x) for x in itertools.product('acgt', repeat=kmer)) def isCompressed(filename): - """ This function checks to see if the file is gzipped """ + """ This function checks to see if the file is gzipped + + >>> boolean_value = isCompressed("/path/to/compressed/gzip/file") + >>> print boolean_value + True + + param filename: the filename to check + type filename: string + return: Returns whether the file is gzipped + rtype: boolean + """ f = open(filename, "rb") # The first two bytes of a gzipped file are always '1f 8b' @@ -35,16 +56,8 @@ def train_matrix(input_file_location, kmer): kmer_file_name = str(kmer) + "mers.txt" - if not os.path.isfile(kmer_file_name): - print "could not find kmer file" - exit() - - uname = platform.uname()[0] - - if uname == "Linux": - input_file = Popen(["./probabilities-by-read-linux", str(kmer), input_file_location, kmer_file_name], stdout=PIPE) - elif uname == "Darwin": - input_file = Popen(["./probabilities-by-read-osx", str(kmer), input_file_location, kmer_file_name]) + kmer_output = Popen(["generate_kmers", str(kmer)], stdout=PIPE) + input_file = Popen(["probabilities-by-read", str(kmer), input_file_location] , stdout=PIPE) # load and normalize the matrix by dividing each element by the sum of it's column. # also do some fancy rotations so that it works properly with quikr @@ -104,7 +117,6 @@ def calculate_estimated_frequencies(input_fasta_location, trained_matrix, kmer, trained_matrix = trained_matrix * default_lambda; trained_matrix = np.vstack((np.ones(trained_matrix.shape[1]), trained_matrix)) - xstar, rnorm = scipy.optimize.nnls(trained_matrix, counts) xstar = xstar / xstar.sum(0) diff --git a/src/quikr_train b/src/python/quikr_train index 6e599c9..bf74e12 100755 --- a/src/quikr_train +++ b/src/python/quikr_train @@ -35,6 +35,11 @@ def main(): # call the quikr train function, save the output with np.save matrix = quikr.train_matrix(args.input, args.kmer) + if args.kmer is None: + kmer = 6 + else: + kmer = args.kmer + if args.compress: output_file = gzip.open(args.output, "wb") else: |