aboutsummaryrefslogtreecommitdiff
path: root/src
diff options
context:
space:
mode:
Diffstat (limited to 'src')
-rwxr-xr-xsrc/python/generate_kmers (renamed from src/generate_kmers)0
-rwxr-xr-xsrc/python/multifasta_to_otu.py (renamed from src/multifasta_to_otu.py)0
-rwxr-xr-xsrc/python/quikr (renamed from src/quikr)0
-rwxr-xr-xsrc/python/quikr.py (renamed from src/quikr.py)38
-rwxr-xr-xsrc/python/quikr_train (renamed from src/quikr_train)5
5 files changed, 30 insertions, 13 deletions
diff --git a/src/generate_kmers b/src/python/generate_kmers
index 38fdf0a..38fdf0a 100755
--- a/src/generate_kmers
+++ b/src/python/generate_kmers
diff --git a/src/multifasta_to_otu.py b/src/python/multifasta_to_otu.py
index c6fb562..c6fb562 100755
--- a/src/multifasta_to_otu.py
+++ b/src/python/multifasta_to_otu.py
diff --git a/src/quikr b/src/python/quikr
index bac01ca..bac01ca 100755
--- a/src/quikr
+++ b/src/python/quikr
diff --git a/src/quikr.py b/src/python/quikr.py
index 3f8221e..368ab31 100755
--- a/src/quikr.py
+++ b/src/python/quikr.py
@@ -1,6 +1,7 @@
#!/usr/bin/python
import os
import sys
+from StringIO import StringIO
import scipy.optimize.nnls
import scipy.sparse
import numpy as np
@@ -11,12 +12,32 @@ import gzip
import itertools
def generate_kmers(kmer):
- """ This will return a list of kmers seperated by newlines """
+ """ generate all possible kmers permutations seperated by newlines
+
+ >>> kmers = generate_kmers(1)
+ >>> generate_kmers(2)
+
+ param kmer: the desired Mer size
+ type kmer: int
+ return: Returns a string of kmers seperated by newlines
+ rtype: string
+ """
+
return '\n'.join(''.join(x) for x in itertools.product('acgt', repeat=kmer))
def isCompressed(filename):
- """ This function checks to see if the file is gzipped """
+ """ This function checks to see if the file is gzipped
+
+ >>> boolean_value = isCompressed("/path/to/compressed/gzip/file")
+ >>> print boolean_value
+ True
+
+ param filename: the filename to check
+ type filename: string
+ return: Returns whether the file is gzipped
+ rtype: boolean
+ """
f = open(filename, "rb")
# The first two bytes of a gzipped file are always '1f 8b'
@@ -35,16 +56,8 @@ def train_matrix(input_file_location, kmer):
kmer_file_name = str(kmer) + "mers.txt"
- if not os.path.isfile(kmer_file_name):
- print "could not find kmer file"
- exit()
-
- uname = platform.uname()[0]
-
- if uname == "Linux":
- input_file = Popen(["./probabilities-by-read-linux", str(kmer), input_file_location, kmer_file_name], stdout=PIPE)
- elif uname == "Darwin":
- input_file = Popen(["./probabilities-by-read-osx", str(kmer), input_file_location, kmer_file_name])
+ kmer_output = Popen(["generate_kmers", str(kmer)], stdout=PIPE)
+ input_file = Popen(["probabilities-by-read", str(kmer), input_file_location] , stdout=PIPE)
# load and normalize the matrix by dividing each element by the sum of it's column.
# also do some fancy rotations so that it works properly with quikr
@@ -104,7 +117,6 @@ def calculate_estimated_frequencies(input_fasta_location, trained_matrix, kmer,
trained_matrix = trained_matrix * default_lambda;
trained_matrix = np.vstack((np.ones(trained_matrix.shape[1]), trained_matrix))
-
xstar, rnorm = scipy.optimize.nnls(trained_matrix, counts)
xstar = xstar / xstar.sum(0)
diff --git a/src/quikr_train b/src/python/quikr_train
index 6e599c9..bf74e12 100755
--- a/src/quikr_train
+++ b/src/python/quikr_train
@@ -35,6 +35,11 @@ def main():
# call the quikr train function, save the output with np.save
matrix = quikr.train_matrix(args.input, args.kmer)
+ if args.kmer is None:
+ kmer = 6
+ else:
+ kmer = args.kmer
+
if args.compress:
output_file = gzip.open(args.output, "wb")
else: