aboutsummaryrefslogtreecommitdiff
path: root/multifasta_to_otu.py
diff options
context:
space:
mode:
authormutantturkey <mutantturke@gmail.com>2013-02-25 14:27:39 -0500
committermutantturkey <mutantturke@gmail.com>2013-02-25 14:27:39 -0500
commit8279f9aef5ab992d478956d4e94b6e2ec1ec690f (patch)
treeafe0fe3e2a77746231072a20eb53a2f657f4f916 /multifasta_to_otu.py
parente7c65094a01a8ec2a12fea89dedd30147d60bc75 (diff)
initial commit, playing around with threading
Diffstat (limited to 'multifasta_to_otu.py')
-rw-r--r--multifasta_to_otu.py68
1 files changed, 68 insertions, 0 deletions
diff --git a/multifasta_to_otu.py b/multifasta_to_otu.py
new file mode 100644
index 0000000..110328e
--- /dev/null
+++ b/multifasta_to_otu.py
@@ -0,0 +1,68 @@
+#!/usr/bin/python
+from multiprocessing import Pool
+import multiprocessing
+import os
+import quikr_train as qt
+import quikr as q
+import sys
+import numpy as np
+import argparse
+import platform
+
+# our defaults
+kmer = 6
+lamb = 10000
+trained_matrix = ""
+output_directory = ""
+
+
+def main():
+
+
+ #do: write up the description
+ parser = argparse.ArgumentParser(description="MultifastaOTU"
+
+ parser.add_argument("-i", "--input", help="directory containing fasta files", required=True)
+ parser.add_argument("-o", "--otu-table", help="otu_table", required=True)
+ parser.add_argument("-t", "--trained-matrix", help="otu_table", required=True)
+ parser.add_argument("-d", "--output-directory", help="quikr output directory", required=True)
+ parser.add_argument("-l", "--lamb", type=int, help="the default lambda value is 10,000")
+ parser.add_argument("-k", "--kmer", type=int, help="specifies which kmer to use, default=6")
+ parser.add_argument("-j", "--jobs", type=int, help="specifies how many jobs to run at once, default=number of CPUs")
+ args = parser.parse_args()
+
+ # our defaults
+ trained_matrix = args.trained_matrix
+
+ # Make sure our input exist
+ if not os.path.isdir(args.input):
+ parser.error( "Input directory not found")
+
+ if not os.path.isdir(args.output_directory):
+ os.path.mkdir(args,output_directory)
+
+ if not os.path.isfile(args.trained_matrix):
+ parser.error("custom trained matrix not found")
+
+ # use alternative lambda
+ if args.lamb is not None:
+ lamb = args.lamb
+
+ if args.jobs is not None:
+ jobs = args.jobs
+
+ if args.kmer is not None:
+ kmer = args.kmer
+ fasta_list = os.listdir(args.
+ pool = Pool(processes=jobs)
+ result = pool.map(quikr_call, fasta_list)
+ return 0
+
+def quikr_call(fasta_file):
+ xstar = q.quikr(fasta_file, training_matrix, kmer, lamb)
+ np.savetxt(output_directory + os.path.basename(fasta_file), xstar, delimiter=",", fmt="%f")
+ return 0
+
+ if __name__ == "__main__":
+ sys.exit(main())
+