summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
-rwxr-xr-xmultifasta_to_otu.py78
1 files changed, 46 insertions, 32 deletions
diff --git a/multifasta_to_otu.py b/multifasta_to_otu.py
index 18ddc35..7bec8cb 100755
--- a/multifasta_to_otu.py
+++ b/multifasta_to_otu.py
@@ -14,55 +14,69 @@ kmer = 6
lamb = 10000
trained_matrix = ""
output_directory = ""
+input_directory = ""
def main():
+ global kmer
+ global input_directory
+ global output_directory
+ global lamb
+ global trained_matrix
+ #do: write up the description
+ parser = argparse.ArgumentParser(description="MultifastaOTU")
- #do: write up the description
- parser = argparse.ArgumentParser(description="MultifastaOTU")
-
- parser.add_argument("-i", "--input", help="directory containing fasta files", required=True)
- parser.add_argument("-o", "--otu-table", help="otu_table", required=True)
- parser.add_argument("-t", "--trained-matrix", help="otu_table", required=True)
- parser.add_argument("-d", "--output-directory", help="quikr output directory", required=True)
- parser.add_argument("-l", "--lamb", type=int, help="the default lambda value is 10,000")
- parser.add_argument("-k", "--kmer", type=int, help="specifies which kmer to use, default=6")
- parser.add_argument("-j", "--jobs", type=int, help="specifies how many jobs to run at once, default=number of CPUs")
- args = parser.parse_args()
+ parser.add_argument("-i", "--input-directory", help="directory containing fasta files", required=True)
+ parser.add_argument("-o", "--otu-table", help="otu_table", required=True)
+ parser.add_argument("-t", "--trained-matrix", help="otu_table", required=True)
+ parser.add_argument("-d", "--output-directory", help="quikr output directory", required=True)
+ parser.add_argument("-l", "--lamb", type=int, help="the default lambda value is 10,000")
+ parser.add_argument("-k", "--kmer", type=int, help="specifies which kmer to use, default=6")
+ parser.add_argument("-j", "--jobs", type=int, help="specifies how many jobs to run at once, default=number of CPUs")
+ args = parser.parse_args()
- # our defaults
- trained_matrix = args.trained_matrix
+ # our defaults
+ jobs=multiprocessing.cpu_count()
+ trained_matrix = args.trained_matrix
+ input_directory = args.input_directory
+ output_directory = args.output_directory
+
+ # Make sure our input exist
+ if not os.path.isdir(args.input_directory):
+ parser.error( "Input directory not found")
- # Make sure our input exist
- if not os.path.isdir(args.input):
- parser.error( "Input directory not found")
+ if not os.path.isdir(args.output_directory):
+ parser.error( "Input directory not found")
- if not os.path.isdir(args.output_directory):
- os.path.mkdir(args,output_directory)
+ if not os.path.isdir(args.output_directory):
+ os.path.mkdir(args,output_directory)
- if not os.path.isfile(args.trained_matrix):
- parser.error("custom trained matrix not found")
+ if not os.path.isfile(args.trained_matrix):
+ parser.error("custom trained matrix not found")
# use alternative lambda
- if args.lamb is not None:
- lamb = args.lamb
+ if args.lamb is not None:
+ lamb = args.lamb
- if args.jobs is not None:
- jobs = args.jobs
+ if args.jobs is not None:
+ jobs = args.jobs
- if args.kmer is not None:
- kmer = args.kmer
- fasta_list = os.listdir(args.input_directory)
+ if args.kmer is not None:
+ kmer = args.kmer
- for fasta in fasta_list:
- quikr_call(fasta)
+ fasta_list = os.listdir(args.input_directory)
- return 0
+ for fasta in fasta_list:
+ quikr_call(fasta)
+
+ return 0
def quikr_call(fasta_file):
- xstar = q.quikr(fasta_file, training_matrix, kmer, lamb)
- np.savetxt(output_directory + os.path.basename(fasta_file), xstar, delimiter=",", fmt="%f")
+ inp = input_directory + fasta_file
+ output = output_directory + os.path.basename(fasta_file)
+ xstar = q.quikr(inp, trained_matrix, kmer, lamb)
+ np.savetxt(output, xstar, delimiter=",", fmt="%f")
return 0
if __name__ == "__main__":