From 8fe20384c10cff03f4c9a4613499bf2b711c9bab Mon Sep 17 00:00:00 2001 From: Calvin Date: Tue, 26 Feb 2013 10:48:18 -0500 Subject: some upodates --- multifasta_to_otu.py | 83 +++++++++++++++++++++++++++++++--------------------- 1 file changed, 49 insertions(+), 34 deletions(-) mode change 100644 => 100755 multifasta_to_otu.py (limited to 'multifasta_to_otu.py') diff --git a/multifasta_to_otu.py b/multifasta_to_otu.py old mode 100644 new mode 100755 index 34af060..cca2e89 --- a/multifasta_to_otu.py +++ b/multifasta_to_otu.py @@ -14,55 +14,70 @@ kmer = 6 lamb = 10000 trained_matrix = "" output_directory = "" +input_directory = "" def main(): + global kmer + global input_directory + global output_directory + global lamb + global trained_matrix + #do: write up the description + parser = argparse.ArgumentParser(description="MultifastaOTU") - #do: write up the description - parser = argparse.ArgumentParser(description="MultifastaOTU") - - parser.add_argument("-i", "--input", help="directory containing fasta files", required=True) - parser.add_argument("-o", "--otu-table", help="otu_table", required=True) - parser.add_argument("-t", "--trained-matrix", help="otu_table", required=True) - parser.add_argument("-d", "--output-directory", help="quikr output directory", required=True) - parser.add_argument("-l", "--lamb", type=int, help="the default lambda value is 10,000") - parser.add_argument("-k", "--kmer", type=int, help="specifies which kmer to use, default=6") - parser.add_argument("-j", "--jobs", type=int, help="specifies how many jobs to run at once, default=number of CPUs") - args = parser.parse_args() + parser.add_argument("-i", "--input-directory", help="directory containing fasta files", required=True) + parser.add_argument("-o", "--otu-table", help="otu_table", required=True) + parser.add_argument("-t", "--trained-matrix", help="otu_table", required=True) + parser.add_argument("-d", "--output-directory", help="quikr output directory", required=True) + parser.add_argument("-l", "--lamb", type=int, help="the default lambda value is 10,000") + parser.add_argument("-k", "--kmer", type=int, help="specifies which kmer to use, default=6") + parser.add_argument("-j", "--jobs", type=int, help="specifies how many jobs to run at once, default=number of CPUs") + args = parser.parse_args() - # our defaults - trained_matrix = args.trained_matrix + # our defaults + jobs=multiprocessing.cpu_count() + trained_matrix = args.trained_matrix + input_directory = args.input_directory + output_directory = args.output_directory + + # Make sure our input exist + if not os.path.isdir(args.input_directory): + parser.error( "Input directory not found") - # Make sure our input exist - if not os.path.isdir(args.input): - parser.error( "Input directory not found") + if not os.path.isdir(args.output_directory): + parser.error( "Input directory not found") - if not os.path.isdir(args.output_directory): - os.path.mkdir(args,output_directory) + if not os.path.isdir(args.output_directory): + os.path.mkdir(args,output_directory) - if not os.path.isfile(args.trained_matrix): - parser.error("custom trained matrix not found") + if not os.path.isfile(args.trained_matrix): + parser.error("custom trained matrix not found") # use alternative lambda - if args.lamb is not None: - lamb = args.lamb + if args.lamb is not None: + lamb = args.lamb - if args.jobs is not None: - jobs = args.jobs + if args.jobs is not None: + jobs = args.jobs + + if args.kmer is not None: + kmer = args.kmer - if args.kmer is not None: - kmer = args.kmer - fasta_list = os.listdir(args. - pool = Pool(processes=jobs) - result = pool.map(quikr_call, fasta_list) - return 0 + fasta_list = os.listdir(args.input_directory) + pool = Pool(processes=jobs) + result = pool.map(quikr_call, fasta_list) + + return 0 def quikr_call(fasta_file): - xstar = q.quikr(fasta_file, training_matrix, kmer, lamb) - np.savetxt(output_directory + os.path.basename(fasta_file), xstar, delimiter=",", fmt="%f") + inp = input_directory + fasta_file + output = output_directory + os.path.basename(fasta_file) + xstar = q.quikr(inp, trained_matrix, kmer, lamb) + np.savetxt(output, xstar, delimiter=",", fmt="%f") return 0 - if __name__ == "__main__": - sys.exit(main()) +if __name__ == "__main__": + sys.exit(main()) -- cgit v1.2.3