From 17ece99b4bd7bd3371adf35221f0594a2549e3a8 Mon Sep 17 00:00:00 2001 From: Calvin Date: Wed, 6 Mar 2013 13:34:22 -0500 Subject: Cleanups and refactoring * Only do one check for output_directory by combining statements * rename output to number_of_reads * get rid of useless comment --- multifasta_to_otu.py | 28 +++++++++++++++------------- 1 file changed, 15 insertions(+), 13 deletions(-) (limited to 'multifasta_to_otu.py') diff --git a/multifasta_to_otu.py b/multifasta_to_otu.py index ad364a5..548633e 100755 --- a/multifasta_to_otu.py +++ b/multifasta_to_otu.py @@ -26,7 +26,7 @@ def main(): global output_directory global lamb global trained_matrix - #do: write up the description + parser = argparse.ArgumentParser(description="MultifastaOTU") parser.add_argument("-i", "--input-directory", help="directory containing fasta files", required=True) @@ -50,14 +50,15 @@ def main(): parser.error("Input directory not found") if not os.path.isdir(args.output_directory): - parser.error("Output directory not found") - - if not os.path.isdir(args.output_directory): + print "Output directory not found, creating directory" os.path.mkdir(args, output_directory) if not os.path.isfile(args.trained_matrix): - parser.error("custom trained matrix not found") + parser.error("Custom trained matrix not found") + if not os.path.isfile(args.trained_fasta): + parser.error("Fasta file of trained matrix not found") + # use alternative lambda if args.lamb is not None: lamb = args.lamb @@ -85,8 +86,8 @@ def main(): headers.append(header.id) trained_matrix_headers.close() - - output = np.zeros((len(headers), len(fasta_list))) + # create our number of reads matrix + number_of_reads = np.zeros((len(headers), len(fasta_list))) # load the keys with values from each fasta result for fasta, fasta_it in map(None, fasta_list, range(len(fasta_list))): @@ -97,12 +98,12 @@ def main(): proportions = np.loadtxt(output_directory + fasta); for proportion, proportion_it in map(None, proportions, range(len(proportions))): - output[proportion_it, fasta_it] = round(proportion * number_of_sequences) + number_of_reads[proportion_it, fasta_it] = round(proportion * number_of_sequences) # remove empty rows from our matrix final_headers = list() final_data = list() - for row, header in map(None, output, headers): + for row, header in map(None, number_of_reads, headers): if row.sum() != 0: final_headers.append(header) final_data.append(row) @@ -110,8 +111,8 @@ def main(): # convert from a list back into a numpy array final_data = np.array(final_data, dtype=int) - # stack our final header and our output matrix - output = np.column_stack((final_headers, final_data)) + # stack our final header and our number_of_reads matrix + number_of_reads = np.column_stack((final_headers, final_data)) # write our OTU table output_file = open(args.otu_table, "wb") @@ -119,14 +120,15 @@ def main(): #write out our fasta file row writer.writerow(['# QIIME vGail OTU table']) + fasta_row = ['#OTU_ID'] fasta_row.append(' '.join(fasta_list)) fasta_row = [' '.join(fasta_row)] writer.writerow(fasta_row) # write out our results - for i in range(0, np.shape(output)[0]): - writer.writerow(list(output[i])) + for i in range(0, np.shape(number_of_reads)[0]): + writer.writerow(list(number_of_reads[i])) output_file.close() -- cgit v1.2.3