Cleanups and refactoring

* Only do one check for output_directory by combining statements * rename output to number_of_reads * get rid of useless comment
author: Calvin <calvin@EESI> 2013-03-06 13:34:22 -0500
committer: Calvin <calvin@EESI> 2013-03-06 13:34:22 -0500
commit: 17ece99b4bd7bd3371adf35221f0594a2549e3a8 (patch)
tree: 52951540c9a601e5e841c74960c05a627548e12d
parent: cbbda6b01a68bbcfcf99b1112735a4b3451e4d42 (diff)
1 files changed, 15 insertions, 13 deletions
diff --git a/multifasta_to_otu.py b/multifasta_to_otu.py
index ad364a5..548633e 100755
--- a/multifasta_to_otu.py
+++ b/multifasta_to_otu.py
@@ -26,7 +26,7 @@ def main():
   global output_directory 
   global lamb
   global trained_matrix 
-  #do: write up the description
+
   parser = argparse.ArgumentParser(description="MultifastaOTU")
 
   parser.add_argument("-i", "--input-directory", help="directory containing fasta files", required=True)
@@ -50,14 +50,15 @@ def main():
     parser.error("Input directory not found")
 
   if not os.path.isdir(args.output_directory):
-    parser.error("Output directory not found")
-
-  if not os.path.isdir(args.output_directory):
+    print "Output directory not found, creating directory"
     os.path.mkdir(args, output_directory)
 
   if not os.path.isfile(args.trained_matrix):
-    parser.error("custom trained matrix not found")
+    parser.error("Custom trained matrix not found")
     
+  if not os.path.isfile(args.trained_fasta):
+    parser.error("Fasta file of trained matrix not found")
+
   # use alternative lambda
   if args.lamb is not None:
     lamb = args.lamb
@@ -85,8 +86,8 @@ def main():
     headers.append(header.id)
   trained_matrix_headers.close()
 
-
-  output = np.zeros((len(headers), len(fasta_list)))
+  # create our number of reads matrix
+  number_of_reads = np.zeros((len(headers), len(fasta_list)))
 
   # load the keys with values from each fasta result
   for fasta, fasta_it in map(None, fasta_list, range(len(fasta_list))):
@@ -97,12 +98,12 @@ def main():
     proportions = np.loadtxt(output_directory + fasta);
     
     for proportion, proportion_it in map(None, proportions, range(len(proportions))):
-      output[proportion_it, fasta_it] = round(proportion * number_of_sequences)
+      number_of_reads[proportion_it, fasta_it] = round(proportion * number_of_sequences)
 
   # remove empty rows from our matrix
   final_headers = list()
   final_data = list()
-  for row, header in map(None, output, headers):
+  for row, header in map(None, number_of_reads, headers):
     if row.sum() != 0:
       final_headers.append(header)
       final_data.append(row)
@@ -110,8 +111,8 @@ def main():
   # convert from a list back into a numpy array
   final_data = np.array(final_data, dtype=int)
 
-  # stack our final header and our output matrix
-  output = np.column_stack((final_headers, final_data))
+  # stack our final header and our number_of_reads matrix
+  number_of_reads = np.column_stack((final_headers, final_data))
 
   # write our OTU table
   output_file = open(args.otu_table, "wb") 
@@ -119,14 +120,15 @@ def main():
 
   #write out our fasta file row
   writer.writerow(['# QIIME vGail OTU table'])
+
   fasta_row = ['#OTU_ID']
   fasta_row.append(' '.join(fasta_list))
   fasta_row = [' '.join(fasta_row)]
   writer.writerow(fasta_row)
 
   # write out our results
-  for i in range(0, np.shape(output)[0]):
-      writer.writerow(list(output[i]))
+  for i in range(0, np.shape(number_of_reads)[0]):
+      writer.writerow(list(number_of_reads[i]))
 
   output_file.close()
author	Calvin <calvin@EESI>	2013-03-06 13:34:22 -0500
committer	Calvin <calvin@EESI>	2013-03-06 13:34:22 -0500
commit	17ece99b4bd7bd3371adf35221f0594a2549e3a8 (patch)
tree	52951540c9a601e5e841c74960c05a627548e12d
parent	cbbda6b01a68bbcfcf99b1112735a4b3451e4d42 (diff)