diff options
| -rwxr-xr-x | multifasta_to_otu.py | 28 | 
1 files changed, 15 insertions, 13 deletions
diff --git a/multifasta_to_otu.py b/multifasta_to_otu.py index ad364a5..548633e 100755 --- a/multifasta_to_otu.py +++ b/multifasta_to_otu.py @@ -26,7 +26,7 @@ def main():    global output_directory     global lamb    global trained_matrix  -  #do: write up the description +    parser = argparse.ArgumentParser(description="MultifastaOTU")    parser.add_argument("-i", "--input-directory", help="directory containing fasta files", required=True) @@ -50,14 +50,15 @@ def main():      parser.error("Input directory not found")    if not os.path.isdir(args.output_directory): -    parser.error("Output directory not found") - -  if not os.path.isdir(args.output_directory): +    print "Output directory not found, creating directory"      os.path.mkdir(args, output_directory)    if not os.path.isfile(args.trained_matrix): -    parser.error("custom trained matrix not found") +    parser.error("Custom trained matrix not found") +  if not os.path.isfile(args.trained_fasta): +    parser.error("Fasta file of trained matrix not found") +    # use alternative lambda    if args.lamb is not None:      lamb = args.lamb @@ -85,8 +86,8 @@ def main():      headers.append(header.id)    trained_matrix_headers.close() - -  output = np.zeros((len(headers), len(fasta_list))) +  # create our number of reads matrix +  number_of_reads = np.zeros((len(headers), len(fasta_list)))    # load the keys with values from each fasta result    for fasta, fasta_it in map(None, fasta_list, range(len(fasta_list))): @@ -97,12 +98,12 @@ def main():      proportions = np.loadtxt(output_directory + fasta);      for proportion, proportion_it in map(None, proportions, range(len(proportions))): -      output[proportion_it, fasta_it] = round(proportion * number_of_sequences) +      number_of_reads[proportion_it, fasta_it] = round(proportion * number_of_sequences)    # remove empty rows from our matrix    final_headers = list()    final_data = list() -  for row, header in map(None, output, headers): +  for row, header in map(None, number_of_reads, headers):      if row.sum() != 0:        final_headers.append(header)        final_data.append(row) @@ -110,8 +111,8 @@ def main():    # convert from a list back into a numpy array    final_data = np.array(final_data, dtype=int) -  # stack our final header and our output matrix -  output = np.column_stack((final_headers, final_data)) +  # stack our final header and our number_of_reads matrix +  number_of_reads = np.column_stack((final_headers, final_data))    # write our OTU table    output_file = open(args.otu_table, "wb")  @@ -119,14 +120,15 @@ def main():    #write out our fasta file row    writer.writerow(['# QIIME vGail OTU table']) +    fasta_row = ['#OTU_ID']    fasta_row.append(' '.join(fasta_list))    fasta_row = [' '.join(fasta_row)]    writer.writerow(fasta_row)    # write out our results -  for i in range(0, np.shape(output)[0]): -      writer.writerow(list(output[i])) +  for i in range(0, np.shape(number_of_reads)[0]): +      writer.writerow(list(number_of_reads[i]))    output_file.close()  | 
