diff options
| -rwxr-xr-x | multifasta_to_otu.py | 35 | 
1 files changed, 26 insertions, 9 deletions
| diff --git a/multifasta_to_otu.py b/multifasta_to_otu.py index ef7f5f6..ac883ca 100755 --- a/multifasta_to_otu.py +++ b/multifasta_to_otu.py @@ -29,7 +29,8 @@ def main():    parser.add_argument("-i", "--input-directory", help="directory containing fasta files", required=True)    parser.add_argument("-o", "--otu-table", help="otu_table", required=True) -  parser.add_argument("-t", "--trained-matrix", help="otu_table", required=True) +  parser.add_argument("-t", "--trained-matrix", help="your trained matrix ", required=True) +  parser.add_argument("-f", "--trained-fasta", help="the fasta file used to train your matrix", required=True)    parser.add_argument("-d", "--output-directory", help="quikr output directory", required=True)    parser.add_argument("-l", "--lamb", type=int, help="the default lambda value is 10,000")    parser.add_argument("-k", "--kmer", type=int, help="specifies which kmer to use, default=6") @@ -72,22 +73,38 @@ def main():    fasta_list = os.listdir(args.input_directory)    # Queue up and run our quikr functions. -  pool = Pool(processes=jobs) -  results = pool.map(quikr_call, fasta_list) +#  pool = Pool(processes=jobs) +#  results = pool.map(quikr_call, fasta_list) -  # Create a dictionary and load up our keys +  # Create an array of headers    records = [] - -  trained_matrix_headers = open(args.trained_matrix, "rU") +  trained_matrix_headers = open(args.trained_fasta, "rU")    for record in SeqIO.parse(trained_matrix_headers, "fasta"): -    records.append((record.id, 0)) +    records.append(record.id) +  trained_matrix_headers.close() -  records = dict(records) +  final_output = np.zeros((len(records), len(fasta_list))) +  print len(fasta_list) +  # load the keys with values from each fasta result +  for fasta, fasta_it in map(None, fasta_list, range(len(fasta_list))): +    fasta_file = open(input_directory + fasta, "rU") +    sequences = list(SeqIO.parse(fasta_file, "fasta")) +    number_of_sequences = len(sequences) +    fasta_file.close() +    print number_of_sequences +     +    proportions = np.loadtxt(output_directory + fasta); +    for proportion, proportion_it in map(None, proportions, range(len(proportions))): +     if(round(proportion * number_of_sequences) is not 0): +        print str(fasta_it) + " " + str(proportion_it) +        final_output[fasta_it, proportion_it] = proportion * number_of_sequences +   +  np.savetxt(args.otu_table, final_output, delimiter=",", fmt="%d") +       -  # load the keys with values from each fasta result    # Write the otu table    return 0 | 
