aboutsummaryrefslogtreecommitdiff
path: root/multifasta_to_otu.py
diff options
context:
space:
mode:
Diffstat (limited to 'multifasta_to_otu.py')
-rwxr-xr-xmultifasta_to_otu.py28
1 files changed, 15 insertions, 13 deletions
diff --git a/multifasta_to_otu.py b/multifasta_to_otu.py
index ad364a5..548633e 100755
--- a/multifasta_to_otu.py
+++ b/multifasta_to_otu.py
@@ -26,7 +26,7 @@ def main():
global output_directory
global lamb
global trained_matrix
- #do: write up the description
+
parser = argparse.ArgumentParser(description="MultifastaOTU")
parser.add_argument("-i", "--input-directory", help="directory containing fasta files", required=True)
@@ -50,14 +50,15 @@ def main():
parser.error("Input directory not found")
if not os.path.isdir(args.output_directory):
- parser.error("Output directory not found")
-
- if not os.path.isdir(args.output_directory):
+ print "Output directory not found, creating directory"
os.path.mkdir(args, output_directory)
if not os.path.isfile(args.trained_matrix):
- parser.error("custom trained matrix not found")
+ parser.error("Custom trained matrix not found")
+ if not os.path.isfile(args.trained_fasta):
+ parser.error("Fasta file of trained matrix not found")
+
# use alternative lambda
if args.lamb is not None:
lamb = args.lamb
@@ -85,8 +86,8 @@ def main():
headers.append(header.id)
trained_matrix_headers.close()
-
- output = np.zeros((len(headers), len(fasta_list)))
+ # create our number of reads matrix
+ number_of_reads = np.zeros((len(headers), len(fasta_list)))
# load the keys with values from each fasta result
for fasta, fasta_it in map(None, fasta_list, range(len(fasta_list))):
@@ -97,12 +98,12 @@ def main():
proportions = np.loadtxt(output_directory + fasta);
for proportion, proportion_it in map(None, proportions, range(len(proportions))):
- output[proportion_it, fasta_it] = round(proportion * number_of_sequences)
+ number_of_reads[proportion_it, fasta_it] = round(proportion * number_of_sequences)
# remove empty rows from our matrix
final_headers = list()
final_data = list()
- for row, header in map(None, output, headers):
+ for row, header in map(None, number_of_reads, headers):
if row.sum() != 0:
final_headers.append(header)
final_data.append(row)
@@ -110,8 +111,8 @@ def main():
# convert from a list back into a numpy array
final_data = np.array(final_data, dtype=int)
- # stack our final header and our output matrix
- output = np.column_stack((final_headers, final_data))
+ # stack our final header and our number_of_reads matrix
+ number_of_reads = np.column_stack((final_headers, final_data))
# write our OTU table
output_file = open(args.otu_table, "wb")
@@ -119,14 +120,15 @@ def main():
#write out our fasta file row
writer.writerow(['# QIIME vGail OTU table'])
+
fasta_row = ['#OTU_ID']
fasta_row.append(' '.join(fasta_list))
fasta_row = [' '.join(fasta_row)]
writer.writerow(fasta_row)
# write out our results
- for i in range(0, np.shape(output)[0]):
- writer.writerow(list(output[i]))
+ for i in range(0, np.shape(number_of_reads)[0]):
+ writer.writerow(list(number_of_reads[i]))
output_file.close()