summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorCalvin <calvin@EESI>2013-05-03 18:35:54 -0400
committerCalvin <calvin@EESI>2013-05-03 18:35:54 -0400
commitcb668cbf756c2d5454877d8f90c4b9dc89043c1d (patch)
treee7da21dfb3d037d452fd07cdc297388763301ebb
parent734d4a56a854bf152fee42f05ed02c5ffbaf4035 (diff)
multifasta now reads .fa and fasta file sonly
-rwxr-xr-xsrc/python/multifasta_to_otu16
1 files changed, 11 insertions, 5 deletions
diff --git a/src/python/multifasta_to_otu b/src/python/multifasta_to_otu
index ec1cc7a..9039709 100755
--- a/src/python/multifasta_to_otu
+++ b/src/python/multifasta_to_otu
@@ -4,6 +4,7 @@ from Bio import SeqIO
import multiprocessing
from subprocess import *
import os
+import glob
import gzip
import quikr as q
import sys
@@ -77,10 +78,15 @@ def main():
trained_matrix = np.load(trained_matrix_file)
+ fasta_list = []
+
# Return a list of the input directory
- fasta_list = os.listdir(args.input_directory)
+ fasta_list = glob.glob(args.input_directory + "/*.fa");
+ fasta_list = fasta_list + glob.glob(args.input_directory + "/*.fasta")
+ print fasta_list
# Sort the list
+
fasta_list.sort()
# Queue up and run our quikr functions.
@@ -100,10 +106,10 @@ def main():
# load the keys with values from each fasta result
for fasta, fasta_it in map(None, fasta_list, range(len(fasta_list))):
- count_sequences = Popen(["grep", "-c" , "^>", args.input_directory + fasta], stdout=PIPE)
+ count_sequences = Popen(["grep", "-c" , "^>", fasta], stdout=PIPE)
number_of_sequences = int(count_sequences.stdout.readline())
- proportions = np.loadtxt(output_directory + "/" + fasta);
+ proportions = np.loadtxt(output_directory + "/" + os.path.basename(fasta));
for proportion, proportion_it in map(None, proportions, range(len(proportions))):
number_of_reads[proportion_it, fasta_it] = round(proportion * number_of_sequences)
@@ -143,10 +149,10 @@ def main():
return 0
def quikr_call(fasta_file):
- input_location = input_directory + fasta_file
+ print os.path.basename(fasta_file)
output_location = output_directory + "/" + os.path.basename(fasta_file)
- xstar = q.calculate_estimated_frequencies(input_location, trained_matrix, kmer, lamb)
+ xstar = q.calculate_estimated_frequencies(fasta_file, trained_matrix, kmer, lamb)
np.savetxt(output_location, xstar, delimiter=",", fmt="%f")
return xstar