diff options
Diffstat (limited to 'src/python/multifasta_to_otu')
-rwxr-xr-x | src/python/multifasta_to_otu | 16 |
1 files changed, 11 insertions, 5 deletions
diff --git a/src/python/multifasta_to_otu b/src/python/multifasta_to_otu index ec1cc7a..9039709 100755 --- a/src/python/multifasta_to_otu +++ b/src/python/multifasta_to_otu @@ -4,6 +4,7 @@ from Bio import SeqIO import multiprocessing from subprocess import * import os +import glob import gzip import quikr as q import sys @@ -77,10 +78,15 @@ def main(): trained_matrix = np.load(trained_matrix_file) + fasta_list = [] + # Return a list of the input directory - fasta_list = os.listdir(args.input_directory) + fasta_list = glob.glob(args.input_directory + "/*.fa"); + fasta_list = fasta_list + glob.glob(args.input_directory + "/*.fasta") + print fasta_list # Sort the list + fasta_list.sort() # Queue up and run our quikr functions. @@ -100,10 +106,10 @@ def main(): # load the keys with values from each fasta result for fasta, fasta_it in map(None, fasta_list, range(len(fasta_list))): - count_sequences = Popen(["grep", "-c" , "^>", args.input_directory + fasta], stdout=PIPE) + count_sequences = Popen(["grep", "-c" , "^>", fasta], stdout=PIPE) number_of_sequences = int(count_sequences.stdout.readline()) - proportions = np.loadtxt(output_directory + "/" + fasta); + proportions = np.loadtxt(output_directory + "/" + os.path.basename(fasta)); for proportion, proportion_it in map(None, proportions, range(len(proportions))): number_of_reads[proportion_it, fasta_it] = round(proportion * number_of_sequences) @@ -143,10 +149,10 @@ def main(): return 0 def quikr_call(fasta_file): - input_location = input_directory + fasta_file + print os.path.basename(fasta_file) output_location = output_directory + "/" + os.path.basename(fasta_file) - xstar = q.calculate_estimated_frequencies(input_location, trained_matrix, kmer, lamb) + xstar = q.calculate_estimated_frequencies(fasta_file, trained_matrix, kmer, lamb) np.savetxt(output_location, xstar, delimiter=",", fmt="%f") return xstar |