summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorCalvin <calvin@EESI>2013-05-14 19:50:28 -0400
committerCalvin <calvin@EESI>2013-05-14 19:50:28 -0400
commit3631a0c9d47d8ff72085bcc534bd24bfad4f73da (patch)
treea259116534e6b5522d065e7dd46a414d54d57075
parent7ab43937c81ad5af1b7d6b5b1d3c317b58881e84 (diff)
use sensing matrix
-rwxr-xr-xsrc/python/multifasta_to_otu34
-rwxr-xr-xsrc/python/quikr16
-rwxr-xr-xsrc/python/quikr.py26
3 files changed, 36 insertions, 40 deletions
diff --git a/src/python/multifasta_to_otu b/src/python/multifasta_to_otu
index 3cc8f3e..431cc74 100755
--- a/src/python/multifasta_to_otu
+++ b/src/python/multifasta_to_otu
@@ -26,14 +26,14 @@ def main():
global input_directory
global output_directory
global lamb
- global trained_matrix
+ global sensing_matrix
parser = argparse.ArgumentParser(description="MultifastaOTU")
parser.add_argument("-i", "--input-directory", help="directory containing fasta files", required=True)
parser.add_argument("-o", "--otu-table", help="otu_table", required=True)
- parser.add_argument("-t", "--trained-matrix", help="your trained matrix ", required=True)
- parser.add_argument("-f", "--trained-fasta", help="the fasta file used to train your matrix", required=True)
+ parser.add_argument("-s", "--sensing-matrix", help="your sensing matrix ", required=True)
+ parser.add_argument("-f", "--sensing-fasta", help="the fasta file used to train your matrix", required=True)
parser.add_argument("-l", "--lamb", type=int, help="the default lambda value is 10,000")
parser.add_argument("-k", "--kmer", type=int, help="specifies which kmer to use, default=6")
parser.add_argument("-j", "--jobs", type=int, help="specifies how many jobs to run at once, default=number of CPUs")
@@ -41,18 +41,18 @@ def main():
# our defaults
jobs = multiprocessing.cpu_count()
- trained_matrix = args.trained_matrix
+ sensing_matrix = args.sensing_matrix
input_directory = args.input_directory
# Make sure our input exist
if not os.path.isdir(args.input_directory):
parser.error("Input directory not found")
- if not os.path.isfile(args.trained_matrix):
- parser.error("Custom trained matrix not found")
+ if not os.path.isfile(args.sensing_matrix):
+ parser.error("Custom sensing matrix not found")
- if not os.path.isfile(args.trained_fasta):
- parser.error("Fasta file of trained matrix not found")
+ if not os.path.isfile(args.sensing_fasta):
+ parser.error("Fasta file of sensing matrix not found")
# use alternative lambda
if args.lamb is not None:
@@ -64,13 +64,13 @@ def main():
if args.kmer is not None:
kmer = args.kmer
- # Load trained matrix
- if q.is_compressed(args.trained_matrix):
- trained_matrix_file = gzip.open(args.trained_matrix, "rb")
+ # Load sensing matrix
+ if q.is_compressed(args.sensing_matrix):
+ sensing_matrix_file = gzip.open(args.sensing_matrix, "rb")
else:
- trained_matrix_file = open(args.trained_matrix, "rb")
+ sensing_matrix_file = open(args.sensing_matrix, "rb")
- trained_matrix = np.load(trained_matrix_file)
+ sensing_matrix = np.load(sensing_matrix_file)
fasta_list = []
@@ -89,10 +89,10 @@ def main():
# Create an array of headers
headers = []
- trained_matrix_headers = open(args.trained_fasta, "rU")
- for header in SeqIO.parse(trained_matrix_headers, "fasta"):
+ sensing_matrix_headers = open(args.sensing_fasta, "rU")
+ for header in SeqIO.parse(sensing_matrix_headers, "fasta"):
headers.append(header.id)
- trained_matrix_headers.close()
+ sensing_matrix_headers.close()
# create our number of reads matrix
number_of_reads = np.zeros((len(headers), len(fasta_list)))
@@ -148,7 +148,7 @@ def main():
def quikr_call(fasta_file):
print os.path.basename(fasta_file)
- xstar = q.calculate_estimated_frequencies(fasta_file, trained_matrix, kmer, lamb)
+ xstar = q.calculate_estimated_frequencies(fasta_file, sensing_matrix, kmer, lamb)
return xstar
if __name__ == "__main__":
diff --git a/src/python/quikr b/src/python/quikr
index bae2b9f..7c9ce25 100755
--- a/src/python/quikr
+++ b/src/python/quikr
@@ -9,15 +9,11 @@ def main():
parser = argparse.ArgumentParser(description=
"Quikr returns the estimated frequencies of batcteria present when given a \
- input FASTA file. \n \
- A default trained matrix will be used if none is supplied \n \
- You must supply a kmer and default lambda if using a custom trained \
- matrix.")
-
+ input FASTA file. \n")
parser.add_argument("-f", "--fasta", help="the sample's fasta file of NGS READS", required=True)
parser.add_argument("-o", "--output", help="OTU_FRACTION_PRESENT, a vector \
representing the percentage of database sequence's presence in a sequence. (csv output)", required=True)
- parser.add_argument("-t", "--trained-matrix", help="the trained matrix", required=True)
+ parser.add_argument("-s", "--sensing-matrix", help="the sensing matrix", required=True)
parser.add_argument("-l", "--lamb", type=int, help="the lambda size. (default is 10,000")
parser.add_argument("-k", "--kmer", type=int, required=True,
help="specifies the size of the kmer to use (the default is 6)")
@@ -33,8 +29,8 @@ def main():
if not os.path.isfile(args.fasta):
parser.error( "Input fasta file not found")
- if not os.path.isfile(args.trained_matrix):
- parser.error("Custom trained matrix not found")
+ if not os.path.isfile(args.sensing_matrix):
+ parser.error("Custom sensing matrix not found")
# use alternative lambda
if args.lamb is not None:
@@ -44,8 +40,8 @@ def main():
if args.kmer is not None:
kmer = args.kmer
- trained_matrix = quikr.load_trained_matrix_from_file(args.trained_matrix)
- xstar = quikr.calculate_estimated_frequencies(args.fasta, trained_matrix, kmer, lamb)
+ sensing_matrix = quikr.load_sensing_matrix_from_file(args.sensing_matrix)
+ xstar = quikr.calculate_estimated_frequencies(args.fasta, sensing_matrix, kmer, lamb)
np.savetxt(args.output, xstar, delimiter=",", fmt="%f")
return 0
diff --git a/src/python/quikr.py b/src/python/quikr.py
index 98cc953..3c2b23a 100755
--- a/src/python/quikr.py
+++ b/src/python/quikr.py
@@ -39,7 +39,7 @@ def is_compressed(filename):
def train_matrix(input_file_location, kmer):
"""
- Takes a input fasta file, and kmer, returns a custom trained matrix
+ Takes a input fasta file, and kmer, returns a custom sensing matrix
"""
input_file = Popen(["bash", "-c", "probabilities-by-read " + str(kmer) + " " + input_file_location + " <(generate_kmers 6)"], stdout=PIPE)
@@ -54,23 +54,23 @@ def train_matrix(input_file_location, kmer):
return matrix
-def load_trained_matrix_from_file(trained_matrix_location):
- """ This is a helper function to load our trained matrix and run quikr """
+def load_sensing_matrix_from_file(sensing_matrix_location):
+ """ This is a helper function to load our sensing matrix and run quikr """
- if is_compressed(trained_matrix_location):
- trained_matrix_file = gzip.open(trained_matrix_location, "rb")
+ if is_compressed(sensing_matrix_location):
+ sensing_matrix_file = gzip.open(sensing_matrix_location, "rb")
else:
- trained_matrix_file = open(trained_matrix_location, "rb")
+ sensing_matrix_file = open(sensing_matrix_location, "rb")
- trained_matrix = np.load(trained_matrix_file)
+ sensing_matrix = np.load(sensing_matrix_file)
- return trained_matrix
+ return sensing_matrix
-def calculate_estimated_frequencies(input_fasta_location, trained_matrix, kmer, default_lambda):
+def calculate_estimated_frequencies(input_fasta_location, sensing_matrix, kmer, default_lambda):
"""
input_fasta is the input fasta file to find the estimated frequencies of
- trained_matrix is the trained matrix we are using to estimate the species
+ sensing_matrix is the sensing matrix we are using to estimate the species
kmer is the desired k-mer to use
default_lambda is inp
@@ -93,10 +93,10 @@ def calculate_estimated_frequencies(input_fasta_location, trained_matrix, kmer,
counts = np.concatenate([np.zeros(1), counts])
#form the k-mer sensing matrix
- trained_matrix = trained_matrix * default_lambda;
- trained_matrix = np.vstack((np.ones(trained_matrix.shape[1]), trained_matrix))
+ sensing_matrix = sensing_matrix * default_lambda;
+ sensing_matrix = np.vstack((np.ones(sensing_matrix.shape[1]), sensing_matrix))
- xstar, rnorm = scipy.optimize.nnls(trained_matrix, counts)
+ xstar, rnorm = scipy.optimize.nnls(sensing_matrix, counts)
xstar = xstar / xstar.sum(0)
return xstar