diff options
| -rwxr-xr-x | src/python/multifasta_to_otu | 34 | ||||
| -rwxr-xr-x | src/python/quikr | 16 | ||||
| -rwxr-xr-x | src/python/quikr.py | 26 | 
3 files changed, 36 insertions, 40 deletions
| diff --git a/src/python/multifasta_to_otu b/src/python/multifasta_to_otu index 3cc8f3e..431cc74 100755 --- a/src/python/multifasta_to_otu +++ b/src/python/multifasta_to_otu @@ -26,14 +26,14 @@ def main():    global input_directory     global output_directory     global lamb -  global trained_matrix  +  global sensing_matrix     parser = argparse.ArgumentParser(description="MultifastaOTU")    parser.add_argument("-i", "--input-directory", help="directory containing fasta files", required=True)    parser.add_argument("-o", "--otu-table", help="otu_table", required=True) -  parser.add_argument("-t", "--trained-matrix", help="your trained matrix ", required=True) -  parser.add_argument("-f", "--trained-fasta", help="the fasta file used to train your matrix", required=True) +  parser.add_argument("-s", "--sensing-matrix", help="your sensing matrix ", required=True) +  parser.add_argument("-f", "--sensing-fasta", help="the fasta file used to train your matrix", required=True)    parser.add_argument("-l", "--lamb", type=int, help="the default lambda value is 10,000")    parser.add_argument("-k", "--kmer", type=int, help="specifies which kmer to use, default=6")    parser.add_argument("-j", "--jobs", type=int, help="specifies how many jobs to run at once, default=number of CPUs") @@ -41,18 +41,18 @@ def main():    # our defaults    jobs = multiprocessing.cpu_count() -  trained_matrix = args.trained_matrix +  sensing_matrix = args.sensing_matrix    input_directory = args.input_directory    # Make sure our input exist    if not os.path.isdir(args.input_directory):      parser.error("Input directory not found") -  if not os.path.isfile(args.trained_matrix): -    parser.error("Custom trained matrix not found") +  if not os.path.isfile(args.sensing_matrix): +    parser.error("Custom sensing matrix not found") -  if not os.path.isfile(args.trained_fasta): -    parser.error("Fasta file of trained matrix not found") +  if not os.path.isfile(args.sensing_fasta): +    parser.error("Fasta file of sensing matrix not found")    # use alternative lambda    if args.lamb is not None: @@ -64,13 +64,13 @@ def main():    if args.kmer is not None:      kmer = args.kmer -  # Load trained matrix -  if q.is_compressed(args.trained_matrix): -    trained_matrix_file = gzip.open(args.trained_matrix, "rb") +  # Load sensing matrix +  if q.is_compressed(args.sensing_matrix): +    sensing_matrix_file = gzip.open(args.sensing_matrix, "rb")    else: -    trained_matrix_file = open(args.trained_matrix, "rb") +    sensing_matrix_file = open(args.sensing_matrix, "rb") -  trained_matrix = np.load(trained_matrix_file) +  sensing_matrix = np.load(sensing_matrix_file)    fasta_list = [] @@ -89,10 +89,10 @@ def main():    # Create an array of headers    headers = [] -  trained_matrix_headers = open(args.trained_fasta, "rU") -  for header in SeqIO.parse(trained_matrix_headers, "fasta"): +  sensing_matrix_headers = open(args.sensing_fasta, "rU") +  for header in SeqIO.parse(sensing_matrix_headers, "fasta"):      headers.append(header.id) -  trained_matrix_headers.close() +  sensing_matrix_headers.close()    # create our number of reads matrix    number_of_reads = np.zeros((len(headers), len(fasta_list))) @@ -148,7 +148,7 @@ def main():  def quikr_call(fasta_file):    print os.path.basename(fasta_file) -  xstar = q.calculate_estimated_frequencies(fasta_file, trained_matrix, kmer, lamb) +  xstar = q.calculate_estimated_frequencies(fasta_file, sensing_matrix, kmer, lamb)    return xstar  if __name__ == "__main__": diff --git a/src/python/quikr b/src/python/quikr index bae2b9f..7c9ce25 100755 --- a/src/python/quikr +++ b/src/python/quikr @@ -9,15 +9,11 @@ def main():      parser = argparse.ArgumentParser(description=      "Quikr returns the estimated frequencies of batcteria present when given a \ -    input FASTA file. \n \ -    A default trained matrix will be used if none is supplied \n \ -    You must supply a kmer and default lambda if using a custom trained \ -    matrix.") - +    input FASTA file. \n")      parser.add_argument("-f", "--fasta", help="the sample's fasta file of NGS READS", required=True)      parser.add_argument("-o", "--output", help="OTU_FRACTION_PRESENT, a vector \       representing the percentage of database sequence's presence in a sequence. (csv output)", required=True) -    parser.add_argument("-t", "--trained-matrix", help="the trained matrix", required=True) +    parser.add_argument("-s", "--sensing-matrix", help="the sensing matrix", required=True)      parser.add_argument("-l", "--lamb", type=int, help="the lambda size. (default is 10,000")      parser.add_argument("-k", "--kmer", type=int, required=True,          help="specifies the size of the kmer to use (the default is 6)") @@ -33,8 +29,8 @@ def main():      if not os.path.isfile(args.fasta):          parser.error( "Input fasta file not found") -    if not os.path.isfile(args.trained_matrix): -        parser.error("Custom trained matrix not found") +    if not os.path.isfile(args.sensing_matrix): +        parser.error("Custom sensing matrix not found")      # use alternative lambda      if args.lamb is not None: @@ -44,8 +40,8 @@ def main():      if args.kmer is not None:          kmer = args.kmer -    trained_matrix = quikr.load_trained_matrix_from_file(args.trained_matrix) -    xstar = quikr.calculate_estimated_frequencies(args.fasta, trained_matrix, kmer, lamb) +    sensing_matrix = quikr.load_sensing_matrix_from_file(args.sensing_matrix) +    xstar = quikr.calculate_estimated_frequencies(args.fasta, sensing_matrix, kmer, lamb)      np.savetxt(args.output, xstar, delimiter=",", fmt="%f")      return 0 diff --git a/src/python/quikr.py b/src/python/quikr.py index 98cc953..3c2b23a 100755 --- a/src/python/quikr.py +++ b/src/python/quikr.py @@ -39,7 +39,7 @@ def is_compressed(filename):  def train_matrix(input_file_location, kmer):    """ -  Takes a input fasta file, and kmer, returns a custom trained matrix +  Takes a input fasta file, and kmer, returns a custom sensing matrix    """    input_file = Popen(["bash", "-c", "probabilities-by-read " + str(kmer) + " " + input_file_location + " <(generate_kmers 6)"], stdout=PIPE)  @@ -54,23 +54,23 @@ def train_matrix(input_file_location, kmer):    return matrix -def load_trained_matrix_from_file(trained_matrix_location): -  """ This is a helper function to load our trained matrix and run quikr """ +def load_sensing_matrix_from_file(sensing_matrix_location): +  """ This is a helper function to load our sensing matrix and run quikr """ -  if is_compressed(trained_matrix_location): -    trained_matrix_file = gzip.open(trained_matrix_location, "rb") +  if is_compressed(sensing_matrix_location): +    sensing_matrix_file = gzip.open(sensing_matrix_location, "rb")    else: -    trained_matrix_file = open(trained_matrix_location, "rb") +    sensing_matrix_file = open(sensing_matrix_location, "rb") -  trained_matrix = np.load(trained_matrix_file) +  sensing_matrix = np.load(sensing_matrix_file) -  return trained_matrix +  return sensing_matrix -def calculate_estimated_frequencies(input_fasta_location, trained_matrix, kmer, default_lambda): +def calculate_estimated_frequencies(input_fasta_location, sensing_matrix, kmer, default_lambda):    """    input_fasta is the input fasta file to find the estimated frequencies of -  trained_matrix is the trained matrix we are using to estimate the species +  sensing_matrix is the sensing matrix we are using to estimate the species    kmer is the desired k-mer to use    default_lambda is inp  @@ -93,10 +93,10 @@ def calculate_estimated_frequencies(input_fasta_location, trained_matrix, kmer,    counts = np.concatenate([np.zeros(1), counts])    #form the k-mer sensing matrix -  trained_matrix = trained_matrix * default_lambda; -  trained_matrix = np.vstack((np.ones(trained_matrix.shape[1]), trained_matrix)) +  sensing_matrix = sensing_matrix * default_lambda; +  sensing_matrix = np.vstack((np.ones(sensing_matrix.shape[1]), sensing_matrix)) -  xstar, rnorm = scipy.optimize.nnls(trained_matrix, counts)  +  xstar, rnorm = scipy.optimize.nnls(sensing_matrix, counts)     xstar = xstar / xstar.sum(0)     return xstar | 
