diff options
| -rwxr-xr-x[-rw-r--r--] | quikr.py | 24 | ||||
| -rwxr-xr-x | quikr_train.py | 16 | 
2 files changed, 20 insertions, 20 deletions
| @@ -1,3 +1,4 @@ +#!/usr/bin/python  import os  import sys  import scipy.optimize.nnls @@ -29,9 +30,12 @@ def main():      if not os.path.isfile(args.fasta):          parser.error( "Input fasta file not found") - +          # If we are using a custom trained matrix, we need to do some basic checks -    if args.trained is not None:   +    if args.trained_matrix is not None:   +          +        if not os.path.isfile(args.trained_matrix): +            parser.error("custom trained matrix not be found")          if args.kmer is None:              parser.error("A kmer is required when using a custom matrix") @@ -43,14 +47,10 @@ def main():              input_lambda = 10000      # If we aren't using a custom trained matrix, load in the defaults      else: -        trained_matrix_location = "trainset7_112011N6Aaux.mat" +        trained_matrix_location = "output.npy"          input_lambda = 10000          kmer = 6 - -    if not os.path.isfile(args.trained): -        parser.error("custom trained matrix not be found") - -    xstar = quikr(args.fasta, trained_matrix_location, kmer, input_lambda) +        xstar = quikr(args.fasta, trained_matrix_location, kmer, input_lambda)      return 0 @@ -74,10 +74,10 @@ def quikr(input_fasta_location, trained_matrix_location, kmer, default_lambda):    # We use the count program to count ____    if uname == "Linux" and os.path.isfile("./count-linux"):      print "Detected Linux" -    count_input = Popen(["count-linux", "-r " + kmer, "-1", "-u", input_fasta_location], stdout=PIPE)  +    count_input = Popen(["./count-linux", "-r", str(kmer), "-1", "-u", input_fasta_location], stdout=PIPE)     elif uname == "Darwin" and os.path.isfile("./count-osx"):      print "Detected Mac OS X"  -    count_input = Popen(["count-osx", "-r 6", "-1", "-u", input_fasta_location], stdout=PIPE)  +    count_input = Popen(["count-osx", "-r", str(kmer), "-1", "-u", input_fasta_location], stdout=PIPE)     # load the output of our count program and form a probability vector from the counts   @@ -86,9 +86,11 @@ def quikr(input_fasta_location, trained_matrix_location, kmer, default_lambda):    counts = default_lambda * counts -  trained_matrix  = np.loadtxt(trained_matrix_location) +  trained_matrix  = np.load(trained_matrix_location)    # perform the non-negative least squares +  # import pdb; pdb.set_trace() +  counts = np.rot90(counts)    xstar = scipy.optimize.nnls(trained_matrix, counts)     xstar = xstar / sum(xstar)  diff --git a/quikr_train.py b/quikr_train.py index 94a8c4e..b14ddcd 100755 --- a/quikr_train.py +++ b/quikr_train.py @@ -1,3 +1,4 @@ +#!/usr/bin/python  import numpy as np  import os  import sys @@ -36,22 +37,21 @@ def quikr_train(input_file_location, kmer):    Takes a input fasta file, and kmer, returns a custom trained matrix    """ -   -  print "input fasta training file: " + input_file_location -  print "kmer: " + kmer +  kmer_file_name = str(kmer) + "mers.txt" -  kmer_file_name = kmer + "mers.txt" -  print kmer_file_name +  if not os.path.isfile(kmer_file_name): +    print "could not find kmer file"  +    exit()    uname = platform.uname()[0]    if uname == "Linux":       print "Detected Linux" -    input_file = Popen(["./probabilities-by-read-linux", kmer, input_file_location, kmer_file_name], stdout=PIPE)  +    input_file = Popen(["./probabilities-by-read-linux", str(kmer), input_file_location, kmer_file_name], stdout=PIPE)     elif uname == "Darwin":      print "Detected Mac OS X"  -    input_file = Popen(["./probabilities-by-read-osx", kmer, input_file_location, kmer_file_name])  +    input_file = Popen(["./probabilities-by-read-osx", str(kmer), input_file_location, kmer_file_name])     # load and  normalize the matrix by dividing each element by the sum of it's column.    matrix  = np.loadtxt(input_file.stdout) @@ -59,7 +59,5 @@ def quikr_train(input_file_location, kmer):    return normalized - -  if __name__ == "__main__":      sys.exit(main()) | 
