quikr and quikr_train

author: Calvin <calvin@EESI> 2013-02-14 16:21:47 -0500
committer: Calvin <calvin@EESI> 2013-02-14 16:21:47 -0500
commit: dd6ba075ea5ec86936900adaf71d69ae374d1794 (patch)
tree: 0ad18c8d488be046336f5d1585e73132dd38c94d
2 files changed, 117 insertions, 0 deletions
diff --git a/quikr.py b/quikr.py
new file mode 100644
index 0000000..1e2d671
--- /dev/null
+++ b/quikr.py
@@ -0,0 +1,67 @@
+import os
+import sys
+import scipy.optimize.nnls
+import scipy.sparse
+import numpy as np
+import scipy as sp
+from scipy.sparse import lil_matrix
+from scipy.sparse.linalg import spsolve
+from subprocess import *
+import argparse
+import platform
+
+def main(args):
+
+    parser = argparse.ArgumentParser(description="An argparse example")
+    parser.add_argument("--trained", help="The trained matrix")
+    parser.add_argument("--fasta", help="input fasta file")
+
+    args = parser.parse_args()
+
+    if args.trained== "install":
+        print "You asked for installation" 
+    else:
+        print "You asked for something other than installation"
+
+  
+    return 0
+
+def quikr(input_fasta_location, trained_matrix_location, kmer, default_lambda):
+  """
+  input_fasta is the input fasta file to find the estimated frequencies of
+  trained_matrix is the trained matrix we are using to estimate the species
+  kmer is the desired k-mer to use
+  default_lambda is inp 
+  
+  returns the estimated requencies of bacteria present when given an input
+  FASTA file of amplicon (454) reads. A k-mer based, L1 regularized, sparsity
+  promoting algorthim is utilized. 
+
+  In practice reconstruction is accurate only down to the genus level (not 
+  species or strain).
+  """
+
+  uname = platform.uname()[0]
+
+  # We use the count program to count ____
+  if uname == "Linux" and os.path.isfile("./count-linux"):
+    print "Detected Linux"
+    count_input = Popen(["count-linux", "-r " + kmer, "-1", "-u", input_fasta], stdout=PIPE) 
+  elif uname == "Darwin" and os.path.isfile("./count-osx"):
+    print "Detected Mac OS X" 
+    count_input = Popen(["count-osx", "-r 6", "-1", "-u", input_fasta], stdout=PIPE) 
+
+  
+  counts = np.loadtxt(count_input.stdout) # create a ndarray
+  counts = counts / np.sum(counts) # form a probablility vector from our counts
+ 
+  counts = default_lambda * counts
+
+  trained_matrix  = np.loadtxt(trained_matrix_location)
+   
+  xstar = nnls(trained_matrix, counts)
+  return 1
+
+
+if __name__ == "__main__":
+    sys.exit(main(sys.argv))
diff --git a/quikr_train.py b/quikr_train.py
new file mode 100644
index 0000000..2076f5a
--- /dev/null
+++ b/quikr_train.py
@@ -0,0 +1,50 @@
+#from scipy.sparse import * 
+import numpy as np
+import sys
+from subprocess import *
+import platform
+
+# You can call this script independently, and will save the 
+# trained matrix as a numpy file.
+# example: python quikr-train.py input.fasta 6 trained_matrix.npy
+
+def main(argv):
+  input_file_location = argv[1]
+  kmer = argv[2]
+  output_file_location = argv[3]
+
+  # call the quikr train function, save the output with np.save
+  matrix = quikr_train(argv[1], argv[2])
+  np.save(output_file_location, matrix)
+
+  return 0
+
+def quikr_train(input_file_location, kmer):
+
+  
+  print "input fasta training file: " + input_file_location
+  print "kmer: " + kmer
+
+  kmer_file_name = kmer + "mers.txt"
+  print kmer_file_name
+
+  
+  uname = platform.uname()[0]
+
+  if uname == "Linux": 
+    print "Detected Linux"
+    input_file = Popen(["./probabilities-by-read-linux", kmer, input_file_location, kmer_file_name], stdout=PIPE) 
+  elif uname == "Darwin":
+    print "Detected Mac OS X" 
+    input_file = Popen(["./probabilities-by-read-osx", kmer, input_file_location, kmer_file_name]) 
+
+  # load and  normalize the matrix by dividing each element by the sum of it's column.
+  matrix  = np.loadtxt(input_file.stdout)
+  normalized = matrix / matrix.sum(0)
+
+  return normalized
+  
+
+
+if __name__ == "__main__":
+    sys.exit(main(sys.argv))
author	Calvin <calvin@EESI>	2013-02-14 16:21:47 -0500
committer	Calvin <calvin@EESI>	2013-02-14 16:21:47 -0500
commit	dd6ba075ea5ec86936900adaf71d69ae374d1794 (patch)
tree	0ad18c8d488be046336f5d1585e73132dd38c94d