summaryrefslogtreecommitdiff
path: root/quikr.py
blob: 1e2d67129dbdda8158b9f183144248521e774e95 (plain)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
import os
import sys
import scipy.optimize.nnls
import scipy.sparse
import numpy as np
import scipy as sp
from scipy.sparse import lil_matrix
from scipy.sparse.linalg import spsolve
from subprocess import *
import argparse
import platform

def main(args):

    parser = argparse.ArgumentParser(description="An argparse example")
    parser.add_argument("--trained", help="The trained matrix")
    parser.add_argument("--fasta", help="input fasta file")

    args = parser.parse_args()

    if args.trained== "install":
        print "You asked for installation" 
    else:
        print "You asked for something other than installation"

  
    return 0

def quikr(input_fasta_location, trained_matrix_location, kmer, default_lambda):
  """
  input_fasta is the input fasta file to find the estimated frequencies of
  trained_matrix is the trained matrix we are using to estimate the species
  kmer is the desired k-mer to use
  default_lambda is inp 
  
  returns the estimated requencies of bacteria present when given an input
  FASTA file of amplicon (454) reads. A k-mer based, L1 regularized, sparsity
  promoting algorthim is utilized. 

  In practice reconstruction is accurate only down to the genus level (not 
  species or strain).
  """

  uname = platform.uname()[0]

  # We use the count program to count ____
  if uname == "Linux" and os.path.isfile("./count-linux"):
    print "Detected Linux"
    count_input = Popen(["count-linux", "-r " + kmer, "-1", "-u", input_fasta], stdout=PIPE) 
  elif uname == "Darwin" and os.path.isfile("./count-osx"):
    print "Detected Mac OS X" 
    count_input = Popen(["count-osx", "-r 6", "-1", "-u", input_fasta], stdout=PIPE) 

  
  counts = np.loadtxt(count_input.stdout) # create a ndarray
  counts = counts / np.sum(counts) # form a probablility vector from our counts
 
  counts = default_lambda * counts

  trained_matrix  = np.loadtxt(trained_matrix_location)
   
  xstar = nnls(trained_matrix, counts)
  return 1


if __name__ == "__main__":
    sys.exit(main(sys.argv))