import os
import sys
import scipy.optimize.nnls
import scipy.sparse
import numpy as np
from subprocess import *
import argparse
import platform

def main():

    parser = argparse.ArgumentParser(description=
    "Quikr returns the estimated frequencies of batcteria present when given a \
    input FASTA file. \n \
    A default trained matrix will be used if none is supplied \n \
    You must supply a kmer and default lambda if using a custom trained \
    matrix.")

    parser.add_argument("-f", "--fasta", help="path to a fasta file", required=True)
    parser.add_argument("-t", "--trained-matrix", help="path to a custom trained matrix")
    parser.add_argument("-l", "--lamb", type=int, help="the default lambda value is 10,000")
    parser.add_argument("-k", "--kmer", type=int, 
        help="specifies which kmer to use, must be used with a custom trained database")


    args = parser.parse_args()

    # Do some basic sanity checks

    if not os.path.isfile(args.fasta):
        parser.error( "Input fasta file not found")

    # If we are using a custom trained matrix, we need to do some basic checks
    if args.trained is not None:  

        if args.kmer is None:
            parser.error("A kmer is required when using a custom matrix")
        else:
          kmer = args.kmer

        if args.lamb is None:
            # use 10,000 as default Lambda
            input_lambda = 10000
    # If we aren't using a custom trained matrix, load in the defaults
    else:
        trained_matrix_location = "trainset7_112011N6Aaux.mat"
        input_lambda = 10000
        kmer = 6

    if not os.path.isfile(args.trained):
        parser.error("custom trained matrix not be found")

    xstar = quikr(args.fasta, trained_matrix_location, kmer, input_lambda)
        
    return 0

def quikr(input_fasta_location, trained_matrix_location, kmer, default_lambda):
  """
  input_fasta is the input fasta file to find the estimated frequencies of
  trained_matrix is the trained matrix we are using to estimate the species
  kmer is the desired k-mer to use
  default_lambda is inp 
  
  returns the estimated requencies of bacteria present when given an input
  FASTA file of amplicon (454) reads. A k-mer based, L1 regularized, sparsity
  promoting algorthim is utilized. 

  In practice reconstruction is accurate only down to the genus level (not 
  species or strain).
  """

  uname = platform.uname()[0]

  # We use the count program to count ____
  if uname == "Linux" and os.path.isfile("./count-linux"):
    print "Detected Linux"
    count_input = Popen(["count-linux", "-r " + kmer, "-1", "-u", input_fasta_location], stdout=PIPE) 
  elif uname == "Darwin" and os.path.isfile("./count-osx"):
    print "Detected Mac OS X" 
    count_input = Popen(["count-osx", "-r 6", "-1", "-u", input_fasta_location], stdout=PIPE) 

  
  # load the output of our count program and form a probability vector from the counts  
  counts = np.loadtxt(count_input.stdout) 
  counts = counts / np.sum(counts) 
 
  counts = default_lambda * counts

  trained_matrix  = np.loadtxt(trained_matrix_location)
  
  # perform the non-negative least squares
  xstar = scipy.optimize.nnls(trained_matrix, counts) 
 
  xstar = xstar / sum(xstar) 
  return xstar


if __name__ == "__main__":
    sys.exit(main())