summaryrefslogtreecommitdiff
path: root/quikr_train.py
diff options
context:
space:
mode:
Diffstat (limited to 'quikr_train.py')
-rw-r--r--quikr_train.py50
1 files changed, 50 insertions, 0 deletions
diff --git a/quikr_train.py b/quikr_train.py
new file mode 100644
index 0000000..2076f5a
--- /dev/null
+++ b/quikr_train.py
@@ -0,0 +1,50 @@
+#from scipy.sparse import *
+import numpy as np
+import sys
+from subprocess import *
+import platform
+
+# You can call this script independently, and will save the
+# trained matrix as a numpy file.
+# example: python quikr-train.py input.fasta 6 trained_matrix.npy
+
+def main(argv):
+ input_file_location = argv[1]
+ kmer = argv[2]
+ output_file_location = argv[3]
+
+ # call the quikr train function, save the output with np.save
+ matrix = quikr_train(argv[1], argv[2])
+ np.save(output_file_location, matrix)
+
+ return 0
+
+def quikr_train(input_file_location, kmer):
+
+
+ print "input fasta training file: " + input_file_location
+ print "kmer: " + kmer
+
+ kmer_file_name = kmer + "mers.txt"
+ print kmer_file_name
+
+
+ uname = platform.uname()[0]
+
+ if uname == "Linux":
+ print "Detected Linux"
+ input_file = Popen(["./probabilities-by-read-linux", kmer, input_file_location, kmer_file_name], stdout=PIPE)
+ elif uname == "Darwin":
+ print "Detected Mac OS X"
+ input_file = Popen(["./probabilities-by-read-osx", kmer, input_file_location, kmer_file_name])
+
+ # load and normalize the matrix by dividing each element by the sum of it's column.
+ matrix = np.loadtxt(input_file.stdout)
+ normalized = matrix / matrix.sum(0)
+
+ return normalized
+
+
+
+if __name__ == "__main__":
+ sys.exit(main(sys.argv))