From e8dfa85cd7e0428e53aac532c31f1ac1cc3cf1c1 Mon Sep 17 00:00:00 2001 From: Calvin Date: Thu, 7 Mar 2013 17:17:20 -0500 Subject: starting to modularize --- quikr_train.py | 74 ---------------------------------------------------------- 1 file changed, 74 deletions(-) delete mode 100755 quikr_train.py (limited to 'quikr_train.py') diff --git a/quikr_train.py b/quikr_train.py deleted file mode 100755 index a427436..0000000 --- a/quikr_train.py +++ /dev/null @@ -1,74 +0,0 @@ -#!/usr/bin/python -import numpy as np -import os -import sys -import gzip -from subprocess import * -import platform -import argparse - -def main(): - """ - You can call this script independently, and will save the - trained matrix as a numpy file. - example: python quikr-train.py -i input.fasta -k 6 -o trained_matrix.npy - - """ - parser = argparse.ArgumentParser(description= - " quikr_train returns a custom trained matrix that can be used with \ - the quikr function. \n You must supply a kmer. \n ") - - parser.add_argument("-i", "--input", help="training database of sequences (fasta format)", required=True) - parser.add_argument("-o", "--output", help="sensing matrix (text file)", required=True) - parser.add_argument("-k", "--kmer", help="kmer size (integer)", - type=int, required=False ) - parser.add_argument("-z", "--compress", help="compress output (integer)", - action='store_true', required=False) - - args = parser.parse_args() - - if not os.path.isfile(args.input): - parser.error( "Input database not found") - - # call the quikr train function, save the output with np.save - matrix = quikr_train(args.input, args.kmer) - - if args.compress: - output_file = gzip.open(args.output, "wb") - else: - output_file = open(args.output, "wb") - - np.save(output_file, matrix) - - return 0 - -def quikr_train(input_file_location, kmer): - """ - Takes a input fasta file, and kmer, returns a custom trained matrix - """ - - kmer_file_name = str(kmer) + "mers.txt" - - if not os.path.isfile(kmer_file_name): - print "could not find kmer file" - exit() - - - uname = platform.uname()[0] - - if uname == "Linux": - input_file = Popen(["./probabilities-by-read-linux", str(kmer), input_file_location, kmer_file_name], stdout=PIPE) - elif uname == "Darwin": - input_file = Popen(["./probabilities-by-read-osx", str(kmer), input_file_location, kmer_file_name]) - - # load and normalize the matrix by dividing each element by the sum of it's column. - # also do some fancy rotations so that it works properly with quikr - matrix = np.loadtxt(input_file.stdout) - - matrix = np.rot90(matrix) - matrix = matrix / matrix.sum(0) - matrix = np.flipud(matrix); - return matrix - -if __name__ == "__main__": - sys.exit(main()) -- cgit v1.2.3