From 06fa848b90982ddcd4308bb88d70a0d5f11f785b Mon Sep 17 00:00:00 2001 From: Calvin Morrison Date: Tue, 25 Mar 2014 16:33:36 -0400 Subject: add average binding filter --- src/score_mers.py | 53 +++++++++++++++++++++++++++++++++++++++-------------- 1 file changed, 39 insertions(+), 14 deletions(-) (limited to 'src/score_mers.py') diff --git a/src/score_mers.py b/src/score_mers.py index 4679c45..db7f838 100755 --- a/src/score_mers.py +++ b/src/score_mers.py @@ -18,22 +18,14 @@ bg_mers = {} seq_ends = [] -if len(sys.argv) == 5: - selectivity_fn = sys.argv[1] - fg_fasta_fn = sys.argv[2] - bg_fasta_fn = sys.argv[3] - output_file = sys.argv[4] - - fg_genome_length = os.path.getsize(fg_fasta_fn) - bg_genome_length = os.path.getsize(bg_fasta_fn) -else: - print "please specify your inputs" - print "ex: score_mers.py selectivity_file fg_fasta bg_fasta output_file" - exit() +fg_genome_length = 0 +bg_genome_length = 0 + +output_file = "" # import our variables cpus = int(os.environ.get("cpus", cpu_count())) -debug = int(os.environ.get("debug", False)) +debug = os.environ.get("debug", False) min_mer_range = int(os.environ.get("min_mer_range", 6)) max_mer_range = int(os.environ.get("max_mer_range", 12)) min_mer_count = int(os.environ.get("min_mer_count", 0)) @@ -42,7 +34,6 @@ max_check = int(os.environ.get("max_check", 35)) max_mer_distance = int(os.environ.get("max_mer_distance", 5000)) max_consecutive_binding = int(os.environ.get("max_consecutive_binding", 4)) - def get_max_consecutive_binding(mer1, mer2): ''' Return the maximum number of consecutively binding mers @@ -237,6 +228,7 @@ def score(combination): return [combination, mer_score, fg_mean_dist, fg_std_dist, bg_ratio] def load_end_points(fn): + ''' get all the points of the end of each sequence in a sample ''' end_points = [0] @@ -253,6 +245,22 @@ def load_end_points(fn): return end_points +def get_length(fn): + ''' get length of a genome ( number of base pairs )''' + + cmd = 'grep "^>" ' + fn + " -v | tr -d '\\n' | wc -c" + + if debug: + print "loading sequence end points" + print "executing: " + cmd + points_fh = Popen(cmd, stdout=PIPE, shell=True) + + length = points_fh.stdout.readline() + + length = int(length) + + return length + def load_heterodimer_dic(selected_mers): ''' Generate a heterodimer dict which contains every possible combination of @@ -277,6 +285,23 @@ def main(): Score Combinations For All Sizes ''' + global fg_genome_length + global bg_genome_length + global output_file + + if len(sys.argv) == 5: + selectivity_fn = sys.argv[1] + fg_fasta_fn = sys.argv[2] + bg_fasta_fn = sys.argv[3] + output_file = sys.argv[4] + else: + print "please specify your inputs" + print "ex: score_mers.py selectivity_file fg_fasta bg_fasta output_file" + exit() + + fg_genome_length = get_length(fg_fasta_fn) + bg_genome_length = get_length(bg_fasta_fn) + selectivity_fh = open(selectivity_fn, "r") # load our mer list into python -- cgit v1.2.3