From e7b18504069aab40d68c38be195692fbaa50ce87 Mon Sep 17 00:00:00 2001 From: Calvin Morrison Date: Tue, 1 Apr 2014 13:25:09 -0400 Subject: add feature to read from previously scored all-scores file, clean up code as well --- src/score_mers.py | 108 ++++++++++++++++++++++++++++++++++++------------------ 1 file changed, 73 insertions(+), 35 deletions(-) (limited to 'src/score_mers.py') diff --git a/src/score_mers.py b/src/score_mers.py index 71cf5d5..3ff62a8 100755 --- a/src/score_mers.py +++ b/src/score_mers.py @@ -217,7 +217,7 @@ def percentage(part, whole, precision=2): def write_header(fh): fh.write("# variables used: max_select=" + str(max_select) + " max_check=" + str(max_check) + " max_mer_distance=" + str(max_mer_distance) + " max_consecutive_binding=" + str(max_consecutive_binding) + " primer_weight=" + str(primer_weight) + "\n") fh.write("# scoring function: " + str(score_str) + "\n") - fh.write("nb_primers\tCombination\tScore\tFG_mean_dist\tFG_stdev_dist\tBG_ratio\n") + fh.write("#nb_primers\tCombination\tScore\tFG_mean_dist\tFG_stdev_dist\tBG_ratio\n") def write_result(fh, score_res): combination, score_val, fg_mean_dist, fg_stddev_dist, bg_ratio = score_res @@ -363,6 +363,22 @@ def score(combination): return [combination, mer_score, fg_mean_dist, fg_std_dist, bg_ratio] + +def initialize_mers(foreground, background, load_background=True): + print "Calculating heterodimer distances" + load_heterodimer_dic(fg_mers.keys()) + + print "Populating foreground locations" + populate_locations(fg_mers.keys(), fg_mers, foreground) + + if load_background: + print "Populating background locations" + populate_locations(fg_mers.keys(), bg_mers, background) + + for mer in bg_mers: + bg_mers[mer] = len(bg_mers[mer]) + + def main(): ''' Basic worflow: @@ -385,17 +401,23 @@ def main(): parser.add_argument("-s", "--selectivity-file", help="mer selectivity file generated by select_mers.py", required=False) parser.add_argument("-c", "--combination-file", help="a set of combinations you want to score", required=False) parser.add_argument("-m", "--mer-file", help="a set of you want to score all combinations of", required=False) + parser.add_argument("-r", "--rescore-file", help="rescore an already scored output file", required=False) args = parser.parse_args() - nb_flags = len(filter(lambda x: x is None, [args.combination_file, args.selectivity_file,args.mer_file])) - if nb_flags != 2: - if nb_flags == 3: - print "you must either have a selectivity, combination, or mer file to score from" + nb_flags = len(filter(lambda x: x is None, [args.combination_file, args.selectivity_file,args.mer_file, args.rescore_file])) + if nb_flags != 3: + if nb_flags == 4: + parser.error("you must have at least one input file to score from [-s -c -m -r]") else: - print "you can only select either a selectivity, combination, or mer file to score from" + parser.error("you can only have one input file to score from" ) exit() + if not os.path.isfile(args.foreground): + parser.error(args.foreground + " not found") + if not os.path.isfile(args.background): + parser.error(args.background + " not found") + output_file = args.output print "Getting genome length" @@ -426,16 +448,18 @@ def main(): selected_mers = [x.split()[0] for x in selected_mers] - print "Populating foreground locations" - populate_locations(selected_mers, fg_mers, args.foreground) + if len(selected_mers) is 0: + print "no merss found." + exit() - print "Calculating heterodimer distances" - load_heterodimer_dic(selected_mers) + # we already have our background counts + initialize_mers(args.foreground, args.background, load_background=False) print "Scoring mer combinations" score_all_combinations(selected_mers) - + elif args.combination_file is not None: + print "Scoring specific mer combinations" combinations = [] @@ -448,45 +472,59 @@ def main(): fg_mers[mer] = [] bg_mers[mer] = [] - print "Calculating heterodimer distances" - load_heterodimer_dic(fg_mers.keys()) - - print "Populating foreground locations" - populate_locations(fg_mers.keys(), fg_mers, args.foreground) - - print "Populating background locations" - populate_locations(fg_mers.keys(), bg_mers, args.background) - - for mer in bg_mers: - bg_mers[mer] = len(bg_mers[mer]) + if len(combinations) is 0: + print "no combinations found." + exit() + initialize_mers(args.foreground, args.background) score_specific_combinations(combinations) - elif args.mer_file is not None: - print "Scoring all mer combinations from ", args.mer_file - - combinations = [] + print "Scoring all possible mer combinations from ", args.mer_file mer_fh = open(args.mer_file, "r") for mer in mer_fh: mer = mer.strip() + if(len(mer.split()) > 1): + print "skipping line:", mer, "each line should contain only one mer" + continue + fg_mers[mer] = [] bg_mers[mer] = [] - print "calculating heterodimer distances" - load_heterodimer_dic(fg_mers.keys()) + if len(fg_mers.keys()) is 0: + print "no mers found." + exit() - print "Populating foreground locations" - populate_locations(fg_mers.keys(), fg_mers, args.foreground) + initialize_mers(args.foreground, args.background) + score_all_combinations(fg_mers.keys()) - print "Populating background locations" - populate_locations(fg_mers.keys(), bg_mers, args.background) + elif args.rescore_file is not None: + print "Scoring all mer combinations from ", args.rescore_file + + combinations = [] + + score_fh = open(args.rescore_file, "r") + for line in score_fh: + if line.startswith("#"): + continue + split_line = line.split('\t') + combination = split_line[1].split() + combinations.append(combination) + for mer in combination: + fg_mers[mer] = [] + bg_mers[mer] = [] - for mer in bg_mers: - bg_mers[mer] = len(bg_mers[mer]) + if len(combinations) is 0: + print "no combinations found." + exit() + + initialize_mers(args.foreground, args.background) + + print "re-scoring scores file" + + score_specific_combinations(combinations) - score_all_combinations(fg_mers.keys()) print "output file:", output_file -- cgit v1.2.3