aboutsummaryrefslogtreecommitdiff
path: root/src/score_mers.py
diff options
context:
space:
mode:
authorCalvin Morrison <mutantturkey@gmail.com>2014-04-01 13:25:09 -0400
committerCalvin Morrison <mutantturkey@gmail.com>2014-04-01 13:25:09 -0400
commite7b18504069aab40d68c38be195692fbaa50ce87 (patch)
tree7aae7bf42cefc0041ebcb2226ef0aececc54f97d /src/score_mers.py
parent2a1793992786fbc584d654d4fff6a6a698d32c33 (diff)
add feature to read from previously scored all-scores file, clean up code as well
Diffstat (limited to 'src/score_mers.py')
-rwxr-xr-xsrc/score_mers.py108
1 files changed, 73 insertions, 35 deletions
diff --git a/src/score_mers.py b/src/score_mers.py
index 71cf5d5..3ff62a8 100755
--- a/src/score_mers.py
+++ b/src/score_mers.py
@@ -217,7 +217,7 @@ def percentage(part, whole, precision=2):
def write_header(fh):
fh.write("# variables used: max_select=" + str(max_select) + " max_check=" + str(max_check) + " max_mer_distance=" + str(max_mer_distance) + " max_consecutive_binding=" + str(max_consecutive_binding) + " primer_weight=" + str(primer_weight) + "\n")
fh.write("# scoring function: " + str(score_str) + "\n")
- fh.write("nb_primers\tCombination\tScore\tFG_mean_dist\tFG_stdev_dist\tBG_ratio\n")
+ fh.write("#nb_primers\tCombination\tScore\tFG_mean_dist\tFG_stdev_dist\tBG_ratio\n")
def write_result(fh, score_res):
combination, score_val, fg_mean_dist, fg_stddev_dist, bg_ratio = score_res
@@ -363,6 +363,22 @@ def score(combination):
return [combination, mer_score, fg_mean_dist, fg_std_dist, bg_ratio]
+
+def initialize_mers(foreground, background, load_background=True):
+ print "Calculating heterodimer distances"
+ load_heterodimer_dic(fg_mers.keys())
+
+ print "Populating foreground locations"
+ populate_locations(fg_mers.keys(), fg_mers, foreground)
+
+ if load_background:
+ print "Populating background locations"
+ populate_locations(fg_mers.keys(), bg_mers, background)
+
+ for mer in bg_mers:
+ bg_mers[mer] = len(bg_mers[mer])
+
+
def main():
'''
Basic worflow:
@@ -385,17 +401,23 @@ def main():
parser.add_argument("-s", "--selectivity-file", help="mer selectivity file generated by select_mers.py", required=False)
parser.add_argument("-c", "--combination-file", help="a set of combinations you want to score", required=False)
parser.add_argument("-m", "--mer-file", help="a set of you want to score all combinations of", required=False)
+ parser.add_argument("-r", "--rescore-file", help="rescore an already scored output file", required=False)
args = parser.parse_args()
- nb_flags = len(filter(lambda x: x is None, [args.combination_file, args.selectivity_file,args.mer_file]))
- if nb_flags != 2:
- if nb_flags == 3:
- print "you must either have a selectivity, combination, or mer file to score from"
+ nb_flags = len(filter(lambda x: x is None, [args.combination_file, args.selectivity_file,args.mer_file, args.rescore_file]))
+ if nb_flags != 3:
+ if nb_flags == 4:
+ parser.error("you must have at least one input file to score from [-s -c -m -r]")
else:
- print "you can only select either a selectivity, combination, or mer file to score from"
+ parser.error("you can only have one input file to score from" )
exit()
+ if not os.path.isfile(args.foreground):
+ parser.error(args.foreground + " not found")
+ if not os.path.isfile(args.background):
+ parser.error(args.background + " not found")
+
output_file = args.output
print "Getting genome length"
@@ -426,16 +448,18 @@ def main():
selected_mers = [x.split()[0] for x in selected_mers]
- print "Populating foreground locations"
- populate_locations(selected_mers, fg_mers, args.foreground)
+ if len(selected_mers) is 0:
+ print "no merss found."
+ exit()
- print "Calculating heterodimer distances"
- load_heterodimer_dic(selected_mers)
+ # we already have our background counts
+ initialize_mers(args.foreground, args.background, load_background=False)
print "Scoring mer combinations"
score_all_combinations(selected_mers)
-
+
elif args.combination_file is not None:
+
print "Scoring specific mer combinations"
combinations = []
@@ -448,45 +472,59 @@ def main():
fg_mers[mer] = []
bg_mers[mer] = []
- print "Calculating heterodimer distances"
- load_heterodimer_dic(fg_mers.keys())
-
- print "Populating foreground locations"
- populate_locations(fg_mers.keys(), fg_mers, args.foreground)
-
- print "Populating background locations"
- populate_locations(fg_mers.keys(), bg_mers, args.background)
-
- for mer in bg_mers:
- bg_mers[mer] = len(bg_mers[mer])
+ if len(combinations) is 0:
+ print "no combinations found."
+ exit()
+ initialize_mers(args.foreground, args.background)
score_specific_combinations(combinations)
-
elif args.mer_file is not None:
- print "Scoring all mer combinations from ", args.mer_file
-
- combinations = []
+ print "Scoring all possible mer combinations from ", args.mer_file
mer_fh = open(args.mer_file, "r")
for mer in mer_fh:
mer = mer.strip()
+ if(len(mer.split()) > 1):
+ print "skipping line:", mer, "each line should contain only one mer"
+ continue
+
fg_mers[mer] = []
bg_mers[mer] = []
- print "calculating heterodimer distances"
- load_heterodimer_dic(fg_mers.keys())
+ if len(fg_mers.keys()) is 0:
+ print "no mers found."
+ exit()
- print "Populating foreground locations"
- populate_locations(fg_mers.keys(), fg_mers, args.foreground)
+ initialize_mers(args.foreground, args.background)
+ score_all_combinations(fg_mers.keys())
- print "Populating background locations"
- populate_locations(fg_mers.keys(), bg_mers, args.background)
+ elif args.rescore_file is not None:
+ print "Scoring all mer combinations from ", args.rescore_file
+
+ combinations = []
+
+ score_fh = open(args.rescore_file, "r")
+ for line in score_fh:
+ if line.startswith("#"):
+ continue
+ split_line = line.split('\t')
+ combination = split_line[1].split()
+ combinations.append(combination)
+ for mer in combination:
+ fg_mers[mer] = []
+ bg_mers[mer] = []
- for mer in bg_mers:
- bg_mers[mer] = len(bg_mers[mer])
+ if len(combinations) is 0:
+ print "no combinations found."
+ exit()
+
+ initialize_mers(args.foreground, args.background)
+
+ print "re-scoring scores file"
+
+ score_specific_combinations(combinations)
- score_all_combinations(fg_mers.keys())
print "output file:", output_file