From 7d16533ca9ad31db771e74c65e15f9f421d3a47e Mon Sep 17 00:00:00 2001 From: Calvin Morrison Date: Tue, 25 Mar 2014 13:21:38 -0400 Subject: Update scoring function. Before we were incorrectly scoring our background. Using the new equation: score = (nb_primers * fg_mean_dist * fg_variance_dist) / (bg_ratio) where bg_ratio is bg_genome_length / count_of_bg_mers This will be faster, and will simplify our code (no whcky appends or anything. It can be even faster if we want to by not even storing the background points, just their hits. Should be relatively easy to do that. --- src/score_mers.py | 28 ++++++++++------------------ 1 file changed, 10 insertions(+), 18 deletions(-) (limited to 'src') diff --git a/src/score_mers.py b/src/score_mers.py index ba63f71..1d3456b 100755 --- a/src/score_mers.py +++ b/src/score_mers.py @@ -204,35 +204,27 @@ def score(combination): #return [combination, "max", max(fg_dist)] return None - min_mer_distance = max(len(i) for i in combination) - # return without calculating scores if any mers are closer than the length of - # our longest mer in the combination - if any(dist < min_mer_distance for dist in fg_dist): - #return [combintaion, 'max'] - return None - - - # bg points - bg_pts = [] - bg_dist = [] + # bg counts + bg_counts = 0 for mer in combination: - bg_pts = bg_pts + bg_mers[mer] + bg_counts += len(bg_mers[mer]) - if len(bg_pts()) <= 1: - bg_pts.append(0, 1, fg_genome_length) + if bg_counts <= 1: + bg_counts = 1 - bg_sum = sum(bg_pts) - bg_ratio = (bg_genome_length / bg_sum) + bg_sum = len(bg_counts) + bg_ratio = (bg_genome_length / bg_sum) nb_primers = len(combination) fg_mean_dist = np.mean(fg_dist) fg_std_dist = np.std(fg_dist) + # this is our equation - score = (nb_primers * fg_mean_dist * fg_std_dist) / bg_ratio + mer_score = (nb_primers * fg_mean_dist * fg_std_dist) / bg_ratio - return [combination, score, fg_mean_dist, fg_std_dist, bg_ratio] + return [combination, mer_score, fg_mean_dist, fg_std_dist, bg_ratio] def load_end_points(fn): -- cgit v1.2.1