aboutsummaryrefslogtreecommitdiff
path: root/src
diff options
context:
space:
mode:
authorCalvin Morrison <mutantturkey@gmail.com>2014-03-25 13:21:38 -0400
committerCalvin Morrison <mutantturkey@gmail.com>2014-03-25 13:21:38 -0400
commit7d16533ca9ad31db771e74c65e15f9f421d3a47e (patch)
treebba3631be5e896494980825bf675d5db603fcd44 /src
parentab1bf5c1e90b351f50b5a8413135f854f2a5ab38 (diff)
Update scoring function.
Before we were incorrectly scoring our background. Using the new equation: score = (nb_primers * fg_mean_dist * fg_variance_dist) / (bg_ratio) where bg_ratio is bg_genome_length / count_of_bg_mers This will be faster, and will simplify our code (no whcky appends or anything. It can be even faster if we want to by not even storing the background points, just their hits. Should be relatively easy to do that.
Diffstat (limited to 'src')
-rwxr-xr-xsrc/score_mers.py28
1 files changed, 10 insertions, 18 deletions
diff --git a/src/score_mers.py b/src/score_mers.py
index ba63f71..1d3456b 100755
--- a/src/score_mers.py
+++ b/src/score_mers.py
@@ -204,35 +204,27 @@ def score(combination):
#return [combination, "max", max(fg_dist)]
return None
- min_mer_distance = max(len(i) for i in combination)
- # return without calculating scores if any mers are closer than the length of
- # our longest mer in the combination
- if any(dist < min_mer_distance for dist in fg_dist):
- #return [combintaion, 'max']
- return None
-
-
- # bg points
- bg_pts = []
- bg_dist = []
+ # bg counts
+ bg_counts = 0
for mer in combination:
- bg_pts = bg_pts + bg_mers[mer]
+ bg_counts += len(bg_mers[mer])
- if len(bg_pts()) <= 1:
- bg_pts.append(0, 1, fg_genome_length)
+ if bg_counts <= 1:
+ bg_counts = 1
- bg_sum = sum(bg_pts)
- bg_ratio = (bg_genome_length / bg_sum)
+ bg_sum = len(bg_counts)
+ bg_ratio = (bg_genome_length / bg_sum)
nb_primers = len(combination)
fg_mean_dist = np.mean(fg_dist)
fg_std_dist = np.std(fg_dist)
+
# this is our equation
- score = (nb_primers * fg_mean_dist * fg_std_dist) / bg_ratio
+ mer_score = (nb_primers * fg_mean_dist * fg_std_dist) / bg_ratio
- return [combination, score, fg_mean_dist, fg_std_dist, bg_ratio]
+ return [combination, mer_score, fg_mean_dist, fg_std_dist, bg_ratio]
def load_end_points(fn):