diff options
author | Calvin Morrison <mutantturkey@gmail.com> | 2014-03-25 13:21:38 -0400 |
---|---|---|
committer | Calvin Morrison <mutantturkey@gmail.com> | 2014-03-25 13:21:38 -0400 |
commit | 7d16533ca9ad31db771e74c65e15f9f421d3a47e (patch) | |
tree | bba3631be5e896494980825bf675d5db603fcd44 /src | |
parent | ab1bf5c1e90b351f50b5a8413135f854f2a5ab38 (diff) |
Update scoring function.
Before we were incorrectly scoring our background. Using the new
equation:
score = (nb_primers * fg_mean_dist * fg_variance_dist) / (bg_ratio)
where bg_ratio is bg_genome_length / count_of_bg_mers
This will be faster, and will simplify our code (no whcky appends or
anything.
It can be even faster if we want to by not even storing the background
points, just their hits. Should be relatively easy to do that.
Diffstat (limited to 'src')
-rwxr-xr-x | src/score_mers.py | 28 |
1 files changed, 10 insertions, 18 deletions
diff --git a/src/score_mers.py b/src/score_mers.py index ba63f71..1d3456b 100755 --- a/src/score_mers.py +++ b/src/score_mers.py @@ -204,35 +204,27 @@ def score(combination): #return [combination, "max", max(fg_dist)] return None - min_mer_distance = max(len(i) for i in combination) - # return without calculating scores if any mers are closer than the length of - # our longest mer in the combination - if any(dist < min_mer_distance for dist in fg_dist): - #return [combintaion, 'max'] - return None - - - # bg points - bg_pts = [] - bg_dist = [] + # bg counts + bg_counts = 0 for mer in combination: - bg_pts = bg_pts + bg_mers[mer] + bg_counts += len(bg_mers[mer]) - if len(bg_pts()) <= 1: - bg_pts.append(0, 1, fg_genome_length) + if bg_counts <= 1: + bg_counts = 1 - bg_sum = sum(bg_pts) - bg_ratio = (bg_genome_length / bg_sum) + bg_sum = len(bg_counts) + bg_ratio = (bg_genome_length / bg_sum) nb_primers = len(combination) fg_mean_dist = np.mean(fg_dist) fg_std_dist = np.std(fg_dist) + # this is our equation - score = (nb_primers * fg_mean_dist * fg_std_dist) / bg_ratio + mer_score = (nb_primers * fg_mean_dist * fg_std_dist) / bg_ratio - return [combination, score, fg_mean_dist, fg_std_dist, bg_ratio] + return [combination, mer_score, fg_mean_dist, fg_std_dist, bg_ratio] def load_end_points(fn): |