aboutsummaryrefslogtreecommitdiff
path: root/src
diff options
context:
space:
mode:
authorCalvin Morrison <mutantturkey@gmail.com>2014-03-13 17:15:08 -0400
committerCalvin Morrison <mutantturkey@gmail.com>2014-03-13 17:15:08 -0400
commit8200fba820749f34052d851fbe17f56d56f65976 (patch)
treeb78398bc5ab09b25e0d3d4dedca74649868425cf /src
parent39a653d1b49c0e4fa5de1783e7aba1743fd66e32 (diff)
Refactoring, features, rage!
- rename apply filters to filter_mers - return True instead of False - Use this function in score mers - Add total scored check - use np.diff() instead of manually doing it for fg. - fix long line comments - check if len bg_pts <= 1
Diffstat (limited to 'src')
-rwxr-xr-xsrc/score_mers.py61
1 files changed, 32 insertions, 29 deletions
diff --git a/src/score_mers.py b/src/score_mers.py
index c10b04a..a049340 100755
--- a/src/score_mers.py
+++ b/src/score_mers.py
@@ -110,28 +110,21 @@ def populate_locations(input_fn, mers, mer):
for line in strstream.stdout:
mers[mer].pts.append(int(line))
-def apply_filters(combination):
- for mer in combination:
- for other_mer in combination:
- if not mer == other_mer:
- if mer in other_mer:
- return False
+def filter_mers(combination):
for combo in combinations(combination, 2):
if heterodimer_dic[combo]:
- return False
-
- return True
-
-
-def score_mers(selected):
- import time
- # import gmpy
+ return True
- p = Pool(cpus)
+ for mer in combination:
+ for other_mer in combination:
+ if not mer == other_mer:
+ if mer in other_mer:
+ return True
- fh = open(output_file, 'wb');
+ return False
+def check_feasible(selected):
total = 0;
for mer in selected:
total += len(fg_mers[mer].pts)
@@ -141,7 +134,17 @@ def score_mers(selected):
print "still not meet the right max mer distance < ", max_mer_distance, "requirement."
print total, " / ", fg_genome_length, " = ", total / fg_genome_length
+ exit()
+def score_mers(selected):
+ import time
+ total_scored = 0;
+
+ check_feasible(selected)
+
+ p = Pool(cpus)
+
+ fh = open(output_file, 'wb');
fh.write("Combination\tScore\tFG_mean_dist\tFG_var_dist\tBG_mean_dist\tBG_var_dist\n");
for select_n in range(1, max_select+1):
print "scoring size ", select_n,
@@ -149,6 +152,7 @@ def score_mers(selected):
scores_it = p.imap_unordered(score, combinations(selected, select_n), chunksize=8192)
for score_res in scores_it:
if score_res is not None:
+ total_scored += 1;
combination, scores, fg_mean_dist, fg_variance_dist, bg_mean_dist, bg_variance_dist = score_res
fh.write(str(combination) + "\t");
fh.write(str(scores) + "\t");
@@ -158,23 +162,17 @@ def score_mers(selected):
fh.write(str(bg_variance_dist) + "\n");
print "size ", select_n, "took:", time.time() - t
+ if(total_scored == 0):
+ print "NO RESULTS FOUND"
heterodimer_dic = {}
def score(combination):
# input is a string of mers like
# ['ACCAA', 'ACCCGA', 'ACGTATA']
- for combo in combinations(combination, 2):
- if [combo] is True:
- #return [combination, 'het']
- return None
-
- for mer in combination:
- for other_mer in combination:
- if not mer == other_mer:
- if mer in other_mer:
- #return [combination, 'dup']
- return None
+ # check if the combination passes our filters
+ if filter_mers(combination):
+ return None
# fg points
fg_pts = []
@@ -186,7 +184,7 @@ def score(combination):
fg_pts.sort()
# fg distances
- fg_dist = np.array([abs(fg_pts[i] - fg_pts[i-1]) for i in range(1, len(fg_pts))])
+ fg_dist = np.diff(fg_pts)
# return without calculating scores if any objects are higher than our max distance
if any(dist > max_mer_distance for dist in fg_dist):
@@ -194,7 +192,8 @@ def score(combination):
return None
min_mer_distance = max(len(i) for i in combination)
- # return without calculating scores if any mers are closer than the length of our longest mer in the combination
+ # return without calculating scores if any mers are closer than the length of
+ # our longest mer in the combination
if any(dist < min_mer_distance for dist in fg_dist):
#return [combintaion, 'max']
return None
@@ -207,6 +206,9 @@ def score(combination):
for mer in combination:
bg_pts = bg_pts + bg_mers[mer].pts
+ if len(bg_pts()) <= 0:
+ bg_pts.append(0, 1, fg_genome_length)
+
bg_pts.sort()
# bg distances
@@ -270,6 +272,7 @@ def main():
print "Populating foreground locations"
map(pop_fg, selected_mers)
+
print "Populating background locations"
map(pop_bg, selected_mers)