aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorCalvin Morrison <mutantturkey@gmail.com>2014-03-03 14:33:07 -0500
committerCalvin Morrison <mutantturkey@gmail.com>2014-03-03 14:33:07 -0500
commit86fdc850a7852d56a1402d16575fbc456400c2b6 (patch)
treea5b856b341d8e0077dcaa12d026e8aa2c6b0cdac
parent40b612fa50531bb166d635a9a9965d97afb14be1 (diff)
add check to see if results are even possible, and use gzip out
-rwxr-xr-xsrc/score_mers.py17
1 files changed, 14 insertions, 3 deletions
diff --git a/src/score_mers.py b/src/score_mers.py
index 7fa2b53..46963df 100755
--- a/src/score_mers.py
+++ b/src/score_mers.py
@@ -97,16 +97,27 @@ def apply_filters(combination):
def score_mers(selected):
import time
+ import gzip
# import gmpy
p = Pool(cpus)
- fh = open(output_file, 'w');
- fh.write("scores:\n");
+ fh = gzip.open(output_file + ".gz", 'wb');
+
+ total = 0;
+ for mer in selected:
+ total += len(fg_mers[mer].pts)
+ if (fg_genome_length / total) > max_mer_distance:
+ print "even if we select all top ", max_select,
+ print "mers disregarding any critera, and they were perfectly evenly spaced we would ",
+ print "still not meet the right max mer distance < ", max_mer_distance, "requirement."
+
+ print total, " / ", fg_genome_length, " = ", total / fg_genome_length
+
for select_n in range(1, max_select+1):
print "scoring size ", select_n,
t = time.time()
- scores_it = p.imap_unordered(score, combinations(selected, select_n), chunksize=128000)
+ scores_it = p.imap_unordered(score, combinations(selected, select_n), chunksize=8192)
for score_res in scores_it:
if score_res is not None:
fh.write(str(score_res) + "\n");