aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorCalvin Morrison <mutantturkey@gmail.com>2014-01-21 11:26:00 -0500
committerCalvin Morrison <mutantturkey@gmail.com>2014-01-21 11:26:00 -0500
commit16cc6fc3a71a346860f05a80c9b15de5a62b43f4 (patch)
treef86c4d3c7469597a8bba18be24cfd4d7b9be186f
parentf3da7b2df829633f353cdea17d1af6965ac7f01b (diff)
add output file
-rwxr-xr-xSelectiveGenomeAmplification2
-rwxr-xr-xselect_mers.py61
2 files changed, 30 insertions, 33 deletions
diff --git a/SelectiveGenomeAmplification b/SelectiveGenomeAmplification
index f833347..1f495eb 100755
--- a/SelectiveGenomeAmplification
+++ b/SelectiveGenomeAmplification
@@ -130,4 +130,4 @@ melting_range $min_melting_temp $max_melting_temp < $bg_counts > $bg_counts-bg-n
echo ""
echo "scoring mers"
-select_mers.py $fg_counts-fg-non-melting $fg_tmp $bg_counts-bg-non-melting $bg_tmp # > $(basename $foreground)_$(basename $background)_final_mers
+select_mers.py $fg_counts-fg-non-melting $fg_tmp $bg_counts-bg-non-melting $bg_tmp $output_directory/output_`date +%s`
diff --git a/select_mers.py b/select_mers.py
index a3fce06..b8aee7b 100755
--- a/select_mers.py
+++ b/select_mers.py
@@ -33,6 +33,10 @@ max_select = int(os.environ.get("max_select", 15));
max_mer_distance = int(os.environ.get("max_mer_distance", 5000));
+def populate_locations(input_fn, mers, mer):
+ ''' Run the strstreamone command, and parse in the integers that are output
+ by the command, and add it to mers[mer].pts
+ '''
cmd = 'strstreamone ' + mer + " < " + input_fn
@@ -41,9 +45,6 @@ max_mer_distance = int(os.environ.get("max_mer_distance", 5000));
mers[mer].pts.append(int(line))
-
- return None
-
def score_mers(selected):
from itertools import combinations
import time
@@ -52,32 +53,28 @@ def score_mers(selected):
p = Pool()
+ fh = open(output_file, 'w');
+ fh.write("scores:\n");
for select_n in range(1, max_select+1):
- t = time.time()
print "scoring size ", select_n
- scores_it = []
- scores_it = p.imap_unordered(score, combinations(selected, select_n))
- scores_it = filter(lambda x: x is not None, scores_it)
- scores = scores + scores_it
- print time.time() - t
-
-
-
-
-
+ scores_it = p.imap_unordered(score, combinations(selected, select_n))
+ for score_res in scores_it:
+ fh.write(str(score_res) + "\n");
return scores
+
def score(combination):
# input is a string of mers like
# ['ACCAA', 'ACCCGA', 'ACGTATA']
+ ret = [combination]
for mer in combination:
for other_mer in combination:
if not mer == other_mer:
if mer in other_mer:
- return None
-
+ ret.append("duplicates")
+ return ret
fg_pts = []
fg_dist = []
@@ -101,7 +98,8 @@ def score(combination):
bg_dist = np.array([abs(bg_pts[i] - bg_pts[i-1]) for i in range(1, len(bg_pts))])
if any(dist > max_mer_distance for dist in fg_dist):
- return None
+ ret.append("dist")
+ return ret
nb_primers = len(combination)
fg_mean_dist = np.mean(fg_dist)
@@ -112,14 +110,19 @@ def score(combination):
# this is our equation
score = (nb_primers * fg_mean_dist * fg_variance_dist) / ((bg_mean_dist * bg_variance_dist) + .000001)
- return (combination, score)
-
+ ret.append(score)
+ ret.append(fg_mean_dist)
+ ret.append(fg_variance_dist)
+ ret.append(bg_mean_dist)
+ ret.append(bg_variance_dist)
+
+ return ret
# select mers based on our 'selectivity' measure. (count in fg) / (count in bg)
def select_mers(fg_mers, bg_mers, select_nb):
- mers = [] # contains mer strings
+ mers = [] # contains mer strings
fg_arr = [] # contains fg counts
bg_arr = [] # contains bg counts
@@ -132,7 +135,7 @@ def select_mers(fg_mers, bg_mers, select_nb):
fg_arr = np.array(fg_arr, dtype='f');
bg_arr = np.array(bg_arr, dtype='f');
- selectivity = fg_arr - bg_arr
+ selectivity = fg_arr / bg_arr
arr = [(mers[i], fg_arr[i], bg_arr[i], selectivity[i]) for i in range(len(mers))]
@@ -147,9 +150,11 @@ def select_mers(fg_mers, bg_mers, select_nb):
return arr
def pop_fg(mer):
+ ''' helper for map function '''
populate_locations(fg_fasta_fn, fg_mers, mer)
def pop_bg(mer):
+ ''' helper for map function '''
populate_locations(bg_fasta_fn, bg_mers, mer)
def main():
@@ -199,10 +204,11 @@ def main():
selected = fg_mers.keys()
else:
selected = select_mers(fg_mers, bg_mers, max_select)
- selected = selected[-50:]
+ selected = selected[-25:]
+ print "searching through combinations of"
print selected
- pdb.set_trace()
+# pdb.set_trace()
# print "selected the top ", max_select, " mers"
# print "selected:", ", ".join(selected)
@@ -213,22 +219,13 @@ def main():
map(pop_fg, selected)
map(pop_bg, selected)
- pdb.set_trace()
scores = score_mers(selected)
- pdb.set_trace()
print "fg_genome_length", fg_genome_length
print "bg_genome_length", bg_genome_length
- sys.stdout.write("scores:\n");
- for score in scores:
- if score is not "Distance":
- sys.stdout.write(", ".join(list(score[0])) + "\t")
- sys.stdout.write(str(score[1]) + "\t")
- # sys.stdsys.stdout.write("\t".join(score.dist_arr));
- sys.stdout.write("\n");
if __name__ == "__main__":