aboutsummaryrefslogtreecommitdiff
path: root/src
diff options
context:
space:
mode:
authorCalvin Morrison <mutantturkey@gmail.com>2014-03-27 23:34:56 -0400
committerCalvin Morrison <mutantturkey@gmail.com>2014-03-27 23:34:56 -0400
commit2d923ad2aed6701dbeb33d8700f52e8d0028db15 (patch)
tree5a36e938557020671820630b9da422f151a99b1f /src
parentde1f88095a623ebe1d83cbfa05868760343ad323 (diff)
Scoring all mer combinations from a file with a -m
Diffstat (limited to 'src')
-rwxr-xr-xsrc/score_mers.py61
1 files changed, 48 insertions, 13 deletions
diff --git a/src/score_mers.py b/src/score_mers.py
index a7c27c9..e3d2c58 100755
--- a/src/score_mers.py
+++ b/src/score_mers.py
@@ -169,6 +169,7 @@ def load_heterodimer_dic(selected_mers):
def check_feasible(selected):
total = 0
for mer in selected:
+ print mer, len(fg_mers[mer]), len(selected)
total += len(fg_mers[mer])
if (fg_genome_length / (total + 1 )) > max_mer_distance:
print "even if we select all top ", max_select,
@@ -235,24 +236,28 @@ def score_specific_combinations(mers):
total_reject = len(mers) - total_scored
print_rejected(total_reject, len(mers), total_scored, excluded)
-def score_all_combinations(selected):
+def score_all_combinations(mers):
import time
total_scored = 0
total_checked = 0
excluded = [0, 0, 0]
- check_feasible(selected)
+ check_feasible(mers)
p = Pool(cpus)
fh = open(output_file, 'wb')
write_header(fh)
- for select_n in range(1, max_select+1):
+ max_size = max_select+1
+ if len(mers) < max_select + 1:
+ max_size = len(mers) + 1
+
+ for select_n in range(1, max_size ):
print "scoring size ", select_n,
t = time.time()
- scores_it = p.imap_unordered(score, combinations(selected, select_n), chunksize=8192)
+ scores_it = p.imap_unordered(score, combinations(mers, select_n), chunksize=8192)
for score_res in scores_it:
total_checked += 1
if type(score_res) is list:
@@ -350,14 +355,16 @@ def main():
parser.add_argument("-o", "--output", help="output fasta with UIDs in the file", required=True)
parser.add_argument("-s", "--selectivity-file", help="mer selectivity file generated by select_mers.py", required=False)
parser.add_argument("-c", "--combination-file", help="a set of combinations you want to score", required=False)
+ parser.add_argument("-m", "--mer-file", help="a set of you want to score all combinations of", required=False)
args = parser.parse_args()
- if args.selectivity_file is None and args.combination_file is None:
- print "you must either have a selectivity file or a combination file to score from"
- exit()
- if args.selectivity_file is not None and args.combination_file is not None:
- print "you can only select either a selectivity file or a combination file to score from"
+ nb_flags = len(filter(lambda x: x is None, [args.combination_file, args.selectivity_file,args.mer_file]))
+ if nb_flags != 2:
+ if nb_flags == 3:
+ print "you must either have a selectivity, combination, or mer file to score from"
+ else:
+ print "you can only select either a selectivity, combination, or mer file to score from"
exit()
output_file = args.output
@@ -370,7 +377,7 @@ def main():
seq_ends = load_end_points(args.foreground)
- if(args.selectivity_file is not None):
+ if args.selectivity_file is not None:
print "Scoring all mer combinations"
@@ -399,7 +406,7 @@ def main():
print "scoring mer combinations"
score_all_combinations(selected_mers)
- else:
+ elif args.combination_file is not None:
print "Scoring specific mer combinations"
combinations = []
@@ -407,7 +414,6 @@ def main():
combination_fh = open(args.combination_file, "r")
for line in combination_fh:
mers = line.split()
- combinations.append(mers)
for mer in mers:
fg_mers[mer] = []
bg_mers[mer] = []
@@ -424,7 +430,36 @@ def main():
for mer in bg_mers:
bg_mers[mer] = len(bg_mers[mer])
- score_specific_combinations(combinations)
+ score_specific_combinations(fg_mers.keys())
+
+
+ elif args.mer_file is not None:
+ print "Scoring all mer combinations from ", args.mer_file
+
+ combinations = []
+
+ mer_fh = open(args.mer_file, "r")
+ for mer in mer_fh:
+ mer = mer.strip()
+ fg_mers[mer] = []
+ bg_mers[mer] = []
+
+ print fg_mers.keys()
+ print "calculating heterodimer distances"
+ load_heterodimer_dic(fg_mers.keys())
+
+ print "Populating foreground locations"
+ populate_locations(fg_mers.keys(), fg_mers, args.foreground)
+
+ print "Populating background locations"
+ populate_locations(fg_mers.keys(), bg_mers, args.background)
+
+ print fg_mers
+
+ for mer in bg_mers:
+ bg_mers[mer] = len(bg_mers[mer])
+
+ score_all_combinations(fg_mers.keys())
print "output_file:", output_file