From 2d923ad2aed6701dbeb33d8700f52e8d0028db15 Mon Sep 17 00:00:00 2001 From: Calvin Morrison Date: Thu, 27 Mar 2014 23:34:56 -0400 Subject: Scoring all mer combinations from a file with a -m --- src/score_mers.py | 61 +++++++++++++++++++++++++++++++++++++++++++------------ 1 file changed, 48 insertions(+), 13 deletions(-) (limited to 'src') diff --git a/src/score_mers.py b/src/score_mers.py index a7c27c9..e3d2c58 100755 --- a/src/score_mers.py +++ b/src/score_mers.py @@ -169,6 +169,7 @@ def load_heterodimer_dic(selected_mers): def check_feasible(selected): total = 0 for mer in selected: + print mer, len(fg_mers[mer]), len(selected) total += len(fg_mers[mer]) if (fg_genome_length / (total + 1 )) > max_mer_distance: print "even if we select all top ", max_select, @@ -235,24 +236,28 @@ def score_specific_combinations(mers): total_reject = len(mers) - total_scored print_rejected(total_reject, len(mers), total_scored, excluded) -def score_all_combinations(selected): +def score_all_combinations(mers): import time total_scored = 0 total_checked = 0 excluded = [0, 0, 0] - check_feasible(selected) + check_feasible(mers) p = Pool(cpus) fh = open(output_file, 'wb') write_header(fh) - for select_n in range(1, max_select+1): + max_size = max_select+1 + if len(mers) < max_select + 1: + max_size = len(mers) + 1 + + for select_n in range(1, max_size ): print "scoring size ", select_n, t = time.time() - scores_it = p.imap_unordered(score, combinations(selected, select_n), chunksize=8192) + scores_it = p.imap_unordered(score, combinations(mers, select_n), chunksize=8192) for score_res in scores_it: total_checked += 1 if type(score_res) is list: @@ -350,14 +355,16 @@ def main(): parser.add_argument("-o", "--output", help="output fasta with UIDs in the file", required=True) parser.add_argument("-s", "--selectivity-file", help="mer selectivity file generated by select_mers.py", required=False) parser.add_argument("-c", "--combination-file", help="a set of combinations you want to score", required=False) + parser.add_argument("-m", "--mer-file", help="a set of you want to score all combinations of", required=False) args = parser.parse_args() - if args.selectivity_file is None and args.combination_file is None: - print "you must either have a selectivity file or a combination file to score from" - exit() - if args.selectivity_file is not None and args.combination_file is not None: - print "you can only select either a selectivity file or a combination file to score from" + nb_flags = len(filter(lambda x: x is None, [args.combination_file, args.selectivity_file,args.mer_file])) + if nb_flags != 2: + if nb_flags == 3: + print "you must either have a selectivity, combination, or mer file to score from" + else: + print "you can only select either a selectivity, combination, or mer file to score from" exit() output_file = args.output @@ -370,7 +377,7 @@ def main(): seq_ends = load_end_points(args.foreground) - if(args.selectivity_file is not None): + if args.selectivity_file is not None: print "Scoring all mer combinations" @@ -399,7 +406,7 @@ def main(): print "scoring mer combinations" score_all_combinations(selected_mers) - else: + elif args.combination_file is not None: print "Scoring specific mer combinations" combinations = [] @@ -407,7 +414,6 @@ def main(): combination_fh = open(args.combination_file, "r") for line in combination_fh: mers = line.split() - combinations.append(mers) for mer in mers: fg_mers[mer] = [] bg_mers[mer] = [] @@ -424,7 +430,36 @@ def main(): for mer in bg_mers: bg_mers[mer] = len(bg_mers[mer]) - score_specific_combinations(combinations) + score_specific_combinations(fg_mers.keys()) + + + elif args.mer_file is not None: + print "Scoring all mer combinations from ", args.mer_file + + combinations = [] + + mer_fh = open(args.mer_file, "r") + for mer in mer_fh: + mer = mer.strip() + fg_mers[mer] = [] + bg_mers[mer] = [] + + print fg_mers.keys() + print "calculating heterodimer distances" + load_heterodimer_dic(fg_mers.keys()) + + print "Populating foreground locations" + populate_locations(fg_mers.keys(), fg_mers, args.foreground) + + print "Populating background locations" + populate_locations(fg_mers.keys(), bg_mers, args.background) + + print fg_mers + + for mer in bg_mers: + bg_mers[mer] = len(bg_mers[mer]) + + score_all_combinations(fg_mers.keys()) print "output_file:", output_file -- cgit v1.2.1