From c827f42ff9af35117c0732439d6d495235469cf9 Mon Sep 17 00:00:00 2001 From: Calvin Morrison Date: Wed, 23 Apr 2014 15:19:08 -0400 Subject: simplify select_mers.py --- src/select_mers.py | 36 ++++++++++++------------------------ 1 file changed, 12 insertions(+), 24 deletions(-) (limited to 'src/select_mers.py') diff --git a/src/select_mers.py b/src/select_mers.py index b3cc09e..5f42717 100755 --- a/src/select_mers.py +++ b/src/select_mers.py @@ -6,6 +6,7 @@ fg_mers = {} bg_mers = {} fg_weight = float(os.environ.get("fg_weight", 0)) +max_check = int(os.environ.get("max_check", 0)) if(len(sys.argv) == 3): fg_count_fn = sys.argv[1] @@ -19,33 +20,18 @@ else: # select mers based on our 'selectivity' measure. (count in fg) / (count in bg) def select_mers(fg_mers, bg_mers): - import numpy as np - mers = [] # contains mer strings - fg_arr = [] # contains fg counts - bg_arr = [] # contains bg counts - # populate our bg_arr and fg_arr as well as our mer arr. - for mer in fg_mers.keys(): - mers.append(mer); - bg_arr.append(bg_mers.get(mer, 1)); - fg_arr.append(fg_mers[mer]); - - fg_arr = np.array(fg_arr, dtype='f'); - bg_arr = np.array(bg_arr, dtype='f'); - - selectivity = (fg_arr / bg_arr) * (fg_arr**fg_weight) - arr = [(mers[i], fg_arr[i], bg_arr[i], selectivity[i]) for i in range(len(mers))] + score = {} - # filter results less than 1 ( indicates that the bg is more present than the fg) - # arr = filter(lambda i: i[3] > 1, arr) + for mer in fg_mers.keys(): + score[mer] = (fg_mers[mer] / bg_mers[mer]) * (fg_mers[mer]**fg_weight) - # sort by the selectivity - arr = sorted(arr, key = lambda row: row[3]) + sorted_scored_mers = sorted(score, key=score.get) - # return only our mers, without our selectivity scores - return arr + for mer in sorted_scored_mers: + print mer, int(fg_mers[mer]), int(bg_mers[mer]), (fg_mers[mer] / bg_mers[mer]) * (fg_mers[mer]**fg_weight) def main(): @@ -57,15 +43,17 @@ def main(): for mers,fh in [(fg_mers, fg_count_fh), (bg_mers, bg_count_fh)]: for line in fh: (mer, count) = line.split() - mers[mer] = int(count) + mers[mer] = float(count) + for mer in fg_mers.keys(): + if mer not in bg_mers: + bg_mers[mer] = 1 + for mer in bg_mers.keys(): if mer not in fg_mers: del bg_mers[mer] selected = select_mers(fg_mers, bg_mers) - for row in selected: - print row[0] +"\t"+str("%d" % row[1]) + "\t" + str("%d" % row[2]) + "\t" + str("%.5f" % row[3]) if __name__ == "__main__": sys.exit(main()) -- cgit v1.2.3