aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorCalvin Morrison <mutantturkey@gmail.com>2014-04-23 15:19:08 -0400
committerCalvin Morrison <mutantturkey@gmail.com>2014-04-23 15:19:08 -0400
commitc827f42ff9af35117c0732439d6d495235469cf9 (patch)
treed690a49742b3fbc88cc1034fe81bc913c4fb0d12
parentf4570d2ba0af5c5806e9e588233459993b021c22 (diff)
simplify select_mers.py
-rwxr-xr-xsrc/select_mers.py36
1 files changed, 12 insertions, 24 deletions
diff --git a/src/select_mers.py b/src/select_mers.py
index b3cc09e..5f42717 100755
--- a/src/select_mers.py
+++ b/src/select_mers.py
@@ -6,6 +6,7 @@ fg_mers = {}
bg_mers = {}
fg_weight = float(os.environ.get("fg_weight", 0))
+max_check = int(os.environ.get("max_check", 0))
if(len(sys.argv) == 3):
fg_count_fn = sys.argv[1]
@@ -19,33 +20,18 @@ else:
# select mers based on our 'selectivity' measure. (count in fg) / (count in bg)
def select_mers(fg_mers, bg_mers):
- import numpy as np
- mers = [] # contains mer strings
- fg_arr = [] # contains fg counts
- bg_arr = [] # contains bg counts
-
# populate our bg_arr and fg_arr as well as our mer arr.
- for mer in fg_mers.keys():
- mers.append(mer);
- bg_arr.append(bg_mers.get(mer, 1));
- fg_arr.append(fg_mers[mer]);
-
- fg_arr = np.array(fg_arr, dtype='f');
- bg_arr = np.array(bg_arr, dtype='f');
-
- selectivity = (fg_arr / bg_arr) * (fg_arr**fg_weight)
- arr = [(mers[i], fg_arr[i], bg_arr[i], selectivity[i]) for i in range(len(mers))]
+ score = {}
- # filter results less than 1 ( indicates that the bg is more present than the fg)
- # arr = filter(lambda i: i[3] > 1, arr)
+ for mer in fg_mers.keys():
+ score[mer] = (fg_mers[mer] / bg_mers[mer]) * (fg_mers[mer]**fg_weight)
- # sort by the selectivity
- arr = sorted(arr, key = lambda row: row[3])
+ sorted_scored_mers = sorted(score, key=score.get)
- # return only our mers, without our selectivity scores
- return arr
+ for mer in sorted_scored_mers:
+ print mer, int(fg_mers[mer]), int(bg_mers[mer]), (fg_mers[mer] / bg_mers[mer]) * (fg_mers[mer]**fg_weight)
def main():
@@ -57,15 +43,17 @@ def main():
for mers,fh in [(fg_mers, fg_count_fh), (bg_mers, bg_count_fh)]:
for line in fh:
(mer, count) = line.split()
- mers[mer] = int(count)
+ mers[mer] = float(count)
+ for mer in fg_mers.keys():
+ if mer not in bg_mers:
+ bg_mers[mer] = 1
+
for mer in bg_mers.keys():
if mer not in fg_mers:
del bg_mers[mer]
selected = select_mers(fg_mers, bg_mers)
- for row in selected:
- print row[0] +"\t"+str("%d" % row[1]) + "\t" + str("%d" % row[2]) + "\t" + str("%.5f" % row[3])
if __name__ == "__main__":
sys.exit(main())