aboutsummaryrefslogtreecommitdiff
path: root/select_mers.py
diff options
context:
space:
mode:
authorCalvin Morrison <mutantturkey@gmail.com>2014-01-29 11:53:30 -0500
committerCalvin Morrison <mutantturkey@gmail.com>2014-01-29 11:53:30 -0500
commit94d04a1e503121a98b403f882c18a4f0799267d7 (patch)
tree0d2cf5586b31bddc9bca99b4b07ebb4b993f1130 /select_mers.py
parent73531da5cdf33f9bde7d4db0e4ce96f1e41f581b (diff)
add filtering based on consecutive mer lengths
Diffstat (limited to 'select_mers.py')
-rwxr-xr-xselect_mers.py83
1 files changed, 0 insertions, 83 deletions
diff --git a/select_mers.py b/select_mers.py
deleted file mode 100755
index 21306cc..0000000
--- a/select_mers.py
+++ /dev/null
@@ -1,83 +0,0 @@
-#!/usr/bin/env python
-import sys
-import os
-
-fg_mers = {}
-bg_mers = {}
-
-min_mer_count = int(os.environ.get("min_mer_count", 0));
-
-if(len(sys.argv) == 5):
- fg_count_fn = sys.argv[1]
- fg_fasta_fn = sys.argv[2]
- bg_count_fn = sys.argv[3]
- bg_fasta_fn = sys.argv[4]
-
- fg_genome_length = os.path.getsize(fg_fasta_fn)
- bg_genome_length = os.path.getsize(bg_fasta_fn)
-else:
- print len(sys.argv)
- print "please specify your inputs"
- print "ex: select_mers.py fg_counts fg_fasta bg_counts bg_fasta"
- exit()
-
-
-# select mers based on our 'selectivity' measure. (count in fg) / (count in bg)
-def select_mers(fg_mers, bg_mers):
- import numpy as np
-
- mers = [] # contains mer strings
- fg_arr = [] # contains fg counts
- bg_arr = [] # contains bg counts
-
- # populate our bg_arr and fg_arr as well as our mer arr.
- for mer in fg_mers.keys():
- mers.append(mer);
- bg_arr.append(bg_mers.get(mer, 1));
- fg_arr.append(fg_mers[mer]);
-
- fg_arr = np.array(fg_arr, dtype='f');
- bg_arr = np.array(bg_arr, dtype='f');
-
- selectivity = (fg_arr / bg_arr)
-
- arr = [(mers[i], fg_arr[i], bg_arr[i], selectivity[i]) for i in range(len(mers))]
-
- # filter results less than 1 ( indicates that the bg is more present than the fg)
- # arr = filter(lambda i: i[3] > 1, arr)
-
- # sort by the selectivity
- arr = sorted(arr, key = lambda row: row[3])
-
- # return only our mers, without our selectivity scores
- return arr
-
-
-def main():
-
- fg_count_fh = open(fg_count_fn, "r")
- bg_count_fh = open(bg_count_fn, "r")
-
- # copy in our fg_mers and counts
- for mers,fh in [(fg_mers, fg_count_fh), (bg_mers, bg_count_fh)]:
- for line in fh:
- (mer, count) = line.split()
- mers[mer] = int(count)
-
- if min_mer_count >= 1:
- for mer in fg_mers.keys():
- if(fg_mers[mer] < min_mer_count):
- del fg_mers[mer]
- if mer in bg_mers:
- del bg_mers[mer]
-
- for mer in bg_mers.keys():
- if mer not in fg_mers:
- del bg_mers[mer]
-
- selected = select_mers(fg_mers, bg_mers)
- for row in selected:
- print row[0] +"\t"+str("%d" % row[1]) + "\t" + str("%d" % row[2]) + "\t" + str("%.5f" % row[3])
-
-if __name__ == "__main__":
- sys.exit(main())