aboutsummaryrefslogtreecommitdiff
path: root/src/select_mers.py
blob: 5f42717c8bb9bbd513905eaf113ff6476effd89a (plain)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
#!/usr/bin/env python2.7
import sys
import os

fg_mers = {}
bg_mers = {}

fg_weight = float(os.environ.get("fg_weight", 0))
max_check = int(os.environ.get("max_check", 0))

if(len(sys.argv) == 3):
	fg_count_fn =  sys.argv[1]
	bg_count_fn =  sys.argv[2]
else:
	print len(sys.argv)
	sys.stderr.write("please specify your inputs\n")
	sys.stderr.write("ex: select_mers.py fg_counts bg_count\n")
	exit(1)


# select mers based on our 'selectivity' measure. (count in fg) / (count in bg)
def select_mers(fg_mers, bg_mers):

	# populate our bg_arr and fg_arr as well as our mer arr.

	score = {}

	for mer in fg_mers.keys():
		score[mer] = (fg_mers[mer] / bg_mers[mer]) * (fg_mers[mer]**fg_weight)

	sorted_scored_mers =	sorted(score, key=score.get)

	for mer in sorted_scored_mers: 
		print mer, int(fg_mers[mer]), int(bg_mers[mer]), (fg_mers[mer] / bg_mers[mer]) * (fg_mers[mer]**fg_weight)


def main():

	fg_count_fh = open(fg_count_fn, "r")
	bg_count_fh = open(bg_count_fn, "r")
	
	# copy in our fg_mers and counts
	for mers,fh in [(fg_mers, fg_count_fh), (bg_mers, bg_count_fh)]:
		for line in fh:
			(mer, count) = line.split()
			mers[mer] = float(count)
	
	for mer in fg_mers.keys():
		if mer not in bg_mers:
			bg_mers[mer] = 1

	for mer in bg_mers.keys():
		if mer not in fg_mers:
			del bg_mers[mer]

	selected = select_mers(fg_mers, bg_mers)

if __name__ == "__main__":
	sys.exit(main())