diff options
Diffstat (limited to 'src')
| -rwxr-xr-x | src/score_mers.py | 8 | ||||
| -rwxr-xr-x | src/select_mers.py | 70 | 
2 files changed, 38 insertions, 40 deletions
| diff --git a/src/score_mers.py b/src/score_mers.py index 9d90ae2..0ddeb91 100755 --- a/src/score_mers.py +++ b/src/score_mers.py @@ -487,6 +487,8 @@ def main():  		# load it into our fg and bg counts into their dictionaries  		for mer in selected_mers: +			if mer.startswith("#"): +				continue  			split_mer = mer.split()  			fg_mers[split_mer[0]] = []  			bg_mers[split_mer[0]] = int(split_mer[2]) @@ -494,7 +496,7 @@ def main():  		selected_mers = [x.split()[0] for x in selected_mers]  		if len(selected_mers) is 0: -			print "no merss found." +			print "no mers found."  			exit(1)  		# we already have our background counts @@ -511,6 +513,8 @@ def main():  		combination_fh = open(args.combination_file, "r")  		for line in combination_fh: +			if line.startswith("#"): +				continue  			mers = line.split()  			combinations.append(mers)  			for mer in mers: @@ -529,6 +533,8 @@ def main():  		mer_fh = open(args.mer_file, "r")  		for mer in mer_fh: +			if mer.startswith("#"): +				continue  			mer = mer.strip()  			if(len(mer.split()) > 1):  				print "skipping line:", mer, "each line should contain only one mer" diff --git a/src/select_mers.py b/src/select_mers.py index 5f42717..ceec4a0 100755 --- a/src/select_mers.py +++ b/src/select_mers.py @@ -2,58 +2,50 @@  import sys  import os -fg_mers = {} -bg_mers = {} -  fg_weight = float(os.environ.get("fg_weight", 0))  max_check = int(os.environ.get("max_check", 0)) -if(len(sys.argv) == 3): -	fg_count_fn =  sys.argv[1] -	bg_count_fn =  sys.argv[2] -else: -	print len(sys.argv) -	sys.stderr.write("please specify your inputs\n") -	sys.stderr.write("ex: select_mers.py fg_counts bg_count\n") -	exit(1) - - -# select mers based on our 'selectivity' measure. (count in fg) / (count in bg) -def select_mers(fg_mers, bg_mers): - -	# populate our bg_arr and fg_arr as well as our mer arr. - -	score = {} - -	for mer in fg_mers.keys(): -		score[mer] = (fg_mers[mer] / bg_mers[mer]) * (fg_mers[mer]**fg_weight) -	sorted_scored_mers =	sorted(score, key=score.get) - -	for mer in sorted_scored_mers:  -		print mer, int(fg_mers[mer]), int(bg_mers[mer]), (fg_mers[mer] / bg_mers[mer]) * (fg_mers[mer]**fg_weight) +def main(): +	if(len(sys.argv) == 3): +		fg_count_fn =  sys.argv[1] +		bg_count_fn =  sys.argv[2] +	else: +		sys.stderr.write("please specify your inputs\n") +		sys.stderr.write("ex: select_mers.py fg_counts bg_count\n") +		exit(1) -def main(): +	# mers dictionary: +	#  +	# Key: mer name, eg AAAACT  +	# Value: fg_mer_count, bg_mer_count +	mers = {}  	fg_count_fh = open(fg_count_fn, "r")  	bg_count_fh = open(bg_count_fn, "r") -	# copy in our fg_mers and counts -	for mers,fh in [(fg_mers, fg_count_fh), (bg_mers, bg_count_fh)]: -		for line in fh: -			(mer, count) = line.split() -			mers[mer] = float(count) +	# copy in our foreground mers and counts into mers dictionary +	for line in fg_count_fh: +		(mer, count) = line.split() +		mers[mer] = [float(count), 1] -	for mer in fg_mers.keys(): -		if mer not in bg_mers: -			bg_mers[mer] = 1 +	 +	for line in bg_count_fh: +		(mer, count) = line.split() +		if mer in mers: +			mers[mer][1] = float(count) + +	score = [] + +	for mer in mers: +		score.append([mer, (mers[mer][0] / mers[mer][1]) * (mers[mer][0]**fg_weight)]) -	for mer in bg_mers.keys(): -		if mer not in fg_mers: -			del bg_mers[mer] +	sorted_scored_mers = sorted(score, key=lambda x: x[1]) -	selected = select_mers(fg_mers, bg_mers) +	sys.stdout.write('#MERS\tFG_COUNT\tBG_COUNT\tSCORE\n') +	for scores in sorted_scored_mers:  +		sys.stdout.write(scores[0] + '\t' + str(int(mers[scores[0]][0])) + '\t' +  str(int(mers[scores[0]][1])) + '\t' +  str(scores[1])+ '\n')  if __name__ == "__main__":  	sys.exit(main()) | 
