From acc1534c45a33b37368cf82dec3d6eb6fedb9442 Mon Sep 17 00:00:00 2001 From: Calvin Morrison Date: Wed, 22 Jan 2014 13:31:04 -0500 Subject: split --- score_mers.py | 21 ++++++++++++--------- select_mers.py | 17 +++-------------- 2 files changed, 15 insertions(+), 23 deletions(-) diff --git a/score_mers.py b/score_mers.py index 3409fc0..0a73cfb 100755 --- a/score_mers.py +++ b/score_mers.py @@ -10,11 +10,11 @@ import pdb fg_mers = {} bg_mers = {} -if(len(sys.argv) == 4): +if(len(sys.argv) == 5): selectivity_fn = sys.argv[1] - fg_fasta_fn = sys.argv[2] - bg_fasta_fn = sys.argv[3] - output_file = sys.argv[4] + fg_fasta_fn = sys.argv[2] + bg_fasta_fn = sys.argv[3] + output_file = sys.argv[4] else: print "please specify your inputs" print "ex: select_mers.py fg_counts_file fg_fasta_file bg_counts_file bg_fasta_file output_file" @@ -139,17 +139,19 @@ def pop_bg(mer): def main(): import time selected = [] - selectivty_fh = open(selectivity_fn, "r") + selectivity_fh = open(selectivity_fn, "r") # get our genome length fg_genome_length = os.path.getsize(fg_fasta_fn) bg_genome_length = os.path.getsize(bg_fasta_fn) - for row in selectivity_fn: + for row in selectivity_fh: (mer, fg_count, bg_count, selectivity) = row.split() fg_mers[mer] = Mer() + fg_mers[mer].pts = [] fg_mers[mer].count = fg_count bg_mers[mer] = Mer() + bg_mers[mer].pts = [] bg_mers[mer].count = bg_count selected.append([mer, selectivity]) @@ -160,6 +162,7 @@ def main(): # else: # selected = select_mers(fg_mers, bg_mers, max_select) selected = selected[-100:] + selected_mers = [row[0] for row in selected] pdb.set_trace() # print "searching through combinations of" # print selected @@ -167,10 +170,10 @@ def main(): print "Populating foreground locations" - map(pop_fg, selected) - map(pop_bg, selected) + map(pop_fg, selected_mers) + map(pop_bg, selected_mers) - scores = score_mers(selected) + scores = score_mers(selected_mers) print "fg_genome_length", fg_genome_length print "bg_genome_length", bg_genome_length diff --git a/select_mers.py b/select_mers.py index a4657d4..21306cc 100755 --- a/select_mers.py +++ b/select_mers.py @@ -33,13 +33,13 @@ def select_mers(fg_mers, bg_mers): # populate our bg_arr and fg_arr as well as our mer arr. for mer in fg_mers.keys(): mers.append(mer); - bg_arr.append(bg_mers[mer] + 1); + bg_arr.append(bg_mers.get(mer, 1)); fg_arr.append(fg_mers[mer]); fg_arr = np.array(fg_arr, dtype='f'); bg_arr = np.array(bg_arr, dtype='f'); - selectivity = (fg_arr/fg_genome_length) / (bg_arr/bg_genome_length) + selectivity = (fg_arr / bg_arr) arr = [(mers[i], fg_arr[i], bg_arr[i], selectivity[i]) for i in range(len(mers))] @@ -55,40 +55,29 @@ def select_mers(fg_mers, bg_mers): def main(): - fg_count_fh = open(fg_count_fn, "r") bg_count_fh = open(bg_count_fn, "r") # copy in our fg_mers and counts - print "populating our mers dictionary" for mers,fh in [(fg_mers, fg_count_fh), (bg_mers, bg_count_fh)]: for line in fh: (mer, count) = line.split() mers[mer] = int(count) if min_mer_count >= 1: - print "removing that are less frequent than: ", min_mer_count for mer in fg_mers.keys(): if(fg_mers[mer] < min_mer_count): del fg_mers[mer] if mer in bg_mers: del bg_mers[mer] - print "removing useless mers from the background" for mer in bg_mers.keys(): if mer not in fg_mers: del bg_mers[mer] - print "adding empty mers to the background" - for mer in fg_mers: - if mer not in bg_mers: - bg_mers[mer] = 0 - - print fg_genome_length - print bg_genome_length selected = select_mers(fg_mers, bg_mers) for row in selected: - print row[0] +"\t"+str(row[1]) + "\t" + str(row[2]) + str(row[3]) + print row[0] +"\t"+str("%d" % row[1]) + "\t" + str("%d" % row[2]) + "\t" + str("%.5f" % row[3]) if __name__ == "__main__": sys.exit(main()) -- cgit v1.2.3