aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorCalvin Morrison <mutantturkey@gmail.com>2014-01-22 13:31:04 -0500
committerCalvin Morrison <mutantturkey@gmail.com>2014-01-22 13:31:04 -0500
commitacc1534c45a33b37368cf82dec3d6eb6fedb9442 (patch)
tree1ba49527b4be4602a738b3f4fd08073cf239894a
parent502de9c77d4a3ff2718f734a90562cc2c5d0d150 (diff)
split
-rwxr-xr-xscore_mers.py21
-rwxr-xr-xselect_mers.py17
2 files changed, 15 insertions, 23 deletions
diff --git a/score_mers.py b/score_mers.py
index 3409fc0..0a73cfb 100755
--- a/score_mers.py
+++ b/score_mers.py
@@ -10,11 +10,11 @@ import pdb
fg_mers = {}
bg_mers = {}
-if(len(sys.argv) == 4):
+if(len(sys.argv) == 5):
selectivity_fn = sys.argv[1]
- fg_fasta_fn = sys.argv[2]
- bg_fasta_fn = sys.argv[3]
- output_file = sys.argv[4]
+ fg_fasta_fn = sys.argv[2]
+ bg_fasta_fn = sys.argv[3]
+ output_file = sys.argv[4]
else:
print "please specify your inputs"
print "ex: select_mers.py fg_counts_file fg_fasta_file bg_counts_file bg_fasta_file output_file"
@@ -139,17 +139,19 @@ def pop_bg(mer):
def main():
import time
selected = []
- selectivty_fh = open(selectivity_fn, "r")
+ selectivity_fh = open(selectivity_fn, "r")
# get our genome length
fg_genome_length = os.path.getsize(fg_fasta_fn)
bg_genome_length = os.path.getsize(bg_fasta_fn)
- for row in selectivity_fn:
+ for row in selectivity_fh:
(mer, fg_count, bg_count, selectivity) = row.split()
fg_mers[mer] = Mer()
+ fg_mers[mer].pts = []
fg_mers[mer].count = fg_count
bg_mers[mer] = Mer()
+ bg_mers[mer].pts = []
bg_mers[mer].count = bg_count
selected.append([mer, selectivity])
@@ -160,6 +162,7 @@ def main():
# else:
# selected = select_mers(fg_mers, bg_mers, max_select)
selected = selected[-100:]
+ selected_mers = [row[0] for row in selected]
pdb.set_trace()
# print "searching through combinations of"
# print selected
@@ -167,10 +170,10 @@ def main():
print "Populating foreground locations"
- map(pop_fg, selected)
- map(pop_bg, selected)
+ map(pop_fg, selected_mers)
+ map(pop_bg, selected_mers)
- scores = score_mers(selected)
+ scores = score_mers(selected_mers)
print "fg_genome_length", fg_genome_length
print "bg_genome_length", bg_genome_length
diff --git a/select_mers.py b/select_mers.py
index a4657d4..21306cc 100755
--- a/select_mers.py
+++ b/select_mers.py
@@ -33,13 +33,13 @@ def select_mers(fg_mers, bg_mers):
# populate our bg_arr and fg_arr as well as our mer arr.
for mer in fg_mers.keys():
mers.append(mer);
- bg_arr.append(bg_mers[mer] + 1);
+ bg_arr.append(bg_mers.get(mer, 1));
fg_arr.append(fg_mers[mer]);
fg_arr = np.array(fg_arr, dtype='f');
bg_arr = np.array(bg_arr, dtype='f');
- selectivity = (fg_arr/fg_genome_length) / (bg_arr/bg_genome_length)
+ selectivity = (fg_arr / bg_arr)
arr = [(mers[i], fg_arr[i], bg_arr[i], selectivity[i]) for i in range(len(mers))]
@@ -55,40 +55,29 @@ def select_mers(fg_mers, bg_mers):
def main():
-
fg_count_fh = open(fg_count_fn, "r")
bg_count_fh = open(bg_count_fn, "r")
# copy in our fg_mers and counts
- print "populating our mers dictionary"
for mers,fh in [(fg_mers, fg_count_fh), (bg_mers, bg_count_fh)]:
for line in fh:
(mer, count) = line.split()
mers[mer] = int(count)
if min_mer_count >= 1:
- print "removing that are less frequent than: ", min_mer_count
for mer in fg_mers.keys():
if(fg_mers[mer] < min_mer_count):
del fg_mers[mer]
if mer in bg_mers:
del bg_mers[mer]
- print "removing useless mers from the background"
for mer in bg_mers.keys():
if mer not in fg_mers:
del bg_mers[mer]
- print "adding empty mers to the background"
- for mer in fg_mers:
- if mer not in bg_mers:
- bg_mers[mer] = 0
-
- print fg_genome_length
- print bg_genome_length
selected = select_mers(fg_mers, bg_mers)
for row in selected:
- print row[0] +"\t"+str(row[1]) + "\t" + str(row[2]) + str(row[3])
+ print row[0] +"\t"+str("%d" % row[1]) + "\t" + str("%d" % row[2]) + "\t" + str("%.5f" % row[3])
if __name__ == "__main__":
sys.exit(main())