aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorCalvin Morrison <mutantturkey@gmail.com>2014-04-14 15:17:12 -0400
committerCalvin Morrison <mutantturkey@gmail.com>2014-04-14 15:17:12 -0400
commitc5ddb534d0f06542c3818f9b17e2ab93ee981698 (patch)
tree3c42f5920837f066f6eeb9b6cf4d1677fdda4e97
parentfa62d2eff6905842a0dd84804f46a92f00228d77 (diff)
don't use strstream, use kmer_locations
-rwxr-xr-xsrc/output_full_genome.py65
-rwxr-xr-xsrc/score_mers.py61
2 files changed, 55 insertions, 71 deletions
diff --git a/src/output_full_genome.py b/src/output_full_genome.py
index eb12a82..5a0e3d5 100755
--- a/src/output_full_genome.py
+++ b/src/output_full_genome.py
@@ -58,58 +58,49 @@ def get_length(fn):
return length
-def populate_locations(selected_mers, mer_dic, input_fn, length):
+
+def size_iterator(mer_arr):
+ for mer_len in set([ len(it) for it in mer_arr]):
+ mers = filter(lambda x: len(x) == mer_len, mer_arr)
+ yield mers
+
+def populate_locations(mers, mer_dic, input_fn, length):
'''
- Run the strstreamone command, and parse in the integers that are output
+ Run the kmer_locations command, and parse in the integers that are output
by the command, and add it to mers[mer]
- strstreamone just prints the location of a string argv[1] in stdout.
-
- We also do the reverse compliment, using tac and tr piped together.
+ We also do the reverse compliment, so we subtract the length of the genome for it
'''
import tempfile
-
- cmds = []
- # strip file of header and delete newlines
- cmds.append(["grep -v '^>' " + input_fn + " | tr -d '\\n' | strstream ", False, 5])
- # reverse file, strip and delete newlines
- cmds.append(["tac " + input_fn + \
- "| rev " \
- "| grep -v '>$' " \
- "| tr -d '\\n' " \
- "| tr [ACGT] [TGCA] | strstream ", True, 3])
-
- for (cmd, reverse, strand) in cmds:
- if(debug):
- print(cmd)
+ # write our mers out to a fifi
+ for n_mers in size_iterator(mers):
_, merlist_fn = tempfile.mkstemp()
-
- # write our mers out to a fifi
merlist_fh = open(merlist_fn, 'w')
- for mer in selected_mers:
+ for mer in n_mers:
+ print mer
merlist_fh.write(mer + '\n')
merlist_fh.flush()
# add our merlist fn to our command
- cmd = cmd + " " + merlist_fn
-
- strstream = Popen(cmd, stdout=PIPE, shell=True)
- if reverse:
- for line in strstream.stdout:
- (mer, pos) = line.split(" ")
- mer_dic[selected_mers[int(mer)]].append([pos, strand])
- else:
- for line in strstream.stdout:
- (mer, pos) = line.split(" ")
- mer_dic[selected_mers[int(mer)]].append([int(pos), strand])
+ cmd = "kmer_locations -k " + str(len(n_mers[0])) + " -c -m \"" + merlist_fn + "\" -l -i \"" + input_fn + "\""
- if strstream.wait() is not 0:
- print "executing", cmd, "failed"
+ if(debug):
+ print "Executing", cmd
+
+ cmd_pipe = Popen(cmd, stdout=PIPE, shell=True)
+ for line in cmd_pipe.stdout:
+ line_s = line[:-1].split(" ")
+ if len(line_s) == 2:
+ mer_dic[line_s[0]].append([int(line_s[1]), 3])
+ elif len(line_s) == 3:
+ mer_dic[line_s[0]].append([length - int(line_s[1]), 5])
+
+ if cmd_pipe.wait() is not 0:
+ print "Executing", cmd, "failed"
+ exit()
merlist_fh.close()
-
-
def main():
parser = argparse.ArgumentParser(description="take a top-scores file, and \
diff --git a/src/score_mers.py b/src/score_mers.py
index e245f39..7538ed4 100755
--- a/src/score_mers.py
+++ b/src/score_mers.py
@@ -80,54 +80,46 @@ def get_max_consecutive_binding(mer1, mer2):
return max_bind
+def size_iterator(mer_arr):
+ for mer_len in set([ len(it) for it in mer_arr]):
+ mers = filter(lambda x: len(x) == mer_len, mer_arr)
+ yield mers
-def populate_locations(selected_mers, mer_dic, input_fn, length):
+def populate_locations(mers, mer_dic, input_fn, length):
'''
- Run the strstreamone command, and parse in the integers that are output
+ Run the kmer_locations command, and parse in the integers that are output
by the command, and add it to mers[mer]
- strstreamone just prints the location of a string argv[1] in stdout.
-
- We also do the reverse compliment, using tac and tr piped together.
+ We also do the reverse compliment, so we subtract the length of the genome for it
'''
import tempfile
-
- cmds = []
- # strip file of header and delete newlines
- cmds.append(["grep -v '^>' " + input_fn + " | tr -d '\\n' | strstream ", False])
- # reverse file, strip and delete newlines
- cmds.append(["tac " + input_fn + \
- "| rev " \
- "| grep -v '>$' " \
- "| tr -d '\\n' " \
- "| tr [ACGT] [TGCA] | strstream ", True])
-
- for (cmd, reverse) in cmds:
+ # write our mers out to a fifi
+ for n_mers in size_iterator(mers):
_, merlist_fn = tempfile.mkstemp()
-
- # write our mers out to a fifi
merlist_fh = open(merlist_fn, 'w')
- for mer in selected_mers:
+ for mer in n_mers:
+ print mer
merlist_fh.write(mer + '\n')
merlist_fh.flush()
# add our merlist fn to our command
- cmd = cmd + " " + merlist_fn
-
- strstream = Popen(cmd, stdout=PIPE, shell=True)
- if reverse:
- for line in strstream.stdout:
- (mer, pos) = line.split(" ")
- pos = length - int(pos)
- mer_dic[selected_mers[int(mer)]].append(pos)
- else:
- for line in strstream.stdout:
- (mer, pos) = line.split(" ")
- mer_dic[selected_mers[int(mer)]].append(int(pos))
+ cmd = "kmer_locations -k " + str(len(n_mers[0])) + " -c -m \"" + merlist_fn + "\" -l -i \"" + input_fn + "\""
+
+ if(debug):
+ print "Executing", cmd
+
+ cmd_pipe = Popen(cmd, stdout=PIPE, shell=True)
+ for line in cmd_pipe.stdout:
+ line_s = line[:-1].split(" ")
+ if len(line_s) == 2:
+ mer_dic[line_s[0]].append(int(line_s[1]))
+ elif len(line_s) == 3:
+ mer_dic[line_s[0]].append(length - int(line_s[1]))
- if strstream.wait() is not 0:
- print "executing", cmd, "failed"
+ if cmd_pipe.wait() is not 0:
+ print "Executing", cmd, "failed"
+ exit()
merlist_fh.close()
@@ -170,6 +162,7 @@ def get_length(fn):
if grep.wait() is not 0:
print "executing", cmd, "failed"
+ exit()
return length