aboutsummaryrefslogtreecommitdiff
path: root/src/output_full_genome.py
diff options
context:
space:
mode:
authorCalvin Morrison <mutantturkey@gmail.com>2014-04-08 13:43:45 -0400
committerCalvin Morrison <mutantturkey@gmail.com>2014-04-08 13:43:45 -0400
commit1b49a2d276a62546e4e9522e26228265142066a3 (patch)
tree96abe4dfb6ae106ebd5bdaae1cb46b017b7a5118 /src/output_full_genome.py
parent927b681691b293afbb798a65d4340446e7f6fb6c (diff)
more docs and add strand
Diffstat (limited to 'src/output_full_genome.py')
-rwxr-xr-xsrc/output_full_genome.py19
1 files changed, 10 insertions, 9 deletions
diff --git a/src/output_full_genome.py b/src/output_full_genome.py
index e55136a..eb12a82 100755
--- a/src/output_full_genome.py
+++ b/src/output_full_genome.py
@@ -72,15 +72,15 @@ def populate_locations(selected_mers, mer_dic, input_fn, length):
cmds = []
# strip file of header and delete newlines
- cmds.append(["grep -v '^>' " + input_fn + " | tr -d '\\n' | strstream ", False])
+ cmds.append(["grep -v '^>' " + input_fn + " | tr -d '\\n' | strstream ", False, 5])
# reverse file, strip and delete newlines
cmds.append(["tac " + input_fn + \
"| rev " \
"| grep -v '>$' " \
"| tr -d '\\n' " \
- "| tr [ACGT] [TGCA] | strstream ", True])
+ "| tr [ACGT] [TGCA] | strstream ", True, 3])
- for (cmd, reverse) in cmds:
+ for (cmd, reverse, strand) in cmds:
if(debug):
print(cmd)
_, merlist_fn = tempfile.mkstemp()
@@ -98,12 +98,11 @@ def populate_locations(selected_mers, mer_dic, input_fn, length):
if reverse:
for line in strstream.stdout:
(mer, pos) = line.split(" ")
- pos = length - int(pos)
- mer_dic[selected_mers[int(mer)]].append(pos)
+ mer_dic[selected_mers[int(mer)]].append([pos, strand])
else:
for line in strstream.stdout:
(mer, pos) = line.split(" ")
- mer_dic[selected_mers[int(mer)]].append(int(pos))
+ mer_dic[selected_mers[int(mer)]].append([int(pos), strand])
if strstream.wait() is not 0:
print "executing", cmd, "failed"
@@ -127,12 +126,13 @@ def main():
parser.error(args.output_directory + "must point to a directory")
elif not os.path.isdir(args.output_directory):
os.mkdir(args.output_directory)
-
score_fh = open(args.scores, "r")
global seq_ends
seq_ends = load_end_points(args.fasta)
+ length = get_length(args.fasta)
+
nb_done = 0;
for line in score_fh:
# skip headers
@@ -153,15 +153,16 @@ def main():
new_populate.append(mer)
if len(new_populate) is not 0:
- populate_locations(new_populate, mers, args.fasta, get_length(args.fasta))
+ populate_locations(new_populate, mers, args.fasta, length)
pts = []
for mer in combination:
for pt in mers[mer]:
- pts.append([pt, mer, get_sequence(pt)])
+ pts.append([pt[0], pt[1], mer, get_sequence(pt[0])])
pts = sorted(pts, key = lambda row: row[0])
+ fh.write("pt\tstrand\tmer\tsequence\n")
for pt in pts:
fh.write('\t'.join(str(x) for x in pt) + '\n')