aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorCalvin Morrison <mutantturkey@gmail.com>2014-04-08 13:43:45 -0400
committerCalvin Morrison <mutantturkey@gmail.com>2014-04-08 13:43:45 -0400
commit1b49a2d276a62546e4e9522e26228265142066a3 (patch)
tree96abe4dfb6ae106ebd5bdaae1cb46b017b7a5118
parent927b681691b293afbb798a65d4340446e7f6fb6c (diff)
more docs and add strand
-rw-r--r--README.md24
-rwxr-xr-xsrc/output_full_genome.py19
2 files changed, 31 insertions, 12 deletions
diff --git a/README.md b/README.md
index 7642341..caebe96 100644
--- a/README.md
+++ b/README.md
@@ -230,11 +230,17 @@ You need to use **valid** python code.
## Filters
-There are several filters that our mers go through, to eliminate ones that won't fit our needs. They are all configurable via the tunable parameters. If you look in a output directory, you'll see a folder called "passes-filter". This contains a file for each of the different steps in the pipeline, and the contents of each file is what 'passes' that filter.
+There are several filters that our mers go through, to eliminate ones that
+won't fit our needs. They are all configurable via the tunable parameters. If
+you look in a output directory, you'll see a folder called "passes-filter".
+This contains a file for each of the different steps in the pipeline, and the
+contents of each file is what 'passes' that filter.
-For example, if you ignored the mer 'AAAAA', then in passes-filter/1-$foreground-ignore-mers there would be no line containing that.
+For example, if you ignored the mer 'AAAAA', then in
+passes-filter/1-$foreground-ignore-mers there would be no line containing that.
-The filter system works like a big pipe, whatever gets filtered out won't make it to the next step. the order is like this
+The filter system works like a big pipe, whatever gets filtered out won't make
+it to the next step. the order is like this
All mers -> ignore_mers -> ignore_all_mers -> average_binding -> non_melting -> consecutive_binding
@@ -272,3 +278,15 @@ background count, and the mer selectivity value. (higher is better)
score mers outputs a tab delimited file with 6 columns:
nb_primers Combination Score FG_mean_dist FG_stdev_dist BG_ratio
+
+## Post Processing
+
+To get a more detailed look at each scored combination we provide the
+output\_full\_genome.py script. This script will output all of the points in a
+selected set along with some metadata, including position, what sequence it is in,
+what strand and what mer it is.
+
+ output_full_genome.py -f fg.fa -s fg.fa_bg.fa/run_12/top-scores -n 15 -o sets
+
+this will output one file for eat of the the top 15 sets in top-scores, in the
+folder sets.
diff --git a/src/output_full_genome.py b/src/output_full_genome.py
index e55136a..eb12a82 100755
--- a/src/output_full_genome.py
+++ b/src/output_full_genome.py
@@ -72,15 +72,15 @@ def populate_locations(selected_mers, mer_dic, input_fn, length):
cmds = []
# strip file of header and delete newlines
- cmds.append(["grep -v '^>' " + input_fn + " | tr -d '\\n' | strstream ", False])
+ cmds.append(["grep -v '^>' " + input_fn + " | tr -d '\\n' | strstream ", False, 5])
# reverse file, strip and delete newlines
cmds.append(["tac " + input_fn + \
"| rev " \
"| grep -v '>$' " \
"| tr -d '\\n' " \
- "| tr [ACGT] [TGCA] | strstream ", True])
+ "| tr [ACGT] [TGCA] | strstream ", True, 3])
- for (cmd, reverse) in cmds:
+ for (cmd, reverse, strand) in cmds:
if(debug):
print(cmd)
_, merlist_fn = tempfile.mkstemp()
@@ -98,12 +98,11 @@ def populate_locations(selected_mers, mer_dic, input_fn, length):
if reverse:
for line in strstream.stdout:
(mer, pos) = line.split(" ")
- pos = length - int(pos)
- mer_dic[selected_mers[int(mer)]].append(pos)
+ mer_dic[selected_mers[int(mer)]].append([pos, strand])
else:
for line in strstream.stdout:
(mer, pos) = line.split(" ")
- mer_dic[selected_mers[int(mer)]].append(int(pos))
+ mer_dic[selected_mers[int(mer)]].append([int(pos), strand])
if strstream.wait() is not 0:
print "executing", cmd, "failed"
@@ -127,12 +126,13 @@ def main():
parser.error(args.output_directory + "must point to a directory")
elif not os.path.isdir(args.output_directory):
os.mkdir(args.output_directory)
-
score_fh = open(args.scores, "r")
global seq_ends
seq_ends = load_end_points(args.fasta)
+ length = get_length(args.fasta)
+
nb_done = 0;
for line in score_fh:
# skip headers
@@ -153,15 +153,16 @@ def main():
new_populate.append(mer)
if len(new_populate) is not 0:
- populate_locations(new_populate, mers, args.fasta, get_length(args.fasta))
+ populate_locations(new_populate, mers, args.fasta, length)
pts = []
for mer in combination:
for pt in mers[mer]:
- pts.append([pt, mer, get_sequence(pt)])
+ pts.append([pt[0], pt[1], mer, get_sequence(pt[0])])
pts = sorted(pts, key = lambda row: row[0])
+ fh.write("pt\tstrand\tmer\tsequence\n")
for pt in pts:
fh.write('\t'.join(str(x) for x in pt) + '\n')