more docs and add strand

author: Calvin Morrison <mutantturkey@gmail.com> 2014-04-08 13:43:45 -0400
committer: Calvin Morrison <mutantturkey@gmail.com> 2014-04-08 13:43:45 -0400
commit: 1b49a2d276a62546e4e9522e26228265142066a3 (patch)
tree: 96abe4dfb6ae106ebd5bdaae1cb46b017b7a5118
parent: 927b681691b293afbb798a65d4340446e7f6fb6c (diff)
2 files changed, 31 insertions, 12 deletions
diff --git a/README.md b/README.md
index 7642341..caebe96 100644
--- a/README.md
+++ b/README.md
@@ -230,11 +230,17 @@ You need to use **valid** python code.
 
 ## Filters
 
-There are several filters that our mers go through, to eliminate ones that won't fit our needs. They are all configurable via the tunable parameters. If you look in a output directory, you'll see a folder called "passes-filter". This contains a file for each of the different steps in the pipeline, and the contents of each file is what 'passes' that filter.
+There are several filters that our mers go through, to eliminate ones that
+won't fit our needs. They are all configurable via the tunable parameters. If
+you look in a output directory, you'll see a folder called "passes-filter".
+This contains a file for each of the different steps in the pipeline, and the
+contents of each file is what 'passes' that filter.
 
-For example, if you ignored the mer 'AAAAA', then in passes-filter/1-$foreground-ignore-mers there would be no line containing that.
+For example, if you ignored the mer 'AAAAA', then in
+passes-filter/1-$foreground-ignore-mers there would be no line containing that.
 
-The filter system works like a big pipe, whatever gets filtered out won't make it to the next step. the order is like this
+The filter system works like a big pipe, whatever gets filtered out won't make
+it to the next step. the order is like this
 
 
     All mers -> ignore_mers -> ignore_all_mers -> average_binding -> non_melting -> consecutive_binding
@@ -272,3 +278,15 @@ background count, and the mer selectivity value. (higher is better)
 score mers outputs a tab delimited file with 6 columns:
 
     nb_primers  Combination  Score  FG_mean_dist  FG_stdev_dist  BG_ratio
+
+## Post Processing
+
+To get a more detailed look at each scored combination we provide the
+output\_full\_genome.py script. This script will output all of the points in a 
+selected set along with some metadata, including position, what sequence it is in,
+what strand and what mer it is.
+
+    output_full_genome.py -f fg.fa -s fg.fa_bg.fa/run_12/top-scores -n 15 -o sets
+
+this will output one file for eat of the the top 15 sets in top-scores, in the
+folder sets.
diff --git a/src/output_full_genome.py b/src/output_full_genome.py
index e55136a..eb12a82 100755
--- a/src/output_full_genome.py
+++ b/src/output_full_genome.py
@@ -72,15 +72,15 @@ def populate_locations(selected_mers, mer_dic, input_fn, length):
 
 	cmds = []
 	# strip file of header and delete newlines
-	cmds.append(["grep -v '^>' " + input_fn  +  " | tr -d '\\n' | strstream ", False])
+	cmds.append(["grep -v '^>' " + input_fn  +  " | tr -d '\\n' | strstream ", False, 5])
 	# reverse file, strip and delete newlines
 	cmds.append(["tac " + input_fn + \
 							"| rev " \
 							"| grep -v '>$' " \
 							"| tr -d '\\n' " \
-							"| tr [ACGT] [TGCA] | strstream ", True])
+							"| tr [ACGT] [TGCA] | strstream ", True, 3])
 	
-	for (cmd, reverse) in cmds:
+	for (cmd, reverse, strand) in cmds:
 		if(debug):
 			print(cmd)
 		_, merlist_fn = tempfile.mkstemp()
@@ -98,12 +98,11 @@ def populate_locations(selected_mers, mer_dic, input_fn, length):
 		if reverse:
 			for line in strstream.stdout:
 				(mer, pos) = line.split(" ")
-				pos = length - int(pos)
-				mer_dic[selected_mers[int(mer)]].append(pos)
+				mer_dic[selected_mers[int(mer)]].append([pos, strand])
 		else:
 			for line in strstream.stdout:
 				(mer, pos) = line.split(" ")
-				mer_dic[selected_mers[int(mer)]].append(int(pos))
+				mer_dic[selected_mers[int(mer)]].append([int(pos), strand])
 
 		if strstream.wait() is not 0:
 			print "executing", cmd, "failed"
@@ -127,12 +126,13 @@ def main():
 		parser.error(args.output_directory + "must point to a directory")
 	elif not os.path.isdir(args.output_directory):
 		os.mkdir(args.output_directory)
-
 	score_fh = open(args.scores, "r")
 
 	global seq_ends
 	seq_ends = load_end_points(args.fasta)
 
+	length = get_length(args.fasta)
+
 	nb_done = 0;
 	for line in score_fh:
 		# skip headers
@@ -153,15 +153,16 @@ def main():
 				new_populate.append(mer)
 		
 		if len(new_populate) is not 0:
-			populate_locations(new_populate, mers, args.fasta, get_length(args.fasta))
+			populate_locations(new_populate, mers, args.fasta, length)
 
 		pts = []
 		for mer in combination:
 			for pt in mers[mer]:
-				pts.append([pt, mer, get_sequence(pt)])
+				pts.append([pt[0], pt[1], mer, get_sequence(pt[0])])
 		
 		pts = sorted(pts, key = lambda row: row[0])
 			
+		fh.write("pt\tstrand\tmer\tsequence\n")
 		for pt in pts:
 			fh.write('\t'.join(str(x) for x in pt) + '\n')
author	Calvin Morrison <mutantturkey@gmail.com>	2014-04-08 13:43:45 -0400
committer	Calvin Morrison <mutantturkey@gmail.com>	2014-04-08 13:43:45 -0400
commit	1b49a2d276a62546e4e9522e26228265142066a3 (patch)
tree	96abe4dfb6ae106ebd5bdaae1cb46b017b7a5118
parent	927b681691b293afbb798a65d4340446e7f6fb6c (diff)