aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorCalvin Morrison <mutantturkey@gmail.com>2014-08-13 16:59:37 -0400
committerCalvin Morrison <mutantturkey@gmail.com>2014-08-13 16:59:37 -0400
commitd4ec5459d0fc141d20a4bbbf0a7dc40742e0372f (patch)
tree17d8797427b2534b538b00ed0660ac1781cd3c58
parent972add88040cac2f0a969e99c533051be7220ccc (diff)
add filter max bg mers
-rwxr-xr-xSelectiveWholeGenomeAmplification10
-rwxr-xr-xsrc/filter_max_bg_mers.py39
2 files changed, 48 insertions, 1 deletions
diff --git a/SelectiveWholeGenomeAmplification b/SelectiveWholeGenomeAmplification
index 7a3d46b..8304387 100755
--- a/SelectiveWholeGenomeAmplification
+++ b/SelectiveWholeGenomeAmplification
@@ -182,6 +182,9 @@ done
# bg_ratio
: ${min_bg_ratio=0}
+# max_bg_mers
+: ${max_bg_mers=-1}
+
export ignore_mers
export min_mer_range
export max_mer_range
@@ -246,6 +249,7 @@ ignore_all_mers_counts="$output_directory/$current_run/passes-filter/2-$fg_basen
average_binding="$output_directory/$current_run/passes-filter/3-$fg_basename-average-binding"
non_melting="$output_directory/$current_run/passes-filter/4-$fg_basename-non-melting"
consecutive_binding="$output_directory/$current_run/passes-filter/5-$fg_basename-consecutive-binding"
+bg_filtered="$output_directory/$current_run/passes-filter/6-$fg_basename-bg-filtered"
# Make our output directory
if [[ ! -d "$output_directory" ]]; then
@@ -342,7 +346,11 @@ if [[ -n "$step_filters" ]] || [[ -n "$all" ]]; then
filter_max_consecutive_binding.py "$max_consecutive_binding" < "$non_melting" > "$consecutive_binding" || exit 1
check_non_empty "$consecutive_binding" "consecutive binding"
- cp $consecutive_binding $final_fg_counts
+ echo " filtering mers that have more bg mers than allowed ($max_bg_mers)"
+ filter_max_bg_mers.py "$max_bg_mers" "$bg_counts" < "$consecutive_binding" > "$bg_filtered" || exit 1
+ check_non_empty "$bg_filtered" "background filtered"
+
+ cp $bg_filtered $final_fg_counts
fi
if [[ -n "$step_select" ]] || [[ -n "$all" ]]; then
diff --git a/src/filter_max_bg_mers.py b/src/filter_max_bg_mers.py
new file mode 100755
index 0000000..8ea38a4
--- /dev/null
+++ b/src/filter_max_bg_mers.py
@@ -0,0 +1,39 @@
+#!/usr/bin/env python2.7
+import sys, os
+
+def main():
+
+ if(len(sys.argv) < 2):
+ print "cutoff and bg_counts is expected as an argument"
+ exit()
+ else:
+ cutoff = int(sys.argv[1])
+ bg_count_fn = sys.argv[2]
+
+ # if cutoff, is less than zero, we ignore, aka so we can do -1 by default,
+ # we can't do 0, because that might have a valid use case
+ if cutoff < 0:
+ for line in sys.stdin:
+ sys.stdout.write(line)
+ else:
+
+ mers = {}
+
+ bg_count_fh = open(bg_count_fn, "r")
+
+ # copy in our foreground mers and counts into mers dictionary, then process it
+ for line in sys.stdin:
+ (mer, count) = line.split()
+ mers[mer] = [int(count), -1]
+
+ for line in bg_count_fh:
+ (mer, count) = line.split()
+ if mer in mers:
+ mers[mer][1] = int(count)
+
+ for mer in mers:
+ if mers[mer][1] == -1 or mers[mer][1] <= cutoff:
+ sys.stdout.write(mer + '\t' + str(mers[mer][0]) + '\n')
+
+if __name__ == "__main__":
+ sys.exit(main())