diff options
| -rwxr-xr-x | SelectiveWholeGenomeAmplification | 10 | ||||
| -rwxr-xr-x | src/filter_max_bg_mers.py | 39 | 
2 files changed, 48 insertions, 1 deletions
| diff --git a/SelectiveWholeGenomeAmplification b/SelectiveWholeGenomeAmplification index 7a3d46b..8304387 100755 --- a/SelectiveWholeGenomeAmplification +++ b/SelectiveWholeGenomeAmplification @@ -182,6 +182,9 @@ done  # bg_ratio  : ${min_bg_ratio=0} +# max_bg_mers +: ${max_bg_mers=-1} +  export ignore_mers  export min_mer_range  export max_mer_range @@ -246,6 +249,7 @@ ignore_all_mers_counts="$output_directory/$current_run/passes-filter/2-$fg_basen  average_binding="$output_directory/$current_run/passes-filter/3-$fg_basename-average-binding"  non_melting="$output_directory/$current_run/passes-filter/4-$fg_basename-non-melting"  consecutive_binding="$output_directory/$current_run/passes-filter/5-$fg_basename-consecutive-binding" +bg_filtered="$output_directory/$current_run/passes-filter/6-$fg_basename-bg-filtered"  # Make our output directory  if [[ ! -d "$output_directory" ]]; then @@ -342,7 +346,11 @@ if [[ -n "$step_filters" ]] || [[ -n "$all" ]]; then  	filter_max_consecutive_binding.py "$max_consecutive_binding" < "$non_melting" > "$consecutive_binding" || exit 1  	check_non_empty "$consecutive_binding" "consecutive binding" -	cp $consecutive_binding $final_fg_counts +	echo "  filtering mers that have more bg mers than allowed ($max_bg_mers)" +	filter_max_bg_mers.py "$max_bg_mers" "$bg_counts" < "$consecutive_binding" > "$bg_filtered" || exit 1 +	check_non_empty "$bg_filtered" "background filtered" + +	cp $bg_filtered $final_fg_counts  fi  if [[ -n "$step_select" ]] || [[ -n "$all" ]]; then diff --git a/src/filter_max_bg_mers.py b/src/filter_max_bg_mers.py new file mode 100755 index 0000000..8ea38a4 --- /dev/null +++ b/src/filter_max_bg_mers.py @@ -0,0 +1,39 @@ +#!/usr/bin/env python2.7 +import sys, os +	 +def main(): + +	if(len(sys.argv) < 2): +		print "cutoff and bg_counts is expected as an argument" +		exit() +	else: +		cutoff = int(sys.argv[1]) +		bg_count_fn = sys.argv[2] +	 +	# if  cutoff, is less than zero, we ignore, aka so we can do -1 by default, +	# we can't do 0, because that might have a valid use case +	if cutoff < 0: +		for line in sys.stdin: +			sys.stdout.write(line) +	else: + +		mers = {} + +		bg_count_fh = open(bg_count_fn, "r") +		 +		# copy in our foreground mers and counts into mers dictionary, then process it +		for line in sys.stdin: +			(mer, count) = line.split() +			mers[mer] = [int(count), -1] +		 +		for line in bg_count_fh: +			(mer, count) = line.split() +			if mer in mers: +				mers[mer][1] = int(count) + +		for mer in mers: +			if mers[mer][1] == -1 or mers[mer][1] <= cutoff: +				sys.stdout.write(mer + '\t' + str(mers[mer][0]) + '\n') + +if __name__ == "__main__": +	sys.exit(main()) | 
