#!/usr/bin/env bash if [ -z "$foreground" ] && [ -z "$background" ]; then if (( $# < 2 )); then echo "please supply two genomes, foreground and background" exit 1 fi; : ${foreground=$1} : ${background=$2} fi if [[ ! -f $foreground ]]; then echo "Could not open $foreground" exit 1 fi if [[ ! -f $background ]]; then echo "Could not open $background" exit 1 fi # output directory : ${output_directory=`basename $foreground`_`basename $background`} # temp directory : ${tmp_directory=$output_directory/.tmp} # directory to store our counts and sorted counts : ${counts_directory=$tmp_directory} # range of mers, min and max : ${min_mer_range=6} : ${max_mer_range=12} # max mer distance, the distance between two mers in our selected outputs : ${max_mer_distance=5000} # min/maximum kmer meling point : ${max_melting_temp=30} : ${min_melting_temp=0} # minimum average binding distance in the foreground : ${min_foreground_binding_average=50000} # maximum mers to pick : ${max_select=15} # maximum mers to check : ${max_check=35} # mers to specifically IGNORE, space delimited : ${ignore_mers=''} # maximum number of mers that are consecutively binding : ${max_consecutive_binding=4} export ignore_mers export min_mer_range export max_mer_range export max_select export min_foreground_binding_average export max_mer_distance export max_melting_temp export min_melting_temp # Make our output directory if [ ! -d $output_directory ]; then mkdir $output_directory fi # Make our counts directory if [ ! -d $counts_directory ]; then mkdir $counts_directory fi # Make our temporary directory if [ ! -d $tmp_directory ]; then mkdir $tmp_directory fi current_run=$output_`date +%s` mkdir -p $output_directory/$current_run for fasta_file in $foreground $background; do counts=$counts_directory/$(basename $fasta_file) echo pre-processing $fasta_file # run counts if they haven't been created if [ -e $counts-counts ]; then rm $counts-counts fi for mer in `seq $min_mer_range $max_mer_range`; do if [ ! -e $counts-counts-$mer ]; then echo checking $mer mers for $fasta_file kmer_continuous_count -c -i $fasta_file -k $mer -l -n > $counts-counts-$mer else echo "$mer mers already done for $fasta_file (assuming $fasta_file didn't change)" fi cat $counts-counts-$mer >> $counts-counts done done fg_counts=$counts_directory/$(basename $foreground)-counts bg_counts=$counts_directory/$(basename $background)-counts selected=$output_directory/$current_run/selected-mers # remove ignored mers if [ "$ignore_mers" ]; then echo "removing ignored mers: " + $ignore_mers for mer in $ignore_mers; do sed -i '/^'$mer'\t/d' $fg_counts sed -i '/^'$mer'\t/d' $bg_counts done fi echo "outputing current run parameters" for var in ignore_mers min_mer_range max_check cpus max_consecutive_binding max_mer_range max_select min_mer_count max_mer_distance max_melting_temp min_melting_temp foreground background; do echo $var "${!var}" >> $output_directory/$current_run/parameters done; average_binding=$output_directory/$current_run/`basename $foreground`-counts-average-binding consecutive_binding=$output_directory/$current_run/`basename $foreground`-counts-consecutive-binding non_melting=$output_directory/$current_run/`basename $foreground`-counts-non-melting echo "checking if mers appear at least as often in the fg as the average binding site or more $min_foreground_binding_average" cat $fg_counts | filter_average_binding.py $foreground $min_foreground_binding_average > $average_binding || exit 1 echo "checking if mers are within the melting range $min_melting_temp $max_melting_temp" cat $average_binding | filter_melting_temperature.py $min_melting_temp $max_melting_temp > $non_melting || exit 1 echo "filtering out elements that have more consecutive binding mers than allowed by \$max_consecutive_binding $max_consecutive_binding" cat $non_melting | filter_max_consecutive_binding.py $max_consecutive_binding > $consecutive_binding || exit 1 echo "scoring mer selectivity" select_mers.py $consecutive_binding $bg_counts > $selected || exit 1 echo "scoring top mers based on selectivity" score_wrapper.sh $selected $foreground $background $output_directory/$current_run/scores-output || exit 1