#!/usr/bin/env bash if [ -z "$foreground" ] && [ -z "$background" ]; then if (( $# < 2 )); then echo "please supply two genomes, foreground and background" exit 1 fi; : ${foreground=$1} : ${background=$2} fi if [[ ! -f $foreground ]]; then echo "Could not open $foreground" exit 1 fi if [[ ! -f $background ]]; then echo "Could not open $background" exit 1 fi # output directory : ${output_directory=`basename $foreground`_`basename $background`} # temp directory : ${tmp_directory=$output_directory/.tmp} # directory to store our counts and sorted counts : ${counts_directory=$tmp_directory} # range of mers, min and max : ${min_mer_range=6} : ${max_mer_range=12} # max mer distance, the distance between two mers in our selected outputs : ${max_mer_distance=5000} # min/maximum kmer meling point : ${max_melting_temp=30} : ${min_melting_temp=0} # minimum mer count : ${min_mer_count=0} # maximum mers to pick : ${max_select=15} # maximum mers to check : ${max_check=35} # mers to specifically IGNORE, space delimited : ${ignore_mers=''} # maximum number of mers that are consecutively binding : ${max_consecutive_binding=4} export ignore_mers export min_mer_range export max_mer_range export max_select export min_mer_count export max_mer_distance export max_melting_temp export min_melting_temp # Make our output directory if [ ! -d $output_directory ]; then mkdir $output_directory fi # Make our counts directory if [ ! -d $counts_directory ]; then mkdir $counts_directory fi # Make our temporary directory if [ ! -d $tmp_directory ]; then mkdir $tmp_directory fi current_run=$output_`date +%s` mkdir -p $output_directory/$current_run for fasta_file in $foreground $background; do counts=$counts_directory/$(basename $fasta_file) tmp=$tmp_directory/$(basename $fasta_file) echo pre-processing $fasta_file # check if our preprocessed file exists if [[ ! -f $tmp ]]; then echo "> pre processed $fasta_file" >> $tmp cat $fasta_file | grep -v "^>" | tr -d '\n' >> $tmp fi # run counts if they haven't been created rm $counts-counts for mer in `seq $min_mer_range $max_mer_range`; do if [ ! -e $counts-counts-$mer ]; then echo checking $mer mers for $fasta_file kmer_total_count -i $tmp -k $mer -l -n >> $counts-counts-$mer else echo "$mer mers already done for $fasta_file" fi cat $counts-counts-$mer >> $counts-counts done done fg_counts=$counts_directory/$(basename $foreground)-counts bg_counts=$counts_directory/$(basename $background)-counts fg_tmp=$tmp_directory/$(basename $foreground) bg_tmp=$tmp_directory/$(basename $background) selected=$output_directory/$current_run/selected-mers # remove ignored mers if [ "$ignore_mers" ]; then echo "removing ignored mers: " + $ignore_mers for mer in $ignore_mers; do sed -i '/^'$mer'\t/d' $fg_counts sed -i '/^'$mer'\t/d' $bg_counts done fi echo "outputing current run parameters" for var in ignore_mers min_mer_range max_check cpus max_consecutive_binding max_mer_range max_select min_mer_count max_mer_distance max_melting_temp min_melting_temp foreground background; do echo $var "${!var}" >> $output_directory/$current_run/parameters done; echo "checking if mers are within the melting range $min_melting_temp $max_melting_temp" cat $fg_counts | filter_melting_range $min_melting_temp $max_melting_temp > $output_directory/$current_run/$foreground-counts-non-melting cat $bg_counts | filter_melting_range $min_melting_temp $max_melting_temp > $output_directory/$current_run/$background-counts-non-melting echo "filtering out elements that have more consecutive binding mers than allowed by default $max_consecutive_binding" cat $output_directory/$current_run/$foreground-counts-non-melting | filter_max_consecutive_binding.py $max_consecutive_binding > $output_directory/$current_run/$foreground-counts-filtered-binding cat $output_directory/$current_run/$background-counts-non-melting | filter_max_consecutive_binding.py $max_consecutive_binding > $output_directory/$current_run/$background-counts-filtered-binding echo "scoring mer selectivity" select_mers.py $output_directory/$current_run/$foreground-counts-filtered-binding $fg_tmp $output_directory/$current_run/$background-counts-filtered-binding $bg_tmp > $selected echo "scoring top mers based on selectivity" score_mers.py $selected $fg_tmp $bg_tmp $output_directory/$current_run/scores-output