From 495228f7167a6df24a139022e7a0560a4dd07b56 Mon Sep 17 00:00:00 2001 From: Calvin Morrison Date: Wed, 26 Mar 2014 21:05:25 -0400 Subject: majorly rewrite and revamp the script to support runnign parts at a time --- SelectiveGenomeAmplification | 234 +++++++++++++++++++++++++++++++------------ 1 file changed, 168 insertions(+), 66 deletions(-) (limited to 'SelectiveGenomeAmplification') diff --git a/SelectiveGenomeAmplification b/SelectiveGenomeAmplification index ad8d4a5..85a166f 100755 --- a/SelectiveGenomeAmplification +++ b/SelectiveGenomeAmplification @@ -1,6 +1,9 @@ #!/usr/bin/env bash -if [ -z "$foreground" ] && [ -z "$background" ]; then + +all=run +# Parse in our arguments +if [[ -z "$foreground" ]] && [[ -z "$background" ]]; then if (( $# < 2 )); then echo "please supply two genomes, foreground and background" exit 1 @@ -8,27 +11,62 @@ if [ -z "$foreground" ] && [ -z "$background" ]; then : ${foreground=$1} : ${background=$2} + start=3 +else + start=1 fi +if (( $# > 2 )); then + unset all -if [[ ! -f $foreground ]]; then - echo "Could not open $foreground" - exit 1 -fi + for i in "${@:$start}"; do + if [[ "$i" = "1" ]] || [[ "$i" = "count" ]]; then + step=1; step_mers=1 + fi -if [[ ! -f $background ]]; then - echo "Could not open $background" - exit 1 + if [[ "$i" = "2" ]] || [[ "$i" = "filter" ]]; then + step=1; step_filters=1 + fi + + if [[ "$i" = "3" ]] || [[ "$i" = "select" ]]; then + step=1; step_select=1 + fi + + if [[ "$i" = "4" ]] || [[ "$i" = "score" ]]; then + step=1; step_score=1 + fi + + if [[ $step ]] && [[ ! "$current_run" ]] && [[ ! $step_mers ]]; then + echo "Error: If you are going to step through your program, and aren't starting" \ + "at the first step, you better specify what previous run you want to use" \ + "as your base" + exit + fi + + done; +fi; + +if [[ -n "$step" ]] && [[ -z "$step_mers" ]] && [[ -z "$step_filters" ]] && [[ -z $step_select ]] && [[ -z "$step_score" ]]; then + echo "Error: you need to select at least one step to run." + exit fi +echo +echo "Planning on running these steps:" +for var in step_mers step_filters step_select step_score all; do + if [[ -n "${!var}" ]]; then + echo ' '$var + fi +done + # output directory -: ${output_directory=`basename $foreground`_`basename $background`} +: ${output_directory=$(basename "$foreground")_$(basename "$background")} # temp directory -: ${tmp_directory=$output_directory/.tmp} +: ${tmp_directory="$output_directory"/.tmp} # directory to store our counts and sorted counts -: ${counts_directory=$tmp_directory} +: ${counts_directory="$tmp_directory"} # range of mers, min and max : ${min_mer_range=6} @@ -59,7 +97,7 @@ fi # fg_weight, how much to weight to give the higher bindnig primers : ${fg_weight=0} -# primer_weight, how much weight to give to sets with a higher number of priemrs. (between 0 and 1) +# primer_weight, how much weight to give to sets with a higher number of primers. (between 0 and 1) : ${primer_weight=0} export ignore_mers @@ -78,82 +116,146 @@ export fg_weight export primer_weight +echo +# check foreground and background +if [[ ! -f "$foreground" ]]; then + echo "Error: could not open $foreground" + exit 1 +fi + +if [[ ! -f "$background" ]]; then + echo "Error: could not open $background" + exit 1 +fi + + +if [[ -n "$current_run" ]] && [[ ! -d "$output_directory/$current_run" ]]; then + echo -n "run $current_run was not found, it should be a folder here: " + echo "$output_directory/$current_run" + exit +fi + +num=1 +if [[ -z "$current_run" ]]; then + while [[ -d $output_directory/run_$num ]] ; do + let num++ + done + current_run=run_$num +fi + +fg_basename=$(basename "$foreground") +bg_basename=$(basename "$background") + +fg_counts=$counts_directory/$fg_basename-counts +bg_counts=$counts_directory/$bg_basename-counts +selected=$output_directory/$current_run/selected-mers + + +average_binding=$output_directory/$current_run/$fg_basename-counts-average-binding +non_melting=$output_directory/$current_run/$fg_basename-counts-non-melting +consecutive_binding=$output_directory/$current_run/$fg_basename-counts-consecutive-binding + # Make our output directory -if [ ! -d $output_directory ]; then - mkdir $output_directory +if [[ ! -d "$output_directory" ]]; then + mkdir "$output_directory" fi # Make our counts directory -if [ ! -d $counts_directory ]; then - mkdir $counts_directory +if [[ ! -d "$counts_directory" ]]; then + mkdir "$counts_directory" fi # Make our temporary directory -if [ ! -d $tmp_directory ]; then - mkdir $tmp_directory +if [[ ! -d $tmp_directory ]]; then + mkdir "$tmp_directory" fi +# Make our current run directory +if [[ ! -d $output_directory/$current_run ]]; then + mkdir "$output_directory"/"$current_run" +fi -current_run=$output_`date +%s` -mkdir -p $output_directory/$current_run +echo "Outputting current run parameters" + for var in ignore_mers min_mer_range max_mer_range max_check cpus max_consecutive_binding max_select min_foreground_binding_average max_mer_distance min_melting_temp max_melting_temp foreground background; do + echo "$var" "${!var}" >> "$output_directory"/"$current_run"/parameters +done; -for fasta_file in $foreground $background; do +echo "current run is: $current_run" +echo - counts=$counts_directory/$(basename $fasta_file) +if [[ -n "$step_mers" ]] || [[ -n "$all" ]]; then + # to continue this project you need to use the current run. - echo pre-processing $fasta_file + echo "Step 1: counting primers in foreground and background" - # run counts if they haven't been created - if [ -e $counts-counts ]; then - rm $counts-counts - fi - for mer in `seq $min_mer_range $max_mer_range`; do - if [ ! -e $counts-counts-$mer ]; then - echo checking $mer mers for $fasta_file - kmer_continuous_count -c -i $fasta_file -k $mer -l -n > $counts-counts-$mer - else - echo "$mer mers already done for $fasta_file (assuming $fasta_file didn't change)" - fi - - cat $counts-counts-$mer >> $counts-counts - - done -done + for fasta_file in "$foreground" "$background"; do -fg_counts=$counts_directory/$(basename $foreground)-counts -bg_counts=$counts_directory/$(basename $background)-counts + counts="$counts_directory"/$(basename "$fasta_file") -selected=$output_directory/$current_run/selected-mers + echo "counting mers in $fasta_file" + + # check each mer size and process if not already run + for (( mer = min_mer_range; mer <= max_mer_range; mer++)) ; do + if [[ ! -e "$counts"-counts-"$mer" ]]; then + echo "checking $mer mers for $fasta_file (assuming $fasta_file didn't change)" + kmer_continuous_count -c -i "$fasta_file" -k "$mer" -l -n > "$counts"-counts-"$mer" || exit 1 + else + echo "$mer mers already done for $fasta_file" + fi -# remove ignored mers -if [ "$ignore_mers" ]; then - echo "removing ignored mers: " + $ignore_mers - for mer in $ignore_mers; do - sed -i '/^'$mer'\t/d' $fg_counts - sed -i '/^'$mer'\t/d' $bg_counts + # remove the counts file so we can concatenate + if [[ -e "$counts"-counts ]]; then + rm "$counts"-counts + fi + + # concatentate + cat "$counts"-counts-"$mer" >> "$counts"-counts + + done done fi -echo "outputing current run parameters" -for var in ignore_mers min_mer_range max_mer_range max_check cpus max_consecutive_binding max_select min_foreground_binding_average max_mer_distance min_melting_temp max_melting_temp foreground background; do - echo $var "${!var}" >> $output_directory/$current_run/parameters -done; - -average_binding=$output_directory/$current_run/`basename $foreground`-counts-average-binding -consecutive_binding=$output_directory/$current_run/`basename $foreground`-counts-consecutive-binding -non_melting=$output_directory/$current_run/`basename $foreground`-counts-non-melting +if [[ -n "$step_filters" ]] || [[ -n "$all" ]]; then + if [[ ! -f "$fg_counts" ]]; then + echo "Error: you need to run your count step before filtration" + exit + fi + echo "Step 2: Filtering mer combinations based on parameters" + + # remove ignored mers + if [[ "$ignore_mers" ]]; then + echo "removing ignored mers: " + "$ignore_mers" + for mer in $ignore_mers; do + sed -i '/^'"$mer"'\t/d' "$fg_counts" + sed -i '/^'"$mer"'\t/d' "$bg_counts" + done + fi -echo "checking if mers appear at least as often in the fg as the average binding site or more $min_foreground_binding_average" -cat $fg_counts | filter_average_binding.py $foreground $min_foreground_binding_average > $average_binding || exit 1 + echo "checking if mers appear at least as often in the fg as the average binding site or more $min_foreground_binding_average" + filter_average_binding.py "$foreground" "$min_foreground_binding_average" < "$fg_counts" > "$average_binding" || exit 1 -echo "checking if mers are within the melting range $min_melting_temp $max_melting_temp" -cat $average_binding | filter_melting_temperature.py $min_melting_temp $max_melting_temp > $non_melting || exit 1 + echo "checking if mers are within the melting range $min_melting_temp $max_melting_temp" + filter_melting_temperature.py "$min_melting_temp" "$max_melting_temp" < "$average_binding" > "$non_melting" || exit 1 -echo "filtering out elements that have more consecutive binding mers than allowed by \$max_consecutive_binding $max_consecutive_binding" -cat $non_melting | filter_max_consecutive_binding.py $max_consecutive_binding > $consecutive_binding || exit 1 + echo "filtering out elements that have more consecutive binding mers than allowed by \$max_consecutive_binding $max_consecutive_binding" + filter_max_consecutive_binding.py "$max_consecutive_binding" < "$non_melting" > "$consecutive_binding" || exit 1 +fi + +if [[ -n "$step_select" ]] || [[ -n "$all" ]]; then + if [[ ! -f "$consecutive_binding" ]]; then + echo "Error: you need to run your filtration step before selection" + exit + fi + echo "Step 3: Scoring mer selectivity" + select_mers.py "$consecutive_binding" "$bg_counts" > "$selected" || exit 1 +fi -echo "scoring mer selectivity" -select_mers.py $consecutive_binding $bg_counts > $selected || exit 1 +if [[ -n "$step_score" ]] || [[ -n "$all" ]]; then + if [[ ! -f "$selected" ]]; then + echo "Error: you need to run your selection step before you run your scoring" + exit + fi -echo "scoring top mers based on selectivity" -score_wrapper.sh $selected $foreground $background $output_directory/$current_run/scores-output || exit 1 + echo "Step 4: Scoring top mers based on selectivity" + score_wrapper.sh "$selected" "$foreground" "$background" "$output_directory"/"$current_run"/scores-output || exit 1 +fi -- cgit v1.2.3