aboutsummaryrefslogtreecommitdiff
path: root/SelectiveGenomeAmplification
diff options
context:
space:
mode:
authorCalvin Morrison <mutantturkey@gmail.com>2014-03-26 21:05:25 -0400
committerCalvin Morrison <mutantturkey@gmail.com>2014-03-26 21:05:25 -0400
commit495228f7167a6df24a139022e7a0560a4dd07b56 (patch)
treeca163ce70968410ebbe30291c2c90aaec92da9b1 /SelectiveGenomeAmplification
parentf6fe059e37d8e8ee0cb6f7acf72c8918473ad3f7 (diff)
majorly rewrite and revamp the script to support runnign parts at a time
Diffstat (limited to 'SelectiveGenomeAmplification')
-rwxr-xr-xSelectiveGenomeAmplification234
1 files changed, 168 insertions, 66 deletions
diff --git a/SelectiveGenomeAmplification b/SelectiveGenomeAmplification
index ad8d4a5..85a166f 100755
--- a/SelectiveGenomeAmplification
+++ b/SelectiveGenomeAmplification
@@ -1,6 +1,9 @@
#!/usr/bin/env bash
-if [ -z "$foreground" ] && [ -z "$background" ]; then
+
+all=run
+# Parse in our arguments
+if [[ -z "$foreground" ]] && [[ -z "$background" ]]; then
if (( $# < 2 )); then
echo "please supply two genomes, foreground and background"
exit 1
@@ -8,27 +11,62 @@ if [ -z "$foreground" ] && [ -z "$background" ]; then
: ${foreground=$1}
: ${background=$2}
+ start=3
+else
+ start=1
fi
+if (( $# > 2 )); then
+ unset all
-if [[ ! -f $foreground ]]; then
- echo "Could not open $foreground"
- exit 1
-fi
+ for i in "${@:$start}"; do
+ if [[ "$i" = "1" ]] || [[ "$i" = "count" ]]; then
+ step=1; step_mers=1
+ fi
-if [[ ! -f $background ]]; then
- echo "Could not open $background"
- exit 1
+ if [[ "$i" = "2" ]] || [[ "$i" = "filter" ]]; then
+ step=1; step_filters=1
+ fi
+
+ if [[ "$i" = "3" ]] || [[ "$i" = "select" ]]; then
+ step=1; step_select=1
+ fi
+
+ if [[ "$i" = "4" ]] || [[ "$i" = "score" ]]; then
+ step=1; step_score=1
+ fi
+
+ if [[ $step ]] && [[ ! "$current_run" ]] && [[ ! $step_mers ]]; then
+ echo "Error: If you are going to step through your program, and aren't starting" \
+ "at the first step, you better specify what previous run you want to use" \
+ "as your base"
+ exit
+ fi
+
+ done;
+fi;
+
+if [[ -n "$step" ]] && [[ -z "$step_mers" ]] && [[ -z "$step_filters" ]] && [[ -z $step_select ]] && [[ -z "$step_score" ]]; then
+ echo "Error: you need to select at least one step to run."
+ exit
fi
+echo
+echo "Planning on running these steps:"
+for var in step_mers step_filters step_select step_score all; do
+ if [[ -n "${!var}" ]]; then
+ echo ' '$var
+ fi
+done
+
# output directory
-: ${output_directory=`basename $foreground`_`basename $background`}
+: ${output_directory=$(basename "$foreground")_$(basename "$background")}
# temp directory
-: ${tmp_directory=$output_directory/.tmp}
+: ${tmp_directory="$output_directory"/.tmp}
# directory to store our counts and sorted counts
-: ${counts_directory=$tmp_directory}
+: ${counts_directory="$tmp_directory"}
# range of mers, min and max
: ${min_mer_range=6}
@@ -59,7 +97,7 @@ fi
# fg_weight, how much to weight to give the higher bindnig primers
: ${fg_weight=0}
-# primer_weight, how much weight to give to sets with a higher number of priemrs. (between 0 and 1)
+# primer_weight, how much weight to give to sets with a higher number of primers. (between 0 and 1)
: ${primer_weight=0}
export ignore_mers
@@ -78,82 +116,146 @@ export fg_weight
export primer_weight
+echo
+# check foreground and background
+if [[ ! -f "$foreground" ]]; then
+ echo "Error: could not open $foreground"
+ exit 1
+fi
+
+if [[ ! -f "$background" ]]; then
+ echo "Error: could not open $background"
+ exit 1
+fi
+
+
+if [[ -n "$current_run" ]] && [[ ! -d "$output_directory/$current_run" ]]; then
+ echo -n "run $current_run was not found, it should be a folder here: "
+ echo "$output_directory/$current_run"
+ exit
+fi
+
+num=1
+if [[ -z "$current_run" ]]; then
+ while [[ -d $output_directory/run_$num ]] ; do
+ let num++
+ done
+ current_run=run_$num
+fi
+
+fg_basename=$(basename "$foreground")
+bg_basename=$(basename "$background")
+
+fg_counts=$counts_directory/$fg_basename-counts
+bg_counts=$counts_directory/$bg_basename-counts
+selected=$output_directory/$current_run/selected-mers
+
+
+average_binding=$output_directory/$current_run/$fg_basename-counts-average-binding
+non_melting=$output_directory/$current_run/$fg_basename-counts-non-melting
+consecutive_binding=$output_directory/$current_run/$fg_basename-counts-consecutive-binding
+
# Make our output directory
-if [ ! -d $output_directory ]; then
- mkdir $output_directory
+if [[ ! -d "$output_directory" ]]; then
+ mkdir "$output_directory"
fi
# Make our counts directory
-if [ ! -d $counts_directory ]; then
- mkdir $counts_directory
+if [[ ! -d "$counts_directory" ]]; then
+ mkdir "$counts_directory"
fi
# Make our temporary directory
-if [ ! -d $tmp_directory ]; then
- mkdir $tmp_directory
+if [[ ! -d $tmp_directory ]]; then
+ mkdir "$tmp_directory"
fi
+# Make our current run directory
+if [[ ! -d $output_directory/$current_run ]]; then
+ mkdir "$output_directory"/"$current_run"
+fi
-current_run=$output_`date +%s`
-mkdir -p $output_directory/$current_run
+echo "Outputting current run parameters"
+ for var in ignore_mers min_mer_range max_mer_range max_check cpus max_consecutive_binding max_select min_foreground_binding_average max_mer_distance min_melting_temp max_melting_temp foreground background; do
+ echo "$var" "${!var}" >> "$output_directory"/"$current_run"/parameters
+done;
-for fasta_file in $foreground $background; do
+echo "current run is: $current_run"
+echo
- counts=$counts_directory/$(basename $fasta_file)
+if [[ -n "$step_mers" ]] || [[ -n "$all" ]]; then
+ # to continue this project you need to use the current run.
- echo pre-processing $fasta_file
+ echo "Step 1: counting primers in foreground and background"
- # run counts if they haven't been created
- if [ -e $counts-counts ]; then
- rm $counts-counts
- fi
- for mer in `seq $min_mer_range $max_mer_range`; do
- if [ ! -e $counts-counts-$mer ]; then
- echo checking $mer mers for $fasta_file
- kmer_continuous_count -c -i $fasta_file -k $mer -l -n > $counts-counts-$mer
- else
- echo "$mer mers already done for $fasta_file (assuming $fasta_file didn't change)"
- fi
-
- cat $counts-counts-$mer >> $counts-counts
-
- done
-done
+ for fasta_file in "$foreground" "$background"; do
-fg_counts=$counts_directory/$(basename $foreground)-counts
-bg_counts=$counts_directory/$(basename $background)-counts
+ counts="$counts_directory"/$(basename "$fasta_file")
-selected=$output_directory/$current_run/selected-mers
+ echo "counting mers in $fasta_file"
+
+ # check each mer size and process if not already run
+ for (( mer = min_mer_range; mer <= max_mer_range; mer++)) ; do
+ if [[ ! -e "$counts"-counts-"$mer" ]]; then
+ echo "checking $mer mers for $fasta_file (assuming $fasta_file didn't change)"
+ kmer_continuous_count -c -i "$fasta_file" -k "$mer" -l -n > "$counts"-counts-"$mer" || exit 1
+ else
+ echo "$mer mers already done for $fasta_file"
+ fi
-# remove ignored mers
-if [ "$ignore_mers" ]; then
- echo "removing ignored mers: " + $ignore_mers
- for mer in $ignore_mers; do
- sed -i '/^'$mer'\t/d' $fg_counts
- sed -i '/^'$mer'\t/d' $bg_counts
+ # remove the counts file so we can concatenate
+ if [[ -e "$counts"-counts ]]; then
+ rm "$counts"-counts
+ fi
+
+ # concatentate
+ cat "$counts"-counts-"$mer" >> "$counts"-counts
+
+ done
done
fi
-echo "outputing current run parameters"
-for var in ignore_mers min_mer_range max_mer_range max_check cpus max_consecutive_binding max_select min_foreground_binding_average max_mer_distance min_melting_temp max_melting_temp foreground background; do
- echo $var "${!var}" >> $output_directory/$current_run/parameters
-done;
-
-average_binding=$output_directory/$current_run/`basename $foreground`-counts-average-binding
-consecutive_binding=$output_directory/$current_run/`basename $foreground`-counts-consecutive-binding
-non_melting=$output_directory/$current_run/`basename $foreground`-counts-non-melting
+if [[ -n "$step_filters" ]] || [[ -n "$all" ]]; then
+ if [[ ! -f "$fg_counts" ]]; then
+ echo "Error: you need to run your count step before filtration"
+ exit
+ fi
+ echo "Step 2: Filtering mer combinations based on parameters"
+
+ # remove ignored mers
+ if [[ "$ignore_mers" ]]; then
+ echo "removing ignored mers: " + "$ignore_mers"
+ for mer in $ignore_mers; do
+ sed -i '/^'"$mer"'\t/d' "$fg_counts"
+ sed -i '/^'"$mer"'\t/d' "$bg_counts"
+ done
+ fi
-echo "checking if mers appear at least as often in the fg as the average binding site or more $min_foreground_binding_average"
-cat $fg_counts | filter_average_binding.py $foreground $min_foreground_binding_average > $average_binding || exit 1
+ echo "checking if mers appear at least as often in the fg as the average binding site or more $min_foreground_binding_average"
+ filter_average_binding.py "$foreground" "$min_foreground_binding_average" < "$fg_counts" > "$average_binding" || exit 1
-echo "checking if mers are within the melting range $min_melting_temp $max_melting_temp"
-cat $average_binding | filter_melting_temperature.py $min_melting_temp $max_melting_temp > $non_melting || exit 1
+ echo "checking if mers are within the melting range $min_melting_temp $max_melting_temp"
+ filter_melting_temperature.py "$min_melting_temp" "$max_melting_temp" < "$average_binding" > "$non_melting" || exit 1
-echo "filtering out elements that have more consecutive binding mers than allowed by \$max_consecutive_binding $max_consecutive_binding"
-cat $non_melting | filter_max_consecutive_binding.py $max_consecutive_binding > $consecutive_binding || exit 1
+ echo "filtering out elements that have more consecutive binding mers than allowed by \$max_consecutive_binding $max_consecutive_binding"
+ filter_max_consecutive_binding.py "$max_consecutive_binding" < "$non_melting" > "$consecutive_binding" || exit 1
+fi
+
+if [[ -n "$step_select" ]] || [[ -n "$all" ]]; then
+ if [[ ! -f "$consecutive_binding" ]]; then
+ echo "Error: you need to run your filtration step before selection"
+ exit
+ fi
+ echo "Step 3: Scoring mer selectivity"
+ select_mers.py "$consecutive_binding" "$bg_counts" > "$selected" || exit 1
+fi
-echo "scoring mer selectivity"
-select_mers.py $consecutive_binding $bg_counts > $selected || exit 1
+if [[ -n "$step_score" ]] || [[ -n "$all" ]]; then
+ if [[ ! -f "$selected" ]]; then
+ echo "Error: you need to run your selection step before you run your scoring"
+ exit
+ fi
-echo "scoring top mers based on selectivity"
-score_wrapper.sh $selected $foreground $background $output_directory/$current_run/scores-output || exit 1
+ echo "Step 4: Scoring top mers based on selectivity"
+ score_wrapper.sh "$selected" "$foreground" "$background" "$output_directory"/"$current_run"/scores-output || exit 1
+fi