aboutsummaryrefslogtreecommitdiff
path: root/SelectiveGenomeAmplification
diff options
context:
space:
mode:
authorCalvin Morrison <mutantturkey@gmail.com>2014-04-07 12:52:49 -0400
committerCalvin Morrison <mutantturkey@gmail.com>2014-04-07 12:52:49 -0400
commitff15f36a46617effd1ecb1dafb289c3c18cc836a (patch)
tree749ccfb2880ab43ab6ee78ae6c85bf34bb78c4cd /SelectiveGenomeAmplification
parentad2831e63e47c2575d86319e5a90f746ba567f11 (diff)
Rename!
Diffstat (limited to 'SelectiveGenomeAmplification')
-rwxr-xr-xSelectiveGenomeAmplification343
1 files changed, 0 insertions, 343 deletions
diff --git a/SelectiveGenomeAmplification b/SelectiveGenomeAmplification
deleted file mode 100755
index e778cbd..0000000
--- a/SelectiveGenomeAmplification
+++ /dev/null
@@ -1,343 +0,0 @@
-#!/usr/bin/env bash
-
-set -e
-# arguments:
-
-# check_non_empty
-check_non_empty() {
- if [[ ! -s $1 ]]; then
- echo "Warning: no mers remain after the '$2' filter!"
- echo "Exiting..."
- exit 1
- fi
-}
-# check_mers filename
-check_mers() {
-
- local fasta_file="$1"
- local counts="$2"
- local mer=0
-
- echo " counting mers in $fasta_file"
-
- # remove the counts file so we can concatenate
- if [[ -e "$counts"-counts ]]; then
- echo " removing $counts-counts"
- rm "$counts"-counts
- fi
-
- # check each mer size and process if not already run
- for (( mer = min_mer_range; mer <= max_mer_range; mer++)) ; do
- if [[ ! -e "$counts"-counts-"$mer" ]]; then
- echo " checking $mer mers for $fasta_file"
- kmer_continuous_count -c -i "$fasta_file" -k "$mer" -l -n > "$counts"-counts-"$mer" || exit 1
- else
- echo " $mer-mers already done for $fasta_file (assuming no change)"
- fi
-
- # concatentate
- cat "$counts"-counts-"$mer" >> "$counts"-counts
-
- done
-
-}
-
-all=run
-# Parse in our arguments
-if [[ -z "$foreground" ]] && [[ -z "$background" ]]; then
- if (( $# < 2 )); then
- echo "please supply two genomes, foreground and background"
- exit 1
- fi;
-
- : ${foreground=$1}
- : ${background=$2}
- start=3
-else
- start=1
-fi
-
-if (( $# > 2 )); then
- unset all
-
- for i in "${@:$start}"; do
- if [[ "$i" = "1" ]] || [[ "$i" = "count" ]]; then
- step=1; step_mers=1
- fi
-
- if [[ "$i" = "2" ]] || [[ "$i" = "filter" ]]; then
- step=1; step_filters=1
- fi
-
- if [[ "$i" = "3" ]] || [[ "$i" = "select" ]]; then
- step=1; step_select=1
- fi
-
- if [[ "$i" = "4" ]] || [[ "$i" = "score" ]]; then
- step=1; step_score=1
- fi
-
- if [[ $step ]] && [[ ! "$current_run" ]] && [[ ! $step_mers ]]; then
- echo "Error: If you are going to step through your program, and aren't starting" \
- "at the first step, you better specify what previous run you want to use" \
- "as your base"
- exit
- fi
-
- done;
-fi;
-
-if [[ -n "$step" ]] && [[ -z "$step_mers" ]] && [[ -z "$step_filters" ]] && [[ -z $step_select ]] && [[ -z "$step_score" ]]; then
- echo "Error: you need to select at least one step to run."
- exit
-fi
-
-echo
-echo "Planning on running these steps:"
-for var in step_mers step_filters step_select step_score all; do
- if [[ -n "${!var}" ]]; then
- echo ' '$var
- fi
-done
-
-# output directory
-: ${output_directory=$(basename "$foreground")_$(basename "$background")}
-
-# temp directory
-: ${tmp_directory="$output_directory"/.tmp}
-
-# directory to store our counts and sorted counts
-: ${counts_directory="$tmp_directory"}
-
-# range of mers, min and max
-: ${min_mer_range=6}
-: ${max_mer_range=12}
-
-# max mer distance, the distance between two mers in our selected outputs
-: ${max_mer_distance=5000}
-
-# min/maximum kmer meling point
-: ${max_melting_temp=30}
-: ${min_melting_temp=0}
-
-# minimum average binding distance in the foreground
-: ${min_foreground_binding_average=50000}
-
-# maximum mers to pick
-: ${max_select=15}
-
-# maximum mers to check
-: ${max_check=35}
-
-# mers to specifically IGNORE, space delimited
-: ${ignore_mers=''}
-
-# IGNORE all mers that are in these files, space delimited
-: ${ignore_all_mers_from_files}
-
-# maximum number of mers that are consecutively binding
-: ${max_consecutive_binding=4}
-
-# fg_weight, how much to weight to give the higher bindnig primers
-: ${fg_weight=0}
-
-# primer_weight, how much weight to give to sets with a higher number of primers. (between 0 and 1)
-: ${primer_weight=0}
-
-# output_top_nb, How many scored sets would you like in the top_scored_sets output file (Default = 10000)?
-: ${output_top_nb=10000}
-
-# score_func: A custom scoring function. disable by default. See README.md
-: ${score_func="(nb_primers**primer_weight) * (fg_mean_dist * fg_std_dist) / bg_ratio"}
-
-# sort score by the minimum or maximum value. acceptable parameters are min or max.
-: ${sort_by="min"}
-
-export ignore_mers
-export min_mer_range
-export max_mer_range
-
-export max_select
-
-export min_foreground_binding_average
-export max_mer_distance
-
-export max_melting_temp
-export min_melting_temp
-
-export fg_weight
-export primer_weight
-
-
-echo
-# check foreground and background
-if [[ ! -f "$foreground" ]]; then
- echo "Error: could not open $foreground"
- exit 1
-fi
-
-if [[ ! -f "$background" ]]; then
- echo "Error: could not open $background"
- exit 1
-fi
-
-
-if [[ -n "$current_run" ]] && [[ ! -d "$output_directory/$current_run" ]]; then
- echo -n "run $current_run was not found, it should be a folder here: "
- echo "$output_directory/$current_run"
- exit
-fi
-
-if [[ "$sort_by" = "min" ]]; then
- sort=''
-elif [[ "$sort_by" = "max" ]]; then
- sort="-r"
-else
- echo "Error: \$sort_by must either be set to max or min"
- exit
-fi
-
-num=1
-if [[ -z "$current_run" ]]; then
- while [[ -d $output_directory/run_$num ]] ; do
- let num++
- done
- current_run=run_$num
-fi
-
-fg_basename=$(basename "$foreground")
-bg_basename=$(basename "$background")
-
-fg_counts=$counts_directory/$fg_basename-counts
-bg_counts=$counts_directory/$bg_basename-counts
-
-final_fg_counts=$output_directory/$current_run/$fg_basename-filtered-counts
-selected=$output_directory/$current_run/selected-mers
-
-ignore_mers_counts="$output_directory/$current_run/passes-filter/1-$fg_basename-ignore-mers"
-ignore_all_mers_counts="$output_directory/$current_run/passes-filter/2-$fg_basename-ignore-all-mers"
-average_binding="$output_directory/$current_run/passes-filter/3-$fg_basename-average-binding"
-non_melting="$output_directory/$current_run/passes-filter/4-$fg_basename-non-melting"
-consecutive_binding="$output_directory/$current_run/passes-filter/5-$fg_basename-consecutive-binding"
-
-# Make our output directory
-if [[ ! -d "$output_directory" ]]; then
- mkdir "$output_directory"
-fi
-
-# Make our counts directory
-if [[ ! -d "$counts_directory" ]]; then
- mkdir "$counts_directory"
-fi
-
-# Make our temporary directory
-if [[ ! -d $tmp_directory ]]; then
- mkdir "$tmp_directory"
-fi
-
-# Make our current run directory
-if [[ ! -d $output_directory/$current_run ]]; then
- mkdir "$output_directory"/"$current_run"
-fi
-
-# Make our filter directory
-if [[ ! -d "$output_directory/$current_run/passes-filter" ]]; then
- mkdir "$output_directory/$current_run/passes-filter"
-fi
-
-echo "Outputting current run parameters"
- for var in score_func ignore_mers ignore_all_mers_from_files min_mer_range max_mer_range max_check cpus max_consecutive_binding max_select min_foreground_binding_average max_mer_distance min_melting_temp max_melting_temp foreground background; do
- echo "$var" "${!var}" >> "$output_directory"/"$current_run"/parameters
-done;
-
-echo "current run is: $current_run"
-echo
-
-if [[ -n "$step_mers" ]] || [[ -n "$all" ]]; then
- # to continue this project you need to use the current run.
-
- echo "Step 1: counting primers in foreground and background"
- check_mers "$foreground" "$counts_directory/$(basename "$foreground")"
- check_mers "$background" "$counts_directory/$(basename "$background")"
-fi
-
-if [[ -n "$step_filters" ]] || [[ -n "$all" ]]; then
- if [[ ! -f "$fg_counts" ]]; then
- echo "Error: you need to run your count step before filtration"
- exit
- fi
- echo "Step 2: filtering mers"
-
-
- cp "$fg_counts" "$ignore_mers_counts"
- # remove ignored mers
- if [[ "$ignore_mers" ]]; then
- echo " filtering explicitly ignored mers: $ignore_mers"
- for mer in $ignore_mers; do
- sed -i '/^'"$mer"'\t/d' "$ignore_mers_counts"
- done
- fi
- check_non_empty "$ignore_mers_counts" "ignore mers"
-
- cp "$ignore_mers_counts" "$ignore_all_mers_counts"
- # remove ignored mers
- if [[ "$ignore_all_mers_from_files" ]]; then
- for ignore_file in $ignore_all_mers_from_files; do
-
- if [[ -f "$ignore_file" ]]; then
- echo " filtering ignored mers from: $ignore_file"
-
- counts="$counts_directory/ignore-"$(basename "$ignore_file")
- check_mers "$ignore_file" "$counts"
-
- while read mer_line; do
- mer=$(echo "$mer_line" | sed -e 's/\t.*//g')
- sed -i '/^'"$mer"'\t/d' "$ignore_all_mers_counts"
- done < "$counts-counts"
- else
- echo " $ignore_file not found, continuing..."
- fi
-
- done
- fi
- check_non_empty "$ignore_all_mers_counts" "ignore all mers from file "
-
- echo " filtering mers that appear less frequently than the average binding site distance ($min_foreground_binding_average)"
- filter_average_binding.py "$ignore_all_mers_counts" "$min_foreground_binding_average" < "$fg_counts" > "$average_binding" || exit 1
- check_non_empty "$average_binding" "average binding"
-
- echo " filtering mers that are not in the melting range ($min_melting_temp-$max_melting_temp)"
- filter_melting_temperature.py "$min_melting_temp" "$max_melting_temp" < "$average_binding" > "$non_melting" || exit 1
- check_non_empty "$non_melting" "melting temperature"
-
- echo " filtering mers that have more consecutive binding mers than allowed ($max_consecutive_binding)"
- filter_max_consecutive_binding.py "$max_consecutive_binding" < "$non_melting" > "$consecutive_binding" || exit 1
- check_non_empty "$consecutive_binding" "consecutive binding"
-
- cp $consecutive_binding $final_fg_counts
-fi
-
-if [[ -n "$step_select" ]] || [[ -n "$all" ]]; then
- if [[ ! -f "$final_fg_counts" ]]; then
- echo "Error: you need to run your filtration step before selection"
- exit
- fi
- echo "Step 3: Scoring mer selectivity"
- select_mers.py "$final_fg_counts" "$bg_counts" > "$selected" || exit 1
-fi
-
-if [[ -n "$step_score" ]] || [[ -n "$all" ]]; then
- if [[ ! -f "$selected" ]]; then
- echo "Error: you need to run your selection step before you run your scoring"
- exit
- fi
-
- echo "Step 4: Scoring top mers based on selectivity"
- score_wrapper.sh "$selected" "$foreground" "$background" "$output_directory/$current_run/all-scores" || exit 1
-
- # output our sorted scores
- echo "sorting and outputting top $output_top_nb scores"
- echo "top scores output file: $output_directory/$current_run/top-scores"
- head -n 3 $output_directory/$current_run/all-scores > $output_directory/$current_run/top-scores
- tail -n +4 $output_directory/$current_run/all-scores | sort $sort -t $'\t' -nk 3 | head -n $output_top_nb >> $output_directory/$current_run/top-scores
-fi