From ff15f36a46617effd1ecb1dafb289c3c18cc836a Mon Sep 17 00:00:00 2001 From: Calvin Morrison Date: Mon, 7 Apr 2014 12:52:49 -0400 Subject: Rename! --- Makefile | 4 +- README.md | 24 +-- SelectiveGenomeAmplification | 343 ------------------------------------ SelectiveGenomeAmplificationUI | 126 ------------- SelectiveWholeGenomeAmplification | 343 ++++++++++++++++++++++++++++++++++++ SelectiveWholeGenomeAmplificationUI | 126 +++++++++++++ 6 files changed, 483 insertions(+), 483 deletions(-) delete mode 100755 SelectiveGenomeAmplification delete mode 100755 SelectiveGenomeAmplificationUI create mode 100755 SelectiveWholeGenomeAmplification create mode 100755 SelectiveWholeGenomeAmplificationUI diff --git a/Makefile b/Makefile index 01df8bc..02aeb46 100644 --- a/Makefile +++ b/Makefile @@ -26,8 +26,8 @@ install: all install -c bin/strstreamone $(DEST) install -c bin/sequence_end_points $(DEST) # bash scripts - install -c SelectiveGenomeAmplification $(DEST) - install -c SelectiveGenomeAmplificationUI $(DEST) + install -c SelectiveWholeGenomeAmplification $(DEST) + install -c SelectiveWholeGenomeAmplificationUI $(DEST) # python scripts install -c src/select_mers.py $(DEST) install -c src/score_mers.py $(DEST) diff --git a/README.md b/README.md index 9d52d27..18c0748 100644 --- a/README.md +++ b/README.md @@ -1,4 +1,4 @@ -SelectiveGenomeAmplification +SelectiveWholeGenomeAmplification ============================ SWGA is a tool for choosing primers for the selective amplification of a @@ -42,21 +42,21 @@ To use this you'll need: ## Setup - git clone git@github.com:mutantturkey/SelectiveGenomeAmplification.git - cd SelectiveGenomeAmplification + git clone git@github.com:mutantturkey/SelectiveWholeGenomeAmplification".git + cd SelectiveWholeGenomeAmplification" make sudo make install ## Example Usage -Standard use of (SGA) SelectiveGenomeAmplification is easy. it takes two arguments, +Standard use of (SGA) SelectiveWholeGenomeAmplification" is easy. it takes two arguments, the foreground and background - SelectiveGenomeAmplification PfalciparumGenome.fasta HumanGenome.fasta; + SelectiveWholeGenomeAmplification" PfalciparumGenome.fasta HumanGenome.fasta; less PfalciparumGenome_HumanGenome/final_mers ### SGA User Interface -SGA also comes with a easy to use user prompt called SelectiveGenomeAmplificationUI. +SGA also comes with a easy to use user prompt called SelectiveWholeGenomeAmplification"UI. It allows for a less experienced user to use SGA without issue. to run this all you need to do is run SelectiveGenomeAmiplifcationUI and you'll see a series of prompts asking the user about tunables like below @@ -69,7 +69,7 @@ SGA without issue. to run this all you need to do is run SelectiveGenomeAmiplifc Input the path to your foreground file:target.fa Input the path to your background file:humangenome.fa Would you like to output your inserted variables to a string you can later paste? (Y/N/Default=y): n - Run SelectiveGenomeAmplification? (Y/N/Default=y): y + Run SelectiveWholeGenomeAmplification"? (Y/N/Default=y): y ### Setting Tunable Parameters @@ -78,19 +78,19 @@ below. For user customizable variables, they need to be passed in as environmental variables like so: max_mer_distance=5000 max_select=6 min_mer_range=6 max_mer_range=12 \ - SelectiveGenomeAmplification.sh PfalciparumGenome.fasta half.fasta + SelectiveWholeGenomeAmplification".sh PfalciparumGenome.fasta half.fasta ### Running individual steps -By default SelectiveGenomeAmplification runs all four steps, but you can +By default SelectiveWholeGenomeAmplification" runs all four steps, but you can specify the program to run other steps, like in these examples. - current_run=run_1 SelectiveGenomeAmplification target.fasta bg.fasta score + current_run=run_1 SelectiveWholeGenomeAmplification" target.fasta bg.fasta score - current_run=run_1 SelectiveGenomeAmplification target.fasta bg.fasta select score + current_run=run_1 SelectiveWholeGenomeAmplification" target.fasta bg.fasta select score - current_run=run_1 SelectiveGenomeAmplification target.fasta bg.fasta 3 4 + current_run=run_1 SelectiveWholeGenomeAmplification" target.fasta bg.fasta 3 4 valid steps are these: diff --git a/SelectiveGenomeAmplification b/SelectiveGenomeAmplification deleted file mode 100755 index e778cbd..0000000 --- a/SelectiveGenomeAmplification +++ /dev/null @@ -1,343 +0,0 @@ -#!/usr/bin/env bash - -set -e -# arguments: - -# check_non_empty -check_non_empty() { - if [[ ! -s $1 ]]; then - echo "Warning: no mers remain after the '$2' filter!" - echo "Exiting..." - exit 1 - fi -} -# check_mers filename -check_mers() { - - local fasta_file="$1" - local counts="$2" - local mer=0 - - echo " counting mers in $fasta_file" - - # remove the counts file so we can concatenate - if [[ -e "$counts"-counts ]]; then - echo " removing $counts-counts" - rm "$counts"-counts - fi - - # check each mer size and process if not already run - for (( mer = min_mer_range; mer <= max_mer_range; mer++)) ; do - if [[ ! -e "$counts"-counts-"$mer" ]]; then - echo " checking $mer mers for $fasta_file" - kmer_continuous_count -c -i "$fasta_file" -k "$mer" -l -n > "$counts"-counts-"$mer" || exit 1 - else - echo " $mer-mers already done for $fasta_file (assuming no change)" - fi - - # concatentate - cat "$counts"-counts-"$mer" >> "$counts"-counts - - done - -} - -all=run -# Parse in our arguments -if [[ -z "$foreground" ]] && [[ -z "$background" ]]; then - if (( $# < 2 )); then - echo "please supply two genomes, foreground and background" - exit 1 - fi; - - : ${foreground=$1} - : ${background=$2} - start=3 -else - start=1 -fi - -if (( $# > 2 )); then - unset all - - for i in "${@:$start}"; do - if [[ "$i" = "1" ]] || [[ "$i" = "count" ]]; then - step=1; step_mers=1 - fi - - if [[ "$i" = "2" ]] || [[ "$i" = "filter" ]]; then - step=1; step_filters=1 - fi - - if [[ "$i" = "3" ]] || [[ "$i" = "select" ]]; then - step=1; step_select=1 - fi - - if [[ "$i" = "4" ]] || [[ "$i" = "score" ]]; then - step=1; step_score=1 - fi - - if [[ $step ]] && [[ ! "$current_run" ]] && [[ ! $step_mers ]]; then - echo "Error: If you are going to step through your program, and aren't starting" \ - "at the first step, you better specify what previous run you want to use" \ - "as your base" - exit - fi - - done; -fi; - -if [[ -n "$step" ]] && [[ -z "$step_mers" ]] && [[ -z "$step_filters" ]] && [[ -z $step_select ]] && [[ -z "$step_score" ]]; then - echo "Error: you need to select at least one step to run." - exit -fi - -echo -echo "Planning on running these steps:" -for var in step_mers step_filters step_select step_score all; do - if [[ -n "${!var}" ]]; then - echo ' '$var - fi -done - -# output directory -: ${output_directory=$(basename "$foreground")_$(basename "$background")} - -# temp directory -: ${tmp_directory="$output_directory"/.tmp} - -# directory to store our counts and sorted counts -: ${counts_directory="$tmp_directory"} - -# range of mers, min and max -: ${min_mer_range=6} -: ${max_mer_range=12} - -# max mer distance, the distance between two mers in our selected outputs -: ${max_mer_distance=5000} - -# min/maximum kmer meling point -: ${max_melting_temp=30} -: ${min_melting_temp=0} - -# minimum average binding distance in the foreground -: ${min_foreground_binding_average=50000} - -# maximum mers to pick -: ${max_select=15} - -# maximum mers to check -: ${max_check=35} - -# mers to specifically IGNORE, space delimited -: ${ignore_mers=''} - -# IGNORE all mers that are in these files, space delimited -: ${ignore_all_mers_from_files} - -# maximum number of mers that are consecutively binding -: ${max_consecutive_binding=4} - -# fg_weight, how much to weight to give the higher bindnig primers -: ${fg_weight=0} - -# primer_weight, how much weight to give to sets with a higher number of primers. (between 0 and 1) -: ${primer_weight=0} - -# output_top_nb, How many scored sets would you like in the top_scored_sets output file (Default = 10000)? -: ${output_top_nb=10000} - -# score_func: A custom scoring function. disable by default. See README.md -: ${score_func="(nb_primers**primer_weight) * (fg_mean_dist * fg_std_dist) / bg_ratio"} - -# sort score by the minimum or maximum value. acceptable parameters are min or max. -: ${sort_by="min"} - -export ignore_mers -export min_mer_range -export max_mer_range - -export max_select - -export min_foreground_binding_average -export max_mer_distance - -export max_melting_temp -export min_melting_temp - -export fg_weight -export primer_weight - - -echo -# check foreground and background -if [[ ! -f "$foreground" ]]; then - echo "Error: could not open $foreground" - exit 1 -fi - -if [[ ! -f "$background" ]]; then - echo "Error: could not open $background" - exit 1 -fi - - -if [[ -n "$current_run" ]] && [[ ! -d "$output_directory/$current_run" ]]; then - echo -n "run $current_run was not found, it should be a folder here: " - echo "$output_directory/$current_run" - exit -fi - -if [[ "$sort_by" = "min" ]]; then - sort='' -elif [[ "$sort_by" = "max" ]]; then - sort="-r" -else - echo "Error: \$sort_by must either be set to max or min" - exit -fi - -num=1 -if [[ -z "$current_run" ]]; then - while [[ -d $output_directory/run_$num ]] ; do - let num++ - done - current_run=run_$num -fi - -fg_basename=$(basename "$foreground") -bg_basename=$(basename "$background") - -fg_counts=$counts_directory/$fg_basename-counts -bg_counts=$counts_directory/$bg_basename-counts - -final_fg_counts=$output_directory/$current_run/$fg_basename-filtered-counts -selected=$output_directory/$current_run/selected-mers - -ignore_mers_counts="$output_directory/$current_run/passes-filter/1-$fg_basename-ignore-mers" -ignore_all_mers_counts="$output_directory/$current_run/passes-filter/2-$fg_basename-ignore-all-mers" -average_binding="$output_directory/$current_run/passes-filter/3-$fg_basename-average-binding" -non_melting="$output_directory/$current_run/passes-filter/4-$fg_basename-non-melting" -consecutive_binding="$output_directory/$current_run/passes-filter/5-$fg_basename-consecutive-binding" - -# Make our output directory -if [[ ! -d "$output_directory" ]]; then - mkdir "$output_directory" -fi - -# Make our counts directory -if [[ ! -d "$counts_directory" ]]; then - mkdir "$counts_directory" -fi - -# Make our temporary directory -if [[ ! -d $tmp_directory ]]; then - mkdir "$tmp_directory" -fi - -# Make our current run directory -if [[ ! -d $output_directory/$current_run ]]; then - mkdir "$output_directory"/"$current_run" -fi - -# Make our filter directory -if [[ ! -d "$output_directory/$current_run/passes-filter" ]]; then - mkdir "$output_directory/$current_run/passes-filter" -fi - -echo "Outputting current run parameters" - for var in score_func ignore_mers ignore_all_mers_from_files min_mer_range max_mer_range max_check cpus max_consecutive_binding max_select min_foreground_binding_average max_mer_distance min_melting_temp max_melting_temp foreground background; do - echo "$var" "${!var}" >> "$output_directory"/"$current_run"/parameters -done; - -echo "current run is: $current_run" -echo - -if [[ -n "$step_mers" ]] || [[ -n "$all" ]]; then - # to continue this project you need to use the current run. - - echo "Step 1: counting primers in foreground and background" - check_mers "$foreground" "$counts_directory/$(basename "$foreground")" - check_mers "$background" "$counts_directory/$(basename "$background")" -fi - -if [[ -n "$step_filters" ]] || [[ -n "$all" ]]; then - if [[ ! -f "$fg_counts" ]]; then - echo "Error: you need to run your count step before filtration" - exit - fi - echo "Step 2: filtering mers" - - - cp "$fg_counts" "$ignore_mers_counts" - # remove ignored mers - if [[ "$ignore_mers" ]]; then - echo " filtering explicitly ignored mers: $ignore_mers" - for mer in $ignore_mers; do - sed -i '/^'"$mer"'\t/d' "$ignore_mers_counts" - done - fi - check_non_empty "$ignore_mers_counts" "ignore mers" - - cp "$ignore_mers_counts" "$ignore_all_mers_counts" - # remove ignored mers - if [[ "$ignore_all_mers_from_files" ]]; then - for ignore_file in $ignore_all_mers_from_files; do - - if [[ -f "$ignore_file" ]]; then - echo " filtering ignored mers from: $ignore_file" - - counts="$counts_directory/ignore-"$(basename "$ignore_file") - check_mers "$ignore_file" "$counts" - - while read mer_line; do - mer=$(echo "$mer_line" | sed -e 's/\t.*//g') - sed -i '/^'"$mer"'\t/d' "$ignore_all_mers_counts" - done < "$counts-counts" - else - echo " $ignore_file not found, continuing..." - fi - - done - fi - check_non_empty "$ignore_all_mers_counts" "ignore all mers from file " - - echo " filtering mers that appear less frequently than the average binding site distance ($min_foreground_binding_average)" - filter_average_binding.py "$ignore_all_mers_counts" "$min_foreground_binding_average" < "$fg_counts" > "$average_binding" || exit 1 - check_non_empty "$average_binding" "average binding" - - echo " filtering mers that are not in the melting range ($min_melting_temp-$max_melting_temp)" - filter_melting_temperature.py "$min_melting_temp" "$max_melting_temp" < "$average_binding" > "$non_melting" || exit 1 - check_non_empty "$non_melting" "melting temperature" - - echo " filtering mers that have more consecutive binding mers than allowed ($max_consecutive_binding)" - filter_max_consecutive_binding.py "$max_consecutive_binding" < "$non_melting" > "$consecutive_binding" || exit 1 - check_non_empty "$consecutive_binding" "consecutive binding" - - cp $consecutive_binding $final_fg_counts -fi - -if [[ -n "$step_select" ]] || [[ -n "$all" ]]; then - if [[ ! -f "$final_fg_counts" ]]; then - echo "Error: you need to run your filtration step before selection" - exit - fi - echo "Step 3: Scoring mer selectivity" - select_mers.py "$final_fg_counts" "$bg_counts" > "$selected" || exit 1 -fi - -if [[ -n "$step_score" ]] || [[ -n "$all" ]]; then - if [[ ! -f "$selected" ]]; then - echo "Error: you need to run your selection step before you run your scoring" - exit - fi - - echo "Step 4: Scoring top mers based on selectivity" - score_wrapper.sh "$selected" "$foreground" "$background" "$output_directory/$current_run/all-scores" || exit 1 - - # output our sorted scores - echo "sorting and outputting top $output_top_nb scores" - echo "top scores output file: $output_directory/$current_run/top-scores" - head -n 3 $output_directory/$current_run/all-scores > $output_directory/$current_run/top-scores - tail -n +4 $output_directory/$current_run/all-scores | sort $sort -t $'\t' -nk 3 | head -n $output_top_nb >> $output_directory/$current_run/top-scores -fi diff --git a/SelectiveGenomeAmplificationUI b/SelectiveGenomeAmplificationUI deleted file mode 100755 index 98021a9..0000000 --- a/SelectiveGenomeAmplificationUI +++ /dev/null @@ -1,126 +0,0 @@ -#!/usr/bin/env python -foreground = "" -background = "" - -yes_no = {'Y': True, 'y': True,'n': False, 'N': False, 'yes': True, 'no': False, '': '' } - -variables = {} - -questions = [ - { 'question' : "Where would you like your output directory to be?", - 'default_str': 'current directory/foreground_background/', - 'variable': 'output_directory' }, - - {'question': "Where would you like to temporary files to be stored?", - 'default_str': '$output_directory/.tmp', - 'variable': "temp_directory" }, - - {'question': "Where would you like to count files to be stored?", - 'default_str': '$output_directory/.tmp', - 'variable': "counts_directory" }, - - { 'question': 'maximum mer size you would like to pick?', - 'default_str': '12', - 'variable': 'max_mer_range' }, - - { 'question': 'minimum mer size you would like to pick?', - 'default_str': '6', - 'variable': 'min_mer_range' }, - - { 'question': 'eliminate mers that appear less frequently on average than this number ?', - 'default_str': '50000', - 'variable': 'min_foreground_binding_average' }, - - { 'question': 'maximum size of mer combinations you want to search and select?', - 'default_str': '15', - 'variable': 'max_select' }, - - { 'question': 'maximum number of mers you want to use as possible primers?', - 'default_str': '35', - 'variable': 'max_check' }, - - {'question': 'enter mers to ignore? (space seperated)', - 'default_str': "None", - 'variable': 'ignore_mers'}, - - {'question': 'enter files to ignore all mers from? (space seperated)', - 'default_str': "None", - 'variable': 'ignore_all_mers_from_files'}, - - { 'question': 'maximum distance between mers in the final selection?', - 'default_str': "5000 bases", - 'variable': 'max_mer_distance' }, - - { 'question': 'maximum melting temperature for mers?', 'default_str': '30c', 'variable': 'max_melting_temp' }, - { 'question': 'minimum melting temperature for mers?', 'default_str': '0c', 'variable': 'min_melting_temp' }, - { 'question': 'maximum number of consecutively binding mers in hetero and homodimers?', 'default_str': '4', 'variable': 'max_consecutive_binding' }, - { 'question': 'what extra weight do you want for highgly binding primers? (0-1)', 'default_str': '0', 'variable': 'fg_weight' }, - { 'question': 'what extra weight do you want for sets with a higher number of primers? (0-1)', 'default_str': '0', 'variable': 'primer_weight'}, - { 'question': 'how many scored sets would you like in the top_scored_sets output file?', 'default_str':'10000', 'variable': 'output_top_nb'}, - { 'question': 'would you like to use a custom scoring function? see README.md for details', 'default_str':'', 'variable': 'score_func'} -] - -def bool_ask(ask_string, default): - ans = "" - - ask_string = ask_string + " (Y/N/Default=" + str(default) + ")" + ": " - - ans = raw_input(ask_string) - while ans not in yes_no.keys(): - ans = raw_input(ask_string) - if ans is '': - ans = default - - return yes_no[ans] - -def ask(question_dict): - question = question_dict['question'] - default_str = question_dict['default_str'] - - ask_string = question + " (Default=" + str(default_str) + ")" + ": " - - ans = raw_input(ask_string) - return ans - -def variables_to_string(variables): - ret = "" - for variable in variables: - if variables[variable] is not '': - ret = ret + variable + "=\"" + variables[variable] + "\" " - - ret = ret + "foreground=\"" + foreground + "\" " - ret = ret + "background=\"" + background + "\" " - return ret - -def run(variables_as_a_string): - - import subprocess - try: - subprocess.check_call(variables_as_a_string + " " + "SelectiveGenomeAmplification " + foreground + " " + background, shell=True) - except: - pass - - -if(bool_ask("would you like to input all your variables at once?", "n")): - variables = raw_input("Please paste in your variables (space delimited) and Run:") - run(variables) - - raw_input("Press Enter to exit...") - -else: - for q in questions: - if 'bool' in q: - res = ask_bool(q['question'], q['default']) - else: - res = ask(q) - - variables[q['variable']] = res - - foreground = raw_input("Input the path to your foreground file:") - background = raw_input("Input the path to your background file:") - - if(bool_ask("Would you like to output your inserted variables to a string you can later paste?", "y")): - print variables_to_string(variables) - - if(bool_ask("Run SelectiveGenomeAmplification?", "y")): - run(variables_to_string(variables)) diff --git a/SelectiveWholeGenomeAmplification b/SelectiveWholeGenomeAmplification new file mode 100755 index 0000000..e778cbd --- /dev/null +++ b/SelectiveWholeGenomeAmplification @@ -0,0 +1,343 @@ +#!/usr/bin/env bash + +set -e +# arguments: + +# check_non_empty +check_non_empty() { + if [[ ! -s $1 ]]; then + echo "Warning: no mers remain after the '$2' filter!" + echo "Exiting..." + exit 1 + fi +} +# check_mers filename +check_mers() { + + local fasta_file="$1" + local counts="$2" + local mer=0 + + echo " counting mers in $fasta_file" + + # remove the counts file so we can concatenate + if [[ -e "$counts"-counts ]]; then + echo " removing $counts-counts" + rm "$counts"-counts + fi + + # check each mer size and process if not already run + for (( mer = min_mer_range; mer <= max_mer_range; mer++)) ; do + if [[ ! -e "$counts"-counts-"$mer" ]]; then + echo " checking $mer mers for $fasta_file" + kmer_continuous_count -c -i "$fasta_file" -k "$mer" -l -n > "$counts"-counts-"$mer" || exit 1 + else + echo " $mer-mers already done for $fasta_file (assuming no change)" + fi + + # concatentate + cat "$counts"-counts-"$mer" >> "$counts"-counts + + done + +} + +all=run +# Parse in our arguments +if [[ -z "$foreground" ]] && [[ -z "$background" ]]; then + if (( $# < 2 )); then + echo "please supply two genomes, foreground and background" + exit 1 + fi; + + : ${foreground=$1} + : ${background=$2} + start=3 +else + start=1 +fi + +if (( $# > 2 )); then + unset all + + for i in "${@:$start}"; do + if [[ "$i" = "1" ]] || [[ "$i" = "count" ]]; then + step=1; step_mers=1 + fi + + if [[ "$i" = "2" ]] || [[ "$i" = "filter" ]]; then + step=1; step_filters=1 + fi + + if [[ "$i" = "3" ]] || [[ "$i" = "select" ]]; then + step=1; step_select=1 + fi + + if [[ "$i" = "4" ]] || [[ "$i" = "score" ]]; then + step=1; step_score=1 + fi + + if [[ $step ]] && [[ ! "$current_run" ]] && [[ ! $step_mers ]]; then + echo "Error: If you are going to step through your program, and aren't starting" \ + "at the first step, you better specify what previous run you want to use" \ + "as your base" + exit + fi + + done; +fi; + +if [[ -n "$step" ]] && [[ -z "$step_mers" ]] && [[ -z "$step_filters" ]] && [[ -z $step_select ]] && [[ -z "$step_score" ]]; then + echo "Error: you need to select at least one step to run." + exit +fi + +echo +echo "Planning on running these steps:" +for var in step_mers step_filters step_select step_score all; do + if [[ -n "${!var}" ]]; then + echo ' '$var + fi +done + +# output directory +: ${output_directory=$(basename "$foreground")_$(basename "$background")} + +# temp directory +: ${tmp_directory="$output_directory"/.tmp} + +# directory to store our counts and sorted counts +: ${counts_directory="$tmp_directory"} + +# range of mers, min and max +: ${min_mer_range=6} +: ${max_mer_range=12} + +# max mer distance, the distance between two mers in our selected outputs +: ${max_mer_distance=5000} + +# min/maximum kmer meling point +: ${max_melting_temp=30} +: ${min_melting_temp=0} + +# minimum average binding distance in the foreground +: ${min_foreground_binding_average=50000} + +# maximum mers to pick +: ${max_select=15} + +# maximum mers to check +: ${max_check=35} + +# mers to specifically IGNORE, space delimited +: ${ignore_mers=''} + +# IGNORE all mers that are in these files, space delimited +: ${ignore_all_mers_from_files} + +# maximum number of mers that are consecutively binding +: ${max_consecutive_binding=4} + +# fg_weight, how much to weight to give the higher bindnig primers +: ${fg_weight=0} + +# primer_weight, how much weight to give to sets with a higher number of primers. (between 0 and 1) +: ${primer_weight=0} + +# output_top_nb, How many scored sets would you like in the top_scored_sets output file (Default = 10000)? +: ${output_top_nb=10000} + +# score_func: A custom scoring function. disable by default. See README.md +: ${score_func="(nb_primers**primer_weight) * (fg_mean_dist * fg_std_dist) / bg_ratio"} + +# sort score by the minimum or maximum value. acceptable parameters are min or max. +: ${sort_by="min"} + +export ignore_mers +export min_mer_range +export max_mer_range + +export max_select + +export min_foreground_binding_average +export max_mer_distance + +export max_melting_temp +export min_melting_temp + +export fg_weight +export primer_weight + + +echo +# check foreground and background +if [[ ! -f "$foreground" ]]; then + echo "Error: could not open $foreground" + exit 1 +fi + +if [[ ! -f "$background" ]]; then + echo "Error: could not open $background" + exit 1 +fi + + +if [[ -n "$current_run" ]] && [[ ! -d "$output_directory/$current_run" ]]; then + echo -n "run $current_run was not found, it should be a folder here: " + echo "$output_directory/$current_run" + exit +fi + +if [[ "$sort_by" = "min" ]]; then + sort='' +elif [[ "$sort_by" = "max" ]]; then + sort="-r" +else + echo "Error: \$sort_by must either be set to max or min" + exit +fi + +num=1 +if [[ -z "$current_run" ]]; then + while [[ -d $output_directory/run_$num ]] ; do + let num++ + done + current_run=run_$num +fi + +fg_basename=$(basename "$foreground") +bg_basename=$(basename "$background") + +fg_counts=$counts_directory/$fg_basename-counts +bg_counts=$counts_directory/$bg_basename-counts + +final_fg_counts=$output_directory/$current_run/$fg_basename-filtered-counts +selected=$output_directory/$current_run/selected-mers + +ignore_mers_counts="$output_directory/$current_run/passes-filter/1-$fg_basename-ignore-mers" +ignore_all_mers_counts="$output_directory/$current_run/passes-filter/2-$fg_basename-ignore-all-mers" +average_binding="$output_directory/$current_run/passes-filter/3-$fg_basename-average-binding" +non_melting="$output_directory/$current_run/passes-filter/4-$fg_basename-non-melting" +consecutive_binding="$output_directory/$current_run/passes-filter/5-$fg_basename-consecutive-binding" + +# Make our output directory +if [[ ! -d "$output_directory" ]]; then + mkdir "$output_directory" +fi + +# Make our counts directory +if [[ ! -d "$counts_directory" ]]; then + mkdir "$counts_directory" +fi + +# Make our temporary directory +if [[ ! -d $tmp_directory ]]; then + mkdir "$tmp_directory" +fi + +# Make our current run directory +if [[ ! -d $output_directory/$current_run ]]; then + mkdir "$output_directory"/"$current_run" +fi + +# Make our filter directory +if [[ ! -d "$output_directory/$current_run/passes-filter" ]]; then + mkdir "$output_directory/$current_run/passes-filter" +fi + +echo "Outputting current run parameters" + for var in score_func ignore_mers ignore_all_mers_from_files min_mer_range max_mer_range max_check cpus max_consecutive_binding max_select min_foreground_binding_average max_mer_distance min_melting_temp max_melting_temp foreground background; do + echo "$var" "${!var}" >> "$output_directory"/"$current_run"/parameters +done; + +echo "current run is: $current_run" +echo + +if [[ -n "$step_mers" ]] || [[ -n "$all" ]]; then + # to continue this project you need to use the current run. + + echo "Step 1: counting primers in foreground and background" + check_mers "$foreground" "$counts_directory/$(basename "$foreground")" + check_mers "$background" "$counts_directory/$(basename "$background")" +fi + +if [[ -n "$step_filters" ]] || [[ -n "$all" ]]; then + if [[ ! -f "$fg_counts" ]]; then + echo "Error: you need to run your count step before filtration" + exit + fi + echo "Step 2: filtering mers" + + + cp "$fg_counts" "$ignore_mers_counts" + # remove ignored mers + if [[ "$ignore_mers" ]]; then + echo " filtering explicitly ignored mers: $ignore_mers" + for mer in $ignore_mers; do + sed -i '/^'"$mer"'\t/d' "$ignore_mers_counts" + done + fi + check_non_empty "$ignore_mers_counts" "ignore mers" + + cp "$ignore_mers_counts" "$ignore_all_mers_counts" + # remove ignored mers + if [[ "$ignore_all_mers_from_files" ]]; then + for ignore_file in $ignore_all_mers_from_files; do + + if [[ -f "$ignore_file" ]]; then + echo " filtering ignored mers from: $ignore_file" + + counts="$counts_directory/ignore-"$(basename "$ignore_file") + check_mers "$ignore_file" "$counts" + + while read mer_line; do + mer=$(echo "$mer_line" | sed -e 's/\t.*//g') + sed -i '/^'"$mer"'\t/d' "$ignore_all_mers_counts" + done < "$counts-counts" + else + echo " $ignore_file not found, continuing..." + fi + + done + fi + check_non_empty "$ignore_all_mers_counts" "ignore all mers from file " + + echo " filtering mers that appear less frequently than the average binding site distance ($min_foreground_binding_average)" + filter_average_binding.py "$ignore_all_mers_counts" "$min_foreground_binding_average" < "$fg_counts" > "$average_binding" || exit 1 + check_non_empty "$average_binding" "average binding" + + echo " filtering mers that are not in the melting range ($min_melting_temp-$max_melting_temp)" + filter_melting_temperature.py "$min_melting_temp" "$max_melting_temp" < "$average_binding" > "$non_melting" || exit 1 + check_non_empty "$non_melting" "melting temperature" + + echo " filtering mers that have more consecutive binding mers than allowed ($max_consecutive_binding)" + filter_max_consecutive_binding.py "$max_consecutive_binding" < "$non_melting" > "$consecutive_binding" || exit 1 + check_non_empty "$consecutive_binding" "consecutive binding" + + cp $consecutive_binding $final_fg_counts +fi + +if [[ -n "$step_select" ]] || [[ -n "$all" ]]; then + if [[ ! -f "$final_fg_counts" ]]; then + echo "Error: you need to run your filtration step before selection" + exit + fi + echo "Step 3: Scoring mer selectivity" + select_mers.py "$final_fg_counts" "$bg_counts" > "$selected" || exit 1 +fi + +if [[ -n "$step_score" ]] || [[ -n "$all" ]]; then + if [[ ! -f "$selected" ]]; then + echo "Error: you need to run your selection step before you run your scoring" + exit + fi + + echo "Step 4: Scoring top mers based on selectivity" + score_wrapper.sh "$selected" "$foreground" "$background" "$output_directory/$current_run/all-scores" || exit 1 + + # output our sorted scores + echo "sorting and outputting top $output_top_nb scores" + echo "top scores output file: $output_directory/$current_run/top-scores" + head -n 3 $output_directory/$current_run/all-scores > $output_directory/$current_run/top-scores + tail -n +4 $output_directory/$current_run/all-scores | sort $sort -t $'\t' -nk 3 | head -n $output_top_nb >> $output_directory/$current_run/top-scores +fi diff --git a/SelectiveWholeGenomeAmplificationUI b/SelectiveWholeGenomeAmplificationUI new file mode 100755 index 0000000..decf5a1 --- /dev/null +++ b/SelectiveWholeGenomeAmplificationUI @@ -0,0 +1,126 @@ +#!/usr/bin/env python +foreground = "" +background = "" + +yes_no = {'Y': True, 'y': True,'n': False, 'N': False, 'yes': True, 'no': False, '': '' } + +variables = {} + +questions = [ + { 'question' : "Where would you like your output directory to be?", + 'default_str': 'current directory/foreground_background/', + 'variable': 'output_directory' }, + + {'question': "Where would you like to temporary files to be stored?", + 'default_str': '$output_directory/.tmp', + 'variable': "temp_directory" }, + + {'question': "Where would you like to count files to be stored?", + 'default_str': '$output_directory/.tmp', + 'variable': "counts_directory" }, + + { 'question': 'maximum mer size you would like to pick?', + 'default_str': '12', + 'variable': 'max_mer_range' }, + + { 'question': 'minimum mer size you would like to pick?', + 'default_str': '6', + 'variable': 'min_mer_range' }, + + { 'question': 'eliminate mers that appear less frequently on average than this number ?', + 'default_str': '50000', + 'variable': 'min_foreground_binding_average' }, + + { 'question': 'maximum size of mer combinations you want to search and select?', + 'default_str': '15', + 'variable': 'max_select' }, + + { 'question': 'maximum number of mers you want to use as possible primers?', + 'default_str': '35', + 'variable': 'max_check' }, + + {'question': 'enter mers to ignore? (space seperated)', + 'default_str': "None", + 'variable': 'ignore_mers'}, + + {'question': 'enter files to ignore all mers from? (space seperated)', + 'default_str': "None", + 'variable': 'ignore_all_mers_from_files'}, + + { 'question': 'maximum distance between mers in the final selection?', + 'default_str': "5000 bases", + 'variable': 'max_mer_distance' }, + + { 'question': 'maximum melting temperature for mers?', 'default_str': '30c', 'variable': 'max_melting_temp' }, + { 'question': 'minimum melting temperature for mers?', 'default_str': '0c', 'variable': 'min_melting_temp' }, + { 'question': 'maximum number of consecutively binding mers in hetero and homodimers?', 'default_str': '4', 'variable': 'max_consecutive_binding' }, + { 'question': 'what extra weight do you want for highgly binding primers? (0-1)', 'default_str': '0', 'variable': 'fg_weight' }, + { 'question': 'what extra weight do you want for sets with a higher number of primers? (0-1)', 'default_str': '0', 'variable': 'primer_weight'}, + { 'question': 'how many scored sets would you like in the top_scored_sets output file?', 'default_str':'10000', 'variable': 'output_top_nb'}, + { 'question': 'would you like to use a custom scoring function? see README.md for details', 'default_str':'', 'variable': 'score_func'} +] + +def bool_ask(ask_string, default): + ans = "" + + ask_string = ask_string + " (Y/N/Default=" + str(default) + ")" + ": " + + ans = raw_input(ask_string) + while ans not in yes_no.keys(): + ans = raw_input(ask_string) + if ans is '': + ans = default + + return yes_no[ans] + +def ask(question_dict): + question = question_dict['question'] + default_str = question_dict['default_str'] + + ask_string = question + " (Default=" + str(default_str) + ")" + ": " + + ans = raw_input(ask_string) + return ans + +def variables_to_string(variables): + ret = "" + for variable in variables: + if variables[variable] is not '': + ret = ret + variable + "=\"" + variables[variable] + "\" " + + ret = ret + "foreground=\"" + foreground + "\" " + ret = ret + "background=\"" + background + "\" " + return ret + +def run(variables_as_a_string): + + import subprocess + try: + subprocess.check_call(variables_as_a_string + " " + "SelectiveWholeGenomeAmplification " + foreground + " " + background, shell=True) + except: + pass + + +if(bool_ask("would you like to input all your variables at once?", "n")): + variables = raw_input("Please paste in your variables (space delimited) and Run:") + run(variables) + + raw_input("Press Enter to exit...") + +else: + for q in questions: + if 'bool' in q: + res = ask_bool(q['question'], q['default']) + else: + res = ask(q) + + variables[q['variable']] = res + + foreground = raw_input("Input the path to your foreground file:") + background = raw_input("Input the path to your background file:") + + if(bool_ask("Would you like to output your inserted variables to a string you can later paste?", "y")): + print variables_to_string(variables) + + if(bool_ask("Run SelectiveWholeGenomeAmplification?", "y")): + run(variables_to_string(variables)) -- cgit v1.2.3