From 5a5d89d3881cb0d9014168845d96e5eebeb2bd42 Mon Sep 17 00:00:00 2001 From: Calvin Morrison Date: Mon, 31 Mar 2014 13:27:06 -0400 Subject: Add feature to have a top-scores file. closes issue #21 --- README.md | 5 ++++- SelectiveGenomeAmplification | 13 ++++++++++++- SelectiveGenomeAmplificationUI | 7 ++++--- src/score_mers.py | 6 +----- 4 files changed, 21 insertions(+), 10 deletions(-) diff --git a/README.md b/README.md index 665e8b7..9ec3d47 100644 --- a/README.md +++ b/README.md @@ -120,6 +120,7 @@ background | Not Enabled | path of background file max\_consecutive\_binding | 4 | The maxium number of consecutive binding nucleotides in homodimer and heterodimers fg\_weight | 0 | How much extra weight to give higher frequency mers in fg. see "equations" (between 0 and 1) primer\_weight | 0 | How much extra weight to give to sets with a higher number of priemrs. (between 0 and 1) +output\_top\_nb | 10000 | How many scores do you want to output in your sorted output file? ## Equations @@ -169,7 +170,9 @@ The file structure outputted by default is this:    ├── $foreground-filtered-counts # final filtered mers used for select_mers.py    ├── parameters # parameters used in the run    ├── selected-mers # final filtered mers used for select_mers.py -    └── scores-output # file outputted by score_mers.py +    ├── selected-mers # final filtered mers used for select_mers.py +    ├── all-scores #file outputted by score_mers.py (all the scores generated) +    └── top-scores # the sorted top $output_top_nb scores from all-scores ### select\_mers.py output diff --git a/SelectiveGenomeAmplification b/SelectiveGenomeAmplification index 44fdb90..9793293 100755 --- a/SelectiveGenomeAmplification +++ b/SelectiveGenomeAmplification @@ -144,6 +144,9 @@ done # primer_weight, how much weight to give to sets with a higher number of primers. (between 0 and 1) : ${primer_weight=0} +# output_top_nb, How many scored sets would you like in the top_scored_sets output file (Default = 10000)? +: ${output_top_nb=10000} + export ignore_mers export min_mer_range export max_mer_range @@ -315,5 +318,13 @@ if [[ -n "$step_score" ]] || [[ -n "$all" ]]; then fi echo "Step 4: Scoring top mers based on selectivity" - score_wrapper.sh "$selected" "$foreground" "$background" "$output_directory/$current_run/scores-output" || exit 1 + score_wrapper.sh "$selected" "$foreground" "$background" "$output_directory/$current_run/all-scores" || exit 1 + + # output our sorted scores + echo "sorting and outputting top $output_top_nb scores" + echo "top scores output file: $output_directory/$current_run/top-scores" + head -n 1 $output_directory/$current_run/all-scores > $output_directory/$current_run/top-scores + tail -n +2 $output_directory/$current_run/all-scores | sort -r $output_directory/$current_run/all-scores -t $'\t' -nk 3 | head -n $output_top_nb >> $output_directory/$current_run/top-scores fi + + diff --git a/SelectiveGenomeAmplificationUI b/SelectiveGenomeAmplificationUI index edd56c8..297471b 100755 --- a/SelectiveGenomeAmplificationUI +++ b/SelectiveGenomeAmplificationUI @@ -39,11 +39,11 @@ questions = [ 'default_str': '35', 'variable': 'max_check' }, - {'question': 'Enter mers to ignore? (space seperated)', + {'question': 'enter mers to ignore? (space seperated)', 'default_str': "None", 'variable': 'ignore_mers'}, - {'question': 'Enter files to ignore all mers from? (space seperated)', + {'question': 'enter files to ignore all mers from? (space seperated)', 'default_str': "None", 'variable': 'ignore_all_mers_from_files'}, @@ -55,7 +55,8 @@ questions = [ { 'question': 'minimum melting temperature for mers?', 'default_str': '0c', 'variable': 'min_melting_temp' }, { 'question': 'maximum number of consecutively binding mers in hetero and homodimers?', 'default_str': '4', 'variable': 'max_consecutive_binding' }, { 'question': 'what extra weight do you want for highgly binding primers? (0-1)', 'default_str': '0', 'variable': 'fg_weight' }, - { 'question': 'what extra weight do you want for sets with a higher number of primers? (0-1)', 'default_str': '0', 'variable': 'primer_weight'} + { 'question': 'what extra weight do you want for sets with a higher number of primers? (0-1)', 'default_str': '0', 'variable': 'primer_weight'}, + { 'question': 'How many scored sets would you like in the top_scored_sets output file?', 'default_str':'10000', 'variable': 'output_top_nb'} ] def bool_ask(ask_string, default): diff --git a/src/score_mers.py b/src/score_mers.py index e3d2c58..13050e7 100755 --- a/src/score_mers.py +++ b/src/score_mers.py @@ -169,7 +169,6 @@ def load_heterodimer_dic(selected_mers): def check_feasible(selected): total = 0 for mer in selected: - print mer, len(fg_mers[mer]), len(selected) total += len(fg_mers[mer]) if (fg_genome_length / (total + 1 )) > max_mer_distance: print "even if we select all top ", max_select, @@ -444,7 +443,6 @@ def main(): fg_mers[mer] = [] bg_mers[mer] = [] - print fg_mers.keys() print "calculating heterodimer distances" load_heterodimer_dic(fg_mers.keys()) @@ -454,14 +452,12 @@ def main(): print "Populating background locations" populate_locations(fg_mers.keys(), bg_mers, args.background) - print fg_mers - for mer in bg_mers: bg_mers[mer] = len(bg_mers[mer]) score_all_combinations(fg_mers.keys()) - print "output_file:", output_file + print "output file:", output_file if __name__ == "__main__": sys.exit(main()) -- cgit v1.2.3