aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
-rw-r--r--README.md5
-rwxr-xr-xSelectiveGenomeAmplification13
-rwxr-xr-xSelectiveGenomeAmplificationUI7
-rwxr-xr-xsrc/score_mers.py6
4 files changed, 21 insertions, 10 deletions
diff --git a/README.md b/README.md
index 665e8b7..9ec3d47 100644
--- a/README.md
+++ b/README.md
@@ -120,6 +120,7 @@ background | Not Enabled | path of background file
max\_consecutive\_binding | 4 | The maxium number of consecutive binding nucleotides in homodimer and heterodimers
fg\_weight | 0 | How much extra weight to give higher frequency mers in fg. see "equations" (between 0 and 1)
primer\_weight | 0 | How much extra weight to give to sets with a higher number of priemrs. (between 0 and 1)
+output\_top\_nb | 10000 | How many scores do you want to output in your sorted output file?
## Equations
@@ -169,7 +170,9 @@ The file structure outputted by default is this:
   ├── $foreground-filtered-counts # final filtered mers used for select_mers.py
   ├── parameters # parameters used in the run
   ├── selected-mers # final filtered mers used for select_mers.py
-    └── scores-output # file outputted by score_mers.py
+    ├── selected-mers # final filtered mers used for select_mers.py
+    ├── all-scores #file outputted by score_mers.py (all the scores generated)
+    └── top-scores # the sorted top $output_top_nb scores from all-scores
### select\_mers.py output
diff --git a/SelectiveGenomeAmplification b/SelectiveGenomeAmplification
index 44fdb90..9793293 100755
--- a/SelectiveGenomeAmplification
+++ b/SelectiveGenomeAmplification
@@ -144,6 +144,9 @@ done
# primer_weight, how much weight to give to sets with a higher number of primers. (between 0 and 1)
: ${primer_weight=0}
+# output_top_nb, How many scored sets would you like in the top_scored_sets output file (Default = 10000)?
+: ${output_top_nb=10000}
+
export ignore_mers
export min_mer_range
export max_mer_range
@@ -315,5 +318,13 @@ if [[ -n "$step_score" ]] || [[ -n "$all" ]]; then
fi
echo "Step 4: Scoring top mers based on selectivity"
- score_wrapper.sh "$selected" "$foreground" "$background" "$output_directory/$current_run/scores-output" || exit 1
+ score_wrapper.sh "$selected" "$foreground" "$background" "$output_directory/$current_run/all-scores" || exit 1
+
+ # output our sorted scores
+ echo "sorting and outputting top $output_top_nb scores"
+ echo "top scores output file: $output_directory/$current_run/top-scores"
+ head -n 1 $output_directory/$current_run/all-scores > $output_directory/$current_run/top-scores
+ tail -n +2 $output_directory/$current_run/all-scores | sort -r $output_directory/$current_run/all-scores -t $'\t' -nk 3 | head -n $output_top_nb >> $output_directory/$current_run/top-scores
fi
+
+
diff --git a/SelectiveGenomeAmplificationUI b/SelectiveGenomeAmplificationUI
index edd56c8..297471b 100755
--- a/SelectiveGenomeAmplificationUI
+++ b/SelectiveGenomeAmplificationUI
@@ -39,11 +39,11 @@ questions = [
'default_str': '35',
'variable': 'max_check' },
- {'question': 'Enter mers to ignore? (space seperated)',
+ {'question': 'enter mers to ignore? (space seperated)',
'default_str': "None",
'variable': 'ignore_mers'},
- {'question': 'Enter files to ignore all mers from? (space seperated)',
+ {'question': 'enter files to ignore all mers from? (space seperated)',
'default_str': "None",
'variable': 'ignore_all_mers_from_files'},
@@ -55,7 +55,8 @@ questions = [
{ 'question': 'minimum melting temperature for mers?', 'default_str': '0c', 'variable': 'min_melting_temp' },
{ 'question': 'maximum number of consecutively binding mers in hetero and homodimers?', 'default_str': '4', 'variable': 'max_consecutive_binding' },
{ 'question': 'what extra weight do you want for highgly binding primers? (0-1)', 'default_str': '0', 'variable': 'fg_weight' },
- { 'question': 'what extra weight do you want for sets with a higher number of primers? (0-1)', 'default_str': '0', 'variable': 'primer_weight'}
+ { 'question': 'what extra weight do you want for sets with a higher number of primers? (0-1)', 'default_str': '0', 'variable': 'primer_weight'},
+ { 'question': 'How many scored sets would you like in the top_scored_sets output file?', 'default_str':'10000', 'variable': 'output_top_nb'}
]
def bool_ask(ask_string, default):
diff --git a/src/score_mers.py b/src/score_mers.py
index e3d2c58..13050e7 100755
--- a/src/score_mers.py
+++ b/src/score_mers.py
@@ -169,7 +169,6 @@ def load_heterodimer_dic(selected_mers):
def check_feasible(selected):
total = 0
for mer in selected:
- print mer, len(fg_mers[mer]), len(selected)
total += len(fg_mers[mer])
if (fg_genome_length / (total + 1 )) > max_mer_distance:
print "even if we select all top ", max_select,
@@ -444,7 +443,6 @@ def main():
fg_mers[mer] = []
bg_mers[mer] = []
- print fg_mers.keys()
print "calculating heterodimer distances"
load_heterodimer_dic(fg_mers.keys())
@@ -454,14 +452,12 @@ def main():
print "Populating background locations"
populate_locations(fg_mers.keys(), bg_mers, args.background)
- print fg_mers
-
for mer in bg_mers:
bg_mers[mer] = len(bg_mers[mer])
score_all_combinations(fg_mers.keys())
- print "output_file:", output_file
+ print "output file:", output_file
if __name__ == "__main__":
sys.exit(main())