From a62572ad5ccbc1e968a5164daaaf690e2881062d Mon Sep 17 00:00:00 2001 From: Calvin Morrison Date: Tue, 25 Mar 2014 17:48:31 -0400 Subject: update selectivity equation, and update readme --- README.md | 51 +++++++++++++++++++++++++++++--------------- SelectiveGenomeAmplification | 5 +++++ src/select_mers.py | 4 +++- 3 files changed, 42 insertions(+), 18 deletions(-) diff --git a/README.md b/README.md index 5163087..4f4e370 100644 --- a/README.md +++ b/README.md @@ -31,21 +31,38 @@ For user customizable variables: range of mers, min and max -C | variable | default | notes -:---- | :---- | :---- | ---- | :---- -Y | min\_mer\_range | 6 | minimum mer size to use -Y | max\_mer\_range | 12 | maximum mer size to use -Y | max\_mer\_distance | 5000 | maximum distance between mers in foreground -N | output\_directory | $PWD/$foreground\_$background/ | ex. if fg is Bacillus.fasta and bg is HumanGenome.fasta then folder would be $PWD/Bacillus.fasta\_HumanGenome\_output.fasta/ -Y | counts\_directory | $output\_directory/.tmp | directory for counts directory -Y | tmp\_directory=$output\_directory/.tmp | temporary files directory -Y | max\_melting\_temp | 30° | maximum melting temp of mers -Y | min\_melting\_temp | 0° | minimum melting temp of mers -Y | min\_foreground\_binding\_average | 50000 | elminate mers that appear less frequently than the average (length of foreground / # of occurances) -Y | max\_select | 15 | maximum number of mers to pick -Y | max\_check | 35 | maximum number of mers to select (check the top #) -Y | ignore\_mers | Not Enabled | mers to explicitly ignore, space seperated ex. ignore\_mers="ACAGTA ACCATAA ATATATAT" -Y | foreground | Not Enabled | path of foreground file -Y | background | Not Enabled | path of background file -Y | max\_consecutive\_binding | 4 | The maxium number of consecutive binding nucleotides in homodimer and heterodimers +variable | default | notes +:---- | :---- | ---- | :---- +min\_mer\_range | 6 | minimum mer size to use +max\_mer\_range | 12 | maximum mer size to use +max\_mer\_distance | 5000 | maximum distance between mers in foreground +output\_directory | $PWD/$foreground\_$background/ | ex. if fg is Bacillus.fasta and bg is HumanGenome.fasta then folder would be $PWD/Bacillus.fasta\_HumanGenome\_output.fasta/ +counts\_directory | $output\_directory/.tmp | directory for counts directory +tmp\_directory=$output\_directory/.tmp | temporary files directory +max\_melting\_temp | 30° | maximum melting temp of mers +min\_melting\_temp | 0° | minimum melting temp of mers +min\_foreground\_binding\_average | 50000 | elminate mers that appear less frequently than the average (length of foreground / # of occurances) +max\_select | 15 | maximum number of mers to pick +max\_check | 35 | maximum number of mers to select (check the top #) +ignore\_mers | Not Enabled | mers to explicitly ignore, space seperated ex. ignore\_mers="ACAGTA ACCATAA ATATATAT" +foreground | Not Enabled | path of foreground file +background | Not Enabled | path of background file +max\_consecutive\_binding | 4 | The maxium number of consecutive binding nucleotides in homodimer and heterodimers +fg\_weight | 0 | How much extra weight to give higher frequency mers in fg. see "equations" +## Equations + +Here's what we are using to determine our scoring and selectivity + +### Selecivity + +Our selectivity is what we use to determine what top $max\_check mers are checked later +on in our scoring function. Currently we use this formula: + +By default our fg\_weight is zero. This gives no extra weight to more +frequently occuring mers, but can be set higher with the fg\_weight +environmental variable if you wish to do so. + + hit = abundance of primer X (ex. 'ATGTA') in background + + (foreground hit / background hit) * (foreground hit ^ fg_weight) diff --git a/SelectiveGenomeAmplification b/SelectiveGenomeAmplification index b66d06c..b875fe0 100755 --- a/SelectiveGenomeAmplification +++ b/SelectiveGenomeAmplification @@ -56,6 +56,9 @@ fi # maximum number of mers that are consecutively binding : ${max_consecutive_binding=4} +# fg_weight, now much to weight to give the higher bindnig primers +: ${fg_weight=0} + export ignore_mers export min_mer_range export max_mer_range @@ -68,6 +71,8 @@ export max_mer_distance export max_melting_temp export min_melting_temp +export fg_weight + # Make our output directory if [ ! -d $output_directory ]; then diff --git a/src/select_mers.py b/src/select_mers.py index 5bd6877..3a24ad4 100755 --- a/src/select_mers.py +++ b/src/select_mers.py @@ -5,6 +5,8 @@ import os fg_mers = {} bg_mers = {} +fg_weight = int(os.environ.get("fg_weight", 0)) + if(len(sys.argv) == 3): fg_count_fn = sys.argv[1] bg_count_fn = sys.argv[2] @@ -32,7 +34,7 @@ def select_mers(fg_mers, bg_mers): fg_arr = np.array(fg_arr, dtype='f'); bg_arr = np.array(bg_arr, dtype='f'); - selectivity = (fg_arr / bg_arr) + selectivity = (fg_arr / bg_arr) * (fg_arr**fg_weight) arr = [(mers[i], fg_arr[i], bg_arr[i], selectivity[i]) for i in range(len(mers))] -- cgit v1.2.1