aboutsummaryrefslogtreecommitdiff
path: root/src/score_mers.py
diff options
context:
space:
mode:
authorCalvin Morrison <mutantturkey@gmail.com>2014-03-27 15:12:45 -0400
committerCalvin Morrison <mutantturkey@gmail.com>2014-03-27 15:12:45 -0400
commit550b0e91a1c39dfec223f1959ec08d785927b2fc (patch)
tree0790a5689eb001f3f03986639ea8e7131f228b83 /src/score_mers.py
parentf5ac0df9d42de657a64e15d3f0cfa2198a1921c6 (diff)
move stuff around
Diffstat (limited to 'src/score_mers.py')
-rwxr-xr-xsrc/score_mers.py104
1 files changed, 52 insertions, 52 deletions
diff --git a/src/score_mers.py b/src/score_mers.py
index 5c5416a..438a8e9 100755
--- a/src/score_mers.py
+++ b/src/score_mers.py
@@ -18,6 +18,8 @@ import pdb
fg_mers = {}
bg_mers = {}
+heterodimer_dic = {}
+
seq_ends = []
fg_genome_length = 0
@@ -32,8 +34,6 @@ max_select = int(os.environ.get("max_select", 15))
max_check = int(os.environ.get("max_check", 35))
max_mer_distance = int(os.environ.get("max_mer_distance", 5000))
max_consecutive_binding = int(os.environ.get("max_consecutive_binding", 4))
-
-
primer_weight = float(os.environ.get("primer_weight", 0))
def get_max_consecutive_binding(mer1, mer2):
@@ -116,6 +116,56 @@ def populate_locations(selected_mers, mer_dic, input_fn):
merlist_fh.close()
+def load_end_points(fn):
+ ''' get all the points of the end of each sequence in a sample '''
+
+ end_points = [0]
+
+ cmd = "sequence_end_points < " + fn
+
+ if debug:
+ print "loading sequence end points"
+ print "executing: " + cmd
+
+ points_fh = Popen(cmd, stdout=PIPE, shell=True)
+
+ for line in points_fh.stdout:
+ end_points.append(int(line))
+
+ return end_points
+
+def get_length(fn):
+ ''' get length of a genome ( number of base pairs )'''
+
+ cmd = 'grep "^>" ' + fn + " -v | tr -d '\\n' | wc -c"
+
+ if debug:
+ print "loading sequence end points"
+ print "executing: " + cmd
+ points_fh = Popen(cmd, stdout=PIPE, shell=True)
+
+ length = points_fh.stdout.readline()
+
+ length = int(length)
+
+ return length
+
+def load_heterodimer_dic(selected_mers):
+ '''
+ Generate a heterodimer dict which contains every possible combination of
+ selected mers, so later we can check each combination without re-running the
+ max_consecutive_binding function.
+
+ The stored values are Booleans, True if the result is larger than acceptable.
+
+ '''
+ for (mer1, mer2) in combinations(selected_mers, 2):
+ res = get_max_consecutive_binding(mer1, mer2)
+ heterodimer_dic[(mer1, mer2)] = res > max_consecutive_binding
+ heterodimer_dic[(mer2, mer1)] = res > max_consecutive_binding
+ # print res, heterodimer_dic[(mer1, mer2)]
+
+
def check_feasible(selected):
total = 0
for mer in selected:
@@ -221,7 +271,6 @@ def score_all_combinations(selected):
fh.write("NO RESULTS FOUND\n")
-heterodimer_dic = {}
def score(combination):
# input is a string of mers like
# ['ACCAA', 'ACCCGA', 'ACGTATA']
@@ -280,55 +329,6 @@ def score(combination):
return [combination, mer_score, fg_mean_dist, fg_std_dist, bg_ratio]
-def load_end_points(fn):
- ''' get all the points of the end of each sequence in a sample '''
-
- end_points = [0]
-
- cmd = "sequence_end_points < " + fn
-
- if debug:
- print "loading sequence end points"
- print "executing: " + cmd
-
- points_fh = Popen(cmd, stdout=PIPE, shell=True)
-
- for line in points_fh.stdout:
- end_points.append(int(line))
-
- return end_points
-
-def get_length(fn):
- ''' get length of a genome ( number of base pairs )'''
-
- cmd = 'grep "^>" ' + fn + " -v | tr -d '\\n' | wc -c"
-
- if debug:
- print "loading sequence end points"
- print "executing: " + cmd
- points_fh = Popen(cmd, stdout=PIPE, shell=True)
-
- length = points_fh.stdout.readline()
-
- length = int(length)
-
- return length
-
-def load_heterodimer_dic(selected_mers):
- '''
- Generate a heterodimer dict which contains every possible combination of
- selected mers, so later we can check each combination without re-running the
- max_consecutive_binding function.
-
- The stored values are Booleans, True if the result is larger than acceptable.
-
- '''
- for (mer1, mer2) in combinations(selected_mers, 2):
- res = get_max_consecutive_binding(mer1, mer2)
- heterodimer_dic[(mer1, mer2)] = res > max_consecutive_binding
- heterodimer_dic[(mer2, mer1)] = res > max_consecutive_binding
- # print res, heterodimer_dic[(mer1, mer2)]
-
def main():
'''
Basic worflow: