aboutsummaryrefslogtreecommitdiff
path: root/src/filter_max_consecutive_binding.py
diff options
context:
space:
mode:
authorCalvin Morrison <mutantturkey@gmail.com>2014-01-29 11:53:30 -0500
committerCalvin Morrison <mutantturkey@gmail.com>2014-01-29 11:53:30 -0500
commit94d04a1e503121a98b403f882c18a4f0799267d7 (patch)
tree0d2cf5586b31bddc9bca99b4b07ebb4b993f1130 /src/filter_max_consecutive_binding.py
parent73531da5cdf33f9bde7d4db0e4ce96f1e41f581b (diff)
add filtering based on consecutive mer lengths
Diffstat (limited to 'src/filter_max_consecutive_binding.py')
-rwxr-xr-xsrc/filter_max_consecutive_binding.py72
1 files changed, 72 insertions, 0 deletions
diff --git a/src/filter_max_consecutive_binding.py b/src/filter_max_consecutive_binding.py
new file mode 100755
index 0000000..daebee4
--- /dev/null
+++ b/src/filter_max_consecutive_binding.py
@@ -0,0 +1,72 @@
+#!/usr/bin/env python
+import sys, os
+
+binding = { 'A': 'T', 'T': 'A', 'C': 'G', 'G': 'C', '_': False }
+
+
+def max_consecutive_binding(mer1, mer2):
+ if len(mer2) > len(mer1):
+ mer1, mer2 = mer2, mer1
+
+ # reverse mer2,
+ mer2 = mer2[::-1]
+ # pad mer one to avoid errors
+ mer1 = mer1.ljust(len(mer1) + len(mer1), "_")
+
+ max_bind = 0;
+ for offset in range(len(mer2)):
+ consecutive = 0
+ for x in range(len(mer2)):
+ if binding[mer1[offset+x]] == mer2[x]:
+ consecutive = consecutive + 1
+ else:
+ consecutive = 0
+
+ max_bind = max(consecutive,max_bind)
+
+ return max_bind
+
+def test():
+ # mer 1 mer 2 # correct ans
+ arr = [
+ ("ATATAT", "TATATA", 5),
+ ("ACAGGGAT", "ATATATAT", 2),
+ ("CATATATAT", "ATATATATATAT", 8),
+ ("ATATATATATAT", "ATATATATAT", 10),
+ ("ATATAT", "TATAT", 5),
+ ("AACGATACCATG", "GGATCATACGTA", 3),
+ ("CGT", "ACG", 3),
+ ("ACG", "CGT", 3),
+ ("CACC", "GGTGT", 4),
+ ("GGTGT", "CACC", 4),
+ ]
+
+ print 'pass\tmer1\tmer2\tres\tcorr'
+ for mer_combination in arr:
+ response = []
+ ans = max_consecutive_binding(mer_combination[0], mer_combination[1])
+
+ response.append(str(ans == mer_combination[2]))
+ response.append(mer_combination[0])
+ response.append(mer_combination[1])
+ response.append(str(ans))
+ response.append(str(mer_combination[2]))
+
+ print '\t'.join(response)
+
+def main():
+
+ if(len(sys.argv) < 2):
+ print "cutoff is expected as an argument"
+ exit()
+ else:
+ cutoff = int(sys.argv[1])
+
+ for line in sys.stdin:
+ mer = line.split()[0]
+ if max_consecutive_binding(mer, mer) < cutoff:
+ sys.stdout.write(line)
+
+
+if __name__ == "__main__":
+ sys.exit(main())