From f363df2ac34c34b1c2f223bb5a81c456a764ac38 Mon Sep 17 00:00:00 2001 From: Calvin Morrison Date: Fri, 21 Mar 2014 13:42:54 -0400 Subject: add end of sequences in as points in our array --- src/score_mers.py | 21 ++++++++++++++++++ src/sequence_end_points.c | 54 ++++++++++++++++++++++++++++++++++++++++++++++ src/sequence_length.c | 55 ----------------------------------------------- 3 files changed, 75 insertions(+), 55 deletions(-) create mode 100644 src/sequence_end_points.c delete mode 100644 src/sequence_length.c (limited to 'src') diff --git a/src/score_mers.py b/src/score_mers.py index 6f0c6db..5c1a194 100755 --- a/src/score_mers.py +++ b/src/score_mers.py @@ -15,6 +15,8 @@ import pdb fg_mers = {} bg_mers = {} +seq_ends = [] + if(len(sys.argv) == 5): selectivity_fn = sys.argv[1] fg_fasta_fn = sys.argv[2] @@ -181,8 +183,13 @@ def score(combination): for mer in combination: fg_pts = fg_pts + fg_mers[mer].pts + fg_pts = fg_pts + seq_ends + fg_pts.sort() + if fg_pts[0] is not 0: + fg_pts = [0] + fg_pts + # fg distances fg_dist = np.diff(fg_pts) @@ -221,6 +228,18 @@ def score(combination): return [combination, score, fg_mean_dist, fg_std_dist, bg_ratio] +def load_end_points(fn): + + end_points = [] + + cmd = "sequence_end_points < " + fn + + points_fh = Popen(cmd, stdout=PIPE, shell=True) + for line in points_fh.stdout: + end_points.append(int(line)) + + return end_points + def load_heterodimer_dic(selected_mers): ''' Generate a heterodimer dict which contains every possible combination of @@ -265,6 +284,8 @@ def main(): selected_mers = [row[0] for row in selected] # print selected_mers + print "Populating sequence end points" + seq_ends = load_end_points(fg_fasta_fn) print "Populating foreground locations" map(pop_fg, selected_mers) diff --git a/src/sequence_end_points.c b/src/sequence_end_points.c new file mode 100644 index 0000000..dc03dd9 --- /dev/null +++ b/src/sequence_end_points.c @@ -0,0 +1,54 @@ +// Copyright 2013 Calvin Morrison +#include +#include +#include +#include +#include +#include +int main() { + + size_t len = 0; + + char buffer[4096]; + bool header = false; + + len = fread(&buffer, 1, 1, stdin); + + unsigned long long seq_length = 0; + if(!errno) { + if(buffer[0] == '>') { + header = true; + + while((len = fread(&buffer, 1, 4096, stdin)) != 0) { + size_t i = 0; + for(i = 0; i < len; i++) { + if(buffer[i] == '>') { + printf("%llu\n", seq_length); + header = true; + continue; + } + else if(buffer[i] == '\n' && header == true) { + header = false; + continue; + } + if(header == false && buffer[i] != '\n') { + seq_length++; + } + } + } + } + else { + fprintf(stderr, "this does not look like a fasta file\n"); + return EXIT_FAILURE; + } + } + else { + fprintf(stderr, "could not read file\n"); + return EXIT_FAILURE; + } + + printf("%llu\n", seq_length); + + return EXIT_SUCCESS; +} + diff --git a/src/sequence_length.c b/src/sequence_length.c deleted file mode 100644 index 3df7175..0000000 --- a/src/sequence_length.c +++ /dev/null @@ -1,55 +0,0 @@ -// Copyright 2013 Calvin Morrison -#include -#include -#include -#include -#include -#include -int main() { - - size_t len = 0; - - char buffer[4096]; - bool header = false; - - len = fread(&buffer, 1, 1, stdin); - - unsigned long long seq_length = 0; - if(!errno) { - if(buffer[0] == '>') { - header = true; - - while((len = fread(&buffer, 1, 4096, stdin)) != 0) { - size_t i = 0; - for(i = 0; i < len; i++) { - if(buffer[i] == '>') { - printf("%llu\n", seq_length); - seq_length = 0; - header = true; - continue; - } - else if(buffer[i] == '\n' && header == true) { - header = false; - continue; - } - if(header == false && buffer[i] != '\n') { - seq_length++; - } - } - } - } - else { - fprintf(stderr, "this does not look like a fasta file\n"); - return EXIT_FAILURE; - } - } - else { - fprintf(stderr, "could not read file\n"); - return EXIT_FAILURE; - } - - printf("%llu\n", seq_length); - - return EXIT_SUCCESS; -} - -- cgit v1.2.3