From 4d00f90fd8b1b9e38eea297336ee83a5e5c9e764 Mon Sep 17 00:00:00 2001 From: Calvin Morrison Date: Sat, 28 Sep 2013 11:52:45 -0700 Subject: idea --- kmer_frequency_per_sequence.c | 4 +++- kmer_total_count.c | 4 +++- kmer_utils.c | 26 +++++++++++++++++++------- 3 files changed, 25 insertions(+), 9 deletions(-) diff --git a/kmer_frequency_per_sequence.c b/kmer_frequency_per_sequence.c index 5191b86..8f0d6b0 100644 --- a/kmer_frequency_per_sequence.c +++ b/kmer_frequency_per_sequence.c @@ -37,8 +37,10 @@ int main(int argc, char **argv) { if(counts == NULL) exit(EXIT_FAILURE); + convert_kmer_to_num(line, read); + for(i = 0; i < read - kmer; i++) { - counts[convert_kmer_to_index(&line[i], kmer, width)]++; + counts[num_to_index(&line[i],kmer, width)]++; } unsigned long total = 0; diff --git a/kmer_total_count.c b/kmer_total_count.c index 6d07e13..2416bfa 100644 --- a/kmer_total_count.c +++ b/kmer_total_count.c @@ -43,8 +43,10 @@ int main(int argc, char **argv) { while ((read = getline(&line, &len, fh)) != -1) { if(line[0] != '>') { + convert_kmer_to_num(line, read); + for(i = 0; i < read - kmer; i++) { - counts[convert_kmer_to_index(&line[i],kmer, width)]++; + counts[num_to_index(&line[i],kmer, width)]++; } } } diff --git a/kmer_utils.c b/kmer_utils.c index c0441e2..7b483ea 100644 --- a/kmer_utils.c +++ b/kmer_utils.c @@ -16,31 +16,43 @@ // long kmer - how long of a index value you want to return // long error_pos - what index to return for a non ACGT character // -inline long convert_kmer_to_index(const char *str, int kmer, long error_pos) { +inline long num_to_index(const char *str, int kmer, long error_pos) { int i = 0; unsigned long out = 0; unsigned long multiply = 1; for(i = kmer - 1; i >= 0; i--){ + + out += str[i] * multiply; + multiply = multiply << 2; + } + + return out; +} + +void convert_kmer_to_num(char *str, long length) { + + long i = 0; + + for(i = 0; i < length; i++) { switch(str[i] | 0x20 ) { case 'a': + str[i] = 0; break; case 'c': - out += 1 * multiply; + str[i] = 1; break; case 'g': - out += 2 * multiply; + str[i] = 2; break; case 't': - out += 3 * multiply; + str[i] = 3; break; default: - return error_pos; + str[i] = 5; } - multiply = multiply << 2; } - return out; } -- cgit v1.2.3