From 2a831c405c8372f86c22c5cf3684f63209877b49 Mon Sep 17 00:00:00 2001 From: Calvin Morrison Date: Fri, 4 Oct 2013 18:25:21 -0400 Subject: no more branching --- kmer_frequency_per_sequence.c | 20 +++++++++++++++++--- kmer_total_count.c | 34 ++++++++++++++++++++++++---------- kmer_total_count.h | 2 +- kmer_utils.c | 7 ++----- 4 files changed, 44 insertions(+), 19 deletions(-) diff --git a/kmer_frequency_per_sequence.c b/kmer_frequency_per_sequence.c index aa18dd6..02a221b 100644 --- a/kmer_frequency_per_sequence.c +++ b/kmer_frequency_per_sequence.c @@ -7,7 +7,19 @@ #include "kmer_utils.h" -long position = 0; +unsigned long position = 0; + +const unsigned char alpha[256] = +{5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, +5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, +5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 0, 5, 1, 5, 5, 5, 2, 5, 5, 5, 5, 5, 5, +5, 5, 5, 5, 5, 5, 3, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 0, 5, 1, 5, 5, 5, 2, +5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 3, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, +5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, +5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, +5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, +5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, +5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5}; int main(int argc, char **argv) { @@ -31,14 +43,16 @@ int main(int argc, char **argv) { width = (int)pow(4, kmer); while ((read = getline(&line, &len, fh)) != -1) { - if(line[0] != '>' && read > kmer) { + if(line[0] != '>' && (read > kmer)) { unsigned int i = 0; unsigned long long *counts = malloc((width+ 1) * sizeof(unsigned long long)); if(counts == NULL) exit(EXIT_FAILURE); - convert_kmer_to_num(line, read); + for(i = 0; i < read - kmer; i++) { + line[i] = alpha[line[i]]; + } for(i = 0; i < read - kmer; i++) { counts[num_to_index(&line[i],kmer, width)]++; diff --git a/kmer_total_count.c b/kmer_total_count.c index 4fadf03..113c979 100644 --- a/kmer_total_count.c +++ b/kmer_total_count.c @@ -7,7 +7,19 @@ #include "kmer_utils.h" -long position = 0; +unsigned long position = 0; + +const unsigned char alpha[256] = +{5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, +5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, +5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 0, 5, 1, 5, 5, 5, 2, 5, 5, 5, 5, 5, 5, +5, 5, 5, 5, 5, 5, 3, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 0, 5, 1, 5, 5, 5, 2, +5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 3, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, +5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, +5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, +5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, +5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, +5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5}; int main(int argc, char **argv) { @@ -16,14 +28,12 @@ int main(int argc, char **argv) { ssize_t read; long i = 0; - unsigned long long *counts; - if(argc != 3) { printf("Please supply a filename and a kmer\n"); exit(EXIT_FAILURE); } - FILE *fh = fopen(argv[1], "r"); + FILE * const fh = fopen(argv[1], "r"); if(fh == NULL) { fprintf(stderr, "Error opening %s - %s\n", argv[1], strerror(errno)); exit(EXIT_FAILURE); @@ -33,16 +43,20 @@ int main(int argc, char **argv) { const unsigned int kmer = atoi(argv[2]); // width is 4^kmer - const unsigned long width = (int)pow(4, kmer); + const unsigned long width = pow(4, kmer); // malloc our counts matrix - counts = malloc((width+ 1) * sizeof(unsigned long long)); + unsigned long long * const counts = malloc((width+ 1) * sizeof(unsigned long long)); + if(counts == NULL) exit(EXIT_FAILURE); - + while ((read = getline(&line, &len, fh)) != -1) { if(line[0] != '>' && read > kmer) { - convert_kmer_to_num(line, read); + + for(i = 0; i < read; i++) { + line[i] = alpha[line[i]]; + } for(position = 0; position < (read - kmer); position++) { counts[num_to_index(&line[position],kmer, width)]++; @@ -50,8 +64,8 @@ int main(int argc, char **argv) { } } - for(i = 0; i < (unsigned)width; i++) - printf("%llu\n", counts[i]); + for(i = 0; i < (unsigned)width; i=i+4) + printf("%llu\n%llu\n%llu\n%llu\n", counts[i], counts[i+1], counts[i+2], counts[i+3]); return EXIT_SUCCESS; diff --git a/kmer_total_count.h b/kmer_total_count.h index e782906..0c3d12d 100644 --- a/kmer_total_count.h +++ b/kmer_total_count.h @@ -1 +1 @@ -extern long position; +extern unsigned long position; diff --git a/kmer_utils.c b/kmer_utils.c index f4f8d2f..a1ff9b2 100644 --- a/kmer_utils.c +++ b/kmer_utils.c @@ -2,7 +2,7 @@ // convert a string of k-mer size base-4 values into a // base-10 index -unsigned long num_to_index(const char *str, const int kmer, const long error_pos) { +inline unsigned long num_to_index(const char *str, const int kmer, const long error_pos) { int i = 0; unsigned long out = 0; @@ -17,7 +17,6 @@ unsigned long num_to_index(const char *str, const int kmer, const long error_pos return error_pos; } - out += str[i] * multiply; multiply = multiply << 2; } @@ -25,9 +24,6 @@ unsigned long num_to_index(const char *str, const int kmer, const long error_pos return out; } -// replaces values in a char array of ACGT's and others with -// values that correspond to their base 4 value to be used in -// num_to_index. void convert_kmer_to_num(char *str, const unsigned long length) { unsigned long i = 0; @@ -56,3 +52,4 @@ void convert_kmer_to_num(char *str, const unsigned long length) { } } + -- cgit v1.2.1