From 1d09547b5bc38d1f2d1a5a173738e86dc2ac07f1 Mon Sep 17 00:00:00 2001 From: Calvin Morrison Date: Wed, 16 Oct 2013 12:43:27 -0400 Subject: works with multiline sequences, abstracted most code to kmer_utils.c --- kmer_total_count.c | 72 ++++++------------------------------------------------ 1 file changed, 7 insertions(+), 65 deletions(-) (limited to 'kmer_total_count.c') diff --git a/kmer_total_count.c b/kmer_total_count.c index 795b5dc..34abfa0 100644 --- a/kmer_total_count.c +++ b/kmer_total_count.c @@ -5,80 +5,22 @@ #include #include "kmer_utils.h" -const unsigned char alpha[256] = -{5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, -5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, -5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 0, 5, 1, 5, 5, 5, 2, 5, 5, 5, 5, 5, 5, -5, 5, 5, 5, 5, 5, 3, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 0, 5, 1, 5, 5, 5, 2, -5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 3, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, -5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, -5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, -5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, -5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, -5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5}; int main(int argc, char **argv) { - char *line = NULL; - size_t len = 0; - ssize_t read; - long i = 0; - long posistion = 0; + unsigned long long i = 0; - if(argc != 3) { + if(argc != 3) { printf("Please supply a filename and a kmer\n"); exit(EXIT_FAILURE); } - FILE * const fh = fopen(argv[1], "r"); - if(fh == NULL) { - fprintf(stderr, "Error opening %s - %s\n", argv[1], strerror(errno)); - exit(EXIT_FAILURE); - } - - // second argument is the kmer - const unsigned int kmer = atoi(argv[2]); - - // width is 4^kmer - const unsigned long width = (unsigned long)1 << (kmer * 2); - - // malloc our counts matrix - unsigned long long * const counts = malloc((width+ 1) * sizeof(unsigned long long)); - - if(counts == NULL) - exit(EXIT_FAILURE); - - while ((read = getline(&line, &len, fh)) != -1) { - if(line[0] != '>' && read > kmer) { - - for(i = 0; i < read; i++) { - line[i] = alpha[line[i]]; - } - - for(posistion = 0; posistion < (read - kmer); posistion++) { - unsigned long out = 0; - unsigned long multiply = 1; - - - for(i = posistion + kmer - 1; i >= posistion; i--){ - if(line[i] >> 2) { - out = width; - posistion = i; - goto next; - } - - out += line[i] * multiply; - multiply = multiply << 2; - } - next: - counts[out]++; - } - } - } - - for(i = 0; i < (unsigned)width; i=i+4) - printf("%llu\n%llu\n%llu\n%llu\n", counts[i], counts[i+1], counts[i+2], counts[i+3]); + unsigned long long *counts = get_kmer_counts_from_file(argv[1], atoi(argv[2])); + // print out our counts arrray + // manually unrolled 4 loops to reduce fprintf calls + for(i = 0; i < pow_four(atoi(argv[2])); i=i+4) + fprintf(stdout, "%llu\n%llu\n%llu\n%llu\n", counts[i], counts[i+1], counts[i+2], counts[i+3]); return EXIT_SUCCESS; } -- cgit v1.2.1