diff options
author | Calvin Morrison <mutantturkey@gmail.com> | 2013-10-16 12:43:27 -0400 |
---|---|---|
committer | Calvin Morrison <mutantturkey@gmail.com> | 2013-10-16 12:43:27 -0400 |
commit | 1d09547b5bc38d1f2d1a5a173738e86dc2ac07f1 (patch) | |
tree | 6fd328dd755ed3c9fa67330ce3f7f7460311c63d | |
parent | e0a1e2605920dcfbc500b1cff4b7282210b49744 (diff) |
works with multiline sequences, abstracted most code to kmer_utils.c
-rw-r--r-- | kmer_total_count.c | 72 |
1 files changed, 7 insertions, 65 deletions
diff --git a/kmer_total_count.c b/kmer_total_count.c index 795b5dc..34abfa0 100644 --- a/kmer_total_count.c +++ b/kmer_total_count.c @@ -5,80 +5,22 @@ #include <string.h> #include "kmer_utils.h" -const unsigned char alpha[256] = -{5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, -5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, -5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 0, 5, 1, 5, 5, 5, 2, 5, 5, 5, 5, 5, 5, -5, 5, 5, 5, 5, 5, 3, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 0, 5, 1, 5, 5, 5, 2, -5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 3, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, -5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, -5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, -5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, -5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, -5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5}; int main(int argc, char **argv) { - char *line = NULL; - size_t len = 0; - ssize_t read; - long i = 0; - long posistion = 0; + unsigned long long i = 0; - if(argc != 3) { + if(argc != 3) { printf("Please supply a filename and a kmer\n"); exit(EXIT_FAILURE); } - FILE * const fh = fopen(argv[1], "r"); - if(fh == NULL) { - fprintf(stderr, "Error opening %s - %s\n", argv[1], strerror(errno)); - exit(EXIT_FAILURE); - } - - // second argument is the kmer - const unsigned int kmer = atoi(argv[2]); - - // width is 4^kmer - const unsigned long width = (unsigned long)1 << (kmer * 2); - - // malloc our counts matrix - unsigned long long * const counts = malloc((width+ 1) * sizeof(unsigned long long)); - - if(counts == NULL) - exit(EXIT_FAILURE); - - while ((read = getline(&line, &len, fh)) != -1) { - if(line[0] != '>' && read > kmer) { - - for(i = 0; i < read; i++) { - line[i] = alpha[line[i]]; - } - - for(posistion = 0; posistion < (read - kmer); posistion++) { - unsigned long out = 0; - unsigned long multiply = 1; - - - for(i = posistion + kmer - 1; i >= posistion; i--){ - if(line[i] >> 2) { - out = width; - posistion = i; - goto next; - } - - out += line[i] * multiply; - multiply = multiply << 2; - } - next: - counts[out]++; - } - } - } - - for(i = 0; i < (unsigned)width; i=i+4) - printf("%llu\n%llu\n%llu\n%llu\n", counts[i], counts[i+1], counts[i+2], counts[i+3]); + unsigned long long *counts = get_kmer_counts_from_file(argv[1], atoi(argv[2])); + // print out our counts arrray + // manually unrolled 4 loops to reduce fprintf calls + for(i = 0; i < pow_four(atoi(argv[2])); i=i+4) + fprintf(stdout, "%llu\n%llu\n%llu\n%llu\n", counts[i], counts[i+1], counts[i+2], counts[i+3]); return EXIT_SUCCESS; } |