aboutsummaryrefslogtreecommitdiff
path: root/kmer_total_count.c
diff options
context:
space:
mode:
authorCalvin Morrison <mutantturkey@gmail.com>2013-10-16 12:43:27 -0400
committerCalvin Morrison <mutantturkey@gmail.com>2013-10-16 12:43:27 -0400
commit1d09547b5bc38d1f2d1a5a173738e86dc2ac07f1 (patch)
tree6fd328dd755ed3c9fa67330ce3f7f7460311c63d /kmer_total_count.c
parente0a1e2605920dcfbc500b1cff4b7282210b49744 (diff)
works with multiline sequences, abstracted most code to kmer_utils.c
Diffstat (limited to 'kmer_total_count.c')
-rw-r--r--kmer_total_count.c72
1 files changed, 7 insertions, 65 deletions
diff --git a/kmer_total_count.c b/kmer_total_count.c
index 795b5dc..34abfa0 100644
--- a/kmer_total_count.c
+++ b/kmer_total_count.c
@@ -5,80 +5,22 @@
#include <string.h>
#include "kmer_utils.h"
-const unsigned char alpha[256] =
-{5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5,
-5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5,
-5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 0, 5, 1, 5, 5, 5, 2, 5, 5, 5, 5, 5, 5,
-5, 5, 5, 5, 5, 5, 3, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 0, 5, 1, 5, 5, 5, 2,
-5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 3, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5,
-5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5,
-5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5,
-5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5,
-5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5,
-5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5};
int main(int argc, char **argv) {
- char *line = NULL;
- size_t len = 0;
- ssize_t read;
- long i = 0;
- long posistion = 0;
+ unsigned long long i = 0;
- if(argc != 3) {
+ if(argc != 3) {
printf("Please supply a filename and a kmer\n");
exit(EXIT_FAILURE);
}
- FILE * const fh = fopen(argv[1], "r");
- if(fh == NULL) {
- fprintf(stderr, "Error opening %s - %s\n", argv[1], strerror(errno));
- exit(EXIT_FAILURE);
- }
-
- // second argument is the kmer
- const unsigned int kmer = atoi(argv[2]);
-
- // width is 4^kmer
- const unsigned long width = (unsigned long)1 << (kmer * 2);
-
- // malloc our counts matrix
- unsigned long long * const counts = malloc((width+ 1) * sizeof(unsigned long long));
-
- if(counts == NULL)
- exit(EXIT_FAILURE);
-
- while ((read = getline(&line, &len, fh)) != -1) {
- if(line[0] != '>' && read > kmer) {
-
- for(i = 0; i < read; i++) {
- line[i] = alpha[line[i]];
- }
-
- for(posistion = 0; posistion < (read - kmer); posistion++) {
- unsigned long out = 0;
- unsigned long multiply = 1;
-
-
- for(i = posistion + kmer - 1; i >= posistion; i--){
- if(line[i] >> 2) {
- out = width;
- posistion = i;
- goto next;
- }
-
- out += line[i] * multiply;
- multiply = multiply << 2;
- }
- next:
- counts[out]++;
- }
- }
- }
-
- for(i = 0; i < (unsigned)width; i=i+4)
- printf("%llu\n%llu\n%llu\n%llu\n", counts[i], counts[i+1], counts[i+2], counts[i+3]);
+ unsigned long long *counts = get_kmer_counts_from_file(argv[1], atoi(argv[2]));
+ // print out our counts arrray
+ // manually unrolled 4 loops to reduce fprintf calls
+ for(i = 0; i < pow_four(atoi(argv[2])); i=i+4)
+ fprintf(stdout, "%llu\n%llu\n%llu\n%llu\n", counts[i], counts[i+1], counts[i+2], counts[i+3]);
return EXIT_SUCCESS;
}