aboutsummaryrefslogtreecommitdiff
path: root/kmer_total_count.c
diff options
context:
space:
mode:
authorCalvin Morrison <mutantturkey@gmail.com>2013-09-10 23:43:56 -0400
committerCalvin Morrison <mutantturkey@gmail.com>2013-09-10 23:43:56 -0400
commit5b53a787e4d1cefa5660c891791cd0df4a8fd89c (patch)
tree005983f5d068409c9be44e614b4c1722cde37dd5 /kmer_total_count.c
Initial commit of some kmer utilities.
there are two utilties included. one is kmer_frequency_per_sequence, which outputs a (m x n) matrix where m is the sequence, and n is the frequency of that nmer to occur in the given sequence. the other tool is kmer_total_count, which counts kmers for the total file, not just one sequence
Diffstat (limited to 'kmer_total_count.c')
-rw-r--r--kmer_total_count.c49
1 files changed, 49 insertions, 0 deletions
diff --git a/kmer_total_count.c b/kmer_total_count.c
new file mode 100644
index 0000000..4d1ab87
--- /dev/null
+++ b/kmer_total_count.c
@@ -0,0 +1,49 @@
+// Copyright 2013 Calvin Morrison
+#include <stdio.h>
+#include <math.h>
+#include <stdlib.h>
+#include <unistd.h>
+#include <stdint.h>
+
+#include "kmer_utils.h"
+
+int main(int argc, char **argv) {
+
+ char *line = NULL;
+ size_t len = 0;
+ ssize_t read;
+ unsigned int i = 0;
+
+ if(argc != 3) {
+ printf("Please supply a filename, and only a filename\n");
+ exit(EXIT_FAILURE);
+ }
+
+ FILE *fh = fopen(argv[1], "r" );
+ if(fh == NULL) {
+ fprintf(stderr, "Couldn't open: %s\n", argv[1]);
+ exit(EXIT_FAILURE);
+ }
+
+ int kmer = atoi(argv[2]);
+ int width = (int)pow(4, kmer);
+
+ unsigned long long *counts = malloc((width+ 1) * sizeof(unsigned long long));
+ if(counts == NULL)
+ exit(EXIT_FAILURE);
+
+ while ((read = getline(&line, &len, fh)) != -1) {
+ if(line[0] != '>') {
+
+ for(i = 0; i < strlen(line) - kmer; i++) {
+ counts[convert_kmer_to_index(&line[i],kmer, width)]++;
+ }
+ }
+ }
+
+ for(i = 0; i < width; i++)
+ printf("%llu\n", counts[i]);
+
+
+ return EXIT_SUCCESS;
+}