aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
-rw-r--r--Makefile13
-rw-r--r--kmer_frequency_per_sequence.c58
-rw-r--r--kmer_total_count.c49
-rw-r--r--kmer_utils.c54
-rw-r--r--kmer_utils.h1
5 files changed, 175 insertions, 0 deletions
diff --git a/Makefile b/Makefile
new file mode 100644
index 0000000..df96037
--- /dev/null
+++ b/Makefile
@@ -0,0 +1,13 @@
+VERSION=\"v0.0.1\"
+CC = gcc
+CFLAGS = -O3 -s -mtune=native -Wall -lm -DVERSION=$(VERSION)
+
+all: kmer_total_count kmer_frequency_per_sequence
+
+kmer_total_count:
+ $(CC) kmer_total_count.c kmer_utils.c -o kmer_count_total $(CFLAGS)
+kmer_frequency_per_sequence:
+ $(CC) kmer_frequency_per_sequence.c kmer_utils.c -o kmer_frequency_per_sequence $(CFLAGS)
+
+clean:
+ rm -vf kmer_total_count kmer_frequency_per_sequence
diff --git a/kmer_frequency_per_sequence.c b/kmer_frequency_per_sequence.c
new file mode 100644
index 0000000..e0995f9
--- /dev/null
+++ b/kmer_frequency_per_sequence.c
@@ -0,0 +1,58 @@
+// Copyright 2013 Calvin Morrison
+#include <stdio.h>
+#include <math.h>
+#include <stdlib.h>
+#include <unistd.h>
+#include <stdint.h>
+
+#include "kmer_utils.h"
+
+int main(int argc, char **argv) {
+
+ char *line = NULL;
+ long kmer = 6;
+ size_t len = 0;
+ ssize_t read;
+
+ if(argc != 3) {
+ printf("Please supply a filename, and only a filename\n");
+ exit(EXIT_FAILURE);
+ }
+
+ FILE *fh = fopen(argv[1], "r" );
+ if(fh == NULL) {
+ fprintf(stderr, "Couldn't open: %s\n", argv[1]);
+ exit(EXIT_FAILURE);
+ }
+
+
+ int width = (int)pow(4, kmer);
+ while ((read = getline(&line, &len, fh)) != -1) {
+ if(line[0] != '>') {
+
+ unsigned long long *counts = malloc((width+ 1) * sizeof(unsigned long long));
+ if(counts == NULL)
+ exit(EXIT_FAILURE);
+
+ unsigned int i = 0;
+ for(i = 0; i < strlen(line) - kmer; i++) {
+ counts[convert_kmer_to_index(&line[i], kmer, width)]++;
+ }
+
+ unsigned long total = 0;
+ for(i = 0; i < width; i++)
+ total += counts[i];
+
+ for(i = 0; i < width - 1; i++)
+ printf("%.12f\t", (double)counts[i] / total);
+ printf("%.12f\n", (double)counts[width - 1] / total);
+
+ free(counts);
+ }
+ }
+
+ free(line);
+
+
+ return EXIT_SUCCESS;
+}
diff --git a/kmer_total_count.c b/kmer_total_count.c
new file mode 100644
index 0000000..4d1ab87
--- /dev/null
+++ b/kmer_total_count.c
@@ -0,0 +1,49 @@
+// Copyright 2013 Calvin Morrison
+#include <stdio.h>
+#include <math.h>
+#include <stdlib.h>
+#include <unistd.h>
+#include <stdint.h>
+
+#include "kmer_utils.h"
+
+int main(int argc, char **argv) {
+
+ char *line = NULL;
+ size_t len = 0;
+ ssize_t read;
+ unsigned int i = 0;
+
+ if(argc != 3) {
+ printf("Please supply a filename, and only a filename\n");
+ exit(EXIT_FAILURE);
+ }
+
+ FILE *fh = fopen(argv[1], "r" );
+ if(fh == NULL) {
+ fprintf(stderr, "Couldn't open: %s\n", argv[1]);
+ exit(EXIT_FAILURE);
+ }
+
+ int kmer = atoi(argv[2]);
+ int width = (int)pow(4, kmer);
+
+ unsigned long long *counts = malloc((width+ 1) * sizeof(unsigned long long));
+ if(counts == NULL)
+ exit(EXIT_FAILURE);
+
+ while ((read = getline(&line, &len, fh)) != -1) {
+ if(line[0] != '>') {
+
+ for(i = 0; i < strlen(line) - kmer; i++) {
+ counts[convert_kmer_to_index(&line[i],kmer, width)]++;
+ }
+ }
+ }
+
+ for(i = 0; i < width; i++)
+ printf("%llu\n", counts[i]);
+
+
+ return EXIT_SUCCESS;
+}
diff --git a/kmer_utils.c b/kmer_utils.c
new file mode 100644
index 0000000..b6f66b5
--- /dev/null
+++ b/kmer_utils.c
@@ -0,0 +1,54 @@
+#include <stdint.h>
+#include <stdio.h>
+#include <string.h>
+#include <unistd.h>
+
+// This function takes a char array containing sequences and converts it
+// into a kmer index (essentially a base 4 radix converstion to base 10, with
+// some mumbo-jumbo of taking each of the characters we want (ACGT) and turning
+// them into a radix-4 number we can use.
+//
+// kind of convoluted but it works.
+//
+// Arguemnts:
+// char *str - a NULL terminated character array
+// long kmer - how long of a index value you want to return
+// long error_pos - what index to return for a non ACGT character
+//
+long convert_kmer_to_index(char *str, long kmer, long error_pos) {
+
+ char **ptr = NULL;
+ char vals[kmer];
+ long i = 0;
+ long ret = 0;
+
+ for(i = 0; i < kmer; i++) {
+ int val = 0;
+ switch(str[i]) {
+ case 'a':
+ case 'A':
+ val = 48;
+ break;
+ case 'c':
+ case 'C':
+ val = 49;
+ break;
+ case 'g':
+ case 'G':
+ val = 50;
+ break;
+ case 't':
+ case 'T':
+ val = 51;
+ break;
+ default:
+ return error_pos;
+ }
+
+
+ vals[i] = val;
+ }
+
+ ret = strtol(vals, ptr, 4);
+ return ret;
+}
diff --git a/kmer_utils.h b/kmer_utils.h
new file mode 100644
index 0000000..a93216e
--- /dev/null
+++ b/kmer_utils.h
@@ -0,0 +1 @@
+long convert_kmer_to_index(char *str, long kmer, long error_pos);