diff options
author | Calvin Morrison <mutantturkey@gmail.com> | 2013-09-10 23:43:56 -0400 |
---|---|---|
committer | Calvin Morrison <mutantturkey@gmail.com> | 2013-09-10 23:43:56 -0400 |
commit | 5b53a787e4d1cefa5660c891791cd0df4a8fd89c (patch) | |
tree | 005983f5d068409c9be44e614b4c1722cde37dd5 /kmer_frequency_per_sequence.c |
Initial commit of some kmer utilities.
there are two utilties included.
one is kmer_frequency_per_sequence,
which outputs a (m x n) matrix where m is the sequence, and n is the
frequency of that nmer to occur in the given sequence.
the other tool is kmer_total_count, which counts kmers for the total
file, not just one sequence
Diffstat (limited to 'kmer_frequency_per_sequence.c')
-rw-r--r-- | kmer_frequency_per_sequence.c | 58 |
1 files changed, 58 insertions, 0 deletions
diff --git a/kmer_frequency_per_sequence.c b/kmer_frequency_per_sequence.c new file mode 100644 index 0000000..e0995f9 --- /dev/null +++ b/kmer_frequency_per_sequence.c @@ -0,0 +1,58 @@ +// Copyright 2013 Calvin Morrison +#include <stdio.h> +#include <math.h> +#include <stdlib.h> +#include <unistd.h> +#include <stdint.h> + +#include "kmer_utils.h" + +int main(int argc, char **argv) { + + char *line = NULL; + long kmer = 6; + size_t len = 0; + ssize_t read; + + if(argc != 3) { + printf("Please supply a filename, and only a filename\n"); + exit(EXIT_FAILURE); + } + + FILE *fh = fopen(argv[1], "r" ); + if(fh == NULL) { + fprintf(stderr, "Couldn't open: %s\n", argv[1]); + exit(EXIT_FAILURE); + } + + + int width = (int)pow(4, kmer); + while ((read = getline(&line, &len, fh)) != -1) { + if(line[0] != '>') { + + unsigned long long *counts = malloc((width+ 1) * sizeof(unsigned long long)); + if(counts == NULL) + exit(EXIT_FAILURE); + + unsigned int i = 0; + for(i = 0; i < strlen(line) - kmer; i++) { + counts[convert_kmer_to_index(&line[i], kmer, width)]++; + } + + unsigned long total = 0; + for(i = 0; i < width; i++) + total += counts[i]; + + for(i = 0; i < width - 1; i++) + printf("%.12f\t", (double)counts[i] / total); + printf("%.12f\n", (double)counts[width - 1] / total); + + free(counts); + } + } + + free(line); + + + return EXIT_SUCCESS; +} |