From b508436d6982205508d531ba19c9980570946ae7 Mon Sep 17 00:00:00 2001 From: Calvin Morrison Date: Thu, 7 Nov 2013 17:25:06 -0500 Subject: rename to counts per sequence, don't provide frequencies only counts --- kmer_counts_per_sequence.c | 71 ++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 71 insertions(+) create mode 100644 kmer_counts_per_sequence.c (limited to 'kmer_counts_per_sequence.c') diff --git a/kmer_counts_per_sequence.c b/kmer_counts_per_sequence.c new file mode 100644 index 0000000..d518efc --- /dev/null +++ b/kmer_counts_per_sequence.c @@ -0,0 +1,71 @@ +// Copyright 2013 Calvin Morrison +#include +#include +#include +#include + +#include "kmer_utils.h" + +unsigned long position = 0; +int main(int argc, char **argv) { + + char *line = NULL; + size_t len = 0; + ssize_t read; + + if(argc != 3) { + printf("Please supply a filename and a kmer\n"); + exit(EXIT_FAILURE); + } + + FILE *fh = fopen(argv[1], "r" ); + if(fh == NULL) { + fprintf(stderr, "Error opening %s - %s\n", argv[1], strerror(errno)); + exit(EXIT_FAILURE); + } + + unsigned long kmer = atoi(argv[2]); + if(kmer == 0) { + fprintf(stderr, "Error: invalid kmer.\n"); + exit(EXIT_FAILURE); + } + + const unsigned long width = (unsigned long)1 << (kmer * 2); + + unsigned long long *counts = malloc((width+ 1) * sizeof(unsigned long long)); + if(counts == NULL) + exit(EXIT_FAILURE); + + while ((read = getline(&line, &len, fh)) != -1) { + if(line[0] != '>' && (read > kmer)) { + + unsigned int i = 0; + unsigned long total = 0; + + // reset our count matrix to zero + memset(counts, 0, width * sizeof(unsigned long long)); + + for(i = 0; i < read - kmer; i++) { + line[i] = alpha[(int)line[i]]; + } + + for(i = 0; i < read - kmer; i++) { + counts[num_to_index(&line[i],kmer, width)]++; + } + + for(i = 0; i < width; i++) + total += counts[i]; + + for(i = 0; i < width - 1; i++) + printf("%llu\t", counts[i]); + printf("%llu\n", counts[width - 1]); + + } + } + + free(counts); + free(line); + + + return EXIT_SUCCESS; +} -- cgit v1.2.1