From b508436d6982205508d531ba19c9980570946ae7 Mon Sep 17 00:00:00 2001 From: Calvin Morrison Date: Thu, 7 Nov 2013 17:25:06 -0500 Subject: rename to counts per sequence, don't provide frequencies only counts --- Makefile | 8 ++--- kmer_counts_per_sequence.c | 71 +++++++++++++++++++++++++++++++++++++++++++ kmer_frequency_per_sequence.c | 71 ------------------------------------------- kmer_total_count.c | 1 - 4 files changed, 75 insertions(+), 76 deletions(-) create mode 100644 kmer_counts_per_sequence.c delete mode 100644 kmer_frequency_per_sequence.c diff --git a/Makefile b/Makefile index 2b22db8..249efe8 100644 --- a/Makefile +++ b/Makefile @@ -3,7 +3,7 @@ CC = gcc CFLAGS = -O3 -s -mtune=native -Wall -DVERSION=$(VERSION) -Wextra CLIBS = libkmer.a -all: libkmer.o libkmer.so libkmer.a kmer_total_count kmer_frequency_per_sequence +all: libkmer.o libkmer.so libkmer.a kmer_total_count kmer_counts_per_sequence libkmer.o: $(CC) -c kmer_utils.c -o libkmer.o $(CFLAGS) -fPIC -DSHARED=0 @@ -14,8 +14,8 @@ libkmer.a: libkmer.o chmod +x libkmer.a kmer_total_count: libkmer.a $(CC) kmer_total_count.c -o kmer_total_count $(CLIBS) $(CFLAGS) -DSHARED=0 -kmer_frequency_per_sequence: libkmer.a - $(CC) kmer_frequency_per_sequence.c -o kmer_frequency_per_sequence $(CLIBS) $(CFLAGS) +kmer_counts_per_sequence: libkmer.a + $(CC) kmer_counts_per_sequence.c -o kmer_counts_per_sequence $(CLIBS) $(CFLAGS) clean: - rm -vf kmer_total_count kmer_frequency_per_sequence libkmer.so libkmer.a libkmer.o + rm -vf kmer_total_count kmer_counts_per_sequence libkmer.so libkmer.a libkmer.o diff --git a/kmer_counts_per_sequence.c b/kmer_counts_per_sequence.c new file mode 100644 index 0000000..d518efc --- /dev/null +++ b/kmer_counts_per_sequence.c @@ -0,0 +1,71 @@ +// Copyright 2013 Calvin Morrison +#include +#include +#include +#include + +#include "kmer_utils.h" + +unsigned long position = 0; +int main(int argc, char **argv) { + + char *line = NULL; + size_t len = 0; + ssize_t read; + + if(argc != 3) { + printf("Please supply a filename and a kmer\n"); + exit(EXIT_FAILURE); + } + + FILE *fh = fopen(argv[1], "r" ); + if(fh == NULL) { + fprintf(stderr, "Error opening %s - %s\n", argv[1], strerror(errno)); + exit(EXIT_FAILURE); + } + + unsigned long kmer = atoi(argv[2]); + if(kmer == 0) { + fprintf(stderr, "Error: invalid kmer.\n"); + exit(EXIT_FAILURE); + } + + const unsigned long width = (unsigned long)1 << (kmer * 2); + + unsigned long long *counts = malloc((width+ 1) * sizeof(unsigned long long)); + if(counts == NULL) + exit(EXIT_FAILURE); + + while ((read = getline(&line, &len, fh)) != -1) { + if(line[0] != '>' && (read > kmer)) { + + unsigned int i = 0; + unsigned long total = 0; + + // reset our count matrix to zero + memset(counts, 0, width * sizeof(unsigned long long)); + + for(i = 0; i < read - kmer; i++) { + line[i] = alpha[(int)line[i]]; + } + + for(i = 0; i < read - kmer; i++) { + counts[num_to_index(&line[i],kmer, width)]++; + } + + for(i = 0; i < width; i++) + total += counts[i]; + + for(i = 0; i < width - 1; i++) + printf("%llu\t", counts[i]); + printf("%llu\n", counts[width - 1]); + + } + } + + free(counts); + free(line); + + + return EXIT_SUCCESS; +} diff --git a/kmer_frequency_per_sequence.c b/kmer_frequency_per_sequence.c deleted file mode 100644 index 7b14f6d..0000000 --- a/kmer_frequency_per_sequence.c +++ /dev/null @@ -1,71 +0,0 @@ -// Copyright 2013 Calvin Morrison -#include -#include -#include -#include - -#include "kmer_utils.h" - -unsigned long position = 0; -int main(int argc, char **argv) { - - char *line = NULL; - size_t len = 0; - ssize_t read; - - if(argc != 3) { - printf("Please supply a filename and a kmer\n"); - exit(EXIT_FAILURE); - } - - FILE *fh = fopen(argv[1], "r" ); - if(fh == NULL) { - fprintf(stderr, "Error opening %s - %s\n", argv[1], strerror(errno)); - exit(EXIT_FAILURE); - } - - unsigned long kmer = atoi(argv[2]); - if(kmer == 0) { - fprintf(stderr, "Error: invalid kmer.\n"); - exit(EXIT_FAILURE); - } - - const unsigned long width = (unsigned long)1 << (kmer * 2); - - unsigned long long *counts = malloc((width+ 1) * sizeof(unsigned long long)); - if(counts == NULL) - exit(EXIT_FAILURE); - - while ((read = getline(&line, &len, fh)) != -1) { - if(line[0] != '>' && (read > kmer)) { - - unsigned int i = 0; - unsigned long total = 0; - - // reset our count matrix to zero - memset(counts, 0, width * sizeof(unsigned long long)); - - for(i = 0; i < read - kmer; i++) { - line[i] = alpha[(int)line[i]]; - } - - for(i = 0; i < read - kmer; i++) { - counts[num_to_index(&line[i],kmer, width)]++; - } - - for(i = 0; i < width; i++) - total += counts[i]; - - for(i = 0; i < width - 1; i++) - printf("%.12f\t", (double)counts[i] / total); - printf("%.12f\n", (double)counts[width - 1] / total); - - } - } - - free(counts); - free(line); - - - return EXIT_SUCCESS; -} diff --git a/kmer_total_count.c b/kmer_total_count.c index cc9e625..fd8a676 100644 --- a/kmer_total_count.c +++ b/kmer_total_count.c @@ -28,7 +28,6 @@ int main(int argc, char **argv) { // print out our counts arrray // manually unrolled 4 loops to reduce fprintf calls - if(argc == 3) { for(i = 0; i < width; i=i+4) fprintf(stdout, "%llu\n%llu\n%llu\n%llu\n", counts[i], counts[i+1], counts[i+2], counts[i+3]); -- cgit v1.2.3