diff options
-rw-r--r-- | Makefile | 8 | ||||
-rw-r--r-- | kmer_total_count.c | 7 | ||||
-rw-r--r-- | kmer_utils.c | 99 | ||||
-rw-r--r-- | kmer_utils.h | 14 |
4 files changed, 76 insertions, 52 deletions
@@ -1,6 +1,6 @@ VERSION=\"0.0.2\" -CC = gcc -CFLAGS = -O3 -s -mtune=native -Wall -DVERSION=$(VERSION) -Wextra +CC = g++ +CFLAGS = -O3 -s -mtune=native -Wall -Wextra -DVERSION=$(VERSION) -std=c++11 DESTDIR = /usr/local/ @@ -12,8 +12,8 @@ libkmer.o: kmer_utils.c $(CC) -c kmer_utils.c -o libkmer.o $(CFLAGS) -fPIC libkmer.so: libkmer.o $(CC) kmer_utils.c -o libkmer.so $(CFLAGS) -shared -fPIC -kmer_total_count: sparse.o libkmer.o kmer_total_count.c kmer_utils.h - $(CC) sparse.o libkmer.o kmer_total_count.c -o kmer_total_count $(CLIBS) $(CFLAGS) +kmer_total_count: kmer_utils.c kmer_total_count.c kmer_utils.h + $(CC) kmer_utils.cpp kmer_total_count.c -o kmer_total_count $(CLIBS) $(CFLAGS) kmer_counts_per_sequence: libkmer.o kmer_counts_per_sequence.c kmer_utils.h $(CC) libkmer.o kmer_counts_per_sequence.c -o kmer_counts_per_sequence $(CLIBS) $(CFLAGS) diff --git a/kmer_total_count.c b/kmer_total_count.c index 12f9ab4..c55ffa5 100644 --- a/kmer_total_count.c +++ b/kmer_total_count.c @@ -6,7 +6,6 @@ #include <string.h> #include <getopt.h> -#include "sparse.h" #include "kmer_utils.h" @@ -117,12 +116,12 @@ int main(int argc, char **argv) { } if(kmer > 12 || force_sparse) { - node *root = get_sparse_kmer_counts_from_file(fh, kmer); - print_sparse(root, label, nonzero, kmer); + kmer_map *counts = get_sparse_kmer_counts_from_file(fh, kmer); + print_kmer(counts, label, nonzero, kmer); } else { unsigned long long *counts = get_dense_kmer_counts_from_file(fh, kmer); - print_dense(counts, label, nonzero, kmer); + print_kmer(counts, label, nonzero, kmer); } return EXIT_SUCCESS; diff --git a/kmer_utils.c b/kmer_utils.c index b456f2f..c46e580 100644 --- a/kmer_utils.c +++ b/kmer_utils.c @@ -4,7 +4,11 @@ #include <stdlib.h> #include <string.h> -#include "sparse.h" +#include <unordered_map> + +using namespace std; + +typedef unordered_map<size_t,unsigned long long> kmer_map; const unsigned char alpha[256] = {5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, @@ -98,9 +102,9 @@ char *index_to_kmer(unsigned long long index, long kmer) { size_t i = 0; size_t j = 0; - char *num_array = calloc(kmer, sizeof(char)); - char *ret = calloc(kmer + 1, sizeof(char)); - if(ret == NULL) + char *num_array = (char *) calloc(kmer, sizeof(char)); + char *ret = (char *) calloc(kmer + 1, sizeof(char)); + if(ret == NULL || num_array == NULL) exit(EXIT_FAILURE); @@ -157,7 +161,7 @@ size_t strnstrip(char *s, int c, size_t len) { } -node * get_sparse_kmer_counts_from_file(FILE *fh, const unsigned int kmer) { +kmer_map *get_sparse_kmer_counts_from_file(FILE *fh, int kmer) { char *line = NULL; size_t len = 0; @@ -166,13 +170,11 @@ node * get_sparse_kmer_counts_from_file(FILE *fh, const unsigned int kmer) { long long i = 0; long long position = 0; + kmer_map *counts = new kmer_map(); + // // width is 4^kmer const unsigned long long width = pow_four(kmer); - node *root = NULL; - - - while ((read = getdelim(&line, &len, '>', fh)) != -1) { size_t k; char *seq; @@ -218,13 +220,7 @@ node * get_sparse_kmer_counts_from_file(FILE *fh, const unsigned int kmer) { next: { // bump up the mer value in the counts array - node *tmp = search(&root, mer); - if(tmp) { - tmp->value++; - } - else { - insert(&root, mer); - } + (*counts)[mer]++; } } } @@ -232,7 +228,7 @@ node * get_sparse_kmer_counts_from_file(FILE *fh, const unsigned int kmer) { free(line); fclose(fh); - return root; + return counts; } @@ -248,7 +244,10 @@ unsigned long long * get_dense_kmer_counts_from_file(FILE *fh, const unsigned in // width is 4^kmer const unsigned long long width = pow_four(kmer); - unsigned long long *counts = malloc(width+1 * sizeof(unsigned long long)); + unsigned long long *counts = (unsigned long long *) calloc(width+1, sizeof(unsigned long long)); + if(counts == NULL) { + exit(EXIT_FAILURE); + } while ((read = getdelim(&line, &len, '>', fh)) != -1) { size_t k; char *seq; @@ -305,7 +304,7 @@ unsigned long long * get_dense_kmer_counts_from_file(FILE *fh, const unsigned in } -unsigned long long * get_kmer_counts_from_filename(const char *fn, const unsigned int kmer) { +unsigned long long * get_dense_kmer_counts_from_filename(const char *fn, const unsigned int kmer) { FILE *fh = fopen(fn, "r"); if(fh == NULL) { fprintf(stderr, "Could not open %s - %s\n", fn, strerror(errno)); @@ -315,7 +314,7 @@ unsigned long long * get_kmer_counts_from_filename(const char *fn, const unsigne return get_dense_kmer_counts_from_file(fh, kmer); } -void print_dense(unsigned long long *counts, bool label, bool nonzero, unsigned int kmer) { +void print_kmer(unsigned long long *counts, bool label, bool nonzero, unsigned int kmer) { size_t width = pow_four(kmer); size_t i = 0; @@ -334,7 +333,7 @@ void print_dense(unsigned long long *counts, bool label, bool nonzero, unsigned else { for(i = 0; i < width; i++) if(counts[i] != 0) - fprintf(stdout, "%llu\t%llu\n", i, counts[i]); + fprintf(stdout, "%zu\t%llu\n", i, counts[i]); } } @@ -356,28 +355,48 @@ void print_dense(unsigned long long *counts, bool label, bool nonzero, unsigned } } -void print_sparse(node *tree, bool label, bool nonzero, unsigned int kmer) { +void print_kmer(kmer_map *counts, bool label, bool nonzero, unsigned int kmer) { + size_t width = pow_four(kmer); + size_t i = 0; + + // If nonzero is set, only print non zeros + if(nonzero) { + // if labels is set, print out our labels + if(label) { + for(i = 0; i < width; i++) + if(counts->count(i) != 0) { + char *kmer_str = index_to_kmer(i, kmer); + fprintf(stdout, "%s\t%llu\n", kmer_str, counts->at(i)); + free(kmer_str); + } - if (tree) { - print_sparse(tree->left, label, nonzero, kmer); - if(label && nonzero) { - char *kmer_str = index_to_kmer(tree->index, kmer); - if (tree->value != 0) - fprintf(stdout, "%s\t%llu\n", kmer_str, tree->value); - free(kmer_str); } - else if(label) { - char *kmer_str = index_to_kmer(tree->index, kmer); - fprintf(stdout, "%s\t%llu\n", kmer_str, tree->value); - free(kmer_str); + else { + for(i = 0; i < width; i++) + if(counts->count(i) != 0) + fprintf(stdout, "%zu\t%llu\n", i, counts->at(i)); + } - else if(nonzero) { - if (tree->value != 0) - fprintf(stdout, "%zu\t%llu\n", tree->index, tree->value); + } + // If we aren't printing nonzeros print everything + else { + if(label) { + for(i = 0; i < width; i++) { + char *kmer_str = index_to_kmer(i, kmer); + if(counts->count(i) != 0) + fprintf(stdout, "%s\t%llu\n", kmer_str, counts->at(i)); + else + fprintf(stdout, "%s\t0\n", kmer_str); + free(kmer_str); + } + } + else { + for(i = 0; i < width; i++) { + if(counts->count(i) != 0) + fprintf(stdout, "%llu\n", counts->at(i)); + else + fprintf(stdout, "0\n"); + } } - else - fprintf(stdout, "%llu\n", tree->value); - - print_sparse(tree->right, label, nonzero, kmer); } } diff --git a/kmer_utils.h b/kmer_utils.h index af13227..cf7b8a1 100644 --- a/kmer_utils.h +++ b/kmer_utils.h @@ -1,3 +1,6 @@ +#include <unordered_map> +using namespace std; + // Kmer functions void convert_kmer_to_num(char *str, const unsigned long length); unsigned long num_to_index(const char *str, const int kmer, const long error_pos, long long *current_position); @@ -8,13 +11,16 @@ size_t strnstrip(char *s, int c, size_t len); unsigned long long pow_four(unsigned long long x); // Variables -const unsigned char alpha[256]; +typedef unordered_map<size_t,unsigned long long> kmer_map; // file loading functions -node * get_sparse_kmer_counts_from_filename(const char *fn, const unsigned int kmer); -node * get_sparse_kmer_counts_from_file(FILE *fh, const int kmer); +kmer_map *get_sparse_kmer_counts_from_filename(const char *fn, const unsigned int kmer); +kmer_map *get_sparse_kmer_counts_from_file(FILE *fh, int kmer); unsigned long long * get_dense_kmer_counts_from_file(FILE *fh, const unsigned int kmer); -void print_dense(unsigned long long *counts, bool label, bool nonzero, unsigned int kmer); +void print_kmer(unsigned long long *counts, bool label, bool nonzero, unsigned int kmer); +void print_kmer(kmer_map *counts, bool label, bool nonzero, unsigned int kmer); + +unsigned char alpha[256]; |