aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorCalvin Morrison <mutantturkey@gmail.com>2014-03-03 19:19:02 -0500
committerCalvin Morrison <mutantturkey@gmail.com>2014-03-03 19:19:02 -0500
commit7ded6cee0153b51080375bf16001789dc392584d (patch)
tree2327b0907cbeda9a21dac45e92365f728a210670
parentc038740be3e0dec1798c0c660081f1b6d2445907 (diff)
C++ version using unordered map
-rw-r--r--Makefile8
-rw-r--r--kmer_total_count.c7
-rw-r--r--kmer_utils.c99
-rw-r--r--kmer_utils.h14
4 files changed, 76 insertions, 52 deletions
diff --git a/Makefile b/Makefile
index 5a1f34d..f9db836 100644
--- a/Makefile
+++ b/Makefile
@@ -1,6 +1,6 @@
VERSION=\"0.0.2\"
-CC = gcc
-CFLAGS = -O3 -s -mtune=native -Wall -DVERSION=$(VERSION) -Wextra
+CC = g++
+CFLAGS = -O3 -s -mtune=native -Wall -Wextra -DVERSION=$(VERSION) -std=c++11
DESTDIR = /usr/local/
@@ -12,8 +12,8 @@ libkmer.o: kmer_utils.c
$(CC) -c kmer_utils.c -o libkmer.o $(CFLAGS) -fPIC
libkmer.so: libkmer.o
$(CC) kmer_utils.c -o libkmer.so $(CFLAGS) -shared -fPIC
-kmer_total_count: sparse.o libkmer.o kmer_total_count.c kmer_utils.h
- $(CC) sparse.o libkmer.o kmer_total_count.c -o kmer_total_count $(CLIBS) $(CFLAGS)
+kmer_total_count: kmer_utils.c kmer_total_count.c kmer_utils.h
+ $(CC) kmer_utils.cpp kmer_total_count.c -o kmer_total_count $(CLIBS) $(CFLAGS)
kmer_counts_per_sequence: libkmer.o kmer_counts_per_sequence.c kmer_utils.h
$(CC) libkmer.o kmer_counts_per_sequence.c -o kmer_counts_per_sequence $(CLIBS) $(CFLAGS)
diff --git a/kmer_total_count.c b/kmer_total_count.c
index 12f9ab4..c55ffa5 100644
--- a/kmer_total_count.c
+++ b/kmer_total_count.c
@@ -6,7 +6,6 @@
#include <string.h>
#include <getopt.h>
-#include "sparse.h"
#include "kmer_utils.h"
@@ -117,12 +116,12 @@ int main(int argc, char **argv) {
}
if(kmer > 12 || force_sparse) {
- node *root = get_sparse_kmer_counts_from_file(fh, kmer);
- print_sparse(root, label, nonzero, kmer);
+ kmer_map *counts = get_sparse_kmer_counts_from_file(fh, kmer);
+ print_kmer(counts, label, nonzero, kmer);
}
else {
unsigned long long *counts = get_dense_kmer_counts_from_file(fh, kmer);
- print_dense(counts, label, nonzero, kmer);
+ print_kmer(counts, label, nonzero, kmer);
}
return EXIT_SUCCESS;
diff --git a/kmer_utils.c b/kmer_utils.c
index b456f2f..c46e580 100644
--- a/kmer_utils.c
+++ b/kmer_utils.c
@@ -4,7 +4,11 @@
#include <stdlib.h>
#include <string.h>
-#include "sparse.h"
+#include <unordered_map>
+
+using namespace std;
+
+typedef unordered_map<size_t,unsigned long long> kmer_map;
const unsigned char alpha[256] =
{5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5,
@@ -98,9 +102,9 @@ char *index_to_kmer(unsigned long long index, long kmer) {
size_t i = 0;
size_t j = 0;
- char *num_array = calloc(kmer, sizeof(char));
- char *ret = calloc(kmer + 1, sizeof(char));
- if(ret == NULL)
+ char *num_array = (char *) calloc(kmer, sizeof(char));
+ char *ret = (char *) calloc(kmer + 1, sizeof(char));
+ if(ret == NULL || num_array == NULL)
exit(EXIT_FAILURE);
@@ -157,7 +161,7 @@ size_t strnstrip(char *s, int c, size_t len) {
}
-node * get_sparse_kmer_counts_from_file(FILE *fh, const unsigned int kmer) {
+kmer_map *get_sparse_kmer_counts_from_file(FILE *fh, int kmer) {
char *line = NULL;
size_t len = 0;
@@ -166,13 +170,11 @@ node * get_sparse_kmer_counts_from_file(FILE *fh, const unsigned int kmer) {
long long i = 0;
long long position = 0;
+ kmer_map *counts = new kmer_map();
+ //
// width is 4^kmer
const unsigned long long width = pow_four(kmer);
- node *root = NULL;
-
-
-
while ((read = getdelim(&line, &len, '>', fh)) != -1) {
size_t k;
char *seq;
@@ -218,13 +220,7 @@ node * get_sparse_kmer_counts_from_file(FILE *fh, const unsigned int kmer) {
next:
{
// bump up the mer value in the counts array
- node *tmp = search(&root, mer);
- if(tmp) {
- tmp->value++;
- }
- else {
- insert(&root, mer);
- }
+ (*counts)[mer]++;
}
}
}
@@ -232,7 +228,7 @@ node * get_sparse_kmer_counts_from_file(FILE *fh, const unsigned int kmer) {
free(line);
fclose(fh);
- return root;
+ return counts;
}
@@ -248,7 +244,10 @@ unsigned long long * get_dense_kmer_counts_from_file(FILE *fh, const unsigned in
// width is 4^kmer
const unsigned long long width = pow_four(kmer);
- unsigned long long *counts = malloc(width+1 * sizeof(unsigned long long));
+ unsigned long long *counts = (unsigned long long *) calloc(width+1, sizeof(unsigned long long));
+ if(counts == NULL) {
+ exit(EXIT_FAILURE);
+ }
while ((read = getdelim(&line, &len, '>', fh)) != -1) {
size_t k;
char *seq;
@@ -305,7 +304,7 @@ unsigned long long * get_dense_kmer_counts_from_file(FILE *fh, const unsigned in
}
-unsigned long long * get_kmer_counts_from_filename(const char *fn, const unsigned int kmer) {
+unsigned long long * get_dense_kmer_counts_from_filename(const char *fn, const unsigned int kmer) {
FILE *fh = fopen(fn, "r");
if(fh == NULL) {
fprintf(stderr, "Could not open %s - %s\n", fn, strerror(errno));
@@ -315,7 +314,7 @@ unsigned long long * get_kmer_counts_from_filename(const char *fn, const unsigne
return get_dense_kmer_counts_from_file(fh, kmer);
}
-void print_dense(unsigned long long *counts, bool label, bool nonzero, unsigned int kmer) {
+void print_kmer(unsigned long long *counts, bool label, bool nonzero, unsigned int kmer) {
size_t width = pow_four(kmer);
size_t i = 0;
@@ -334,7 +333,7 @@ void print_dense(unsigned long long *counts, bool label, bool nonzero, unsigned
else {
for(i = 0; i < width; i++)
if(counts[i] != 0)
- fprintf(stdout, "%llu\t%llu\n", i, counts[i]);
+ fprintf(stdout, "%zu\t%llu\n", i, counts[i]);
}
}
@@ -356,28 +355,48 @@ void print_dense(unsigned long long *counts, bool label, bool nonzero, unsigned
}
}
-void print_sparse(node *tree, bool label, bool nonzero, unsigned int kmer) {
+void print_kmer(kmer_map *counts, bool label, bool nonzero, unsigned int kmer) {
+ size_t width = pow_four(kmer);
+ size_t i = 0;
+
+ // If nonzero is set, only print non zeros
+ if(nonzero) {
+ // if labels is set, print out our labels
+ if(label) {
+ for(i = 0; i < width; i++)
+ if(counts->count(i) != 0) {
+ char *kmer_str = index_to_kmer(i, kmer);
+ fprintf(stdout, "%s\t%llu\n", kmer_str, counts->at(i));
+ free(kmer_str);
+ }
- if (tree) {
- print_sparse(tree->left, label, nonzero, kmer);
- if(label && nonzero) {
- char *kmer_str = index_to_kmer(tree->index, kmer);
- if (tree->value != 0)
- fprintf(stdout, "%s\t%llu\n", kmer_str, tree->value);
- free(kmer_str);
}
- else if(label) {
- char *kmer_str = index_to_kmer(tree->index, kmer);
- fprintf(stdout, "%s\t%llu\n", kmer_str, tree->value);
- free(kmer_str);
+ else {
+ for(i = 0; i < width; i++)
+ if(counts->count(i) != 0)
+ fprintf(stdout, "%zu\t%llu\n", i, counts->at(i));
+
}
- else if(nonzero) {
- if (tree->value != 0)
- fprintf(stdout, "%zu\t%llu\n", tree->index, tree->value);
+ }
+ // If we aren't printing nonzeros print everything
+ else {
+ if(label) {
+ for(i = 0; i < width; i++) {
+ char *kmer_str = index_to_kmer(i, kmer);
+ if(counts->count(i) != 0)
+ fprintf(stdout, "%s\t%llu\n", kmer_str, counts->at(i));
+ else
+ fprintf(stdout, "%s\t0\n", kmer_str);
+ free(kmer_str);
+ }
+ }
+ else {
+ for(i = 0; i < width; i++) {
+ if(counts->count(i) != 0)
+ fprintf(stdout, "%llu\n", counts->at(i));
+ else
+ fprintf(stdout, "0\n");
+ }
}
- else
- fprintf(stdout, "%llu\n", tree->value);
-
- print_sparse(tree->right, label, nonzero, kmer);
}
}
diff --git a/kmer_utils.h b/kmer_utils.h
index af13227..cf7b8a1 100644
--- a/kmer_utils.h
+++ b/kmer_utils.h
@@ -1,3 +1,6 @@
+#include <unordered_map>
+using namespace std;
+
// Kmer functions
void convert_kmer_to_num(char *str, const unsigned long length);
unsigned long num_to_index(const char *str, const int kmer, const long error_pos, long long *current_position);
@@ -8,13 +11,16 @@ size_t strnstrip(char *s, int c, size_t len);
unsigned long long pow_four(unsigned long long x);
// Variables
-const unsigned char alpha[256];
+typedef unordered_map<size_t,unsigned long long> kmer_map;
// file loading functions
-node * get_sparse_kmer_counts_from_filename(const char *fn, const unsigned int kmer);
-node * get_sparse_kmer_counts_from_file(FILE *fh, const int kmer);
+kmer_map *get_sparse_kmer_counts_from_filename(const char *fn, const unsigned int kmer);
+kmer_map *get_sparse_kmer_counts_from_file(FILE *fh, int kmer);
unsigned long long * get_dense_kmer_counts_from_file(FILE *fh, const unsigned int kmer);
-void print_dense(unsigned long long *counts, bool label, bool nonzero, unsigned int kmer);
+void print_kmer(unsigned long long *counts, bool label, bool nonzero, unsigned int kmer);
+void print_kmer(kmer_map *counts, bool label, bool nonzero, unsigned int kmer);
+
+unsigned char alpha[256];