aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorCalvin Morrison <mutantturkey@gmail.com>2014-02-18 12:42:11 -0500
committerCalvin Morrison <mutantturkey@gmail.com>2014-02-18 12:42:11 -0500
commit719c45d9ea49f33f087db899737ad5af4fc4414f (patch)
tree8f1eed693ffe3416f139c59e8481b81147d016ae
parent1c2ce90501d87db6431a7f29a37876d61347aff7 (diff)
working sparse implementation
-rw-r--r--Makefile8
-rw-r--r--kmer_total_count.c45
-rw-r--r--kmer_utils.c32
-rw-r--r--kmer_utils.h4
-rw-r--r--sparse.c88
-rw-r--r--sparse.h17
6 files changed, 140 insertions, 54 deletions
diff --git a/Makefile b/Makefile
index c929cfd..e51a508 100644
--- a/Makefile
+++ b/Makefile
@@ -1,17 +1,19 @@
VERSION=\"0.0.2\"
CC = gcc
-CFLAGS = -O3 -s -mtune=native -Wall -DVERSION=$(VERSION) -Wextra
+CFLAGS = -O3 -s -mtune=native -Wall -DVERSION=$(VERSION) -Wextra -lcsparse
DESTDIR = /usr/local/
all: libkmer.o libkmer.so kmer_total_count kmer_counts_per_sequence
+sparse.o: sparse.c
+ $(CC) -c sparse.c -o sparse.o $(CFLAGS)
libkmer.o: kmer_utils.c
$(CC) -c kmer_utils.c -o libkmer.o $(CFLAGS) -fPIC
libkmer.so: libkmer.o
$(CC) kmer_utils.c -o libkmer.so $(CFLAGS) -shared -fPIC
-kmer_total_count: libkmer.o kmer_total_count.c kmer_utils.h
- $(CC) libkmer.o kmer_total_count.c -o kmer_total_count $(CLIBS) $(CFLAGS)
+kmer_total_count: sparse.o libkmer.o kmer_total_count.c kmer_utils.h
+ $(CC) sparse.o libkmer.o kmer_total_count.c -o kmer_total_count $(CLIBS) $(CFLAGS)
kmer_counts_per_sequence: libkmer.o kmer_counts_per_sequence.c kmer_utils.h
$(CC) libkmer.o kmer_counts_per_sequence.c -o kmer_counts_per_sequence $(CLIBS) $(CFLAGS)
diff --git a/kmer_total_count.c b/kmer_total_count.c
index 6b627f2..87f1b69 100644
--- a/kmer_total_count.c
+++ b/kmer_total_count.c
@@ -6,6 +6,7 @@
#include <string.h>
#include <getopt.h>
+#include "sparse.h"
#include "kmer_utils.h"
@@ -115,43 +116,15 @@ int main(int argc, char **argv) {
width = pow_four(kmer);
- unsigned long long *counts = get_kmer_counts_from_file(fh, kmer);
-
- // If nonzero is set, only print non zeros
- if(nonzero) {
- // if labels is set, print out our labels
- if(label) {
- for(i = 0; i < width; i++)
- if(counts[i] != 0) {
- char *kmer_str = index_to_kmer(i, kmer);
- fprintf(stdout, "%s\t%llu\n", kmer_str, counts[i]);
- free(kmer_str);
- }
-
- }
- else {
- for(i = 0; i < width; i++)
- if(counts[i] != 0)
- fprintf(stdout, "%llu\t%llu\n", i, counts[i]);
-
- }
- }
- // If we aren't printing nonzeros print everything
- else {
- if(label) {
- for(i = 0; i < width; i++) {
- char *kmer_str = index_to_kmer(i, kmer);
- fprintf(stdout, "%s\t%llu\n", kmer_str, counts[i]);
- free(kmer_str);
- }
- }
- else {
- for(i = 0; i < width; i=i+4) {
- fprintf(stdout, "%llu\n%llu\n%llu\n%llu\n", counts[i], counts[i+1], counts[i+2], counts[i+3]);
- }
- }
+ if(kmer > 12) {
+ node *root = get_kmer_counts_from_file(fh, kmer);
+ print_sparse(root, label, nonzero, kmer);
}
+ //else {
+// unsigned long long *counts = get_kmer_counts_from_file(fh, kmer);
+// print(countst, label, nonzero);
+ //free(counts);
+// }
- free(counts);
return EXIT_SUCCESS;
}
diff --git a/kmer_utils.c b/kmer_utils.c
index 060e311..f2057ee 100644
--- a/kmer_utils.c
+++ b/kmer_utils.c
@@ -4,6 +4,7 @@
#include <stdlib.h>
#include <string.h>
+#include "sparse.h"
const unsigned char alpha[256] =
{5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5,
@@ -156,8 +157,8 @@ size_t strnstrip(char *s, int c, size_t len) {
}
-unsigned long long * get_kmer_counts_from_file(FILE *fh, const unsigned int kmer) {
-
+node * get_kmer_counts_from_file(FILE *fh, const unsigned int kmer) {
+
char *line = NULL;
size_t len = 0;
ssize_t read;
@@ -166,15 +167,11 @@ unsigned long long * get_kmer_counts_from_file(FILE *fh, const unsigned int kmer
long long position = 0;
// width is 4^kmer
- // there's a sneaky bitshift to avoid pow dependency
- const unsigned long width = pow_four(kmer);
+ const unsigned long long width = pow_four(kmer);
+
+ node *root = NULL;
+
- // malloc our return array
- unsigned long long * counts = calloc((width+ 1), sizeof(unsigned long long));
- if(counts == NULL) {
- fprintf(stderr, strerror(errno));
- exit(EXIT_FAILURE);
- }
while ((read = getdelim(&line, &len, '>', fh)) != -1) {
size_t k;
@@ -219,15 +216,24 @@ unsigned long long * get_kmer_counts_from_file(FILE *fh, const unsigned int kmer
}
// use this point to get mer of our loop
next:
- // bump up the mer value in the counts array
- counts[mer]++;
+ {
+ // bump up the mer value in the counts array
+ node *tmp = search(&root, mer);
+ if(tmp) {
+ tmp->value++;
+ }
+ else {
+ insert(&root, mer);
+ }
+ }
}
}
free(line);
fclose(fh);
- return counts;
+ return root;
+
}
unsigned long long * get_kmer_counts_from_filename(const char *fn, const unsigned int kmer) {
diff --git a/kmer_utils.h b/kmer_utils.h
index ceb28eb..bf60dc4 100644
--- a/kmer_utils.h
+++ b/kmer_utils.h
@@ -12,5 +12,5 @@ const unsigned char alpha[256];
// file loading functions
unsigned long long load_specific_mers_from_file(const char *fn, unsigned int kmer, size_t width, size_t *arr);
-unsigned long long * get_kmer_counts_from_filename(const char *fn, const unsigned int kmer);
-unsigned long long * get_kmer_counts_from_file(FILE *fh, const int kmer);
+node * get_kmer_counts_from_filename(const char *fn, const unsigned int kmer);
+node * get_kmer_counts_from_file(FILE *fh, const int kmer);
diff --git a/sparse.c b/sparse.c
new file mode 100644
index 0000000..9ab6902
--- /dev/null
+++ b/sparse.c
@@ -0,0 +1,88 @@
+#include <errno.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+#include <stdbool.h>
+
+struct bin_tree {
+ size_t index;
+ unsigned long long value;
+ struct bin_tree *right;
+ struct bin_tree *left;
+};
+
+typedef struct bin_tree node;
+
+void insert(node ** tree, size_t index) {
+ node *temp = NULL;
+ if(!(*tree)) {
+ temp = (node *)malloc(sizeof(node));
+ temp->left = temp->right = NULL;
+ temp->index = index;
+ temp->value = 1;
+ *tree = temp;
+ return;
+ }
+
+ if(index < (*tree)->index)
+ insert(&(*tree)->left, index);
+ else if(index > (*tree)->index)
+ insert(&(*tree)->right, index);
+
+}
+
+void deltree(node * tree) {
+ if (tree) {
+ deltree(tree->left);
+ deltree(tree->right);
+ free(tree);
+ }
+}
+
+node* search(node ** tree, size_t index) {
+ if(!(*tree))
+ return NULL;
+
+ if(index < (*tree)->index)
+ search(&((*tree)->left), index);
+
+ else if(index > (*tree)->index)
+ search(&((*tree)->right), index);
+
+ else if(index == (*tree)->index)
+ return *tree;
+}
+
+unsigned long long *lookup(node **tree, size_t index) {
+ node *ret = search(tree, index);
+ if(ret)
+ return ret->value;
+ else
+ return 0;
+}
+
+void print_sparse(node *tree, bool label, bool nonzero, unsigned int kmer) {
+
+ if (tree) {
+ print_sparse(tree->left, label, nonzero, kmer);
+ if(label && nonzero) {
+ char *kmer_str = index_to_kmer(tree->index, kmer);
+ if (tree->value != 0)
+ fprintf(stdout, "%s\t%llu\n", kmer_str, tree->value);
+ free(kmer_str);
+ }
+ else if(label) {
+ char *kmer_str = index_to_kmer(tree->index, kmer);
+ fprintf(stdout, "%s\t%llu\n", kmer_str, tree->value);
+ free(kmer_str);
+ }
+ else if(nonzero) {
+ if (tree->value != 0)
+ fprintf(stdout, "%zu\t%llu\n", tree->index, tree->value);
+ }
+ else
+ fprintf(stdout, "%llu\n", tree->value);
+
+ print_sparse(tree->right, label, nonzero, kmer);
+ }
+}
diff --git a/sparse.h b/sparse.h
new file mode 100644
index 0000000..2ffbabd
--- /dev/null
+++ b/sparse.h
@@ -0,0 +1,17 @@
+#include <stdbool.h>
+
+struct b_tree {
+ size_t index;
+ unsigned long long value;
+ struct b_tree *right;
+ struct b_tree *left;
+};
+
+typedef struct b_tree node;
+
+node* search(node **tree, size_t index);
+void insert(node **tree, size_t index);
+void deltree(node *tree);
+unsigned long long lookup(node **tree, size_t index);
+
+void print_sparse(node *tree, bool label, bool nonzero, unsigned int kmer);