aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
-rw-r--r--kmer_utils.c9
-rw-r--r--mer.c70
-rw-r--r--mer_generator.c28
3 files changed, 102 insertions, 5 deletions
diff --git a/kmer_utils.c b/kmer_utils.c
index e69f485..2f0a923 100644
--- a/kmer_utils.c
+++ b/kmer_utils.c
@@ -3,8 +3,8 @@
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
-#include <libjit.h>
+#include "mer.c"
const unsigned char alpha[256] =
{5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5,
@@ -24,9 +24,6 @@ unsigned long long pow_four(unsigned long long x) {
return (unsigned long long)1 << (x * 2);
}
-unsigned long long mer_6(const char *seq_h) {
- return seq_h[5] + seq_h[4] * 4 + seq_h[3] * 16 + seq_h[2] * 64 + seq_h[1] * 256 + seq_h[0] * 1024;
-}
// convert a string of k-mer size base-4 values into a
// base-10 index
unsigned long num_to_index(const char *str, const int kmer, const long error_pos, long long *current_position) {
@@ -177,6 +174,8 @@ unsigned long long * get_kmer_counts_from_file(FILE *fh, const unsigned int kmer
exit(EXIT_FAILURE);
}
+ merptr_t mer_ptr = get_ptr(kmer);
+
while ((read = getdelim(&line, &len, '>', fh)) != -1) {
size_t k;
long long i;
@@ -209,7 +208,7 @@ unsigned long long * get_kmer_counts_from_file(FILE *fh, const unsigned int kmer
continue;
}
- counts[mer_6(seq_h)]++;
+ counts[(*mer_ptr)(seq_h)]++;
}
}
diff --git a/mer.c b/mer.c
new file mode 100644
index 0000000..b6df711
--- /dev/null
+++ b/mer.c
@@ -0,0 +1,70 @@
+unsigned long long mer_1(const char *seq) { return (seq[0] * 1) + 0; }
+unsigned long long mer_2(const char *seq) { return (seq[1] * 1) + (seq[0] * 4) + 0; }
+unsigned long long mer_3(const char *seq) { return (seq[2] * 1) + (seq[1] * 4) + (seq[0] * 16) + 0; }
+unsigned long long mer_4(const char *seq) { return (seq[3] * 1) + (seq[2] * 4) + (seq[1] * 16) + (seq[0] * 64) + 0; }
+unsigned long long mer_5(const char *seq) { return (seq[4] * 1) + (seq[3] * 4) + (seq[2] * 16) + (seq[1] * 64) + (seq[0] * 256) + 0; }
+unsigned long long mer_6(const char *seq) { return (seq[5] * 1) + (seq[4] * 4) + (seq[3] * 16) + (seq[2] * 64) + (seq[1] * 256) + (seq[0] * 1024) + 0; }
+unsigned long long mer_7(const char *seq) { return (seq[6] * 1) + (seq[5] * 4) + (seq[4] * 16) + (seq[3] * 64) + (seq[2] * 256) + (seq[1] * 1024) + (seq[0] * 4096) + 0; }
+unsigned long long mer_8(const char *seq) { return (seq[7] * 1) + (seq[6] * 4) + (seq[5] * 16) + (seq[4] * 64) + (seq[3] * 256) + (seq[2] * 1024) + (seq[1] * 4096) + (seq[0] * 16384) + 0; }
+unsigned long long mer_9(const char *seq) { return (seq[8] * 1) + (seq[7] * 4) + (seq[6] * 16) + (seq[5] * 64) + (seq[4] * 256) + (seq[3] * 1024) + (seq[2] * 4096) + (seq[1] * 16384) + (seq[0] * 65536) + 0; }
+unsigned long long mer_10(const char *seq) { return (seq[9] * 1) + (seq[8] * 4) + (seq[7] * 16) + (seq[6] * 64) + (seq[5] * 256) + (seq[4] * 1024) + (seq[3] * 4096) + (seq[2] * 16384) + (seq[1] * 65536) + (seq[0] * 262144) + 0; }
+unsigned long long mer_11(const char *seq) { return (seq[10] * 1) + (seq[9] * 4) + (seq[8] * 16) + (seq[7] * 64) + (seq[6] * 256) + (seq[5] * 1024) + (seq[4] * 4096) + (seq[3] * 16384) + (seq[2] * 65536) + (seq[1] * 262144) + (seq[0] * 1048576) + 0; }
+unsigned long long mer_12(const char *seq) { return (seq[11] * 1) + (seq[10] * 4) + (seq[9] * 16) + (seq[8] * 64) + (seq[7] * 256) + (seq[6] * 1024) + (seq[5] * 4096) + (seq[4] * 16384) + (seq[3] * 65536) + (seq[2] * 262144) + (seq[1] * 1048576) + (seq[0] * 4194304) + 0; }
+unsigned long long mer_13(const char *seq) { return (seq[12] * 1) + (seq[11] * 4) + (seq[10] * 16) + (seq[9] * 64) + (seq[8] * 256) + (seq[7] * 1024) + (seq[6] * 4096) + (seq[5] * 16384) + (seq[4] * 65536) + (seq[3] * 262144) + (seq[2] * 1048576) + (seq[1] * 4194304) + (seq[0] * 16777216) + 0; }
+unsigned long long mer_14(const char *seq) { return (seq[13] * 1) + (seq[12] * 4) + (seq[11] * 16) + (seq[10] * 64) + (seq[9] * 256) + (seq[8] * 1024) + (seq[7] * 4096) + (seq[6] * 16384) + (seq[5] * 65536) + (seq[4] * 262144) + (seq[3] * 1048576) + (seq[2] * 4194304) + (seq[1] * 16777216) + (seq[0] * 67108864) + 0; }
+unsigned long long mer_15(const char *seq) { return (seq[14] * 1) + (seq[13] * 4) + (seq[12] * 16) + (seq[11] * 64) + (seq[10] * 256) + (seq[9] * 1024) + (seq[8] * 4096) + (seq[7] * 16384) + (seq[6] * 65536) + (seq[5] * 262144) + (seq[4] * 1048576) + (seq[3] * 4194304) + (seq[2] * 16777216) + (seq[1] * 67108864) + (seq[0] * 268435456) + 0; }
+unsigned long long mer_16(const char *seq) { return (seq[15] * 1) + (seq[14] * 4) + (seq[13] * 16) + (seq[12] * 64) + (seq[11] * 256) + (seq[10] * 1024) + (seq[9] * 4096) + (seq[8] * 16384) + (seq[7] * 65536) + (seq[6] * 262144) + (seq[5] * 1048576) + (seq[4] * 4194304) + (seq[3] * 16777216) + (seq[2] * 67108864) + (seq[1] * 268435456) + (seq[0] * 1073741824) + 0; }
+unsigned long long mer_17(const char *seq) { return (seq[16] * 1) + (seq[15] * 4) + (seq[14] * 16) + (seq[13] * 64) + (seq[12] * 256) + (seq[11] * 1024) + (seq[10] * 4096) + (seq[9] * 16384) + (seq[8] * 65536) + (seq[7] * 262144) + (seq[6] * 1048576) + (seq[5] * 4194304) + (seq[4] * 16777216) + (seq[3] * 67108864) + (seq[2] * 268435456) + (seq[1] * 1073741824) + (seq[0] * 0) + 0; }
+unsigned long long mer_18(const char *seq) { return (seq[17] * 1) + (seq[16] * 4) + (seq[15] * 16) + (seq[14] * 64) + (seq[13] * 256) + (seq[12] * 1024) + (seq[11] * 4096) + (seq[10] * 16384) + (seq[9] * 65536) + (seq[8] * 262144) + (seq[7] * 1048576) + (seq[6] * 4194304) + (seq[5] * 16777216) + (seq[4] * 67108864) + (seq[3] * 268435456) + (seq[2] * 1073741824) + (seq[1] * 0) + (seq[0] * 0) + 0; }
+unsigned long long mer_19(const char *seq) { return (seq[18] * 1) + (seq[17] * 4) + (seq[16] * 16) + (seq[15] * 64) + (seq[14] * 256) + (seq[13] * 1024) + (seq[12] * 4096) + (seq[11] * 16384) + (seq[10] * 65536) + (seq[9] * 262144) + (seq[8] * 1048576) + (seq[7] * 4194304) + (seq[6] * 16777216) + (seq[5] * 67108864) + (seq[4] * 268435456) + (seq[3] * 1073741824) + (seq[2] * 0) + (seq[1] * 0) + (seq[0] * 0) + 0; }
+unsigned long long mer_20(const char *seq) { return (seq[19] * 1) + (seq[18] * 4) + (seq[17] * 16) + (seq[16] * 64) + (seq[15] * 256) + (seq[14] * 1024) + (seq[13] * 4096) + (seq[12] * 16384) + (seq[11] * 65536) + (seq[10] * 262144) + (seq[9] * 1048576) + (seq[8] * 4194304) + (seq[7] * 16777216) + (seq[6] * 67108864) + (seq[5] * 268435456) + (seq[4] * 1073741824) + (seq[3] * 0) + (seq[2] * 0) + (seq[1] * 0) + (seq[0] * 0) + 0; }
+unsigned long long mer_21(const char *seq) { return (seq[20] * 1) + (seq[19] * 4) + (seq[18] * 16) + (seq[17] * 64) + (seq[16] * 256) + (seq[15] * 1024) + (seq[14] * 4096) + (seq[13] * 16384) + (seq[12] * 65536) + (seq[11] * 262144) + (seq[10] * 1048576) + (seq[9] * 4194304) + (seq[8] * 16777216) + (seq[7] * 67108864) + (seq[6] * 268435456) + (seq[5] * 1073741824) + (seq[4] * 0) + (seq[3] * 0) + (seq[2] * 0) + (seq[1] * 0) + (seq[0] * 0) + 0; }
+unsigned long long mer_22(const char *seq) { return (seq[21] * 1) + (seq[20] * 4) + (seq[19] * 16) + (seq[18] * 64) + (seq[17] * 256) + (seq[16] * 1024) + (seq[15] * 4096) + (seq[14] * 16384) + (seq[13] * 65536) + (seq[12] * 262144) + (seq[11] * 1048576) + (seq[10] * 4194304) + (seq[9] * 16777216) + (seq[8] * 67108864) + (seq[7] * 268435456) + (seq[6] * 1073741824) + (seq[5] * 0) + (seq[4] * 0) + (seq[3] * 0) + (seq[2] * 0) + (seq[1] * 0) + (seq[0] * 0) + 0; }
+unsigned long long mer_23(const char *seq) { return (seq[22] * 1) + (seq[21] * 4) + (seq[20] * 16) + (seq[19] * 64) + (seq[18] * 256) + (seq[17] * 1024) + (seq[16] * 4096) + (seq[15] * 16384) + (seq[14] * 65536) + (seq[13] * 262144) + (seq[12] * 1048576) + (seq[11] * 4194304) + (seq[10] * 16777216) + (seq[9] * 67108864) + (seq[8] * 268435456) + (seq[7] * 1073741824) + (seq[6] * 0) + (seq[5] * 0) + (seq[4] * 0) + (seq[3] * 0) + (seq[2] * 0) + (seq[1] * 0) + (seq[0] * 0) + 0; }
+unsigned long long mer_24(const char *seq) { return (seq[23] * 1) + (seq[22] * 4) + (seq[21] * 16) + (seq[20] * 64) + (seq[19] * 256) + (seq[18] * 1024) + (seq[17] * 4096) + (seq[16] * 16384) + (seq[15] * 65536) + (seq[14] * 262144) + (seq[13] * 1048576) + (seq[12] * 4194304) + (seq[11] * 16777216) + (seq[10] * 67108864) + (seq[9] * 268435456) + (seq[8] * 1073741824) + (seq[7] * 0) + (seq[6] * 0) + (seq[5] * 0) + (seq[4] * 0) + (seq[3] * 0) + (seq[2] * 0) + (seq[1] * 0) + (seq[0] * 0) + 0; }
+unsigned long long mer_25(const char *seq) { return (seq[24] * 1) + (seq[23] * 4) + (seq[22] * 16) + (seq[21] * 64) + (seq[20] * 256) + (seq[19] * 1024) + (seq[18] * 4096) + (seq[17] * 16384) + (seq[16] * 65536) + (seq[15] * 262144) + (seq[14] * 1048576) + (seq[13] * 4194304) + (seq[12] * 16777216) + (seq[11] * 67108864) + (seq[10] * 268435456) + (seq[9] * 1073741824) + (seq[8] * 0) + (seq[7] * 0) + (seq[6] * 0) + (seq[5] * 0) + (seq[4] * 0) + (seq[3] * 0) + (seq[2] * 0) + (seq[1] * 0) + (seq[0] * 0) + 0; }
+unsigned long long mer_26(const char *seq) { return (seq[25] * 1) + (seq[24] * 4) + (seq[23] * 16) + (seq[22] * 64) + (seq[21] * 256) + (seq[20] * 1024) + (seq[19] * 4096) + (seq[18] * 16384) + (seq[17] * 65536) + (seq[16] * 262144) + (seq[15] * 1048576) + (seq[14] * 4194304) + (seq[13] * 16777216) + (seq[12] * 67108864) + (seq[11] * 268435456) + (seq[10] * 1073741824) + (seq[9] * 0) + (seq[8] * 0) + (seq[7] * 0) + (seq[6] * 0) + (seq[5] * 0) + (seq[4] * 0) + (seq[3] * 0) + (seq[2] * 0) + (seq[1] * 0) + (seq[0] * 0) + 0; }
+unsigned long long mer_27(const char *seq) { return (seq[26] * 1) + (seq[25] * 4) + (seq[24] * 16) + (seq[23] * 64) + (seq[22] * 256) + (seq[21] * 1024) + (seq[20] * 4096) + (seq[19] * 16384) + (seq[18] * 65536) + (seq[17] * 262144) + (seq[16] * 1048576) + (seq[15] * 4194304) + (seq[14] * 16777216) + (seq[13] * 67108864) + (seq[12] * 268435456) + (seq[11] * 1073741824) + (seq[10] * 0) + (seq[9] * 0) + (seq[8] * 0) + (seq[7] * 0) + (seq[6] * 0) + (seq[5] * 0) + (seq[4] * 0) + (seq[3] * 0) + (seq[2] * 0) + (seq[1] * 0) + (seq[0] * 0) + 0; }
+unsigned long long mer_28(const char *seq) { return (seq[27] * 1) + (seq[26] * 4) + (seq[25] * 16) + (seq[24] * 64) + (seq[23] * 256) + (seq[22] * 1024) + (seq[21] * 4096) + (seq[20] * 16384) + (seq[19] * 65536) + (seq[18] * 262144) + (seq[17] * 1048576) + (seq[16] * 4194304) + (seq[15] * 16777216) + (seq[14] * 67108864) + (seq[13] * 268435456) + (seq[12] * 1073741824) + (seq[11] * 0) + (seq[10] * 0) + (seq[9] * 0) + (seq[8] * 0) + (seq[7] * 0) + (seq[6] * 0) + (seq[5] * 0) + (seq[4] * 0) + (seq[3] * 0) + (seq[2] * 0) + (seq[1] * 0) + (seq[0] * 0) + 0; }
+unsigned long long mer_29(const char *seq) { return (seq[28] * 1) + (seq[27] * 4) + (seq[26] * 16) + (seq[25] * 64) + (seq[24] * 256) + (seq[23] * 1024) + (seq[22] * 4096) + (seq[21] * 16384) + (seq[20] * 65536) + (seq[19] * 262144) + (seq[18] * 1048576) + (seq[17] * 4194304) + (seq[16] * 16777216) + (seq[15] * 67108864) + (seq[14] * 268435456) + (seq[13] * 1073741824) + (seq[12] * 0) + (seq[11] * 0) + (seq[10] * 0) + (seq[9] * 0) + (seq[8] * 0) + (seq[7] * 0) + (seq[6] * 0) + (seq[5] * 0) + (seq[4] * 0) + (seq[3] * 0) + (seq[2] * 0) + (seq[1] * 0) + (seq[0] * 0) + 0; }
+unsigned long long mer_30(const char *seq) { return (seq[29] * 1) + (seq[28] * 4) + (seq[27] * 16) + (seq[26] * 64) + (seq[25] * 256) + (seq[24] * 1024) + (seq[23] * 4096) + (seq[22] * 16384) + (seq[21] * 65536) + (seq[20] * 262144) + (seq[19] * 1048576) + (seq[18] * 4194304) + (seq[17] * 16777216) + (seq[16] * 67108864) + (seq[15] * 268435456) + (seq[14] * 1073741824) + (seq[13] * 0) + (seq[12] * 0) + (seq[11] * 0) + (seq[10] * 0) + (seq[9] * 0) + (seq[8] * 0) + (seq[7] * 0) + (seq[6] * 0) + (seq[5] * 0) + (seq[4] * 0) + (seq[3] * 0) + (seq[2] * 0) + (seq[1] * 0) + (seq[0] * 0) + 0; }
+unsigned long long mer_31(const char *seq) { return (seq[30] * 1) + (seq[29] * 4) + (seq[28] * 16) + (seq[27] * 64) + (seq[26] * 256) + (seq[25] * 1024) + (seq[24] * 4096) + (seq[23] * 16384) + (seq[22] * 65536) + (seq[21] * 262144) + (seq[20] * 1048576) + (seq[19] * 4194304) + (seq[18] * 16777216) + (seq[17] * 67108864) + (seq[16] * 268435456) + (seq[15] * 1073741824) + (seq[14] * 0) + (seq[13] * 0) + (seq[12] * 0) + (seq[11] * 0) + (seq[10] * 0) + (seq[9] * 0) + (seq[8] * 0) + (seq[7] * 0) + (seq[6] * 0) + (seq[5] * 0) + (seq[4] * 0) + (seq[3] * 0) + (seq[2] * 0) + (seq[1] * 0) + (seq[0] * 0) + 0; }
+
+typedef unsigned long long (*merptr_t)(const char *);
+
+merptr_t get_ptr(int kmer) {
+ switch(kmer) {
+ case 1: return mer_1;
+ case 2: return mer_2;
+ case 3: return mer_3;
+ case 4: return mer_4;
+ case 5: return mer_5;
+ case 6: return mer_6;
+ case 7: return mer_7;
+ case 8: return mer_8;
+ case 9: return mer_9;
+ case 10: return mer_10;
+ case 11: return mer_11;
+ case 12: return mer_12;
+ case 13: return mer_13;
+ case 14: return mer_14;
+ case 15: return mer_15;
+ case 16: return mer_16;
+ case 17: return mer_17;
+ case 18: return mer_18;
+ case 19: return mer_19;
+ case 20: return mer_20;
+ case 21: return mer_21;
+ case 22: return mer_22;
+ case 23: return mer_23;
+ case 24: return mer_24;
+ case 25: return mer_25;
+ case 26: return mer_26;
+ case 27: return mer_27;
+ case 28: return mer_28;
+ case 29: return mer_29;
+ case 30: return mer_30;
+ case 31: return mer_31;
+ }
+ return NULL;
+}
diff --git a/mer_generator.c b/mer_generator.c
new file mode 100644
index 0000000..a20c56f
--- /dev/null
+++ b/mer_generator.c
@@ -0,0 +1,28 @@
+#include "stdio.h"
+#include "limits.h"
+
+#include "kmer_utils.h"
+
+main() {
+
+ int i = 0;
+ int j = 0;
+ unsigned long out = 0;
+
+ for(j = 1; j < 32; j++) {
+ printf("unsigned long long mer_%d(const char *seq) { return ", j);
+ unsigned long multiply = 1;
+ for(i = j - 1; i >= 0; i--){
+ printf("(seq[%d] * %d) + ", i, multiply);
+ multiply = multiply << 2;
+ }
+ printf(" 0; }\n");
+ }
+
+ printf("int (*return_fn())(const char * )mer_ptr(int kmer) { switch(kmer) {");
+ for(j = 1; j < 32; j++)
+ printf("case %d: return mer_%d;", j, j);
+ printf("}");
+
+
+}