diff options
-rw-r--r-- | kmer_utils.c | 9 | ||||
-rw-r--r-- | mer.c | 70 | ||||
-rw-r--r-- | mer_generator.c | 28 |
3 files changed, 102 insertions, 5 deletions
diff --git a/kmer_utils.c b/kmer_utils.c index e69f485..2f0a923 100644 --- a/kmer_utils.c +++ b/kmer_utils.c @@ -3,8 +3,8 @@ #include <stdio.h> #include <stdlib.h> #include <string.h> -#include <libjit.h> +#include "mer.c" const unsigned char alpha[256] = {5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, @@ -24,9 +24,6 @@ unsigned long long pow_four(unsigned long long x) { return (unsigned long long)1 << (x * 2); } -unsigned long long mer_6(const char *seq_h) { - return seq_h[5] + seq_h[4] * 4 + seq_h[3] * 16 + seq_h[2] * 64 + seq_h[1] * 256 + seq_h[0] * 1024; -} // convert a string of k-mer size base-4 values into a // base-10 index unsigned long num_to_index(const char *str, const int kmer, const long error_pos, long long *current_position) { @@ -177,6 +174,8 @@ unsigned long long * get_kmer_counts_from_file(FILE *fh, const unsigned int kmer exit(EXIT_FAILURE); } + merptr_t mer_ptr = get_ptr(kmer); + while ((read = getdelim(&line, &len, '>', fh)) != -1) { size_t k; long long i; @@ -209,7 +208,7 @@ unsigned long long * get_kmer_counts_from_file(FILE *fh, const unsigned int kmer continue; } - counts[mer_6(seq_h)]++; + counts[(*mer_ptr)(seq_h)]++; } } @@ -0,0 +1,70 @@ +unsigned long long mer_1(const char *seq) { return (seq[0] * 1) + 0; } +unsigned long long mer_2(const char *seq) { return (seq[1] * 1) + (seq[0] * 4) + 0; } +unsigned long long mer_3(const char *seq) { return (seq[2] * 1) + (seq[1] * 4) + (seq[0] * 16) + 0; } +unsigned long long mer_4(const char *seq) { return (seq[3] * 1) + (seq[2] * 4) + (seq[1] * 16) + (seq[0] * 64) + 0; } +unsigned long long mer_5(const char *seq) { return (seq[4] * 1) + (seq[3] * 4) + (seq[2] * 16) + (seq[1] * 64) + (seq[0] * 256) + 0; } +unsigned long long mer_6(const char *seq) { return (seq[5] * 1) + (seq[4] * 4) + (seq[3] * 16) + (seq[2] * 64) + (seq[1] * 256) + (seq[0] * 1024) + 0; } +unsigned long long mer_7(const char *seq) { return (seq[6] * 1) + (seq[5] * 4) + (seq[4] * 16) + (seq[3] * 64) + (seq[2] * 256) + (seq[1] * 1024) + (seq[0] * 4096) + 0; } +unsigned long long mer_8(const char *seq) { return (seq[7] * 1) + (seq[6] * 4) + (seq[5] * 16) + (seq[4] * 64) + (seq[3] * 256) + (seq[2] * 1024) + (seq[1] * 4096) + (seq[0] * 16384) + 0; } +unsigned long long mer_9(const char *seq) { return (seq[8] * 1) + (seq[7] * 4) + (seq[6] * 16) + (seq[5] * 64) + (seq[4] * 256) + (seq[3] * 1024) + (seq[2] * 4096) + (seq[1] * 16384) + (seq[0] * 65536) + 0; } +unsigned long long mer_10(const char *seq) { return (seq[9] * 1) + (seq[8] * 4) + (seq[7] * 16) + (seq[6] * 64) + (seq[5] * 256) + (seq[4] * 1024) + (seq[3] * 4096) + (seq[2] * 16384) + (seq[1] * 65536) + (seq[0] * 262144) + 0; } +unsigned long long mer_11(const char *seq) { return (seq[10] * 1) + (seq[9] * 4) + (seq[8] * 16) + (seq[7] * 64) + (seq[6] * 256) + (seq[5] * 1024) + (seq[4] * 4096) + (seq[3] * 16384) + (seq[2] * 65536) + (seq[1] * 262144) + (seq[0] * 1048576) + 0; } +unsigned long long mer_12(const char *seq) { return (seq[11] * 1) + (seq[10] * 4) + (seq[9] * 16) + (seq[8] * 64) + (seq[7] * 256) + (seq[6] * 1024) + (seq[5] * 4096) + (seq[4] * 16384) + (seq[3] * 65536) + (seq[2] * 262144) + (seq[1] * 1048576) + (seq[0] * 4194304) + 0; } +unsigned long long mer_13(const char *seq) { return (seq[12] * 1) + (seq[11] * 4) + (seq[10] * 16) + (seq[9] * 64) + (seq[8] * 256) + (seq[7] * 1024) + (seq[6] * 4096) + (seq[5] * 16384) + (seq[4] * 65536) + (seq[3] * 262144) + (seq[2] * 1048576) + (seq[1] * 4194304) + (seq[0] * 16777216) + 0; } +unsigned long long mer_14(const char *seq) { return (seq[13] * 1) + (seq[12] * 4) + (seq[11] * 16) + (seq[10] * 64) + (seq[9] * 256) + (seq[8] * 1024) + (seq[7] * 4096) + (seq[6] * 16384) + (seq[5] * 65536) + (seq[4] * 262144) + (seq[3] * 1048576) + (seq[2] * 4194304) + (seq[1] * 16777216) + (seq[0] * 67108864) + 0; } +unsigned long long mer_15(const char *seq) { return (seq[14] * 1) + (seq[13] * 4) + (seq[12] * 16) + (seq[11] * 64) + (seq[10] * 256) + (seq[9] * 1024) + (seq[8] * 4096) + (seq[7] * 16384) + (seq[6] * 65536) + (seq[5] * 262144) + (seq[4] * 1048576) + (seq[3] * 4194304) + (seq[2] * 16777216) + (seq[1] * 67108864) + (seq[0] * 268435456) + 0; } +unsigned long long mer_16(const char *seq) { return (seq[15] * 1) + (seq[14] * 4) + (seq[13] * 16) + (seq[12] * 64) + (seq[11] * 256) + (seq[10] * 1024) + (seq[9] * 4096) + (seq[8] * 16384) + (seq[7] * 65536) + (seq[6] * 262144) + (seq[5] * 1048576) + (seq[4] * 4194304) + (seq[3] * 16777216) + (seq[2] * 67108864) + (seq[1] * 268435456) + (seq[0] * 1073741824) + 0; } +unsigned long long mer_17(const char *seq) { return (seq[16] * 1) + (seq[15] * 4) + (seq[14] * 16) + (seq[13] * 64) + (seq[12] * 256) + (seq[11] * 1024) + (seq[10] * 4096) + (seq[9] * 16384) + (seq[8] * 65536) + (seq[7] * 262144) + (seq[6] * 1048576) + (seq[5] * 4194304) + (seq[4] * 16777216) + (seq[3] * 67108864) + (seq[2] * 268435456) + (seq[1] * 1073741824) + (seq[0] * 0) + 0; } +unsigned long long mer_18(const char *seq) { return (seq[17] * 1) + (seq[16] * 4) + (seq[15] * 16) + (seq[14] * 64) + (seq[13] * 256) + (seq[12] * 1024) + (seq[11] * 4096) + (seq[10] * 16384) + (seq[9] * 65536) + (seq[8] * 262144) + (seq[7] * 1048576) + (seq[6] * 4194304) + (seq[5] * 16777216) + (seq[4] * 67108864) + (seq[3] * 268435456) + (seq[2] * 1073741824) + (seq[1] * 0) + (seq[0] * 0) + 0; } +unsigned long long mer_19(const char *seq) { return (seq[18] * 1) + (seq[17] * 4) + (seq[16] * 16) + (seq[15] * 64) + (seq[14] * 256) + (seq[13] * 1024) + (seq[12] * 4096) + (seq[11] * 16384) + (seq[10] * 65536) + (seq[9] * 262144) + (seq[8] * 1048576) + (seq[7] * 4194304) + (seq[6] * 16777216) + (seq[5] * 67108864) + (seq[4] * 268435456) + (seq[3] * 1073741824) + (seq[2] * 0) + (seq[1] * 0) + (seq[0] * 0) + 0; } +unsigned long long mer_20(const char *seq) { return (seq[19] * 1) + (seq[18] * 4) + (seq[17] * 16) + (seq[16] * 64) + (seq[15] * 256) + (seq[14] * 1024) + (seq[13] * 4096) + (seq[12] * 16384) + (seq[11] * 65536) + (seq[10] * 262144) + (seq[9] * 1048576) + (seq[8] * 4194304) + (seq[7] * 16777216) + (seq[6] * 67108864) + (seq[5] * 268435456) + (seq[4] * 1073741824) + (seq[3] * 0) + (seq[2] * 0) + (seq[1] * 0) + (seq[0] * 0) + 0; } +unsigned long long mer_21(const char *seq) { return (seq[20] * 1) + (seq[19] * 4) + (seq[18] * 16) + (seq[17] * 64) + (seq[16] * 256) + (seq[15] * 1024) + (seq[14] * 4096) + (seq[13] * 16384) + (seq[12] * 65536) + (seq[11] * 262144) + (seq[10] * 1048576) + (seq[9] * 4194304) + (seq[8] * 16777216) + (seq[7] * 67108864) + (seq[6] * 268435456) + (seq[5] * 1073741824) + (seq[4] * 0) + (seq[3] * 0) + (seq[2] * 0) + (seq[1] * 0) + (seq[0] * 0) + 0; } +unsigned long long mer_22(const char *seq) { return (seq[21] * 1) + (seq[20] * 4) + (seq[19] * 16) + (seq[18] * 64) + (seq[17] * 256) + (seq[16] * 1024) + (seq[15] * 4096) + (seq[14] * 16384) + (seq[13] * 65536) + (seq[12] * 262144) + (seq[11] * 1048576) + (seq[10] * 4194304) + (seq[9] * 16777216) + (seq[8] * 67108864) + (seq[7] * 268435456) + (seq[6] * 1073741824) + (seq[5] * 0) + (seq[4] * 0) + (seq[3] * 0) + (seq[2] * 0) + (seq[1] * 0) + (seq[0] * 0) + 0; } +unsigned long long mer_23(const char *seq) { return (seq[22] * 1) + (seq[21] * 4) + (seq[20] * 16) + (seq[19] * 64) + (seq[18] * 256) + (seq[17] * 1024) + (seq[16] * 4096) + (seq[15] * 16384) + (seq[14] * 65536) + (seq[13] * 262144) + (seq[12] * 1048576) + (seq[11] * 4194304) + (seq[10] * 16777216) + (seq[9] * 67108864) + (seq[8] * 268435456) + (seq[7] * 1073741824) + (seq[6] * 0) + (seq[5] * 0) + (seq[4] * 0) + (seq[3] * 0) + (seq[2] * 0) + (seq[1] * 0) + (seq[0] * 0) + 0; } +unsigned long long mer_24(const char *seq) { return (seq[23] * 1) + (seq[22] * 4) + (seq[21] * 16) + (seq[20] * 64) + (seq[19] * 256) + (seq[18] * 1024) + (seq[17] * 4096) + (seq[16] * 16384) + (seq[15] * 65536) + (seq[14] * 262144) + (seq[13] * 1048576) + (seq[12] * 4194304) + (seq[11] * 16777216) + (seq[10] * 67108864) + (seq[9] * 268435456) + (seq[8] * 1073741824) + (seq[7] * 0) + (seq[6] * 0) + (seq[5] * 0) + (seq[4] * 0) + (seq[3] * 0) + (seq[2] * 0) + (seq[1] * 0) + (seq[0] * 0) + 0; } +unsigned long long mer_25(const char *seq) { return (seq[24] * 1) + (seq[23] * 4) + (seq[22] * 16) + (seq[21] * 64) + (seq[20] * 256) + (seq[19] * 1024) + (seq[18] * 4096) + (seq[17] * 16384) + (seq[16] * 65536) + (seq[15] * 262144) + (seq[14] * 1048576) + (seq[13] * 4194304) + (seq[12] * 16777216) + (seq[11] * 67108864) + (seq[10] * 268435456) + (seq[9] * 1073741824) + (seq[8] * 0) + (seq[7] * 0) + (seq[6] * 0) + (seq[5] * 0) + (seq[4] * 0) + (seq[3] * 0) + (seq[2] * 0) + (seq[1] * 0) + (seq[0] * 0) + 0; } +unsigned long long mer_26(const char *seq) { return (seq[25] * 1) + (seq[24] * 4) + (seq[23] * 16) + (seq[22] * 64) + (seq[21] * 256) + (seq[20] * 1024) + (seq[19] * 4096) + (seq[18] * 16384) + (seq[17] * 65536) + (seq[16] * 262144) + (seq[15] * 1048576) + (seq[14] * 4194304) + (seq[13] * 16777216) + (seq[12] * 67108864) + (seq[11] * 268435456) + (seq[10] * 1073741824) + (seq[9] * 0) + (seq[8] * 0) + (seq[7] * 0) + (seq[6] * 0) + (seq[5] * 0) + (seq[4] * 0) + (seq[3] * 0) + (seq[2] * 0) + (seq[1] * 0) + (seq[0] * 0) + 0; } +unsigned long long mer_27(const char *seq) { return (seq[26] * 1) + (seq[25] * 4) + (seq[24] * 16) + (seq[23] * 64) + (seq[22] * 256) + (seq[21] * 1024) + (seq[20] * 4096) + (seq[19] * 16384) + (seq[18] * 65536) + (seq[17] * 262144) + (seq[16] * 1048576) + (seq[15] * 4194304) + (seq[14] * 16777216) + (seq[13] * 67108864) + (seq[12] * 268435456) + (seq[11] * 1073741824) + (seq[10] * 0) + (seq[9] * 0) + (seq[8] * 0) + (seq[7] * 0) + (seq[6] * 0) + (seq[5] * 0) + (seq[4] * 0) + (seq[3] * 0) + (seq[2] * 0) + (seq[1] * 0) + (seq[0] * 0) + 0; } +unsigned long long mer_28(const char *seq) { return (seq[27] * 1) + (seq[26] * 4) + (seq[25] * 16) + (seq[24] * 64) + (seq[23] * 256) + (seq[22] * 1024) + (seq[21] * 4096) + (seq[20] * 16384) + (seq[19] * 65536) + (seq[18] * 262144) + (seq[17] * 1048576) + (seq[16] * 4194304) + (seq[15] * 16777216) + (seq[14] * 67108864) + (seq[13] * 268435456) + (seq[12] * 1073741824) + (seq[11] * 0) + (seq[10] * 0) + (seq[9] * 0) + (seq[8] * 0) + (seq[7] * 0) + (seq[6] * 0) + (seq[5] * 0) + (seq[4] * 0) + (seq[3] * 0) + (seq[2] * 0) + (seq[1] * 0) + (seq[0] * 0) + 0; } +unsigned long long mer_29(const char *seq) { return (seq[28] * 1) + (seq[27] * 4) + (seq[26] * 16) + (seq[25] * 64) + (seq[24] * 256) + (seq[23] * 1024) + (seq[22] * 4096) + (seq[21] * 16384) + (seq[20] * 65536) + (seq[19] * 262144) + (seq[18] * 1048576) + (seq[17] * 4194304) + (seq[16] * 16777216) + (seq[15] * 67108864) + (seq[14] * 268435456) + (seq[13] * 1073741824) + (seq[12] * 0) + (seq[11] * 0) + (seq[10] * 0) + (seq[9] * 0) + (seq[8] * 0) + (seq[7] * 0) + (seq[6] * 0) + (seq[5] * 0) + (seq[4] * 0) + (seq[3] * 0) + (seq[2] * 0) + (seq[1] * 0) + (seq[0] * 0) + 0; } +unsigned long long mer_30(const char *seq) { return (seq[29] * 1) + (seq[28] * 4) + (seq[27] * 16) + (seq[26] * 64) + (seq[25] * 256) + (seq[24] * 1024) + (seq[23] * 4096) + (seq[22] * 16384) + (seq[21] * 65536) + (seq[20] * 262144) + (seq[19] * 1048576) + (seq[18] * 4194304) + (seq[17] * 16777216) + (seq[16] * 67108864) + (seq[15] * 268435456) + (seq[14] * 1073741824) + (seq[13] * 0) + (seq[12] * 0) + (seq[11] * 0) + (seq[10] * 0) + (seq[9] * 0) + (seq[8] * 0) + (seq[7] * 0) + (seq[6] * 0) + (seq[5] * 0) + (seq[4] * 0) + (seq[3] * 0) + (seq[2] * 0) + (seq[1] * 0) + (seq[0] * 0) + 0; } +unsigned long long mer_31(const char *seq) { return (seq[30] * 1) + (seq[29] * 4) + (seq[28] * 16) + (seq[27] * 64) + (seq[26] * 256) + (seq[25] * 1024) + (seq[24] * 4096) + (seq[23] * 16384) + (seq[22] * 65536) + (seq[21] * 262144) + (seq[20] * 1048576) + (seq[19] * 4194304) + (seq[18] * 16777216) + (seq[17] * 67108864) + (seq[16] * 268435456) + (seq[15] * 1073741824) + (seq[14] * 0) + (seq[13] * 0) + (seq[12] * 0) + (seq[11] * 0) + (seq[10] * 0) + (seq[9] * 0) + (seq[8] * 0) + (seq[7] * 0) + (seq[6] * 0) + (seq[5] * 0) + (seq[4] * 0) + (seq[3] * 0) + (seq[2] * 0) + (seq[1] * 0) + (seq[0] * 0) + 0; } + +typedef unsigned long long (*merptr_t)(const char *); + +merptr_t get_ptr(int kmer) { + switch(kmer) { + case 1: return mer_1; + case 2: return mer_2; + case 3: return mer_3; + case 4: return mer_4; + case 5: return mer_5; + case 6: return mer_6; + case 7: return mer_7; + case 8: return mer_8; + case 9: return mer_9; + case 10: return mer_10; + case 11: return mer_11; + case 12: return mer_12; + case 13: return mer_13; + case 14: return mer_14; + case 15: return mer_15; + case 16: return mer_16; + case 17: return mer_17; + case 18: return mer_18; + case 19: return mer_19; + case 20: return mer_20; + case 21: return mer_21; + case 22: return mer_22; + case 23: return mer_23; + case 24: return mer_24; + case 25: return mer_25; + case 26: return mer_26; + case 27: return mer_27; + case 28: return mer_28; + case 29: return mer_29; + case 30: return mer_30; + case 31: return mer_31; + } + return NULL; +} diff --git a/mer_generator.c b/mer_generator.c new file mode 100644 index 0000000..a20c56f --- /dev/null +++ b/mer_generator.c @@ -0,0 +1,28 @@ +#include "stdio.h" +#include "limits.h" + +#include "kmer_utils.h" + +main() { + + int i = 0; + int j = 0; + unsigned long out = 0; + + for(j = 1; j < 32; j++) { + printf("unsigned long long mer_%d(const char *seq) { return ", j); + unsigned long multiply = 1; + for(i = j - 1; i >= 0; i--){ + printf("(seq[%d] * %d) + ", i, multiply); + multiply = multiply << 2; + } + printf(" 0; }\n"); + } + + printf("int (*return_fn())(const char * )mer_ptr(int kmer) { switch(kmer) {"); + for(j = 1; j < 32; j++) + printf("case %d: return mer_%d;", j, j); + printf("}"); + + +} |