aboutsummaryrefslogtreecommitdiff
path: root/kmer_utils.c
blob: 89df3b2be803027b08c94074c4c5aea8485f1837 (plain)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
#include <stdint.h>
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#include <unistd.h>

// convert a string of k-mer size base-4 values  into a
// base-10 index
long num_to_index(const char *str, const int kmer, const long error_pos) {

  int i = 0;
  unsigned long out = 0;
  unsigned long multiply = 1;

  for(i = kmer - 1; i >= 0; i--){

    if(str[i] >> 2)
      return error_pos;

    out += str[i] * multiply;
    multiply = multiply << 2;
  }

  return out;
}

// replaces values in a char array of ACGT's and others with
// values that correspond to their base 4 value to be used in
// num_to_index.
void convert_kmer_to_num(char *str, const long length) {

  long i = 0;

  for(i = 0; i < length; i++) {
    // this is _not_ portable, only works with ASCII values.
    switch(str[i] | 0x20 ) {
      case 'a':
        str[i] = 0;
        break;
      case 'c':
        str[i] = 1;
        break;
      case 'g':
        str[i] = 2;
        break;
      case 't':
        str[i] = 3;
        break;
      default:
        // Error Checking: use 4 so we can shift right twice
        // to check quickly is there is an non ACGT character 
        str[i] = 4;
    }

  }

}