blob: 7b483ea97f973faa3b400795977755c24968c6e1 (
plain)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
|
#include <stdint.h>
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#include <unistd.h>
// This function takes a char array containing sequences and converts it
// into a kmer index (essentially a base 4 radix converstion to base 10, with
// some mumbo-jumbo of taking each of the characters we want (ACGT) and turning
// them into a radix-4 number we can use.
//
// kind of convoluted but it works.
//
// Arguemnts:
// char *str - a NULL terminated character array
// long kmer - how long of a index value you want to return
// long error_pos - what index to return for a non ACGT character
//
inline long num_to_index(const char *str, int kmer, long error_pos) {
int i = 0;
unsigned long out = 0;
unsigned long multiply = 1;
for(i = kmer - 1; i >= 0; i--){
out += str[i] * multiply;
multiply = multiply << 2;
}
return out;
}
void convert_kmer_to_num(char *str, long length) {
long i = 0;
for(i = 0; i < length; i++) {
switch(str[i] | 0x20 ) {
case 'a':
str[i] = 0;
break;
case 'c':
str[i] = 1;
break;
case 'g':
str[i] = 2;
break;
case 't':
str[i] = 3;
break;
default:
str[i] = 5;
}
}
}
|