aboutsummaryrefslogtreecommitdiff
path: root/kmer_utils.c
diff options
context:
space:
mode:
authorCalvin Morrison <mutantturkey@gmail.com>2013-10-01 16:19:21 -0400
committerCalvin Morrison <mutantturkey@gmail.com>2013-10-01 16:19:21 -0400
commit3d5ab03f760814e5e6bb6ac54e29650bc1fe6153 (patch)
tree8181071f59ed7e616611a12b8863057d6ac0a64d /kmer_utils.c
parent4d00f90fd8b1b9e38eea297336ee83a5e5c9e764 (diff)
update headers, use const for better performance (.500ms on ~2gb file), update comments for functions
Diffstat (limited to 'kmer_utils.c')
-rw-r--r--kmer_utils.c29
1 files changed, 14 insertions, 15 deletions
diff --git a/kmer_utils.c b/kmer_utils.c
index 7b483ea..89df3b2 100644
--- a/kmer_utils.c
+++ b/kmer_utils.c
@@ -4,19 +4,9 @@
#include <string.h>
#include <unistd.h>
-// This function takes a char array containing sequences and converts it
-// into a kmer index (essentially a base 4 radix converstion to base 10, with
-// some mumbo-jumbo of taking each of the characters we want (ACGT) and turning
-// them into a radix-4 number we can use.
-//
-// kind of convoluted but it works.
-//
-// Arguemnts:
-// char *str - a NULL terminated character array
-// long kmer - how long of a index value you want to return
-// long error_pos - what index to return for a non ACGT character
-//
-inline long num_to_index(const char *str, int kmer, long error_pos) {
+// convert a string of k-mer size base-4 values into a
+// base-10 index
+long num_to_index(const char *str, const int kmer, const long error_pos) {
int i = 0;
unsigned long out = 0;
@@ -24,6 +14,9 @@ inline long num_to_index(const char *str, int kmer, long error_pos) {
for(i = kmer - 1; i >= 0; i--){
+ if(str[i] >> 2)
+ return error_pos;
+
out += str[i] * multiply;
multiply = multiply << 2;
}
@@ -31,11 +24,15 @@ inline long num_to_index(const char *str, int kmer, long error_pos) {
return out;
}
-void convert_kmer_to_num(char *str, long length) {
+// replaces values in a char array of ACGT's and others with
+// values that correspond to their base 4 value to be used in
+// num_to_index.
+void convert_kmer_to_num(char *str, const long length) {
long i = 0;
for(i = 0; i < length; i++) {
+ // this is _not_ portable, only works with ASCII values.
switch(str[i] | 0x20 ) {
case 'a':
str[i] = 0;
@@ -50,7 +47,9 @@ void convert_kmer_to_num(char *str, long length) {
str[i] = 3;
break;
default:
- str[i] = 5;
+ // Error Checking: use 4 so we can shift right twice
+ // to check quickly is there is an non ACGT character
+ str[i] = 4;
}
}