aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authororbitz <orbitz@gmail.com>2013-12-31 16:35:07 +0100
committerorbitz <orbitz@gmail.com>2013-12-31 16:35:07 +0100
commitd19a0690f8df4c8c4145eb720eea9740f4a5e450 (patch)
treef9fa53b6f56daad699d7188f1cc8722526bf3761
parent6063803b2dcf602146f0f0590e3ae7e2e0a9b334 (diff)
Refactor out translation and error checking
-rw-r--r--kmer_utils.c21
1 files changed, 15 insertions, 6 deletions
diff --git a/kmer_utils.c b/kmer_utils.c
index 9d31081..bb7f8f8 100644
--- a/kmer_utils.c
+++ b/kmer_utils.c
@@ -6,6 +6,8 @@
#include "kmer_total_count.h"
+#define ERROR_CODE 5
+
const unsigned char kmer_alpha[256] =
{5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5,
5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5,
@@ -110,6 +112,17 @@ static char *strnstrip(const char *s, char *dest, int c, unsigned long long len)
return dest;
}
+void translate_nucleotides_to_numbers(char *str, size_t len, const unsigned char *lookup) {
+ size_t i;
+ for(i = 0; i < len; ++i) {
+ str[i] = lookup[(int)str[i]];
+ }
+}
+
+int is_error_char(char c) {
+ return c == ERROR_CODE;
+}
+
unsigned long long *kmer_counts_from_file(FILE *fh, const unsigned int kmer) {
char *line = NULL;
size_t len = 0;
@@ -149,7 +162,6 @@ unsigned long long *kmer_counts_from_file(FILE *fh, const unsigned int kmer) {
size_t start_len = strlen(start);
-
// if our current str buffer isn't big enough, realloc
if(start_len + 1 > str_size + 1) {
str = realloc(str, start_len + 1);
@@ -159,16 +171,13 @@ unsigned long long *kmer_counts_from_file(FILE *fh, const unsigned int kmer) {
}
}
-
// strip out all other newlines to handle multiline sequences
str = strnstrip(start, str, '\n',start_len);
size_t seq_length = strlen(str);
// relace A, C, G and T with 0, 1, 2, 3 respectively
// everything else is 5
- for(i = 0; i < seq_length; i++) {
- str[i] = kmer_alpha[(int)str[i]];
- }
+ translate_nucleotides_to_numbers(str, seq_length, kmer_alpha);
// loop through our string to process each k-mer
for(position = 0; position < (seq_length - kmer + 1); position++) {
@@ -177,7 +186,7 @@ unsigned long long *kmer_counts_from_file(FILE *fh, const unsigned int kmer) {
// for each char in the k-mer check if it is an error char
for(i = position; i < position + kmer; ++i) {
- if(str[i] == 5) {
+ if(is_error_char(str[i])) {
mer = width;
position = i;
goto next;