diff options
author | orbitz <orbitz@gmail.com> | 2013-12-31 16:35:07 +0100 |
---|---|---|
committer | orbitz <orbitz@gmail.com> | 2013-12-31 16:35:07 +0100 |
commit | d19a0690f8df4c8c4145eb720eea9740f4a5e450 (patch) | |
tree | f9fa53b6f56daad699d7188f1cc8722526bf3761 | |
parent | 6063803b2dcf602146f0f0590e3ae7e2e0a9b334 (diff) |
Refactor out translation and error checking
-rw-r--r-- | kmer_utils.c | 21 |
1 files changed, 15 insertions, 6 deletions
diff --git a/kmer_utils.c b/kmer_utils.c index 9d31081..bb7f8f8 100644 --- a/kmer_utils.c +++ b/kmer_utils.c @@ -6,6 +6,8 @@ #include "kmer_total_count.h" +#define ERROR_CODE 5 + const unsigned char kmer_alpha[256] = {5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, @@ -110,6 +112,17 @@ static char *strnstrip(const char *s, char *dest, int c, unsigned long long len) return dest; } +void translate_nucleotides_to_numbers(char *str, size_t len, const unsigned char *lookup) { + size_t i; + for(i = 0; i < len; ++i) { + str[i] = lookup[(int)str[i]]; + } +} + +int is_error_char(char c) { + return c == ERROR_CODE; +} + unsigned long long *kmer_counts_from_file(FILE *fh, const unsigned int kmer) { char *line = NULL; size_t len = 0; @@ -149,7 +162,6 @@ unsigned long long *kmer_counts_from_file(FILE *fh, const unsigned int kmer) { size_t start_len = strlen(start); - // if our current str buffer isn't big enough, realloc if(start_len + 1 > str_size + 1) { str = realloc(str, start_len + 1); @@ -159,16 +171,13 @@ unsigned long long *kmer_counts_from_file(FILE *fh, const unsigned int kmer) { } } - // strip out all other newlines to handle multiline sequences str = strnstrip(start, str, '\n',start_len); size_t seq_length = strlen(str); // relace A, C, G and T with 0, 1, 2, 3 respectively // everything else is 5 - for(i = 0; i < seq_length; i++) { - str[i] = kmer_alpha[(int)str[i]]; - } + translate_nucleotides_to_numbers(str, seq_length, kmer_alpha); // loop through our string to process each k-mer for(position = 0; position < (seq_length - kmer + 1); position++) { @@ -177,7 +186,7 @@ unsigned long long *kmer_counts_from_file(FILE *fh, const unsigned int kmer) { // for each char in the k-mer check if it is an error char for(i = position; i < position + kmer; ++i) { - if(str[i] == 5) { + if(is_error_char(str[i])) { mer = width; position = i; goto next; |