aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorCalvin Morrison <mutantturkey@gmail.com>2014-01-15 12:58:36 -0500
committerCalvin Morrison <mutantturkey@gmail.com>2014-01-15 12:58:36 -0500
commit489d59d65f95de343ddf56d3725feb06f8b5be13 (patch)
treeccdebe535e725f47fa01f6f4401faf5f4cd50bf6
parent67bf470e1246db3fa15c56315aa0038f393a5b07 (diff)
add handling for overrunning the string length, and if there's a skip char in the first position, also more smartly write (unsigned long long), and clean up our multiplier value
-rw-r--r--kmer_utils.c18
1 files changed, 11 insertions, 7 deletions
diff --git a/kmer_utils.c b/kmer_utils.c
index 73cf819..fb191b1 100644
--- a/kmer_utils.c
+++ b/kmer_utils.c
@@ -33,7 +33,7 @@ const unsigned char kmer_alpha[256] =
static const char reverse_alpha[4] = { 'A', 'C', 'G', 'T' };
unsigned long long kmer_pow_four(unsigned long long x) {
- return (unsigned long long)1 << (x * 2);
+ return 1ULL << (x * 2);
}
// convert an index back into a kmer string
@@ -87,20 +87,25 @@ static void translate_nucleotides_to_numbers(char *str, size_t len, const unsign
str[i] = lookup[(int)str[i]];
}
-static size_t calculate_mer(const char *str, size_t *pos, size_t kmer_len, size_t error_mer) {
+static size_t calculate_mer(const char *str, size_t str_len, size_t *pos, size_t kmer_len, size_t error_mer) {
size_t mer = 0;
size_t multiply;
size_t i;
size_t end;
// set our multiplier to be pow(4, mer) >> 2;
- multiply = (unsigned long long)1 << (kmer_len* 2);
- multiply = multiply >> 2;
+ multiply = kmer_pow_four(kmer_len) >> 2;
- // for each char in the k-mer check if it is an error char
end = *pos + kmer_len;
+
+ // check if space or error, otherwise bump our multiplier and mer
for(i = *pos; i < end; ++i) {
if(str[i] == SPACE) {
+ // if our end has been expanded to the max string length
+ // or if it is the first loop return the error length
+ if(end == str_len || i == *pos)
+ return error_mer;
+
end++;
continue;
}
@@ -114,7 +119,6 @@ static size_t calculate_mer(const char *str, size_t *pos, size_t kmer_len, size_
mer += str[i] * multiply;
multiply = multiply >> 2;
}
-
return mer;
}
@@ -155,7 +159,7 @@ unsigned long long *kmer_counts_from_file(FILE *fh, const unsigned int kmer) {
// loop through our string to process each k-mer
for(position = 0; position < (seq_length - kmer + 1); position++) {
- size_t mer = calculate_mer(seq, &position, kmer, width);
+ size_t mer = calculate_mer(seq, seq_length, &position, kmer, width);
counts[mer]++;
}
}