aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorCalvin Morrison <mutantturkey@gmail.com>2013-11-24 17:35:17 -0500
committerCalvin Morrison <mutantturkey@gmail.com>2013-11-24 17:35:17 -0500
commitaa3c7ed9d11bcd2779db4cf2170b3a87505898bb (patch)
treea9412c423cee8926e2f874d9197e274773726664
parent1801837951eac87e9bdba643641ed76f9b2d796c (diff)
performance boost from skipping our first newline. It seems crazy, but this could be up to a 10-15% improvement because of our strstrip function. Each time we were copying the entire array, even if we didn't need to. There will be more of a benefit on a single line'd sequence file, but will see a speed up on all
-rw-r--r--kmer_utils.c6
1 files changed, 5 insertions, 1 deletions
diff --git a/kmer_utils.c b/kmer_utils.c
index ef46842..dc79700 100644
--- a/kmer_utils.c
+++ b/kmer_utils.c
@@ -82,7 +82,7 @@ char *index_to_kmer(unsigned long long index, long kmer) {
// reverse the array, as j increases, decrease i
for(j = 0; j < start; j++, i--)
- ret[j + offset] = reverse_alpha[num_array[i]];
+ ret[j + offset] = reverse_alpha[(int)num_array[i]];
// set our last character to the null termination byte
ret[kmer + 1] = '\0';
@@ -151,6 +151,9 @@ unsigned long long * get_kmer_counts_from_file(const char *fn, const unsigned in
if(start == NULL)
continue;
+ // point to one past that.
+ start = start + 1;
+
size_t start_len = strlen(start);
@@ -163,6 +166,7 @@ unsigned long long * get_kmer_counts_from_file(const char *fn, const unsigned in
}
}
+
// strip out all other newlines to handle multiline sequences
str = strnstrip(start, str, '\n',start_len);
size_t seq_length = strlen(str);