From aa3c7ed9d11bcd2779db4cf2170b3a87505898bb Mon Sep 17 00:00:00 2001 From: Calvin Morrison Date: Sun, 24 Nov 2013 17:35:17 -0500 Subject: performance boost from skipping our first newline. It seems crazy, but this could be up to a 10-15% improvement because of our strstrip function. Each time we were copying the entire array, even if we didn't need to. There will be more of a benefit on a single line'd sequence file, but will see a speed up on all --- kmer_utils.c | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) (limited to 'kmer_utils.c') diff --git a/kmer_utils.c b/kmer_utils.c index ef46842..dc79700 100644 --- a/kmer_utils.c +++ b/kmer_utils.c @@ -82,7 +82,7 @@ char *index_to_kmer(unsigned long long index, long kmer) { // reverse the array, as j increases, decrease i for(j = 0; j < start; j++, i--) - ret[j + offset] = reverse_alpha[num_array[i]]; + ret[j + offset] = reverse_alpha[(int)num_array[i]]; // set our last character to the null termination byte ret[kmer + 1] = '\0'; @@ -151,6 +151,9 @@ unsigned long long * get_kmer_counts_from_file(const char *fn, const unsigned in if(start == NULL) continue; + // point to one past that. + start = start + 1; + size_t start_len = strlen(start); @@ -163,6 +166,7 @@ unsigned long long * get_kmer_counts_from_file(const char *fn, const unsigned in } } + // strip out all other newlines to handle multiline sequences str = strnstrip(start, str, '\n',start_len); size_t seq_length = strlen(str); -- cgit v1.2.3