aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
-rw-r--r--kmer_utils.c11
1 files changed, 8 insertions, 3 deletions
diff --git a/kmer_utils.c b/kmer_utils.c
index 5d3b8a5..a154cb2 100644
--- a/kmer_utils.c
+++ b/kmer_utils.c
@@ -222,6 +222,9 @@ void count_sequence(const char *seq, const size_t seq_length, const unsigned int
unsigned long long multiply = 1;
// for each char in the k-mer check if it is an error char
+
+ // TODO Get rid of branch prediction - can we do it? Error flag with
+ // bitshifts
for(i = position + kmer - 1; i >= position; i--){
if(seq[i] == 5) {
position = i;
@@ -272,18 +275,19 @@ array_type * get_kmer_counts_from_file(array_type *counts, FILE *fh, const unsig
exit(EXIT_FAILURE);
}
+ // TODO remove getdelim and add a fasta parsing state engine instead
while ((read = getdelim(&line, &len, '>', fh)) != -1) {
size_t k;
char *seq;
- // find our first \n, this should be the end of the header
+ // TODO optimize out strchr with a while loop
seq = strchr(line, '\n');
if(seq == NULL)
continue;
-
// point to one past that.
seq = seq + 1;
+ // TODO can we loop jam the strnstrip and alpha? that way memory access is only done once?
// strip out all other newlines to handle multiline sequences
const size_t seq_length = strnstrip(seq, '\n', strlen(seq));
@@ -295,6 +299,7 @@ array_type * get_kmer_counts_from_file(array_type *counts, FILE *fh, const unsig
count_sequence(seq, seq_length, kmer, counts);
if(count_compliment) {
+ // TODO Same for this, can we reverse and compliment at the same time?
for(k = 0; k < seq_length; k++) {
seq[k] = compliment[(int)seq[k]];
}
@@ -305,7 +310,7 @@ array_type * get_kmer_counts_from_file(array_type *counts, FILE *fh, const unsig
}
}
- free(line);
+ free(line);
fclose(fh);
return counts;