diff options
author | Calvin Morrison <mutantturkey@gmail.com> | 2013-12-06 16:50:10 -0500 |
---|---|---|
committer | Calvin Morrison <mutantturkey@gmail.com> | 2013-12-06 16:50:10 -0500 |
commit | 72be3fbc74bd89f0930023b21c6baafeed571b11 (patch) | |
tree | b813d0426fd807fe313fda38891e60e536811dc7 /count_nucleobases.c | |
parent | 0a9476fe2aa09610a23b2a54974bbb733b478a92 (diff) |
use fread instead of getline, lower memory, static allocation, better performance (by a lot). all win win
Diffstat (limited to 'count_nucleobases.c')
-rwxr-xr-x | count_nucleobases.c | 53 |
1 files changed, 27 insertions, 26 deletions
diff --git a/count_nucleobases.c b/count_nucleobases.c index c2aafe5..4bc3ca1 100755 --- a/count_nucleobases.c +++ b/count_nucleobases.c @@ -3,35 +3,36 @@ #include <stdlib.h> #include <unistd.h> #include <stdint.h> -int main(int argc, char **argv) { +#include <stdbool.h> +int main() { - char *line = NULL; - size_t len = 0; - ssize_t read; - - if(argc != 2) { - printf("Please supply a filename, and only a filename\n"); - exit(EXIT_FAILURE); - } - - FILE *fh = fopen(argv[1], "r" ); - if(fh == NULL) { - fprintf(stderr, "Couldn't open: %s\n", argv[1]); - exit(EXIT_FAILURE); - } + ssize_t len = 0; unsigned long long counts[256] = {0}; - - while ((read = getline(&line, &len, fh)) != -1) { - if(line[0] != '>') { - unsigned int i = 0; - for(i = 0; i < strlen(line); i++) { - counts[line[i]]++; - } - } - } - - free(line); + char buffer[4096]; + bool header = false; + + len = fread(&buffer, 1, 1, stdin); + + if(len != NULL) { + if(buffer[0] == '>') { + header = true; + while((len = fread(&buffer, 1, 4096, stdin)) != NULL) { + unsigned int i = 0; + for(i = 0; i < len; i++) { + if(buffer[i] == '>') { + header = true; + continue; + } + else if(buffer[i] == '\n' && header == true) + header = false; + if(header == false ) { + counts[(int)buffer[i]]++; + } + } + } + } else { fprintf(stderr, "this does not look like a fasta file\n"); } + } else { fprintf(stderr, "could not read file\n"); } printf("A:%llu\nC:%llu\nG:%llu\nT:%llu\n", counts['a'] + counts['A'], |