From 72be3fbc74bd89f0930023b21c6baafeed571b11 Mon Sep 17 00:00:00 2001 From: Calvin Morrison Date: Fri, 6 Dec 2013 16:50:10 -0500 Subject: use fread instead of getline, lower memory, static allocation, better performance (by a lot). all win win --- count_nucleobases | Bin 5496 -> 0 bytes count_nucleobases.c | 53 ++++++++++++++++++++++++++-------------------------- 2 files changed, 27 insertions(+), 26 deletions(-) delete mode 100755 count_nucleobases diff --git a/count_nucleobases b/count_nucleobases deleted file mode 100755 index 8c1b122..0000000 Binary files a/count_nucleobases and /dev/null differ diff --git a/count_nucleobases.c b/count_nucleobases.c index c2aafe5..4bc3ca1 100755 --- a/count_nucleobases.c +++ b/count_nucleobases.c @@ -3,35 +3,36 @@ #include #include #include -int main(int argc, char **argv) { +#include +int main() { - char *line = NULL; - size_t len = 0; - ssize_t read; - - if(argc != 2) { - printf("Please supply a filename, and only a filename\n"); - exit(EXIT_FAILURE); - } - - FILE *fh = fopen(argv[1], "r" ); - if(fh == NULL) { - fprintf(stderr, "Couldn't open: %s\n", argv[1]); - exit(EXIT_FAILURE); - } + ssize_t len = 0; unsigned long long counts[256] = {0}; - - while ((read = getline(&line, &len, fh)) != -1) { - if(line[0] != '>') { - unsigned int i = 0; - for(i = 0; i < strlen(line); i++) { - counts[line[i]]++; - } - } - } - - free(line); + char buffer[4096]; + bool header = false; + + len = fread(&buffer, 1, 1, stdin); + + if(len != NULL) { + if(buffer[0] == '>') { + header = true; + while((len = fread(&buffer, 1, 4096, stdin)) != NULL) { + unsigned int i = 0; + for(i = 0; i < len; i++) { + if(buffer[i] == '>') { + header = true; + continue; + } + else if(buffer[i] == '\n' && header == true) + header = false; + if(header == false ) { + counts[(int)buffer[i]]++; + } + } + } + } else { fprintf(stderr, "this does not look like a fasta file\n"); } + } else { fprintf(stderr, "could not read file\n"); } printf("A:%llu\nC:%llu\nG:%llu\nT:%llu\n", counts['a'] + counts['A'], -- cgit v1.2.3