From adcd4d3c1ef2ba35730e0f9a837ed44cf6d74d51 Mon Sep 17 00:00:00 2001 From: Calvin Morrison Date: Thu, 27 Mar 2014 17:19:22 -0400 Subject: add strstream --- Makefile | 4 ++- strstream.c | 111 ++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++ 2 files changed, 114 insertions(+), 1 deletion(-) create mode 100644 strstream.c diff --git a/Makefile b/Makefile index 8193e1d..9826622 100644 --- a/Makefile +++ b/Makefile @@ -1,7 +1,7 @@ CC = gcc CFLAGS= -O3 -s -mtune=native -Wextra -Wall -all: count_nucleobases sequence_lengths +all: count_nucleobases sequence_lengths strstream count_nucleobases: count_nucleobases.c $(CC) $(CFLAGS) count_nucleobases.c -o count_nucleobases @@ -9,6 +9,8 @@ sequence_lengths: sequence_lengths.c $(CC) $(CFLAGS) sequence_lengths.c -o sequence_lengths sequence_end_points: sequence_end_points.c $(CC) $(CFLAGS) sequence_end_points.c -o sequence_end_points +strstream: strstream.c + $(CC) $(CFLAGS) strstream.c -o strstream clean: rm -vf count_nucleobases sequence_lengths sequence_end_points diff --git a/strstream.c b/strstream.c new file mode 100644 index 0000000..19c7614 --- /dev/null +++ b/strstream.c @@ -0,0 +1,111 @@ +// find the positions of any strings in the file pointed by argv[1] in stdin +#include +#include +#include +#include + +char **load_mers_from_file(FILE *fh, ssize_t *len) { + char line[4096]; + + size_t realloc_size = 1; + *len = 0; + + char **mers = NULL; + + while((fgets(line, 4096, fh)) != NULL) { + size_t line_len = strlen(line); + if(line_len == 0) + continue; + + line[line_len - 1] = '\0'; + + mers = realloc(mers, sizeof(char *) * realloc_size); + if(mers == NULL) { + fprintf(stderr, "could not realloc mers\n"); + exit(EXIT_FAILURE); + } + + char *cpy = malloc(line_len + 1); + if(cpy == NULL) { + fprintf(stderr, "could not alloc mers\n"); + exit(EXIT_FAILURE); + } + strncpy(cpy, line, line_len); + mers[*len] = cpy; + *len += 1; + + realloc_size++; + } + + if(*len != 0) + return mers; + + *len = -1; + return NULL; +} + +int main(int argc, char **argv){ + + char buffer[BUFSIZ] = { 0 }; + char *buf, *start; + ssize_t len = 0; + ssize_t mer_len = 0; + + int save_size = 0; + int cpy = 0; + + unsigned long long pos = 0; + unsigned long long cpy_size = 0; + + int i = 0; + + if(argc != 2) { + fprintf(stderr, "usage: strstream merlist.txt\n"); + exit(EXIT_FAILURE); + } + + // load mers + FILE *fh = fopen(argv[1], "r"); + if(fh == NULL) { + fprintf(stderr, "could not open %s\n", argv[1]); + exit(EXIT_FAILURE); + } + char **mers = load_mers_from_file(fh, &mer_len); + if(mers == NULL) { + fprintf(stderr, "could not load mers from %s\n", argv[1]); + exit(EXIT_FAILURE); + } + + // get max argument length + for(i = 0; i < mer_len; i++) { + int current_len = strlen(mers[i]); + if( current_len > save_size) + save_size = current_len; + } + + cpy = save_size - 1; + cpy_size = BUFSIZ - cpy; + + buf = buffer; + start = buf + cpy; + + // copy our first cpy length into the first part of our buffer + len = fread(buffer, 1, cpy, stdin); + if(len == 0) + exit(EXIT_FAILURE); + + // read into "start" (buf + cpy) from tdin + while((len = fread(start, 1, cpy_size, stdin)) != 0) { + for(i = 0; i < mer_len; i++) { + char *p = buffer; + while((p = strstr(p, mers[i])) != NULL) { + printf("%d %llu\n", i, pos + (p - buffer)); + p++; + } + } + memcpy(buffer, buffer + len, cpy); + pos = pos + len; + } + + return 0; +} -- cgit v1.2.3