diff options
-rw-r--r-- | Makefile | 10 | ||||
-rw-r--r-- | README | 6 | ||||
-rwxr-xr-x | count_nucleobases.c | 16 | ||||
-rw-r--r-- | sequence_end_points.c | 54 | ||||
-rw-r--r-- | sequence_lengths.c | 55 |
5 files changed, 129 insertions, 12 deletions
@@ -1,10 +1,14 @@ CC = gcc CFLAGS= -O3 -s -mtune=native -Wextra -Wall -all: count_nucleobases_ +all: count_nucleobases sequence_lengths -count_nucleobases_: +count_nucleobases: count_nucleobases.c $(CC) $(CFLAGS) count_nucleobases.c -o count_nucleobases +sequence_lengths: sequence_lengths.c + $(CC) $(CFLAGS) sequence_lengths.c -o sequence_lengths +sequence_end_points: sequence_end_points.c + $(CC) $(CFLAGS) sequence_end_points.c -o sequence_end_points clean: - rm -vf count_nucleobases + rm -vf count_nucleobases sequence_lengths sequence_end_points @@ -1 +1,5 @@ -This will count nucleobases of stdin +random fasta tools: + +count_nucleobases - counts nucleobases (A, C, G, T) in fasta files +sequence_lengths - print out length of each sequences +sequence_end_points - print out the end position of each sequence (continuous) diff --git a/count_nucleobases.c b/count_nucleobases.c index 1b432da..d4dca06 100755 --- a/count_nucleobases.c +++ b/count_nucleobases.c @@ -4,24 +4,24 @@ #include <unistd.h> #include <stdint.h> #include <stdbool.h> +#include <errno.h> int main() { size_t len = 0; - - unsigned long long counts[256] = {0}; + unsigned long long counts[256] = {0}; char buffer[4096]; bool header = false; - len = fread(&buffer, 1, 1, stdin); + len = fread(&buffer, 1, 1, stdin); - if(len != NULL) { + if(errno) { if(buffer[0] == '>') { header = true; - while((len = fread(&buffer, 1, 4096, stdin)) != NULL) { + while((len = fread(&buffer, 1, 4096, stdin)) != 0) { unsigned int i = 0; for(i = 0; i < len; i++) { - if(buffer[i] == '>') { + if(buffer[i] == '>') { header = true; continue; } @@ -42,11 +42,11 @@ int main() { exit(EXIT_FAILURE); } - printf("A:%llu\nC:%llu\nG:%llu\nT:%llu\n", + printf("A:%llu\nC:%llu\nG:%llu\nT:%llu\n", counts['a'] + counts['A'], counts['c'] + counts['C'], counts['g'] + counts['G'], counts['t'] + counts['T']); - return EXIT_SUCCESS; + return EXIT_SUCCESS; } diff --git a/sequence_end_points.c b/sequence_end_points.c new file mode 100644 index 0000000..dc03dd9 --- /dev/null +++ b/sequence_end_points.c @@ -0,0 +1,54 @@ +// Copyright 2013 Calvin Morrison +#include <stdio.h> +#include <stdlib.h> +#include <unistd.h> +#include <stdint.h> +#include <stdbool.h> +#include <errno.h> +int main() { + + size_t len = 0; + + char buffer[4096]; + bool header = false; + + len = fread(&buffer, 1, 1, stdin); + + unsigned long long seq_length = 0; + if(!errno) { + if(buffer[0] == '>') { + header = true; + + while((len = fread(&buffer, 1, 4096, stdin)) != 0) { + size_t i = 0; + for(i = 0; i < len; i++) { + if(buffer[i] == '>') { + printf("%llu\n", seq_length); + header = true; + continue; + } + else if(buffer[i] == '\n' && header == true) { + header = false; + continue; + } + if(header == false && buffer[i] != '\n') { + seq_length++; + } + } + } + } + else { + fprintf(stderr, "this does not look like a fasta file\n"); + return EXIT_FAILURE; + } + } + else { + fprintf(stderr, "could not read file\n"); + return EXIT_FAILURE; + } + + printf("%llu\n", seq_length); + + return EXIT_SUCCESS; +} + diff --git a/sequence_lengths.c b/sequence_lengths.c new file mode 100644 index 0000000..70ed43e --- /dev/null +++ b/sequence_lengths.c @@ -0,0 +1,55 @@ +// Copyright 2013 Calvin Morrison +#include <stdio.h> +#include <stdlib.h> +#include <unistd.h> +#include <stdint.h> +#include <stdbool.h> +#include <errno.h> +int main() { + + size_t len = 0; + + char buffer[4096]; + bool header = false; + + len = fread(&buffer, 1, 1, stdin); + + unsigned long long seq_length = 0; + if(!errno) { + if(buffer[0] == '>') { + header = true; + + while((len = fread(&buffer, 1, 4096, stdin)) != 0) { + size_t i = 0; + for(i = 0; i < len; i++) { + if(buffer[i] == '>') { + printf("%llu\n", seq_length); + header = true; + seq_length = 0; + continue; + } + else if(buffer[i] == '\n' && header == true) { + header = false; + continue; + } + if(header == false && buffer[i] != '\n') { + seq_length++; + } + } + } + } + else { + fprintf(stderr, "this does not look like a fasta file\n"); + return EXIT_FAILURE; + } + } + else { + fprintf(stderr, "could not read file\n"); + return EXIT_FAILURE; + } + + printf("%llu\n", seq_length); + + return EXIT_SUCCESS; +} + |