diff options
-rw-r--r-- | Makefile | 10 | ||||
-rw-r--r-- | README | 7 | ||||
-rw-r--r-- | sequence_end_points.c | 54 | ||||
-rw-r--r-- | sequence_lengths.c | 55 |
4 files changed, 122 insertions, 4 deletions
@@ -1,10 +1,14 @@ CC = gcc CFLAGS= -O3 -s -mtune=native -Wextra -Wall -all: count_nucleobases_ +all: count_nucleobases sequence_lengths -count_nucleobases_: +count_nucleobases: count_nucleobases.c $(CC) $(CFLAGS) count_nucleobases.c -o count_nucleobases +sequence_lengths: sequence_lengths.c + $(CC) $(CFLAGS) sequence_lengths.c -o sequence_lengths +sequence_end_points: sequence_end_points.c + $(CC) $(CFLAGS) sequence_end_points.c -o sequence_end_points clean: - rm -vf count_nucleobases + rm -vf count_nucleobases sequence_lengths sequence_end_points @@ -1 +1,6 @@ -This will count nucleobases of argv[1] +random fasta tools: + +count_nucleobases - counts nucleobases (A, C, G, T) in fasta files +sequence_lengths - print out length of each sequences +sequence_end_points - print out the end position of each sequence (continuous) + diff --git a/sequence_end_points.c b/sequence_end_points.c new file mode 100644 index 0000000..dc03dd9 --- /dev/null +++ b/sequence_end_points.c @@ -0,0 +1,54 @@ +// Copyright 2013 Calvin Morrison +#include <stdio.h> +#include <stdlib.h> +#include <unistd.h> +#include <stdint.h> +#include <stdbool.h> +#include <errno.h> +int main() { + + size_t len = 0; + + char buffer[4096]; + bool header = false; + + len = fread(&buffer, 1, 1, stdin); + + unsigned long long seq_length = 0; + if(!errno) { + if(buffer[0] == '>') { + header = true; + + while((len = fread(&buffer, 1, 4096, stdin)) != 0) { + size_t i = 0; + for(i = 0; i < len; i++) { + if(buffer[i] == '>') { + printf("%llu\n", seq_length); + header = true; + continue; + } + else if(buffer[i] == '\n' && header == true) { + header = false; + continue; + } + if(header == false && buffer[i] != '\n') { + seq_length++; + } + } + } + } + else { + fprintf(stderr, "this does not look like a fasta file\n"); + return EXIT_FAILURE; + } + } + else { + fprintf(stderr, "could not read file\n"); + return EXIT_FAILURE; + } + + printf("%llu\n", seq_length); + + return EXIT_SUCCESS; +} + diff --git a/sequence_lengths.c b/sequence_lengths.c new file mode 100644 index 0000000..70ed43e --- /dev/null +++ b/sequence_lengths.c @@ -0,0 +1,55 @@ +// Copyright 2013 Calvin Morrison +#include <stdio.h> +#include <stdlib.h> +#include <unistd.h> +#include <stdint.h> +#include <stdbool.h> +#include <errno.h> +int main() { + + size_t len = 0; + + char buffer[4096]; + bool header = false; + + len = fread(&buffer, 1, 1, stdin); + + unsigned long long seq_length = 0; + if(!errno) { + if(buffer[0] == '>') { + header = true; + + while((len = fread(&buffer, 1, 4096, stdin)) != 0) { + size_t i = 0; + for(i = 0; i < len; i++) { + if(buffer[i] == '>') { + printf("%llu\n", seq_length); + header = true; + seq_length = 0; + continue; + } + else if(buffer[i] == '\n' && header == true) { + header = false; + continue; + } + if(header == false && buffer[i] != '\n') { + seq_length++; + } + } + } + } + else { + fprintf(stderr, "this does not look like a fasta file\n"); + return EXIT_FAILURE; + } + } + else { + fprintf(stderr, "could not read file\n"); + return EXIT_FAILURE; + } + + printf("%llu\n", seq_length); + + return EXIT_SUCCESS; +} + |