aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
-rw-r--r--Makefile10
-rw-r--r--README7
-rw-r--r--sequence_end_points.c54
-rw-r--r--sequence_lengths.c55
4 files changed, 122 insertions, 4 deletions
diff --git a/Makefile b/Makefile
index 255c1fb..8193e1d 100644
--- a/Makefile
+++ b/Makefile
@@ -1,10 +1,14 @@
CC = gcc
CFLAGS= -O3 -s -mtune=native -Wextra -Wall
-all: count_nucleobases_
+all: count_nucleobases sequence_lengths
-count_nucleobases_:
+count_nucleobases: count_nucleobases.c
$(CC) $(CFLAGS) count_nucleobases.c -o count_nucleobases
+sequence_lengths: sequence_lengths.c
+ $(CC) $(CFLAGS) sequence_lengths.c -o sequence_lengths
+sequence_end_points: sequence_end_points.c
+ $(CC) $(CFLAGS) sequence_end_points.c -o sequence_end_points
clean:
- rm -vf count_nucleobases
+ rm -vf count_nucleobases sequence_lengths sequence_end_points
diff --git a/README b/README
index 19d14ab..83f27d2 100644
--- a/README
+++ b/README
@@ -1 +1,6 @@
-This will count nucleobases of argv[1]
+random fasta tools:
+
+count_nucleobases - counts nucleobases (A, C, G, T) in fasta files
+sequence_lengths - print out length of each sequences
+sequence_end_points - print out the end position of each sequence (continuous)
+
diff --git a/sequence_end_points.c b/sequence_end_points.c
new file mode 100644
index 0000000..dc03dd9
--- /dev/null
+++ b/sequence_end_points.c
@@ -0,0 +1,54 @@
+// Copyright 2013 Calvin Morrison
+#include <stdio.h>
+#include <stdlib.h>
+#include <unistd.h>
+#include <stdint.h>
+#include <stdbool.h>
+#include <errno.h>
+int main() {
+
+ size_t len = 0;
+
+ char buffer[4096];
+ bool header = false;
+
+ len = fread(&buffer, 1, 1, stdin);
+
+ unsigned long long seq_length = 0;
+ if(!errno) {
+ if(buffer[0] == '>') {
+ header = true;
+
+ while((len = fread(&buffer, 1, 4096, stdin)) != 0) {
+ size_t i = 0;
+ for(i = 0; i < len; i++) {
+ if(buffer[i] == '>') {
+ printf("%llu\n", seq_length);
+ header = true;
+ continue;
+ }
+ else if(buffer[i] == '\n' && header == true) {
+ header = false;
+ continue;
+ }
+ if(header == false && buffer[i] != '\n') {
+ seq_length++;
+ }
+ }
+ }
+ }
+ else {
+ fprintf(stderr, "this does not look like a fasta file\n");
+ return EXIT_FAILURE;
+ }
+ }
+ else {
+ fprintf(stderr, "could not read file\n");
+ return EXIT_FAILURE;
+ }
+
+ printf("%llu\n", seq_length);
+
+ return EXIT_SUCCESS;
+}
+
diff --git a/sequence_lengths.c b/sequence_lengths.c
new file mode 100644
index 0000000..70ed43e
--- /dev/null
+++ b/sequence_lengths.c
@@ -0,0 +1,55 @@
+// Copyright 2013 Calvin Morrison
+#include <stdio.h>
+#include <stdlib.h>
+#include <unistd.h>
+#include <stdint.h>
+#include <stdbool.h>
+#include <errno.h>
+int main() {
+
+ size_t len = 0;
+
+ char buffer[4096];
+ bool header = false;
+
+ len = fread(&buffer, 1, 1, stdin);
+
+ unsigned long long seq_length = 0;
+ if(!errno) {
+ if(buffer[0] == '>') {
+ header = true;
+
+ while((len = fread(&buffer, 1, 4096, stdin)) != 0) {
+ size_t i = 0;
+ for(i = 0; i < len; i++) {
+ if(buffer[i] == '>') {
+ printf("%llu\n", seq_length);
+ header = true;
+ seq_length = 0;
+ continue;
+ }
+ else if(buffer[i] == '\n' && header == true) {
+ header = false;
+ continue;
+ }
+ if(header == false && buffer[i] != '\n') {
+ seq_length++;
+ }
+ }
+ }
+ }
+ else {
+ fprintf(stderr, "this does not look like a fasta file\n");
+ return EXIT_FAILURE;
+ }
+ }
+ else {
+ fprintf(stderr, "could not read file\n");
+ return EXIT_FAILURE;
+ }
+
+ printf("%llu\n", seq_length);
+
+ return EXIT_SUCCESS;
+}
+