aboutsummaryrefslogtreecommitdiff
path: root/src/c/quikr_functions.c
diff options
context:
space:
mode:
authorCalvin <calvin@EESI>2013-05-14 21:51:40 -0400
committerCalvin <calvin@EESI>2013-05-14 21:51:40 -0400
commit0773aaf89678b967588a902df1f5e6f9ccea393d (patch)
tree40762e5df1da876d460d8695357ab0835645e8c6 /src/c/quikr_functions.c
parent1d2becc9af591d37badfe0e77751bbb80932472f (diff)
release1.0
Diffstat (limited to 'src/c/quikr_functions.c')
-rw-r--r--src/c/quikr_functions.c163
1 files changed, 163 insertions, 0 deletions
diff --git a/src/c/quikr_functions.c b/src/c/quikr_functions.c
new file mode 100644
index 0000000..7e18e64
--- /dev/null
+++ b/src/c/quikr_functions.c
@@ -0,0 +1,163 @@
+#include <stdio.h>
+#include <stdio.h>
+#include <errno.h>
+#include <ctype.h>
+#include <stdlib.h>
+#include <unistd.h>
+#include <string.h>
+#include <math.h>
+
+int count_sequences(char *filename) {
+ char command[512];
+ int sequences = 0;
+ FILE *grep_output;
+
+ sprintf(command, "grep -c ^\\> %s", filename);
+ grep_output = popen(command, "r");
+ if(grep_output == NULL) {
+ fprintf(stderr, "Could not execute %s\n", command);
+ exit(EXIT_FAILURE);
+ }
+
+ fscanf(grep_output, "%d", &sequences);
+
+ pclose(grep_output);
+ return sequences;
+}
+
+
+void normalize_matrix(double *matrix, int height, int width) {
+ int x = 0;
+ int y = 0;
+ for(x = 0; x < height; x++) {
+
+ double row_sum = 0;
+
+ for(y = 0; y < (width); y++)
+ row_sum = row_sum + matrix[width * x + y];
+ for(y = 0; y < (width); y++)
+ matrix[width * x + y] = matrix[width * x + y] / row_sum;
+ }
+}
+
+
+double *load_count_matrix(char *filename, int width, int kmer) {
+
+ double *count_matrix = malloc((width)*sizeof(double));
+ char count_command[512];
+ int x = 0;
+ char *line = NULL;
+ size_t len = 0;
+
+ if(count_matrix == NULL) {
+ fprintf(stderr, "could not allocate enough memory for the count matrix (%d x double) \n", width);
+ exit(EXIT_FAILURE);
+ }
+
+ // create out count matrix
+ sprintf(count_command, "count-kmers -r %d -1 -u %s", kmer, filename);
+ FILE *count_output = popen(count_command, "r");
+ if(count_output == NULL) {
+ fprintf(stderr, "could not execute \"%s\"", count_command);
+ exit(EXIT_FAILURE);
+ }
+
+ // set first element to zero.
+ count_matrix[0] = 0;
+
+ // get our first line
+ getline(&line, &len, count_output);
+ count_matrix[1] = atoi(line);
+
+ // iterate over the rest of the lines
+ for(x = 2; x < width; x++) {
+ getline(&line, &len, count_output);
+ count_matrix[x] = atoi(line);
+ }
+
+ pclose(count_output);
+
+ return count_matrix;
+}
+
+
+double *load_sensing_matrix(char *filename, int height, int width) {
+
+ int x = 0;
+ int y = 0;
+
+ char *line = NULL;
+ char *val;
+ char command[512];
+ size_t len = 0;
+ FILE *sensing_matrix_fh = NULL;
+
+ double *sensing_matrix = malloc(height * width * sizeof(double));
+ if(sensing_matrix == NULL) {
+ fprintf(stderr, "Could not allocate memory for the sensing matrix\n");
+ }
+
+ sprintf(command, "gzip -cd %s", filename);
+ sensing_matrix_fh = popen(command, "r");
+ if(sensing_matrix_fh == NULL) {
+ fprintf(stderr, "could not open %s", filename);
+ exit(EXIT_FAILURE);
+ }
+
+ // read our sensing matrix in
+ for(x = 0; x < height; x++) {
+ getline(&line, &len, sensing_matrix_fh);
+ char *ptr;
+
+ // Read our first element in outside of the loop
+ val = strtok_r(line,"\t", &ptr);
+ sensing_matrix[width*x + 1] = atof(val);
+ // iterate through and load the array
+ for (y = 2; y < width; y++) {
+ val = strtok_r(NULL, "\t", &ptr);
+ sensing_matrix[width*x + y] = atof(val);
+ }
+ }
+
+ pclose(sensing_matrix_fh);
+
+ return sensing_matrix;
+}
+
+char **load_headers(char *filename, int sequences) {
+ char command[512];
+ char *line= NULL;
+ int x = 0;
+ FILE *grep_output;
+ size_t len = 0;
+
+ sprintf(command, "grep ^\\> %s", filename);
+ grep_output = popen(command, "r");
+ if(grep_output == NULL) {
+ fprintf(stderr, "Could not execute %s\n", command);
+ exit(EXIT_FAILURE);
+ }
+
+ char **headers = malloc(sequences * sizeof(char *));
+ if(headers == NULL) {
+ fprintf(stderr, "could not allocated enough memory\n");
+ exit(EXIT_FAILURE);
+ }
+
+ for(x = 0; x < sequences; x++) {
+
+ char *header = malloc(256 * sizeof(char));
+ if(header == NULL) {
+ fprintf(stderr, "could not allocated enough memory\n");
+ exit(EXIT_FAILURE);
+ }
+ getline(&line, &len, grep_output);
+ sscanf(line + 1, "%s", header);
+ headers[x] = header;
+ }
+
+ pclose(grep_output);
+
+ return headers;
+}
+