aboutsummaryrefslogtreecommitdiff
path: root/src/c/quikr_functions.c
diff options
context:
space:
mode:
authorCalvin Morrison <mutantturkey@gmail.com>2014-05-06 12:58:09 -0400
committerCalvin Morrison <mutantturkey@gmail.com>2014-05-06 12:58:09 -0400
commitf251dc46c40501704bfebf2d778577b1f1dc3484 (patch)
treef09e5c15a0b2ce33f7020310161d3f44cfe1c62f /src/c/quikr_functions.c
parent151602f582746f8374a3f8bc4ea1ffdbcc4a513a (diff)
parent958b167c741c2928021216fddadf24ecdabfbf61 (diff)
werks
Diffstat (limited to 'src/c/quikr_functions.c')
-rw-r--r--src/c/quikr_functions.c160
1 files changed, 153 insertions, 7 deletions
diff --git a/src/c/quikr_functions.c b/src/c/quikr_functions.c
index f690549..5715024 100644
--- a/src/c/quikr_functions.c
+++ b/src/c/quikr_functions.c
@@ -6,11 +6,106 @@
#include <string.h>
#include <unistd.h>
#include <zlib.h>
+#include <assert.h>
#include "kmer_utils.h"
#include "quikr.h"
+/* getdelim.c --- Implementation of replacement getdelim function.
+ Copyright (C) 1994, 1996, 1997, 1998, 2001, 2003, 2005 Free
+ Software Foundation, Inc.
+
+ This program is free software; you can redistribute it and/or
+ modify it under the terms of the GNU General Public License as
+ published by the Free Software Foundation; either version 2, or (at
+ your option) any later version.
+
+ This program is distributed in the hope that it will be useful, but
+ WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ General Public License for more details.
+
+ You should have received a copy of the GNU General Public License
+ along with this program; if not, write to the Free Software
+ Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA
+ 02110-1301, USA. */
+
+ssize_t getseq(char **lineptr, size_t *n, FILE *fp) {
+ int result = 0;
+ ssize_t cur_len = 0;
+
+ if (lineptr == NULL || n == NULL || fp == NULL)
+ {
+ errno = EINVAL;
+ return -1;
+ }
+
+ flockfile (fp);
+
+ if (*lineptr == NULL || *n == 0)
+ {
+ *n = 120;
+ *lineptr = (char *) malloc (*n);
+ if (*lineptr == NULL)
+ {
+ result = -1;
+ goto unlock_return;
+ }
+ }
+
+ int newline = 0;
+ for (;;) {
+ int i;
+
+ i = getc (fp);
+ if (i == EOF)
+ {
+ result = -1;
+ break;
+ }
+
+ /* Make enough space for len+1 (for final NUL) bytes. */
+ if (cur_len + 2 >= *n)
+ {
+ size_t needed = 2 * (cur_len + 1) + 2; /* Be generous. */
+ char *new_lineptr;
+
+ if (needed < cur_len)
+ {
+ result = -1;
+ goto unlock_return;
+ }
+
+ new_lineptr = (char *) realloc (*lineptr, needed);
+ if (new_lineptr == NULL){
+ result = -1;
+ goto unlock_return;
+ }
+
+ *lineptr = new_lineptr;
+ *n = needed;
+ }
+
+ (*lineptr)[cur_len] = i;
+ cur_len++;
+
+ if (i == '\n') {
+ newline = 1;
+ }
+
+ if(i == '>' && newline == 1) {
+ break;
+ }
+ }
+ (*lineptr)[cur_len] = '\0';
+ result = cur_len ? cur_len : result;
+
+ unlock_return:
+ funlockfile (fp);
+ return result;
+}
+
void check_malloc(void *ptr, char *error) {
if (ptr == NULL) {
if(error != NULL) {
@@ -124,6 +219,42 @@ unsigned long long count_sequences(const char *filename) {
}
+/*
+ * * Sally - A Tool for Embedding Strings in Vector Spaces
+ * * Copyright (C) 2010 Konrad Rieck (konrad@mlsec.org)
+ * * --
+ * * This program is free software; you can redistribute it and/or modify it
+ * * under the terms of the GNU General Public License as published by the
+ * * Free Software Foundation; either version 3 of the License, or (at your
+ * * option) any later version. This program is distributed without any
+ * * warranty. See the GNU General Public License for more details.
+ * */
+size_t gzgetline(char **s, size_t * n, gzFile f) {
+ assert(f);
+ int c = 0;
+ *n = 0;
+
+ if (gzeof(f))
+ return -1;
+
+ while (c != '\n') {
+ if (!*s || *n % 256 == 0) {
+ *s = realloc(*s, *n + 256 + 1);
+ if (!*s)
+ return -1;
+ }
+
+ c = gzgetc(f);
+ if (c == -1)
+ return -1;
+
+ (*s)[(*n)++] = c;
+ }
+
+ (*s)[*n] = 0;
+ return *n;
+}
+
struct matrix *load_sensing_matrix(const char *filename, unsigned int target_kmer) {
char *line = NULL;
@@ -139,6 +270,7 @@ struct matrix *load_sensing_matrix(const char *filename, unsigned int target_kme
unsigned long long width = 0;
struct matrix *ret = NULL;
+ size_t lineno = 0;
gzFile fh = NULL;
@@ -153,6 +285,7 @@ struct matrix *load_sensing_matrix(const char *filename, unsigned int target_kme
// Check for quikr
line = gzgets(fh, line, 1024);
+ lineno++;
if(strcmp(line, "quikr\n") != 0) {
fprintf(stderr, "This does not look like a quikr sensing matrix. Please check your path: %s\n", filename);
exit(EXIT_FAILURE);
@@ -164,6 +297,7 @@ struct matrix *load_sensing_matrix(const char *filename, unsigned int target_kme
fprintf(stderr, "Sensing Matrix uses an unsupported version, please retrain your matrix\n");
exit(EXIT_FAILURE);
}
+ lineno++;
// get number of sequences
line = gzgets(fh, line, 1024);
@@ -172,6 +306,7 @@ struct matrix *load_sensing_matrix(const char *filename, unsigned int target_kme
fprintf(stderr, "Error parsing sensing matrix, sequence count is zero\n");
exit(EXIT_FAILURE);
}
+ lineno++;
// get kmer
gzgets(fh, line, 1024);
@@ -180,6 +315,7 @@ struct matrix *load_sensing_matrix(const char *filename, unsigned int target_kme
fprintf(stderr, "Error parsing sensing matrix, kmer is zero\n");
exit(EXIT_FAILURE);
}
+ lineno++;
if(kmer != target_kmer) {
fprintf(stderr, "The sensing_matrix was trained with a different kmer than your requested kmer\n");
@@ -198,20 +334,29 @@ struct matrix *load_sensing_matrix(const char *filename, unsigned int target_kme
headers = malloc(sequences * sizeof(char *));
check_malloc(headers, NULL);
- unsigned long long lineno = 0;
+ char *buf = NULL;
+ size_t len = 0;
+ size_t read = 0;
for(i = 0; i < sequences; i++) {
unsigned long long j = 0;
// get header and add it to headers array
- char *header = malloc(512 * sizeof(char));
- check_malloc(header, NULL);
- gzgets(fh, header, 512);
- lineno++;
+ //
+ read = gzgetline(&buf, &len, fh);
+ if(read == 0) {
+ fprintf(stderr, "Error parsing sensing matrix, could not read header\n");
+ exit(EXIT_FAILURE);
+ }
+
+ char *header = malloc(sizeof(char) * read + 1);
+ check_malloc(header, NULL);
+ header = strncpy(header, buf, read - 1);
if(header[0] != '>') {
fprintf(stderr, "Error parsing sensing matrix, could not read header in line %llu\n", lineno);
exit(EXIT_FAILURE);
}
+ lineno++;
- header[strlen(header) - 1] = '\0';
+ header[read - 1] = '\0';
headers[i] = header+1;
row = memset(row, 0, (width) * sizeof(unsigned long long));
@@ -220,9 +365,10 @@ struct matrix *load_sensing_matrix(const char *filename, unsigned int target_kme
line = gzgets(fh, line, 32);
lineno++;
if(line == NULL || line[0] == '>') {
- fprintf(stderr, "Error parsing sensing matrix, line does not look like a value in line %llu\n", lineno);
+ fprintf(stderr, "Error parsing sensing matrix, line %zu does not look like a value\n", lineno);
exit(EXIT_FAILURE);
}
+ lineno++;
row[j] = strtoull(line, NULL, 10);
if(errno) {