summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorCalvin <calvin@calvin-ThinkPad-X220.(none)>2013-05-15 10:41:13 -0400
committerCalvin <calvin@calvin-ThinkPad-X220.(none)>2013-05-15 10:41:13 -0400
commit75c8047886584b2beb81402ad2a6903857dfabda (patch)
tree40f4208200efce89bab897aa39aab6b5329c48b6
parent0773aaf89678b967588a902df1f5e6f9ccea393d (diff)
add -d flag for debugging so verbose doesn't take longer, use generate_kmers instead of a hacky awk
-rw-r--r--Makefile2
-rw-r--r--src/c/quikr.14
-rw-r--r--src/c/quikr.c10
-rw-r--r--src/c/quikr_train.c4
4 files changed, 13 insertions, 7 deletions
diff --git a/Makefile b/Makefile
index cc535d0..166255c 100644
--- a/Makefile
+++ b/Makefile
@@ -9,11 +9,13 @@ install:
@cp -vf src/c/quikr_train ${DESTDIR}${PREFIX}/bin/quikr_train
@cp -vf src/c/quikr ${DESTDIR}${PREFIX}/bin/quikr
@cp -vf src/c/multifasta_to_otu ${DESTDIR}${PREFIX}/bin/multifasta_to_otu
+ @cp -vf src/python/generate_kmers ${DESTDIR}${PREFIX}/bin/generate_kmers
chmod -v 755 ${DESTDIR}${PREFIX}/bin/probabilities-by-read
chmod -v 755 ${DESTDIR}${PREFIX}/bin/count-kmers
chmod -v 755 ${DESTDIR}${PREFIX}/bin/quikr
chmod -v 755 ${DESTDIR}${PREFIX}/bin/quikr_train
chmod -v 755 ${DESTDIR}${PREFIX}/bin/multifasta_to_otu
+ chmod -v 755 ${DESTDIR}${PREFIX}/bin/generate_kmers
nbc:
@echo "building nbc"
diff --git a/src/c/quikr.1 b/src/c/quikr.1
index 9982d94..937109b 100644
--- a/src/c/quikr.1
+++ b/src/c/quikr.1
@@ -16,6 +16,7 @@ quikr \- Calculate estimated frequencies of bacteria in a sample.
.RB [ \-o
.IR output ]
.RB [ \-v ]
+.RB [ \-d ]
.P
.BR quikr " ..."
.SH DESCRIPTION
@@ -45,6 +46,9 @@ OTU_FRACTION_PRESENT a vector representing the percentage of database sequence's
.TP
.B \-v, --verbose
verbose mode.
+.TP
+.B \-d, --debug
+debug mode, this will save our sensing matrix and sample matrix (A and B matricies) in files called 'sensing.matrix' and 'count.matrix' for debugging purposes
.SH EXAMPLES
Use quikr to calculate the estimated frequencies for sample.fa, using rdp7.fasta as the sensing matrix we generated with quikr_train. This uses 6-mers by default, and a lambda value of 10000:
.P
diff --git a/src/c/quikr.c b/src/c/quikr.c
index c73e0dd..b85fb3b 100644
--- a/src/c/quikr.c
+++ b/src/c/quikr.c
@@ -12,7 +12,7 @@
#include "quikr_functions.h"
#define sensing_matrix(i,j) (sensing_matrix[width*i + j])
-#define USAGE "Usage:\n\tmultifasta_to_otu [OPTION...] - Calculate estimated frequencies of bacteria in a sample.\n\nOptions:\n\n-i, --input\n\tthe sample's fasta file of NGS READS (fasta format)\n\n-f, --sensing-fasta\n\tlocation of the fasta file database used to create the sensing matrix (fasta format)\n\n-s, --sensing-matrix\n\t location of the sensing matrix. (trained from quikr_train)\n\n-k, --kmer\n\tspecify what size of kmer to use. (default value is 6)\n\n-l, --lambda\n\tlambda value to use. (default value is 10000)\n\n-o, --output\n\tthe sensing matrix. (a gzip'd text file)\n\n-v, --verbose\n\tverbose mode."
+#define USAGE "Usage:\n\tmultifasta_to_otu [OPTION...] - Calculate estimated frequencies of bacteria in a sample.\n\nOptions:\n\n-i, --input\n\tthe sample's fasta file of NGS READS (fasta format)\n\n-f, --sensing-fasta\n\tlocation of the fasta file database used to create the sensing matrix (fasta format)\n\n-s, --sensing-matrix\n\t location of the sensing matrix. (trained from quikr_train)\n\n-k, --kmer\n\tspecify what size of kmer to use. (default value is 6)\n\n-l, --lambda\n\tlambda value to use. (default value is 10000)\n\n-o, --output\n\tthe sensing matrix. (a gzip'd text file)\n\n-v, --verbose\n\tverbose mode.\n\n-d, --debug\n\tdebug mode, this will save our sensing matrix and sample matrix (A and B matricies) in files called 'sensing.matrix' and 'count.matrix' for debugging purposes"
int main(int argc, char **argv) {
@@ -28,9 +28,8 @@ int main(int argc, char **argv) {
int x = 0;
int y = 0;
int verbose = 0;
+ int debug = 0;
int lambda = 0;
-
-
while (1) {
static struct option long_options[] = {
@@ -41,6 +40,7 @@ int main(int argc, char **argv) {
{"sensing-fasta", required_argument, 0, 'f'},
{"sensing-matrix", required_argument, 0, 's'},
{"verbose", no_argument, 0, 'v'},
+ {"debug", no_argument, 0, 'd'},
{0, 0, 0, 0}
};
@@ -71,6 +71,8 @@ int main(int argc, char **argv) {
case 'o':
output_filename = optarg;
break;
+ case 'd':
+ debug = 1;
case 'v':
verbose = 1;
break;
@@ -147,7 +149,7 @@ int main(int argc, char **argv) {
count_matrix[x] = count_matrix[x] * lambda;
// output our matricies if we are in verbose mode
- if(verbose) {
+ if(debug) {
FILE *sensing_matrix_fh = fopen( "sensing.matrix", "w");
if(sensing_matrix_fh == NULL) {
fprintf(stderr, "could not open sensing.matrix for writing.\n");
diff --git a/src/c/quikr_train.c b/src/c/quikr_train.c
index d2a83ef..f19a554 100644
--- a/src/c/quikr_train.c
+++ b/src/c/quikr_train.c
@@ -10,7 +10,6 @@
#include "quikr_functions.h"
-#define AWK_KMER_PERMUTATIONS "awk 'function p(l,v,i){for(i in A) {if(l<%d) p(l+1, (v?v\"\":x)i); else print v\"\"i;}} {A[$0]} END {p(1);} ' <<<$'A\nC\nG\nT'"
#define USAGE "Usage:\n\tquikr_train [OPTION...] - to train a database for use with quikr.\n\nOptions:\n\n-i, --input\n\tthe database of sequences to create the sensing matrix (fasta format)\n\n-k, --kmer\n\tspecify what size of kmer to use. (default value is 6)\n\n-o, --output\n\tthe sensing matrix. (a gzip'd text file)\n\n-v, --verbose\n\tverbose mode."
int main(int argc, char **argv) {
@@ -120,8 +119,7 @@ int main(int argc, char **argv) {
}
// call the probabilities-by-read command
- sprintf(kmers_file, AWK_KMER_PERMUTATIONS, kmer);
- sprintf(probabilities_command, "%s | probabilities-by-read %d %s /dev/stdin", kmers_file, kmer, fasta_file);
+ sprintf(probabilities_command, "generate_kmers %d | probabilities-by-read %d %s /dev/stdin", kmer, kmer, fasta_file);
FILE *probabilities_output = popen(probabilities_command, "r");
if(probabilities_output == NULL) {
fprintf(stderr, "Error could not execute: %s\n", probabilities_command);