From 75c8047886584b2beb81402ad2a6903857dfabda Mon Sep 17 00:00:00 2001 From: Calvin Date: Wed, 15 May 2013 10:41:13 -0400 Subject: add -d flag for debugging so verbose doesn't take longer, use generate_kmers instead of a hacky awk --- Makefile | 2 ++ src/c/quikr.1 | 4 ++++ src/c/quikr.c | 10 ++++++---- src/c/quikr_train.c | 4 +--- 4 files changed, 13 insertions(+), 7 deletions(-) diff --git a/Makefile b/Makefile index cc535d0..166255c 100644 --- a/Makefile +++ b/Makefile @@ -9,11 +9,13 @@ install: @cp -vf src/c/quikr_train ${DESTDIR}${PREFIX}/bin/quikr_train @cp -vf src/c/quikr ${DESTDIR}${PREFIX}/bin/quikr @cp -vf src/c/multifasta_to_otu ${DESTDIR}${PREFIX}/bin/multifasta_to_otu + @cp -vf src/python/generate_kmers ${DESTDIR}${PREFIX}/bin/generate_kmers chmod -v 755 ${DESTDIR}${PREFIX}/bin/probabilities-by-read chmod -v 755 ${DESTDIR}${PREFIX}/bin/count-kmers chmod -v 755 ${DESTDIR}${PREFIX}/bin/quikr chmod -v 755 ${DESTDIR}${PREFIX}/bin/quikr_train chmod -v 755 ${DESTDIR}${PREFIX}/bin/multifasta_to_otu + chmod -v 755 ${DESTDIR}${PREFIX}/bin/generate_kmers nbc: @echo "building nbc" diff --git a/src/c/quikr.1 b/src/c/quikr.1 index 9982d94..937109b 100644 --- a/src/c/quikr.1 +++ b/src/c/quikr.1 @@ -16,6 +16,7 @@ quikr \- Calculate estimated frequencies of bacteria in a sample. .RB [ \-o .IR output ] .RB [ \-v ] +.RB [ \-d ] .P .BR quikr " ..." .SH DESCRIPTION @@ -45,6 +46,9 @@ OTU_FRACTION_PRESENT a vector representing the percentage of database sequence's .TP .B \-v, --verbose verbose mode. +.TP +.B \-d, --debug +debug mode, this will save our sensing matrix and sample matrix (A and B matricies) in files called 'sensing.matrix' and 'count.matrix' for debugging purposes .SH EXAMPLES Use quikr to calculate the estimated frequencies for sample.fa, using rdp7.fasta as the sensing matrix we generated with quikr_train. This uses 6-mers by default, and a lambda value of 10000: .P diff --git a/src/c/quikr.c b/src/c/quikr.c index c73e0dd..b85fb3b 100644 --- a/src/c/quikr.c +++ b/src/c/quikr.c @@ -12,7 +12,7 @@ #include "quikr_functions.h" #define sensing_matrix(i,j) (sensing_matrix[width*i + j]) -#define USAGE "Usage:\n\tmultifasta_to_otu [OPTION...] - Calculate estimated frequencies of bacteria in a sample.\n\nOptions:\n\n-i, --input\n\tthe sample's fasta file of NGS READS (fasta format)\n\n-f, --sensing-fasta\n\tlocation of the fasta file database used to create the sensing matrix (fasta format)\n\n-s, --sensing-matrix\n\t location of the sensing matrix. (trained from quikr_train)\n\n-k, --kmer\n\tspecify what size of kmer to use. (default value is 6)\n\n-l, --lambda\n\tlambda value to use. (default value is 10000)\n\n-o, --output\n\tthe sensing matrix. (a gzip'd text file)\n\n-v, --verbose\n\tverbose mode." +#define USAGE "Usage:\n\tmultifasta_to_otu [OPTION...] - Calculate estimated frequencies of bacteria in a sample.\n\nOptions:\n\n-i, --input\n\tthe sample's fasta file of NGS READS (fasta format)\n\n-f, --sensing-fasta\n\tlocation of the fasta file database used to create the sensing matrix (fasta format)\n\n-s, --sensing-matrix\n\t location of the sensing matrix. (trained from quikr_train)\n\n-k, --kmer\n\tspecify what size of kmer to use. (default value is 6)\n\n-l, --lambda\n\tlambda value to use. (default value is 10000)\n\n-o, --output\n\tthe sensing matrix. (a gzip'd text file)\n\n-v, --verbose\n\tverbose mode.\n\n-d, --debug\n\tdebug mode, this will save our sensing matrix and sample matrix (A and B matricies) in files called 'sensing.matrix' and 'count.matrix' for debugging purposes" int main(int argc, char **argv) { @@ -28,9 +28,8 @@ int main(int argc, char **argv) { int x = 0; int y = 0; int verbose = 0; + int debug = 0; int lambda = 0; - - while (1) { static struct option long_options[] = { @@ -41,6 +40,7 @@ int main(int argc, char **argv) { {"sensing-fasta", required_argument, 0, 'f'}, {"sensing-matrix", required_argument, 0, 's'}, {"verbose", no_argument, 0, 'v'}, + {"debug", no_argument, 0, 'd'}, {0, 0, 0, 0} }; @@ -71,6 +71,8 @@ int main(int argc, char **argv) { case 'o': output_filename = optarg; break; + case 'd': + debug = 1; case 'v': verbose = 1; break; @@ -147,7 +149,7 @@ int main(int argc, char **argv) { count_matrix[x] = count_matrix[x] * lambda; // output our matricies if we are in verbose mode - if(verbose) { + if(debug) { FILE *sensing_matrix_fh = fopen( "sensing.matrix", "w"); if(sensing_matrix_fh == NULL) { fprintf(stderr, "could not open sensing.matrix for writing.\n"); diff --git a/src/c/quikr_train.c b/src/c/quikr_train.c index d2a83ef..f19a554 100644 --- a/src/c/quikr_train.c +++ b/src/c/quikr_train.c @@ -10,7 +10,6 @@ #include "quikr_functions.h" -#define AWK_KMER_PERMUTATIONS "awk 'function p(l,v,i){for(i in A) {if(l<%d) p(l+1, (v?v\"\":x)i); else print v\"\"i;}} {A[$0]} END {p(1);} ' <<<$'A\nC\nG\nT'" #define USAGE "Usage:\n\tquikr_train [OPTION...] - to train a database for use with quikr.\n\nOptions:\n\n-i, --input\n\tthe database of sequences to create the sensing matrix (fasta format)\n\n-k, --kmer\n\tspecify what size of kmer to use. (default value is 6)\n\n-o, --output\n\tthe sensing matrix. (a gzip'd text file)\n\n-v, --verbose\n\tverbose mode." int main(int argc, char **argv) { @@ -120,8 +119,7 @@ int main(int argc, char **argv) { } // call the probabilities-by-read command - sprintf(kmers_file, AWK_KMER_PERMUTATIONS, kmer); - sprintf(probabilities_command, "%s | probabilities-by-read %d %s /dev/stdin", kmers_file, kmer, fasta_file); + sprintf(probabilities_command, "generate_kmers %d | probabilities-by-read %d %s /dev/stdin", kmer, kmer, fasta_file); FILE *probabilities_output = popen(probabilities_command, "r"); if(probabilities_output == NULL) { fprintf(stderr, "Error could not execute: %s\n", probabilities_command); -- cgit v1.2.3