diff options
| -rw-r--r-- | Makefile | 2 | ||||
| -rw-r--r-- | src/c/quikr.1 | 4 | ||||
| -rw-r--r-- | src/c/quikr.c | 10 | ||||
| -rw-r--r-- | src/c/quikr_train.c | 4 | 
4 files changed, 13 insertions, 7 deletions
| @@ -9,11 +9,13 @@ install:  	@cp -vf src/c/quikr_train ${DESTDIR}${PREFIX}/bin/quikr_train  	@cp -vf src/c/quikr ${DESTDIR}${PREFIX}/bin/quikr  	@cp -vf src/c/multifasta_to_otu ${DESTDIR}${PREFIX}/bin/multifasta_to_otu  +	@cp -vf src/python/generate_kmers ${DESTDIR}${PREFIX}/bin/generate_kmers  	chmod -v 755 ${DESTDIR}${PREFIX}/bin/probabilities-by-read  	chmod -v 755 ${DESTDIR}${PREFIX}/bin/count-kmers  	chmod -v 755 ${DESTDIR}${PREFIX}/bin/quikr  	chmod -v 755 ${DESTDIR}${PREFIX}/bin/quikr_train  	chmod -v 755 ${DESTDIR}${PREFIX}/bin/multifasta_to_otu +	chmod -v 755 ${DESTDIR}${PREFIX}/bin/generate_kmers  nbc:  	@echo "building nbc" diff --git a/src/c/quikr.1 b/src/c/quikr.1 index 9982d94..937109b 100644 --- a/src/c/quikr.1 +++ b/src/c/quikr.1 @@ -16,6 +16,7 @@ quikr \- Calculate estimated frequencies of bacteria in a sample.  .RB [ \-o  .IR output ]  .RB [ \-v ] +.RB [ \-d ]  .P  .BR quikr " ..."  .SH DESCRIPTION @@ -45,6 +46,9 @@ OTU_FRACTION_PRESENT a vector representing the percentage of database sequence's  .TP  .B \-v, --verbose  verbose mode. +.TP +.B \-d, --debug +debug mode, this will save our sensing matrix and sample matrix (A and B matricies) in files called 'sensing.matrix' and 'count.matrix' for debugging purposes  .SH EXAMPLES  Use quikr to calculate the estimated frequencies for sample.fa, using rdp7.fasta as the sensing matrix we generated with quikr_train. This uses 6-mers by default, and a lambda value of 10000:  .P diff --git a/src/c/quikr.c b/src/c/quikr.c index c73e0dd..b85fb3b 100644 --- a/src/c/quikr.c +++ b/src/c/quikr.c @@ -12,7 +12,7 @@  #include "quikr_functions.h"  #define sensing_matrix(i,j) (sensing_matrix[width*i + j]) -#define USAGE "Usage:\n\tmultifasta_to_otu [OPTION...] - Calculate estimated frequencies of bacteria in a sample.\n\nOptions:\n\n-i, --input\n\tthe sample's fasta file of NGS READS (fasta format)\n\n-f, --sensing-fasta\n\tlocation of the fasta file database used to create the sensing matrix (fasta format)\n\n-s, --sensing-matrix\n\t location of the sensing matrix. (trained from quikr_train)\n\n-k, --kmer\n\tspecify what size of kmer to use. (default value is 6)\n\n-l, --lambda\n\tlambda value to use. (default value is 10000)\n\n-o, --output\n\tthe sensing matrix. (a gzip'd text file)\n\n-v, --verbose\n\tverbose mode." +#define USAGE "Usage:\n\tmultifasta_to_otu [OPTION...] - Calculate estimated frequencies of bacteria in a sample.\n\nOptions:\n\n-i, --input\n\tthe sample's fasta file of NGS READS (fasta format)\n\n-f, --sensing-fasta\n\tlocation of the fasta file database used to create the sensing matrix (fasta format)\n\n-s, --sensing-matrix\n\t location of the sensing matrix. (trained from quikr_train)\n\n-k, --kmer\n\tspecify what size of kmer to use. (default value is 6)\n\n-l, --lambda\n\tlambda value to use. (default value is 10000)\n\n-o, --output\n\tthe sensing matrix. (a gzip'd text file)\n\n-v, --verbose\n\tverbose mode.\n\n-d, --debug\n\tdebug mode, this will save our sensing matrix and sample matrix (A and B matricies) in files called 'sensing.matrix' and 'count.matrix' for debugging purposes"  int main(int argc, char **argv) { @@ -28,9 +28,8 @@ int main(int argc, char **argv) {    int x = 0;    int y = 0;    int verbose = 0; +  int debug = 0;    int lambda = 0; -   -    while (1) {      static struct option long_options[] = { @@ -41,6 +40,7 @@ int main(int argc, char **argv) {        {"sensing-fasta",  required_argument, 0, 'f'},        {"sensing-matrix", required_argument, 0, 's'},        {"verbose", no_argument, 0, 'v'}, +      {"debug", no_argument, 0, 'd'},        {0, 0, 0, 0}      }; @@ -71,6 +71,8 @@ int main(int argc, char **argv) {        case 'o':          output_filename = optarg;          break; +      case 'd': +        debug = 1;        case 'v':          verbose = 1;          break; @@ -147,7 +149,7 @@ int main(int argc, char **argv) {      count_matrix[x] = count_matrix[x] * lambda;    // output our matricies if we are in verbose mode -  if(verbose) {  +  if(debug) {       FILE *sensing_matrix_fh = fopen( "sensing.matrix", "w");      if(sensing_matrix_fh == NULL) {        fprintf(stderr, "could not open sensing.matrix for writing.\n"); diff --git a/src/c/quikr_train.c b/src/c/quikr_train.c index d2a83ef..f19a554 100644 --- a/src/c/quikr_train.c +++ b/src/c/quikr_train.c @@ -10,7 +10,6 @@  #include "quikr_functions.h" -#define AWK_KMER_PERMUTATIONS "awk 'function p(l,v,i){for(i in A) {if(l<%d) p(l+1, (v?v\"\":x)i); else print v\"\"i;}} {A[$0]} END {p(1);} ' <<<$'A\nC\nG\nT'"  #define USAGE "Usage:\n\tquikr_train [OPTION...] - to train a database for use with quikr.\n\nOptions:\n\n-i, --input\n\tthe database of sequences to create the sensing matrix (fasta format)\n\n-k, --kmer\n\tspecify what size of kmer to use. (default value is 6)\n\n-o, --output\n\tthe sensing matrix. (a gzip'd text file)\n\n-v, --verbose\n\tverbose mode."  int main(int argc, char **argv) { @@ -120,8 +119,7 @@ int main(int argc, char **argv) {    }    // call the probabilities-by-read command -  sprintf(kmers_file, AWK_KMER_PERMUTATIONS, kmer); -  sprintf(probabilities_command, "%s | probabilities-by-read %d %s /dev/stdin", kmers_file, kmer, fasta_file); +  sprintf(probabilities_command, "generate_kmers %d | probabilities-by-read %d %s /dev/stdin", kmer, kmer, fasta_file);    FILE *probabilities_output = popen(probabilities_command, "r");    if(probabilities_output == NULL) {      fprintf(stderr, "Error could not execute: %s\n", probabilities_command); | 
