add -d flag for debugging so verbose doesn't take longer, use generate_kmers instead of a hacky awk

author: Calvin <calvin@calvin-ThinkPad-X220.(none)> 2013-05-15 10:41:13 -0400
committer: Calvin <calvin@calvin-ThinkPad-X220.(none)> 2013-05-15 10:41:13 -0400
commit: 75c8047886584b2beb81402ad2a6903857dfabda (patch)
tree: 40f4208200efce89bab897aa39aab6b5329c48b6 /src/c
parent: 0773aaf89678b967588a902df1f5e6f9ccea393d (diff)
3 files changed, 11 insertions, 7 deletions
diff --git a/src/c/quikr.1 b/src/c/quikr.1
index 9982d94..937109b 100644
--- a/src/c/quikr.1
+++ b/src/c/quikr.1
@@ -16,6 +16,7 @@ quikr \- Calculate estimated frequencies of bacteria in a sample.
 .RB [ \-o
 .IR output ]
 .RB [ \-v ]
+.RB [ \-d ]
 .P
 .BR quikr " ..."
 .SH DESCRIPTION
@@ -45,6 +46,9 @@ OTU_FRACTION_PRESENT a vector representing the percentage of database sequence's
 .TP
 .B \-v, --verbose
 verbose mode.
+.TP
+.B \-d, --debug
+debug mode, this will save our sensing matrix and sample matrix (A and B matricies) in files called 'sensing.matrix' and 'count.matrix' for debugging purposes
 .SH EXAMPLES
 Use quikr to calculate the estimated frequencies for sample.fa, using rdp7.fasta as the sensing matrix we generated with quikr_train. This uses 6-mers by default, and a lambda value of 10000:
 .P
diff --git a/src/c/quikr.c b/src/c/quikr.c
index c73e0dd..b85fb3b 100644
--- a/src/c/quikr.c
+++ b/src/c/quikr.c
@@ -12,7 +12,7 @@
 #include "quikr_functions.h"
 
 #define sensing_matrix(i,j) (sensing_matrix[width*i + j])
-#define USAGE "Usage:\n\tmultifasta_to_otu [OPTION...] - Calculate estimated frequencies of bacteria in a sample.\n\nOptions:\n\n-i, --input\n\tthe sample's fasta file of NGS READS (fasta format)\n\n-f, --sensing-fasta\n\tlocation of the fasta file database used to create the sensing matrix (fasta format)\n\n-s, --sensing-matrix\n\t location of the sensing matrix. (trained from quikr_train)\n\n-k, --kmer\n\tspecify what size of kmer to use. (default value is 6)\n\n-l, --lambda\n\tlambda value to use. (default value is 10000)\n\n-o, --output\n\tthe sensing matrix. (a gzip'd text file)\n\n-v, --verbose\n\tverbose mode."
+#define USAGE "Usage:\n\tmultifasta_to_otu [OPTION...] - Calculate estimated frequencies of bacteria in a sample.\n\nOptions:\n\n-i, --input\n\tthe sample's fasta file of NGS READS (fasta format)\n\n-f, --sensing-fasta\n\tlocation of the fasta file database used to create the sensing matrix (fasta format)\n\n-s, --sensing-matrix\n\t location of the sensing matrix. (trained from quikr_train)\n\n-k, --kmer\n\tspecify what size of kmer to use. (default value is 6)\n\n-l, --lambda\n\tlambda value to use. (default value is 10000)\n\n-o, --output\n\tthe sensing matrix. (a gzip'd text file)\n\n-v, --verbose\n\tverbose mode.\n\n-d, --debug\n\tdebug mode, this will save our sensing matrix and sample matrix (A and B matricies) in files called 'sensing.matrix' and 'count.matrix' for debugging purposes"
 
 int main(int argc, char **argv) {
 
@@ -28,9 +28,8 @@ int main(int argc, char **argv) {
   int x = 0;
   int y = 0;
   int verbose = 0;
+  int debug = 0;
   int lambda = 0;
-  
-
 
   while (1) {
     static struct option long_options[] = {
@@ -41,6 +40,7 @@ int main(int argc, char **argv) {
       {"sensing-fasta",  required_argument, 0, 'f'},
       {"sensing-matrix", required_argument, 0, 's'},
       {"verbose", no_argument, 0, 'v'},
+      {"debug", no_argument, 0, 'd'},
       {0, 0, 0, 0}
     };
 
@@ -71,6 +71,8 @@ int main(int argc, char **argv) {
       case 'o':
         output_filename = optarg;
         break;
+      case 'd':
+        debug = 1;
       case 'v':
         verbose = 1;
         break;
@@ -147,7 +149,7 @@ int main(int argc, char **argv) {
     count_matrix[x] = count_matrix[x] * lambda;
   
   // output our matricies if we are in verbose mode
-  if(verbose) { 
+  if(debug) { 
     FILE *sensing_matrix_fh = fopen( "sensing.matrix", "w");
     if(sensing_matrix_fh == NULL) {
       fprintf(stderr, "could not open sensing.matrix for writing.\n");
diff --git a/src/c/quikr_train.c b/src/c/quikr_train.c
index d2a83ef..f19a554 100644
--- a/src/c/quikr_train.c
+++ b/src/c/quikr_train.c
@@ -10,7 +10,6 @@
 
 #include "quikr_functions.h"
 
-#define AWK_KMER_PERMUTATIONS "awk 'function p(l,v,i){for(i in A) {if(l<%d) p(l+1, (v?v\"\":x)i); else print v\"\"i;}} {A[$0]} END {p(1);} ' <<<$'A\nC\nG\nT'"
 #define USAGE "Usage:\n\tquikr_train [OPTION...] - to train a database for use with quikr.\n\nOptions:\n\n-i, --input\n\tthe database of sequences to create the sensing matrix (fasta format)\n\n-k, --kmer\n\tspecify what size of kmer to use. (default value is 6)\n\n-o, --output\n\tthe sensing matrix. (a gzip'd text file)\n\n-v, --verbose\n\tverbose mode."
 
 int main(int argc, char **argv) {
@@ -120,8 +119,7 @@ int main(int argc, char **argv) {
   }
 
   // call the probabilities-by-read command
-  sprintf(kmers_file, AWK_KMER_PERMUTATIONS, kmer);
-  sprintf(probabilities_command, "%s | probabilities-by-read %d %s /dev/stdin", kmers_file, kmer, fasta_file);
+  sprintf(probabilities_command, "generate_kmers %d | probabilities-by-read %d %s /dev/stdin", kmer, kmer, fasta_file);
   FILE *probabilities_output = popen(probabilities_command, "r");
   if(probabilities_output == NULL) {
     fprintf(stderr, "Error could not execute: %s\n", probabilities_command);
author	Calvin <calvin@calvin-ThinkPad-X220.(none)>	2013-05-15 10:41:13 -0400
committer	Calvin <calvin@calvin-ThinkPad-X220.(none)>	2013-05-15 10:41:13 -0400
commit	75c8047886584b2beb81402ad2a6903857dfabda (patch)
tree	40f4208200efce89bab897aa39aab6b5329c48b6 /src/c
parent	0773aaf89678b967588a902df1f5e6f9ccea393d (diff)