From fdc2ab95726b27ffabf70a73c00c266aa2717873 Mon Sep 17 00:00:00 2001 From: Calvin Morrison Date: Wed, 12 Mar 2014 16:06:25 -0400 Subject: update cli for reverse-compliment counting --- kmer_continuous_count.c | 20 +++++++++++++------- kmer_counts_per_sequence.c | 36 +++++++++++++++++++++++------------- kmer_total_count.c | 20 +++++++++++++------- 3 files changed, 49 insertions(+), 27 deletions(-) diff --git a/kmer_continuous_count.c b/kmer_continuous_count.c index a7c35f0..7eb617d 100644 --- a/kmer_continuous_count.c +++ b/kmer_continuous_count.c @@ -11,13 +11,14 @@ void help() { - printf("usage: kmer_continuous_count -i input_file -k kmer [-n] [-l] ...\n\n" + printf("usage: kmer_continuous_count -i input_file -k kmer [-c] [-n] [-l] ...\n\n" "count mers in size k from a fasta file, but do so continuously\n" "\n" - " --input -i input fasta file to count\n" - " --kmer -k size of mers to count\n" - " --nonzero -n only print non-zero values\n" - " --label -l print mer along with value\n" + " --input -i input fasta file to count\n" + " --kmer -k size of mers to count\n" + " --compliment -c count compliment of sequences\n" + " --nonzero -n only print non-zero values\n" + " --label -l print mer along with value\n" "\n" "Report all bugs to mutantturkey@gmail.com\n" "\n" @@ -40,6 +41,7 @@ int main(int argc, char **argv) { bool nonzero = false; bool label = false; bool kmer_set = false; + bool count_compliment = false; unsigned long long width = 0; @@ -48,6 +50,7 @@ int main(int argc, char **argv) { static struct option long_options[] = { {"input", required_argument, 0, 'i'}, {"kmer", required_argument, 0, 'k'}, + {"compliment", required_argument, 0, 'c'}, {"nonzero", no_argument, 0, 'n'}, {"label", no_argument, 0, 'l'}, {"help", no_argument, 0, 'h'}, @@ -59,7 +62,7 @@ int main(int argc, char **argv) { int option_index = 0; int c = 0; - c = getopt_long (argc, argv, "i:k:nlvh", long_options, &option_index); + c = getopt_long (argc, argv, "i:k:cnlvh", long_options, &option_index); if (c == -1) break; @@ -72,6 +75,9 @@ int main(int argc, char **argv) { kmer = atoi(optarg); kmer_set = true; break; + case 'c': + count_compliment = true; + break; case 'n': nonzero = true; break; @@ -116,7 +122,7 @@ int main(int argc, char **argv) { width = pow_four(kmer); - unsigned long long *counts = get_continuous_kmer_counts_from_file(fh, kmer); + unsigned long long *counts = get_continuous_kmer_counts_from_file(fh, kmer, count_compliment); // If nonzero is set, only print non zeros if(nonzero) { diff --git a/kmer_counts_per_sequence.c b/kmer_counts_per_sequence.c index 7e0e119..21aca5a 100644 --- a/kmer_counts_per_sequence.c +++ b/kmer_counts_per_sequence.c @@ -12,13 +12,14 @@ void help() { printf("usage: kmer_counts_per_sequence input_file kmer [kmer-file] ...\n\n" "count mers in each sequence of size k from a fasta file\n" "\n" - " --input -i input fasta file to count\n" - " --kmer -k size of mers to count\n" - " --mer-file -m a file containing a list of mers you are interested\n" - " in opening. this will enable output your results in\n" - " a sparse format \n" - " --sparse -s output values in a sparse format. output is in the\n" - " order sequence_number, mer_index, value\n" + " --input -i input fasta file to count\n" + " --kmer -k size of mers to count\n" + " --compliment -c count compliment of sequences\n" + " --mer-file -m a file containing a list of mers you are interested\n" + " in opening. this will enable output your results in\n" + " a sparse format \n" + " --sparse -s output values in a sparse format. output is in the\n" + " order sequence_number, mer_index, value\n" "\n" "Report all bugs to mutantturkey@gmail.com\n" "\n" @@ -55,10 +56,12 @@ int main(int argc, char **argv) { bool sparse = false; bool kmer_set = false; bool specific_mers = false; + bool count_compliment = false; static struct option long_options[] = { {"input", required_argument, 0, 'i'}, {"kmer", required_argument, 0, 'k'}, + {"compliment", required_argument, 0, 'c'}, {"sparse", no_argument, 0, 's'}, {"mer-file", required_argument, 0, 'm'}, {"help", no_argument, 0, 'h'}, @@ -70,7 +73,7 @@ int main(int argc, char **argv) { int option_index = 0; int c = 0; - c = getopt_long (argc, argv, "i:k:m:vsh", long_options, &option_index); + c = getopt_long (argc, argv, "i:k:m:cvsh", long_options, &option_index); if (c == -1) break; @@ -83,6 +86,8 @@ int main(int argc, char **argv) { kmer = atoi(optarg); kmer_set = true; break; + case 'c': + count_compliment = true; case 's': sparse = true; break; @@ -147,7 +152,6 @@ int main(int argc, char **argv) { unsigned long long sequence = 0; while ((read = getdelim(&line, &len, '>', fh)) != -1) { - long long i = 0; size_t k = 0; memset(counts, 0, width * sizeof(unsigned long long)); @@ -170,11 +174,17 @@ int main(int argc, char **argv) { seq[k] = alpha[(int)seq[k]]; } - for(i = 0; i < (signed long long)(seq_length - kmer + 1); i++) { - size_t mer = num_to_index(&seq[i],kmer, width, &i); - counts[mer]++; + count_sequence(seq, seq_length, kmer, counts); + + if(count_compliment) { + for(k = 0; k < seq_length; k++) { + seq[k] = compliment[(int)seq[k]]; + } + + reverse_string(seq, seq_length); + count_sequence(seq, seq_length, kmer, counts); + } - if(specific_mers) { for(k = 0; k < num_desired_indicies; k++) { diff --git a/kmer_total_count.c b/kmer_total_count.c index 6b627f2..dd29a53 100644 --- a/kmer_total_count.c +++ b/kmer_total_count.c @@ -10,13 +10,14 @@ void help() { - printf("usage: kmer_total_count -i input_file -k kmer [-n] [-l] ...\n\n" + printf("usage: kmer_total_count -i input_file -k kmer [-c] [-n] [-l] ...\n\n" "count mers in size k from a fasta file\n" "\n" - " --input -i input fasta file to count\n" - " --kmer -k size of mers to count\n" - " --nonzero -n only print non-zero values\n" - " --label -l print mer along with value\n" + " --input -i input fasta file to count\n" + " --kmer -k size of mers to count\n" + " --compliment -c count compliment of sequences\n" + " --nonzero -n only print non-zero values\n" + " --label -l print mer along with value\n" "\n" "Report all bugs to mutantturkey@gmail.com\n" "\n" @@ -39,6 +40,7 @@ int main(int argc, char **argv) { bool nonzero = false; bool label = false; bool kmer_set = false; + bool count_compliment = false; unsigned long long width = 0; @@ -47,6 +49,7 @@ int main(int argc, char **argv) { static struct option long_options[] = { {"input", required_argument, 0, 'i'}, {"kmer", required_argument, 0, 'k'}, + {"compliment", required_argument, 0, 'c'}, {"nonzero", no_argument, 0, 'n'}, {"label", no_argument, 0, 'l'}, {"help", no_argument, 0, 'h'}, @@ -58,7 +61,7 @@ int main(int argc, char **argv) { int option_index = 0; int c = 0; - c = getopt_long (argc, argv, "i:k:nlvh", long_options, &option_index); + c = getopt_long (argc, argv, "i:k:cnlvh", long_options, &option_index); if (c == -1) break; @@ -71,6 +74,9 @@ int main(int argc, char **argv) { kmer = atoi(optarg); kmer_set = true; break; + case 'c': + count_compliment = true; + break; case 'n': nonzero = true; break; @@ -115,7 +121,7 @@ int main(int argc, char **argv) { width = pow_four(kmer); - unsigned long long *counts = get_kmer_counts_from_file(fh, kmer); + unsigned long long *counts = get_kmer_counts_from_file(fh, kmer, count_compliment); // If nonzero is set, only print non zeros if(nonzero) { -- cgit v1.2.3