diff options
| -rw-r--r-- | kmer_continuous_count.c | 20 | ||||
| -rw-r--r-- | kmer_counts_per_sequence.c | 36 | ||||
| -rw-r--r-- | kmer_total_count.c | 20 | 
3 files changed, 49 insertions, 27 deletions
| diff --git a/kmer_continuous_count.c b/kmer_continuous_count.c index a7c35f0..7eb617d 100644 --- a/kmer_continuous_count.c +++ b/kmer_continuous_count.c @@ -11,13 +11,14 @@  void help() { -	printf("usage: kmer_continuous_count -i input_file -k kmer [-n] [-l] ...\n\n" +	printf("usage: kmer_continuous_count -i input_file -k kmer [-c] [-n] [-l] ...\n\n"  				 "count mers in size k from a fasta file, but do so continuously\n"  				 "\n" -				 "  --input    -i  input fasta file to count\n" -				 "  --kmer     -k  size of mers to count\n" -				 "  --nonzero  -n  only print non-zero values\n" -				 "  --label    -l  print mer along with value\n" +				 "  --input      -i  input fasta file to count\n" +				 "  --kmer       -k  size of mers to count\n" +				 "  --compliment -c  count compliment of sequences\n" +				 "  --nonzero    -n  only print non-zero values\n" +				 "  --label      -l  print mer along with value\n"  				 "\n"  				 "Report all bugs to mutantturkey@gmail.com\n"  				 "\n" @@ -40,6 +41,7 @@ int main(int argc, char **argv) {  	bool nonzero = false;  	bool label = false;  	bool kmer_set = false; +	bool count_compliment = false;  	unsigned long long width = 0; @@ -48,6 +50,7 @@ int main(int argc, char **argv) {  	static struct option long_options[] = {  		{"input", required_argument, 0, 'i'},  		{"kmer",  required_argument, 0, 'k'}, +		{"compliment", required_argument, 0, 'c'},  		{"nonzero", no_argument, 0, 'n'},  		{"label", no_argument, 0, 'l'},  		{"help", no_argument, 0, 'h'}, @@ -59,7 +62,7 @@ int main(int argc, char **argv) {  		int option_index = 0;  		int c = 0; -		c = getopt_long (argc, argv, "i:k:nlvh", long_options, &option_index); +		c = getopt_long (argc, argv, "i:k:cnlvh", long_options, &option_index);  		if (c == -1)  			break; @@ -72,6 +75,9 @@ int main(int argc, char **argv) {  				kmer = atoi(optarg);  				kmer_set = true;  				break; +			case 'c': +				count_compliment = true; +				break;  			case 'n':  				nonzero = true;   				break; @@ -116,7 +122,7 @@ int main(int argc, char **argv) {  	width = pow_four(kmer); -	unsigned long long *counts = get_continuous_kmer_counts_from_file(fh, kmer); +	unsigned long long *counts = get_continuous_kmer_counts_from_file(fh, kmer, count_compliment);  	// If nonzero is set, only print non zeros  	if(nonzero) { diff --git a/kmer_counts_per_sequence.c b/kmer_counts_per_sequence.c index 7e0e119..21aca5a 100644 --- a/kmer_counts_per_sequence.c +++ b/kmer_counts_per_sequence.c @@ -12,13 +12,14 @@ void help() {  	printf("usage: kmer_counts_per_sequence input_file kmer [kmer-file] ...\n\n"  				 "count mers in each sequence of size k from a fasta file\n"  				 "\n" -				 "  --input    -i  input fasta file to count\n" -				 "  --kmer     -k  size of mers to count\n" -				 "  --mer-file -m  a file containing a list of mers you are interested\n" -				 "                 in opening. this will enable output your results in\n" -				 "                 a sparse format \n" -				 "  --sparse   -s  output values in a sparse format. output is in the\n" -				 "                 order sequence_number, mer_index, value\n" +				 "  --input      -i  input fasta file to count\n" +				 "  --kmer       -k  size of mers to count\n" +				 "  --compliment -c  count compliment of sequences\n" +				 "  --mer-file   -m  a file containing a list of mers you are interested\n" +				 "                   in opening. this will enable output your results in\n" +				 "                   a sparse format \n" +				 "  --sparse     -s  output values in a sparse format. output is in the\n" +				 "                   order sequence_number, mer_index, value\n"  				 "\n"  				 "Report all bugs to mutantturkey@gmail.com\n"  				 "\n" @@ -55,10 +56,12 @@ int main(int argc, char **argv) {  	bool sparse = false;  	bool kmer_set = false;  	bool specific_mers = false; +	bool count_compliment = false;  	static struct option long_options[] = {  		{"input", required_argument, 0, 'i'},  		{"kmer",  required_argument, 0, 'k'}, +		{"compliment", required_argument, 0, 'c'},  		{"sparse", no_argument, 0, 's'},  		{"mer-file", required_argument, 0, 'm'},  		{"help", no_argument, 0, 'h'}, @@ -70,7 +73,7 @@ int main(int argc, char **argv) {  		int option_index = 0;  		int c = 0; -		c = getopt_long (argc, argv, "i:k:m:vsh", long_options, &option_index); +		c = getopt_long (argc, argv, "i:k:m:cvsh", long_options, &option_index);  		if (c == -1)  			break; @@ -83,6 +86,8 @@ int main(int argc, char **argv) {  				kmer = atoi(optarg);  				kmer_set = true;  				break; +			case 'c': +				count_compliment = true;  			case 's':  				sparse = true;  				break; @@ -147,7 +152,6 @@ int main(int argc, char **argv) {  	unsigned long long sequence = 0;  	while ((read = getdelim(&line, &len, '>', fh)) != -1) { -		long long i = 0;  		size_t k = 0;  		memset(counts, 0, width * sizeof(unsigned long long)); @@ -170,11 +174,17 @@ int main(int argc, char **argv) {  			seq[k] = alpha[(int)seq[k]];  		} -		for(i = 0; i < (signed long long)(seq_length - kmer + 1); i++) { -			size_t mer = num_to_index(&seq[i],kmer, width, &i); -			counts[mer]++; +		count_sequence(seq, seq_length, kmer, counts); + +		if(count_compliment) { +			for(k = 0; k < seq_length; k++) {  +				seq[k] = compliment[(int)seq[k]]; +			} +			 +			reverse_string(seq, seq_length); +			count_sequence(seq, seq_length, kmer, counts); +			  		} -		  		if(specific_mers) {  				for(k = 0; k < num_desired_indicies; k++) { diff --git a/kmer_total_count.c b/kmer_total_count.c index 6b627f2..dd29a53 100644 --- a/kmer_total_count.c +++ b/kmer_total_count.c @@ -10,13 +10,14 @@  void help() { -	printf("usage: kmer_total_count -i input_file -k kmer [-n] [-l] ...\n\n" +	printf("usage: kmer_total_count -i input_file -k kmer [-c] [-n] [-l] ...\n\n"  				 "count mers in size k from a fasta file\n"  				 "\n" -				 "  --input    -i  input fasta file to count\n" -				 "  --kmer     -k  size of mers to count\n" -				 "  --nonzero  -n  only print non-zero values\n" -				 "  --label    -l  print mer along with value\n" +				 "  --input      -i  input fasta file to count\n" +				 "  --kmer       -k  size of mers to count\n" +				 "  --compliment -c  count compliment of sequences\n" +				 "  --nonzero    -n  only print non-zero values\n" +				 "  --label      -l  print mer along with value\n"  				 "\n"  				 "Report all bugs to mutantturkey@gmail.com\n"  				 "\n" @@ -39,6 +40,7 @@ int main(int argc, char **argv) {  	bool nonzero = false;  	bool label = false;  	bool kmer_set = false; +	bool count_compliment = false;  	unsigned long long width = 0; @@ -47,6 +49,7 @@ int main(int argc, char **argv) {  	static struct option long_options[] = {  		{"input", required_argument, 0, 'i'},  		{"kmer",  required_argument, 0, 'k'}, +		{"compliment", required_argument, 0, 'c'},  		{"nonzero", no_argument, 0, 'n'},  		{"label", no_argument, 0, 'l'},  		{"help", no_argument, 0, 'h'}, @@ -58,7 +61,7 @@ int main(int argc, char **argv) {  		int option_index = 0;  		int c = 0; -		c = getopt_long (argc, argv, "i:k:nlvh", long_options, &option_index); +		c = getopt_long (argc, argv, "i:k:cnlvh", long_options, &option_index);  		if (c == -1)  			break; @@ -71,6 +74,9 @@ int main(int argc, char **argv) {  				kmer = atoi(optarg);  				kmer_set = true;  				break; +			case 'c': +				count_compliment = true; +				break;  			case 'n':  				nonzero = true;   				break; @@ -115,7 +121,7 @@ int main(int argc, char **argv) {  	width = pow_four(kmer); -	unsigned long long *counts = get_kmer_counts_from_file(fh, kmer); +	unsigned long long *counts = get_kmer_counts_from_file(fh, kmer, count_compliment);  	// If nonzero is set, only print non zeros  	if(nonzero) { | 
