aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
-rw-r--r--kmer_frequency_per_sequence.c20
-rw-r--r--kmer_total_count.c34
-rw-r--r--kmer_total_count.h2
-rw-r--r--kmer_utils.c7
4 files changed, 44 insertions, 19 deletions
diff --git a/kmer_frequency_per_sequence.c b/kmer_frequency_per_sequence.c
index aa18dd6..02a221b 100644
--- a/kmer_frequency_per_sequence.c
+++ b/kmer_frequency_per_sequence.c
@@ -7,7 +7,19 @@
#include "kmer_utils.h"
-long position = 0;
+unsigned long position = 0;
+
+const unsigned char alpha[256] =
+{5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5,
+5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5,
+5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 0, 5, 1, 5, 5, 5, 2, 5, 5, 5, 5, 5, 5,
+5, 5, 5, 5, 5, 5, 3, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 0, 5, 1, 5, 5, 5, 2,
+5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 3, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5,
+5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5,
+5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5,
+5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5,
+5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5,
+5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5};
int main(int argc, char **argv) {
@@ -31,14 +43,16 @@ int main(int argc, char **argv) {
width = (int)pow(4, kmer);
while ((read = getline(&line, &len, fh)) != -1) {
- if(line[0] != '>' && read > kmer) {
+ if(line[0] != '>' && (read > kmer)) {
unsigned int i = 0;
unsigned long long *counts = malloc((width+ 1) * sizeof(unsigned long long));
if(counts == NULL)
exit(EXIT_FAILURE);
- convert_kmer_to_num(line, read);
+ for(i = 0; i < read - kmer; i++) {
+ line[i] = alpha[line[i]];
+ }
for(i = 0; i < read - kmer; i++) {
counts[num_to_index(&line[i],kmer, width)]++;
diff --git a/kmer_total_count.c b/kmer_total_count.c
index 4fadf03..113c979 100644
--- a/kmer_total_count.c
+++ b/kmer_total_count.c
@@ -7,7 +7,19 @@
#include "kmer_utils.h"
-long position = 0;
+unsigned long position = 0;
+
+const unsigned char alpha[256] =
+{5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5,
+5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5,
+5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 0, 5, 1, 5, 5, 5, 2, 5, 5, 5, 5, 5, 5,
+5, 5, 5, 5, 5, 5, 3, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 0, 5, 1, 5, 5, 5, 2,
+5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 3, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5,
+5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5,
+5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5,
+5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5,
+5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5,
+5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5};
int main(int argc, char **argv) {
@@ -16,14 +28,12 @@ int main(int argc, char **argv) {
ssize_t read;
long i = 0;
- unsigned long long *counts;
-
if(argc != 3) {
printf("Please supply a filename and a kmer\n");
exit(EXIT_FAILURE);
}
- FILE *fh = fopen(argv[1], "r");
+ FILE * const fh = fopen(argv[1], "r");
if(fh == NULL) {
fprintf(stderr, "Error opening %s - %s\n", argv[1], strerror(errno));
exit(EXIT_FAILURE);
@@ -33,16 +43,20 @@ int main(int argc, char **argv) {
const unsigned int kmer = atoi(argv[2]);
// width is 4^kmer
- const unsigned long width = (int)pow(4, kmer);
+ const unsigned long width = pow(4, kmer);
// malloc our counts matrix
- counts = malloc((width+ 1) * sizeof(unsigned long long));
+ unsigned long long * const counts = malloc((width+ 1) * sizeof(unsigned long long));
+
if(counts == NULL)
exit(EXIT_FAILURE);
-
+
while ((read = getline(&line, &len, fh)) != -1) {
if(line[0] != '>' && read > kmer) {
- convert_kmer_to_num(line, read);
+
+ for(i = 0; i < read; i++) {
+ line[i] = alpha[line[i]];
+ }
for(position = 0; position < (read - kmer); position++) {
counts[num_to_index(&line[position],kmer, width)]++;
@@ -50,8 +64,8 @@ int main(int argc, char **argv) {
}
}
- for(i = 0; i < (unsigned)width; i++)
- printf("%llu\n", counts[i]);
+ for(i = 0; i < (unsigned)width; i=i+4)
+ printf("%llu\n%llu\n%llu\n%llu\n", counts[i], counts[i+1], counts[i+2], counts[i+3]);
return EXIT_SUCCESS;
diff --git a/kmer_total_count.h b/kmer_total_count.h
index e782906..0c3d12d 100644
--- a/kmer_total_count.h
+++ b/kmer_total_count.h
@@ -1 +1 @@
-extern long position;
+extern unsigned long position;
diff --git a/kmer_utils.c b/kmer_utils.c
index f4f8d2f..a1ff9b2 100644
--- a/kmer_utils.c
+++ b/kmer_utils.c
@@ -2,7 +2,7 @@
// convert a string of k-mer size base-4 values into a
// base-10 index
-unsigned long num_to_index(const char *str, const int kmer, const long error_pos) {
+inline unsigned long num_to_index(const char *str, const int kmer, const long error_pos) {
int i = 0;
unsigned long out = 0;
@@ -17,7 +17,6 @@ unsigned long num_to_index(const char *str, const int kmer, const long error_pos
return error_pos;
}
-
out += str[i] * multiply;
multiply = multiply << 2;
}
@@ -25,9 +24,6 @@ unsigned long num_to_index(const char *str, const int kmer, const long error_pos
return out;
}
-// replaces values in a char array of ACGT's and others with
-// values that correspond to their base 4 value to be used in
-// num_to_index.
void convert_kmer_to_num(char *str, const unsigned long length) {
unsigned long i = 0;
@@ -56,3 +52,4 @@ void convert_kmer_to_num(char *str, const unsigned long length) {
}
}
+