diff options
author | Calvin Morrison <mutantturkey@gmail.com> | 2013-09-10 23:43:56 -0400 |
---|---|---|
committer | Calvin Morrison <mutantturkey@gmail.com> | 2013-09-10 23:43:56 -0400 |
commit | 5b53a787e4d1cefa5660c891791cd0df4a8fd89c (patch) | |
tree | 005983f5d068409c9be44e614b4c1722cde37dd5 /kmer_utils.c |
Initial commit of some kmer utilities.
there are two utilties included.
one is kmer_frequency_per_sequence,
which outputs a (m x n) matrix where m is the sequence, and n is the
frequency of that nmer to occur in the given sequence.
the other tool is kmer_total_count, which counts kmers for the total
file, not just one sequence
Diffstat (limited to 'kmer_utils.c')
-rw-r--r-- | kmer_utils.c | 54 |
1 files changed, 54 insertions, 0 deletions
diff --git a/kmer_utils.c b/kmer_utils.c new file mode 100644 index 0000000..b6f66b5 --- /dev/null +++ b/kmer_utils.c @@ -0,0 +1,54 @@ +#include <stdint.h> +#include <stdio.h> +#include <string.h> +#include <unistd.h> + +// This function takes a char array containing sequences and converts it +// into a kmer index (essentially a base 4 radix converstion to base 10, with +// some mumbo-jumbo of taking each of the characters we want (ACGT) and turning +// them into a radix-4 number we can use. +// +// kind of convoluted but it works. +// +// Arguemnts: +// char *str - a NULL terminated character array +// long kmer - how long of a index value you want to return +// long error_pos - what index to return for a non ACGT character +// +long convert_kmer_to_index(char *str, long kmer, long error_pos) { + + char **ptr = NULL; + char vals[kmer]; + long i = 0; + long ret = 0; + + for(i = 0; i < kmer; i++) { + int val = 0; + switch(str[i]) { + case 'a': + case 'A': + val = 48; + break; + case 'c': + case 'C': + val = 49; + break; + case 'g': + case 'G': + val = 50; + break; + case 't': + case 'T': + val = 51; + break; + default: + return error_pos; + } + + + vals[i] = val; + } + + ret = strtol(vals, ptr, 4); + return ret; +} |