From e4ec2d985021f8ca95a76ef8d0ca5088c0d64e5d Mon Sep 17 00:00:00 2001 From: Calvin Morrison Date: Wed, 9 Apr 2014 17:12:34 -0400 Subject: kmer utils too --- kmer_utils.h | 45 +++++++++++++++++++++++++++++++++++---------- 1 file changed, 35 insertions(+), 10 deletions(-) diff --git a/kmer_utils.h b/kmer_utils.h index a727da8..9fcc03c 100644 --- a/kmer_utils.h +++ b/kmer_utils.h @@ -1,5 +1,9 @@ // dna-util's function library. +#include +using namespace std; +// Kmer functions +void convert_kmer_to_num(char *str, const unsigned long length); unsigned long num_to_index(const char *str, const int kmer, const long error_pos, long long *current_position); char *index_to_kmer(unsigned long long index, long kmer); @@ -15,31 +19,52 @@ void reverse_string(char *s, size_t len); // quicky calculate 4^x unsigned long long pow_four(unsigned long long x); -// check if pointer is null. a helper for dealing with NULL +// check if pointer is null. a helper for dealing with NULL // return values as errors. Calls strerror and quits if // ptr is null, optionally takes *error char array as // a error to output void check_null_ptr(void *ptr, const char *error); -void count_sequence(const char *seq, const size_t seq_length, const unsigned int kmer, unsigned long long *counts); +template +void count_sequence(const char *seq, const size_t seq_length, const unsigned int kmer, array_type *counts); // Variables -// -const unsigned char alpha[256]; -const unsigned char reverse_alpha[4]; -const unsigned char compliment[5]; +typedef struct { + size_t operator() (const size_t &k) const { + return k; + } +} kmer_noHash_hash; +typedef struct { + bool operator() (const size_t &x, const size_t &y) const { + return x == y; + } +} kmer_eq; -// file loading functions +typedef unordered_map kmer_map; + +unsigned char alpha[256]; +unsigned char reverse_alpha[4]; +unsigned char compliment[5]; // open file from filename in char array *fn, and try and parse in one mer per // line, of size kmer, and store the indicies of those mers in the *arr // pointer; unsigned long long load_specific_mers_from_file(const char *fn, unsigned int kmer, size_t width, size_t *arr); -unsigned long long * get_kmer_counts_from_filename(const char *fn, const unsigned int kmer, const bool count_compliment); -unsigned long long * get_kmer_counts_from_file(FILE *fh, const int kmer, const bool count_compliment); - unsigned long long * get_continuous_kmer_counts_from_filename(const char *fn, const unsigned int kmer, const bool count_compliment); unsigned long long * get_continuous_kmer_counts_from_file(FILE *fh, const unsigned int kmer, const bool count_compliment); + +template +array_type * get_kmer_counts_from_file(array_type *counts, FILE *fh, const unsigned int kmer, const bool count_compliment); + +kmer_map *get_kmer_counts_from_filename(kmer_map *counts, const char *fn, const unsigned int kmer, const bool count_compliment); +unsigned long long *get_kmer_counts_from_filename(unsigned long long *counts, const char *fn, const unsigned int kmer, const bool count_compliment); + + +size_t load_specific_mers_from_file(char *fn, unsigned int kmer, size_t width, size_t *arr); + +// print functions +void print_kmer(unsigned long long *counts, bool label, bool nonzero, unsigned int kmer); +void print_kmer(kmer_map *counts, bool label, bool nonzero, unsigned int kmer); -- cgit v1.2.3