aboutsummaryrefslogtreecommitdiff
path: root/kmer_utils.h
blob: f5087f2016fc9b4f089248301375b71814f32c6d (plain)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
// dna-util's function library.
#include <unordered_map>
using namespace std;

// Kmer functions
void convert_kmer_to_num(char *str, const unsigned long length);
unsigned long num_to_index(const char *str, const int kmer, const long error_pos, long long *current_position);
char *index_to_kmer(unsigned long long index, long kmer);

// Utility functions


// strip char 'c' out of char array *s of length len
size_t strnstrip(char *s, int c, size_t len);

// reverse char arry *s of length len
void reverse_string(char *s, size_t len);

// quicky calculate 4^x 
unsigned long long pow_four(unsigned long long x);

// check if pointer is null. a helper for dealing with NULL 
// return values as errors. Calls strerror and quits if 
// ptr is null, optionally takes *error char array as 
// a error to output
void check_null_ptr(void *ptr, const char *error);

template <typename array_type>
void count_sequence(const char *seq, const size_t seq_length, const unsigned int kmer, array_type *counts);

// Variables
typedef struct {
	size_t operator() (const size_t &k) const {
	return k;
	}
} kmer_noHash_hash;

typedef struct {
	bool operator() (const size_t &x, const size_t &y) const {
		return x == y;
	}
} kmer_eq; 

typedef unordered_map<size_t,unsigned long long, kmer_noHash_hash, kmer_eq> kmer_map;

const unsigned char alpha[256] =
{5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5,
5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5,
5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 0, 5, 1, 5, 5, 5, 2, 5, 5, 5, 5, 5, 5,
5, 5, 5, 5, 5, 5, 3, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 0, 5, 1, 5, 5, 5, 2,
5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 3, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5,
5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5,
5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5,
5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5,
5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5,
5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5};
const char reverse_alpha[4] = { 'A', 'C', 'G', 'T' };

													// compliments
													// A  C  G  T  E  E
													// T  G  C  A  E  E
const char compliment[6] = { 3, 2, 1, 0, 5, 5};

// open file from filename in char array *fn, and try and parse in one mer per
// line, of size kmer, and store the indicies of those mers in the *arr
// pointer;
unsigned long long load_specific_mers_from_file(const char *fn, unsigned int kmer, size_t width, size_t *arr);

unsigned long long * get_continuous_kmer_counts_from_filename(const char *fn, const unsigned int kmer, const bool count_compliment);
unsigned long long * get_continuous_kmer_counts_from_file(FILE *fh, const unsigned int kmer, const bool count_compliment);


template <typename array_type>
array_type * get_kmer_counts_from_file(array_type *counts, FILE *fh, const unsigned int kmer, const bool count_compliment);

kmer_map           *get_kmer_counts_from_filename(kmer_map           *counts, const char *fn, const unsigned int kmer, const bool count_compliment);
unsigned long long *get_kmer_counts_from_filename(unsigned long long *counts, const char *fn, const unsigned int kmer, const bool count_compliment);


size_t load_specific_mers_from_file(char *fn, unsigned int kmer, size_t width, size_t *arr);

// print functions
void print_kmer(unsigned long long *counts, bool label, bool nonzero, unsigned int kmer);
void print_kmer(kmer_map *counts, bool label, bool nonzero, unsigned int kmer);