aboutsummaryrefslogtreecommitdiff
path: root/kmer_counts_per_sequence.c
blob: 6ff815d232a85745bc5e77616a4a0ad11a34c2f5 (plain)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
// Copyright 2013 Calvin Morrison
#include <errno.h>
#include <stdio.h>
#include <stdlib.h>
#include <string.h>

#include "kmer_utils.h"

unsigned long position = 0;
int main(int argc, char **argv) {

	char *line = NULL;
	size_t len = 0;
	ssize_t read;

	if(argc != 3) {
		printf("Please supply a filename and a kmer\n");
		exit(EXIT_FAILURE);
	}

	FILE *fh = fopen(argv[1], "r" );
	if(fh == NULL) {
		fprintf(stderr, "Error opening %s - %s\n", argv[1], strerror(errno));
		exit(EXIT_FAILURE);
	}

	long kmer = atoi(argv[2]);
	if(kmer == 0) { 
		fprintf(stderr, "Error: invalid kmer.\n");
		exit(EXIT_FAILURE);
	}

	const unsigned long width = (unsigned long)1 << (kmer * 2);

	unsigned long long *counts = malloc((width+ 1) * sizeof(unsigned long long));
	if(counts == NULL) 
		exit(EXIT_FAILURE);

	while ((read = getline(&line, &len, fh)) != -1) {
		if(line[0] != '>' && (read > kmer)) {

			unsigned int i = 0;
			unsigned long total = 0;

			// reset our count matrix to zero
			memset(counts, 0, width * sizeof(unsigned long long));

			for(i = 0; i < read - kmer; i++) {
				line[i] = kmer_alpha[(int)line[i]];
			}

			for(i = 0; i < read - kmer; i++) {
				counts[kmer_num_to_index(&line[i],kmer, width)]++;
			}

			for(i = 0; i < width; i++)
				total += counts[i];

			for(i = 0; i < width - 1; i++)
				printf("%llu\t", counts[i]);
			printf("%llu\n", counts[width - 1]);

		}
	}

	free(counts);
	free(line);


	return EXIT_SUCCESS;
}