aboutsummaryrefslogtreecommitdiff
path: root/kmer_frequency_per_sequence.c
blob: 02a221b99655fcbd6a6184d79473c0bfd7e0c4eb (plain)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
// Copyright 2013 Calvin Morrison
#include <errno.h>
#include <math.h>
#include <stdio.h>
#include <stdlib.h>
#include <string.h>

#include "kmer_utils.h"

unsigned long position = 0;

const unsigned char alpha[256] = 
{5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5,
5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5,
5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 0, 5, 1, 5, 5, 5, 2, 5, 5, 5, 5, 5, 5,
5, 5, 5, 5, 5, 5, 3, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 0, 5, 1, 5, 5, 5, 2,
5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 3, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5,
5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5,
5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5,
5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5,
5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5,
5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5};

int main(int argc, char **argv) {

  char *line = NULL;
  long kmer = 6;
  size_t len = 0;
  ssize_t read;
  unsigned long width = 0;

  if(argc != 3) {
    printf("Please supply a filename and a kmer\n");
    exit(EXIT_FAILURE);
  }

  FILE *fh = fopen(argv[1], "r" );
  if(fh == NULL) {
    fprintf(stderr, "Error opening %s - %s\n", argv[1], strerror(errno));
    exit(EXIT_FAILURE);
  }

  width = (int)pow(4, kmer);

  while ((read = getline(&line, &len, fh)) != -1) {
    if(line[0] != '>' && (read > kmer)) {

      unsigned int i = 0;
      unsigned long long *counts = malloc((width+ 1) * sizeof(unsigned long long));
      if(counts == NULL) 
        exit(EXIT_FAILURE);

  		for(i = 0; i < read - kmer; i++) {
				line[i] = alpha[line[i]];
			}

      for(i = 0; i < read - kmer; i++) {
        counts[num_to_index(&line[i],kmer, width)]++;
      }

      unsigned long total = 0;
      for(i = 0; i < width; i++)
        total += counts[i];

      for(i = 0; i < width - 1; i++)
        printf("%.12f\t", (double)counts[i] / total);
      printf("%.12f\n", (double)counts[width - 1] / total);

      free(counts);
    }
  }

  free(line);


  return EXIT_SUCCESS;
}