aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorCalvin Morrison <mutantturkey@gmail.com>2014-03-06 20:37:04 -0500
committerCalvin Morrison <mutantturkey@gmail.com>2014-03-06 20:37:04 -0500
commitd3a935d76bc5ee6c211184c50790e6576654ca67 (patch)
treedf5c6ab8913e6820ea5973ae1edf4641dd038369
parent36e44b275cc40c7151bbc5c311bef4fd31d6f89c (diff)
UNVERIFIED but progress on converting quikr.m to dna-utils
-rw-r--r--Makefile12
-rw-r--r--src/matlab/quikr.m4
-rw-r--r--src/matlab/quikrCustomTrained.m4
-rw-r--r--src/matlab/quikrTrain.m9
4 files changed, 13 insertions, 16 deletions
diff --git a/Makefile b/Makefile
index c4f5827..88b698b 100644
--- a/Makefile
+++ b/Makefile
@@ -10,12 +10,12 @@ install: c
@cp -vf src/c/quikr ${DESTDIR}${PREFIX}/bin/quikr
@cp -vf src/c/multifasta_to_otu ${DESTDIR}${PREFIX}/bin/multifasta_to_otu
@cp -vf src/python/generate_kmers ${DESTDIR}${PREFIX}/bin/generate_kmers
- chmod -v 755 ${DESTDIR}${PREFIX}/bin/probabilities-by-read
- chmod -v 755 ${DESTDIR}${PREFIX}/bin/count-kmers
- chmod -v 755 ${DESTDIR}${PREFIX}/bin/quikr
- chmod -v 755 ${DESTDIR}${PREFIX}/bin/quikr_train
- chmod -v 755 ${DESTDIR}${PREFIX}/bin/multifasta_to_otu
- chmod -v 755 ${DESTDIR}${PREFIX}/bin/generate_kmers
+ chmod -v 555 ${DESTDIR}${PREFIX}/bin/probabilities-by-read
+ chmod -v 555 ${DESTDIR}${PREFIX}/bin/count-kmers
+ chmod -v 555 ${DESTDIR}${PREFIX}/bin/quikr
+ chmod -v 555 ${DESTDIR}${PREFIX}/bin/quikr_train
+ chmod -v 555 ${DESTDIR}${PREFIX}/bin/multifasta_to_otu
+ chmod -v 555 ${DESTDIR}${PREFIX}/bin/generate_kmers
@cp -vf src/c/quikr.1 ${DESTDIR}${PREFIX}/share/man/man1/quikr.1
@cp -vf src/c/quikr_train.1 ${DESTDIR}${PREFIX}/share/man/man1/quikr_train.1
@cp -vf src/c/multifasta_to_otu.1 ${DESTDIR}${PREFIX}/share/man/man1/multifasta_to_otu.1
diff --git a/src/matlab/quikr.m b/src/matlab/quikr.m
index f295d2b..aa2a271 100644
--- a/src/matlab/quikr.m
+++ b/src/matlab/quikr.m
@@ -15,9 +15,9 @@ if nargin>1
error('too many input arguments');
end
-[status, counts]=unix(['count-kmers -r 6 -1 -u ' inputfasta]); %count the 6-mers in the fasta file, in the forward direction, return the counts without labels
+[status, counts]=unix(['kmer_total_count -k 6 -i ' inputfasta]); %count the 6-mers in the fasta file, in the forward direction, return the counts without labels
if status ~= 0
- error('count-kmers failed: ensure count-kmers is in your path.');
+ error('kmer_total_count failed: ensure kmer_total_count is in your path.');
end
counts=textscan(counts,'%f'); %convert into floats
diff --git a/src/matlab/quikrCustomTrained.m b/src/matlab/quikrCustomTrained.m
index 9331e06..9a45256 100644
--- a/src/matlab/quikrCustomTrained.m
+++ b/src/matlab/quikrCustomTrained.m
@@ -15,9 +15,9 @@ if rows~=4^k
error('Wrong k-mer size for input training matrix');
end
-[status, counts]=unix([sprintf('count-kmers -r %d -1 -u ', k) ' ' inputfasta]); %count the k-mers in the fasta file, in the forward direction, return the counts without labels.
+[status, counts]=unix([sprintf('kmer_total_count -k %d', k) ' -i ' inputfasta]); %count the k-mers in the fasta file, in the forward direction, return the counts without labels.
if status ~= 0
- error('count-kmers failed: ensure count-kmers is in your path.');
+ error('kmer_total_count failed: ensure kmer_total_count is in your path.');
end
counts=textscan(counts,'%f'); %read them in as floats.
diff --git a/src/matlab/quikrTrain.m b/src/matlab/quikrTrain.m
index 86a994b..be4a85b 100644
--- a/src/matlab/quikrTrain.m
+++ b/src/matlab/quikrTrain.m
@@ -19,17 +19,14 @@ outputfilename=fullfile(pathtofile, [filename sprintf('-sensingmatrixK%d.txt',k)
%crashes when unix() returns as many entries as ./probabilities-by-read
%does (on the order of ~2*10^10).
-kmerfilename=sprintf('%dmers.txt',k); %This contains the list of 6-mers to count. In future versions this will be computed locally instead of being read in.
-
-unix(['probabilities-by-read ' sprintf('%d',k) ' ' inputfasta ' <( generate_kmers ' sprintf('%d',k) ') > ' outputfilename]); %obtain the k-mer counts of the inputfasta read-by-read
+unix(['kmer_counts_per_sequence -k ' sprintf('%d',k) ' -i ' inputfasta '>' outputfilename]);
fid=fopen(outputfilename); %open the output file
-
-%A=textscan(fid,'%f'); %get all the counts
-%A=A{:};
A=fscanf(fid,'%f');
+
mat=sparse(reshape(A,4^k,length(A)/4^k)); %form into a matrix
mat=bsxfun(@rdivide,mat,sum(mat,1)); %column-normalize
+
fclose(fid); %close file
delete(outputfilename); %delete the file