From d3a935d76bc5ee6c211184c50790e6576654ca67 Mon Sep 17 00:00:00 2001 From: Calvin Morrison Date: Thu, 6 Mar 2014 20:37:04 -0500 Subject: UNVERIFIED but progress on converting quikr.m to dna-utils --- src/matlab/quikr.m | 4 ++-- src/matlab/quikrCustomTrained.m | 4 ++-- src/matlab/quikrTrain.m | 9 +++------ 3 files changed, 7 insertions(+), 10 deletions(-) (limited to 'src/matlab') diff --git a/src/matlab/quikr.m b/src/matlab/quikr.m index f295d2b..aa2a271 100644 --- a/src/matlab/quikr.m +++ b/src/matlab/quikr.m @@ -15,9 +15,9 @@ if nargin>1 error('too many input arguments'); end -[status, counts]=unix(['count-kmers -r 6 -1 -u ' inputfasta]); %count the 6-mers in the fasta file, in the forward direction, return the counts without labels +[status, counts]=unix(['kmer_total_count -k 6 -i ' inputfasta]); %count the 6-mers in the fasta file, in the forward direction, return the counts without labels if status ~= 0 - error('count-kmers failed: ensure count-kmers is in your path.'); + error('kmer_total_count failed: ensure kmer_total_count is in your path.'); end counts=textscan(counts,'%f'); %convert into floats diff --git a/src/matlab/quikrCustomTrained.m b/src/matlab/quikrCustomTrained.m index 9331e06..9a45256 100644 --- a/src/matlab/quikrCustomTrained.m +++ b/src/matlab/quikrCustomTrained.m @@ -15,9 +15,9 @@ if rows~=4^k error('Wrong k-mer size for input training matrix'); end -[status, counts]=unix([sprintf('count-kmers -r %d -1 -u ', k) ' ' inputfasta]); %count the k-mers in the fasta file, in the forward direction, return the counts without labels. +[status, counts]=unix([sprintf('kmer_total_count -k %d', k) ' -i ' inputfasta]); %count the k-mers in the fasta file, in the forward direction, return the counts without labels. if status ~= 0 - error('count-kmers failed: ensure count-kmers is in your path.'); + error('kmer_total_count failed: ensure kmer_total_count is in your path.'); end counts=textscan(counts,'%f'); %read them in as floats. diff --git a/src/matlab/quikrTrain.m b/src/matlab/quikrTrain.m index 86a994b..be4a85b 100644 --- a/src/matlab/quikrTrain.m +++ b/src/matlab/quikrTrain.m @@ -19,17 +19,14 @@ outputfilename=fullfile(pathtofile, [filename sprintf('-sensingmatrixK%d.txt',k) %crashes when unix() returns as many entries as ./probabilities-by-read %does (on the order of ~2*10^10). -kmerfilename=sprintf('%dmers.txt',k); %This contains the list of 6-mers to count. In future versions this will be computed locally instead of being read in. - -unix(['probabilities-by-read ' sprintf('%d',k) ' ' inputfasta ' <( generate_kmers ' sprintf('%d',k) ') > ' outputfilename]); %obtain the k-mer counts of the inputfasta read-by-read +unix(['kmer_counts_per_sequence -k ' sprintf('%d',k) ' -i ' inputfasta '>' outputfilename]); fid=fopen(outputfilename); %open the output file - -%A=textscan(fid,'%f'); %get all the counts -%A=A{:}; A=fscanf(fid,'%f'); + mat=sparse(reshape(A,4^k,length(A)/4^k)); %form into a matrix mat=bsxfun(@rdivide,mat,sum(mat,1)); %column-normalize + fclose(fid); %close file delete(outputfilename); %delete the file -- cgit v1.2.3