summaryrefslogtreecommitdiff
path: root/src/matlab/quikr.m
diff options
context:
space:
mode:
authorCalvin <calvin@EESI>2013-03-11 12:36:42 -0400
committerCalvin <calvin@EESI>2013-03-11 12:36:42 -0400
commit4ca6f92ceb4b2f8c504431cf56f8a6135187a61c (patch)
treebda30f1f0710d19660c04c57f31be37cf4865c50 /src/matlab/quikr.m
parentc18793c210ea44cef63db3568791cbad537bdb79 (diff)
adding matlab code
Diffstat (limited to 'src/matlab/quikr.m')
-rw-r--r--src/matlab/quikr.m52
1 files changed, 52 insertions, 0 deletions
diff --git a/src/matlab/quikr.m b/src/matlab/quikr.m
new file mode 100644
index 0000000..d24596e
--- /dev/null
+++ b/src/matlab/quikr.m
@@ -0,0 +1,52 @@
+function xstar = quikr(inputfasta)
+%xstar=quikr('path/to/input/fasta/file') returns the estimated frequencies
+%of bacteria present when given an input FASTA file of amplicon (454)
+%reads. A k-mer based, L1-regularized, sparsity promoting algorithm is
+%utilized. This is the default implementation of Quikr, here lambda=10,000, k-mer
+%size is 6, and we use the trainset7_112011 database from RDP to estimate
+%the present species. In practice, reconstruction is accurate only down to
+%the genus level (not species or strain).
+%The program ./count-linux or ./count-osx needs to be placed in the same
+%directory as this script. In Linux, you might need to: chmod a+rx count.
+%Also the inputfasta needs to be a standard FASTA file. The input FASTA file must have reads all in the same
+%orientation, so as a preprocessing step, make sure everything is in the
+%forward (+) orientation.
+if nargin>1
+ error('too many input arguments');
+end
+if (isunix && not(ismac)) %If linux, use ./count-linux
+ [status, counts]=unix(['./count-linux -r 6 -1 -u ' inputfasta]); %count the 6-mers in the fasta file, in the forward direction, return the counts without labels
+ if status ~= 0
+ error('./count-linux failed: ensure count-linux is an executable. Try chmod a+rx count-linux. Be sure matlab/octave is in the same directory as count-linux');
+ end
+elseif ismac %If mac, then use ./count-osx
+ [status, counts]=unix(['./count-osx -r 6 -1 -u ' inputfasta]); %count the 6-mers in the fasta file, in the forward direction, return the counts without labels
+ if status ~= 0
+ error('./count-osx failed: ensure count-linux is an executable. Try chmod a+rx count-osx. Be sure matlab/octave is in the same directory as count-osx');
+ end
+elseif ispc
+ error('Windows is not yet supported');
+end
+counts=textscan(counts,'%f'); %convert into floats
+counts=counts{:}; %make into a vector
+counts=counts/sum(counts); %form a probability vector from the counts
+lambda=10000; %this is the default lambda value (see the paper), and is used in the formation of trainset7_112011N6Aaux.mat
+yaux=[0;lambda*counts]; % form the sample vector
+load('trainset7_112011N6Aaux.mat') %load the 6-mer sensing matrix...I need to rename this so the variable is just Aaux.
+if not(exist('Aaux2')) %make sure Aaux2 loaded
+ error('The default training database trainset7_112011N6Aaux.mat failed to load. Be sure it exists in the Quikr folder or current directory');
+end
+%based on RDP's trainset 7 from 11/2011
+isOctave = exist('OCTAVE_VERSION') ~= 0; %check to see if running Octave or Matlab
+if isOctave %if octave, need to manually read in sparse-matrix format
+ n=length(Aaux2.jc)-1;
+ ii=repelems(1:n,[1:n; diff(Aaux2.jc)]);
+ Aaux=sparse(Aaux2.ir+1,ii,Aaux2.data);
+else
+ Aaux=Aaux2; %fix the names
+ clear Aaux2;
+end %Matlab automatically reads it in correctly, just need to rename it
+warning off
+xstar=lsqnonneg(Aaux,yaux); %perform the nonnegative least squares
+warning on
+xstar=xstar/sum(xstar); %transform the output into a probability vector. Note this vector is on the same basis as Aaux, so the entries correspond to sequences in trainset7_112011.fa \ No newline at end of file