From 0594196bd9f9980e78d5560f6b16a756ad462e79 Mon Sep 17 00:00:00 2001 From: Calvin Date: Fri, 3 May 2013 16:33:46 -0400 Subject: added a contributor --- .../multifasta2otu/multifasta2otutable_rdp.m | 105 +++++++++++++++++++++ 1 file changed, 105 insertions(+) create mode 100644 src/matlab/multifasta2otu/multifasta2otutable_rdp.m (limited to 'src/matlab/multifasta2otu/multifasta2otutable_rdp.m') diff --git a/src/matlab/multifasta2otu/multifasta2otutable_rdp.m b/src/matlab/multifasta2otu/multifasta2otutable_rdp.m new file mode 100644 index 0000000..f3225f7 --- /dev/null +++ b/src/matlab/multifasta2otu/multifasta2otutable_rdp.m @@ -0,0 +1,105 @@ +%This is an example of how to run Quikr on the default RDP_7 training set, trainset7_112011.fa + +%make sure Matlab/Octave is in your path +%cd /path/to/Quikr + +%User-defined variables +input_directory='../../separated_samples'; %path to input directory of samples +output_directory='quikr_results_32'; %path to where want output files to go +otu_table_name='rdp_otu_table.txt'; %name of output otu_table filename +%Do not have to define trainingdatabasename here + +mkdir([output_directory]) +thedirs=dir([input_directory]); +thetime=zeros(numel(thedirs),1); +names={}; + +species=containers.Map; + +tic() + +[headers,~]=fastaread('trainset7_112011.fa'); %read in the training database + +i=0; +%for numdirs=3:6 +for numdirs=3:numel(thedirs) +i=i+1; +[num2str(i) ' out of ' num2str(numel(thedirs)-2)] +fastafilename=[input_directory '/' thedirs(numdirs).name]; +[loadfasta,~]=fastaread(fastafilename); +numreads=numel(loadfasta); +xstar=quikr(fastafilename); %this will give the predicted reconstruction frequencies + +nonzeroentries=find(xstar); %get the indicies of the sequences quikr predicts are in your sample +proportionscell=num2cell(xstar(nonzeroentries)); %convert the concentrations into a cell array +namescell=headers(nonzeroentries); %Get the names of the sequences +namesandproportions={namescell{:}; proportionscell{:}}; %This cell array contains the (unsorted) names of the reconstructed sequences and their concentrations (in the first and second columns respectively) + +[a cols]=size(namesandproportions); +amount=zeros(cols,1); +for j=1:cols + names{j}=namesandproportions{1,j}; + amount(j)=namesandproportions{2,j}; + if isKey(species,names{j}) + temp=species(names{j}); + temp(i)=round(amount(j).*numreads); + species(names{j})=temp; + else + temp=zeros(numel(thedirs)-3+1,1); + temp(i)=round(amount(j).*numreads); + species(names{j})=temp; + end +end + +thefa=strfind(fastafilename,'.fa'); + +if ~isempty(fastafilename(1:thefa-1)) + sampleid{i}=fastafilename(1:thefa-1); +else + sampleid{i}='empty_sampleid'; +end +thetime(i)=toc(); +thetime(i) + +end + +'Total time to compute Quikr' +toc() +'Quickr Average time per file' +mean(diff(thetime)) + +tic +numits=i; + + +fid=fopen([output_directory '/' otu_table_name],'w'); +fprintf(fid,'# QIIME vGail OTU table\n'); +fprintf(fid,'#OTU_ID\t'); +for i=1:numits +if i