From 47cc54decd8d2a14a413bab57f8414214825155b Mon Sep 17 00:00:00 2001 From: Calvin Date: Thu, 16 May 2013 14:53:58 -0400 Subject: used newlines in readme for clarity, update our references to the ../../../data/ --- .../multifasta2otu/multifasta2otutable_rdp.m | 105 --------------------- 1 file changed, 105 deletions(-) delete mode 100644 src/matlab/multifasta2otu/multifasta2otutable_rdp.m (limited to 'src/matlab/multifasta2otu/multifasta2otutable_rdp.m') diff --git a/src/matlab/multifasta2otu/multifasta2otutable_rdp.m b/src/matlab/multifasta2otu/multifasta2otutable_rdp.m deleted file mode 100644 index f3225f7..0000000 --- a/src/matlab/multifasta2otu/multifasta2otutable_rdp.m +++ /dev/null @@ -1,105 +0,0 @@ -%This is an example of how to run Quikr on the default RDP_7 training set, trainset7_112011.fa - -%make sure Matlab/Octave is in your path -%cd /path/to/Quikr - -%User-defined variables -input_directory='../../separated_samples'; %path to input directory of samples -output_directory='quikr_results_32'; %path to where want output files to go -otu_table_name='rdp_otu_table.txt'; %name of output otu_table filename -%Do not have to define trainingdatabasename here - -mkdir([output_directory]) -thedirs=dir([input_directory]); -thetime=zeros(numel(thedirs),1); -names={}; - -species=containers.Map; - -tic() - -[headers,~]=fastaread('trainset7_112011.fa'); %read in the training database - -i=0; -%for numdirs=3:6 -for numdirs=3:numel(thedirs) -i=i+1; -[num2str(i) ' out of ' num2str(numel(thedirs)-2)] -fastafilename=[input_directory '/' thedirs(numdirs).name]; -[loadfasta,~]=fastaread(fastafilename); -numreads=numel(loadfasta); -xstar=quikr(fastafilename); %this will give the predicted reconstruction frequencies - -nonzeroentries=find(xstar); %get the indicies of the sequences quikr predicts are in your sample -proportionscell=num2cell(xstar(nonzeroentries)); %convert the concentrations into a cell array -namescell=headers(nonzeroentries); %Get the names of the sequences -namesandproportions={namescell{:}; proportionscell{:}}; %This cell array contains the (unsorted) names of the reconstructed sequences and their concentrations (in the first and second columns respectively) - -[a cols]=size(namesandproportions); -amount=zeros(cols,1); -for j=1:cols - names{j}=namesandproportions{1,j}; - amount(j)=namesandproportions{2,j}; - if isKey(species,names{j}) - temp=species(names{j}); - temp(i)=round(amount(j).*numreads); - species(names{j})=temp; - else - temp=zeros(numel(thedirs)-3+1,1); - temp(i)=round(amount(j).*numreads); - species(names{j})=temp; - end -end - -thefa=strfind(fastafilename,'.fa'); - -if ~isempty(fastafilename(1:thefa-1)) - sampleid{i}=fastafilename(1:thefa-1); -else - sampleid{i}='empty_sampleid'; -end -thetime(i)=toc(); -thetime(i) - -end - -'Total time to compute Quikr' -toc() -'Quickr Average time per file' -mean(diff(thetime)) - -tic -numits=i; - - -fid=fopen([output_directory '/' otu_table_name],'w'); -fprintf(fid,'# QIIME vGail OTU table\n'); -fprintf(fid,'#OTU_ID\t'); -for i=1:numits -if i