aboutsummaryrefslogtreecommitdiff
path: root/fly-tools/cci-calculator/LeastSquareSolution.m
blob: 85d1326d6e7deb63702e4519b107b97d661c8093 (plain)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
function [x, e] = LeastSquareSolution(fileNameA, fileNameB, output)
    inputData = load(fileNameA);
    A = inputData;
    inputData = load(fileNameB);
    b = inputData;
    % singular value decomposition
    CI = [];
    
    %testing one sample at a time, using all remaining samples as training data

   total_folds = 10;
   fold_size = length(b)/total_folds;

   total_folds = floor(total_folds)
   remainder_of_total_folds = length(b) - total_folds*fold_size
   elements_per_fold = fold_size;
   
   solution=[];
   
   for i=0:(total_folds-1)
       
        % debug
        file_id_number = num2str((i+1),'%2d');
        
        debug_file = strcat(output, '/');
        debug_file = strcat(debug_file, 'Fold_');
        debug_file = strcat(debug_file, file_id_number); 
        debug_file = strcat(debug_file, '.txt');
        
        if (i == (total_folds-1))
            elements_per_fold = fold_size + remainder_of_total_folds
        end
        %select one sample at a time for testing using the rest for training
        %if the value is set to 1, that is the sample that will be used for
        %training/testing

        train = ones(length(b),1); %create a column vector of ones

        for k=(i*fold_size+1):((i*fold_size) + elements_per_fold)
            train(k) = 0;   %set the i-th sample to be the test sample, all others are used for training
        end

        train = ismember(train, 1); %converts to logical the train set

        test = ismember(train, 0);  %converts to logical the test set

        A_ = A(train,:); % A_ will contain all the data except the test data. This is the train data.
        b_ = b(train); % b_ will contain all status except the test status
        [m n] = size(A_);
    
        % do the SVD on the test A data
        [U,S,V] = svd(A_);
        
        % this value should be equal to A_
        U*S*V';
        

        % compute the c from the training b data
        c = U'*b_;
        
        % compute y on from the singular values
        y=[];
        for j=1:n
            yj = c(j)/S(j,j);
            y = [y; yj];
        end
 
        % compute the unknown x values we are trying to find the least
        % square approximation of
        x = V*y;
        
        % the error estimate on the training data
        e = A_*x - b_;

        % add the solution to the solution vector
        solution = [solution; x']

        % compute the ci value of the test data
        test_data = A(test,:); % this extract just a row vector
        size(test_data)
        
        ci = test_data*x;      % compute the ci value for this test sample
        
        %saving the calculated cis
        CI = [CI; ci];         % store the ci values
       
    
        %fid_debug = fopen(debug_file,'w');
        %fprintf(fid_debug, '%s\n','Train data');
        %fprintf(fid_debug, '%8.6f\t%8.6f\t%8.6f\t%8.6f\t%8.6f\t%8.6f\t%8.6f\t%8.6f', A_);
%         fprintf(fid_debug, '%s\n','Test data');
%         fprintf(fid_debug, '%8.6f\t%8.6f\t%8.6f\t%8.6f\t%8.6f\t%8.6f\t%8.6f\t%8.6f', test_data);
        
        fid_debug = fopen(debug_file,'w');
%         fprintf(fid_debug, '%s\n\n','Test data');
        
        
        fprintf(fid_debug, '%s\n\n','new CI');
        fprintf(fid_debug, '%8.6f\n', ci);
        
        fprintf(fid_debug, '%s\n\n','actual CI');
        actual_ci = b(test,:);
        fprintf(fid_debug, '%8.6f\n', actual_ci);
        fprintf(fid_debug, '%s\n\n','error');
        fprintf(fid_debug, '%8.6f\n', (ci-actual_ci));
%         fprintf(fid_debug, '%s\n\n','Train data');
        
        
        fclose(fid_debug);

        dlmwrite(debug_file, A_, 'delimiter', '\t', '-append');
        dlmwrite(debug_file, test_data, 'delimiter', '\t', '-append');

   
   end
   
   % write out the ci values
   fid_ci = fopen('newCIs.txt','w');
   fprintf(fid_ci, '%8.6f\n', CI);
   fclose(fid_ci);
   
   %fid_solution_vectors = fopen('Solution_vectors.txt', 'w');
   dlmwrite('Solution_vectors.txt', solution, 'delimiter', '\t');
   %fclose(fid_solution_vectors);