aboutsummaryrefslogtreecommitdiff
path: root/fly-tools/cci-calculator/LeastSquareSolution.m
blob: 275f822000233e62e618077524c5c4d8df48b2d3 (plain)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
function [x, e] = LeastSquareSolution(fileNameA, fileNameB, output)
    inputData = load(fileNameA);
    A = inputData;
    inputData = load(fileNameB);
    b = inputData;
    % singular value decomposition
    CI = [];
    
    %testing one sample at a time, using all remaining samples as training data

   total_folds = 10;
   fold_size = length(b)/total_folds;

   total_folds = floor(total_folds)
   remainder_of_total_folds = length(b) - total_folds*fold_size
   elements_per_fold = fold_size;
   
   solution=[];
   
   for i=0:(total_folds-1)
       
        % debug
        file_id_number = num2str((i+1),'%2d');
        
        debug_file = strcat(output, '/');
        debug_file = strcat(debug_file, 'Fold_');
        debug_file = strcat(debug_file, file_id_number); 
        debug_file = strcat(debug_file, '.txt');
        
    
       

       
        if (i == (total_folds-1))
            elements_per_fold = fold_size + remainder_of_total_folds
        end
        %select one sample at a time for testing using the rest for training

        %if the value is set to 1, that is the sample that will be used for

        %training/testing

        train = ones(length(b),1); %create a column vector of ones

        for k=(i*fold_size+1):((i*fold_size) + elements_per_fold)
            train(k) = 0;   %set the i-th sample to be the test sample, all others are used for training
        end

        train = ismember(train, 1); %converts to logical the train set

        test = ismember(train, 0);  %converts to logical the test set

        A_ = A(train,:); % A_ will contain all the data except the test data. This is the train data.
        b_ = b(train); % b_ will contain all status except the test status
        [m n] = size(A_);
    
        % do the SVD on the test A data
        [U,S,V] = svd(A_);
        
        % this value should be equal to A_
        U*S*V';
        

        % compute the c from the training b data
        c = U'*b_;
        
        % compute y on from the singular values
        y=[];
        for j=1:n
            yj = c(j)/S(j,j);
            y = [y; yj];
        end
 
        % compute the unknown x values we are trying to find the least
        % square approximation of
        x = V*y;
        
        % the error estimate on the training data
        e = A_*x - b_;

        % add the solution to the solution vector
        solution = [solution; x']

        % compute the ci value of the test data
        test_data = A(test,:); % this extract just a row vector
        size(test_data)
        
        ci = test_data*x;      % compute the ci value for this test sample
        
        %saving the calculated cis
        CI = [CI; ci];         % store the ci values
       
    
        %fid_debug = fopen(debug_file,'w');
        %fprintf(fid_debug, '%s\n','Train data');
        %fprintf(fid_debug, '%8.6f\t%8.6f\t%8.6f\t%8.6f\t%8.6f\t%8.6f\t%8.6f\t%8.6f', A_);
%         fprintf(fid_debug, '%s\n','Test data');
%         fprintf(fid_debug, '%8.6f\t%8.6f\t%8.6f\t%8.6f\t%8.6f\t%8.6f\t%8.6f\t%8.6f', test_data);
        
        fid_debug = fopen(debug_file,'w');
%         fprintf(fid_debug, '%s\n\n','Test data');
        
        
        fprintf(fid_debug, '%s\n\n','new CI');
        fprintf(fid_debug, '%8.6f\n', ci);
        
        fprintf(fid_debug, '%s\n\n','actual CI');
        actual_ci = b(test,:);
        fprintf(fid_debug, '%8.6f\n', actual_ci);
        fprintf(fid_debug, '%s\n\n','error');
        fprintf(fid_debug, '%8.6f\n', (ci-actual_ci));
%         fprintf(fid_debug, '%s\n\n','Train data');
        
        
        fclose(fid_debug);

        dlmwrite(debug_file, A_, 'delimiter', '\t', '-append');
        
        dlmwrite(debug_file, test_data, 'delimiter', '\t', '-append');

   
   end
   
   % write out the ci values
   fid_ci = fopen('newCIs.txt','w');
   fprintf(fid_ci, '%8.6f\n', CI);
   fclose(fid_ci);
   
   %fid_solution_vectors = fopen('Solution_vectors.txt', 'w');
   dlmwrite('Solution_vectors.txt', solution, 'delimiter', '\t');
   %fclose(fid_solution_vectors);