aboutsummaryrefslogtreecommitdiff
path: root/FEAST/FSToolbox
diff options
context:
space:
mode:
Diffstat (limited to 'FEAST/FSToolbox')
-rw-r--r--FEAST/FSToolbox/BetaGamma.c188
-rw-r--r--FEAST/FSToolbox/CMIM.c142
-rw-r--r--FEAST/FSToolbox/CompileFEAST.m4
-rw-r--r--FEAST/FSToolbox/CondMI.c166
-rw-r--r--FEAST/FSToolbox/DISR.c182
-rw-r--r--FEAST/FSToolbox/FCBF.m58
-rw-r--r--FEAST/FSToolbox/FSAlgorithms.h138
-rw-r--r--FEAST/FSToolbox/FSToolbox.h70
-rw-r--r--FEAST/FSToolbox/FSToolboxMex.c290
-rwxr-xr-xFEAST/FSToolbox/FSToolboxMex.mexa64bin0 -> 22290 bytes
-rw-r--r--FEAST/FSToolbox/ICAP.c184
-rw-r--r--FEAST/FSToolbox/JMI.c177
-rw-r--r--FEAST/FSToolbox/MIM.m17
-rw-r--r--FEAST/FSToolbox/Makefile96
-rw-r--r--FEAST/FSToolbox/README80
-rw-r--r--FEAST/FSToolbox/RELIEF.m61
-rw-r--r--FEAST/FSToolbox/feast.bib122
-rw-r--r--FEAST/FSToolbox/feast.m100
-rw-r--r--FEAST/FSToolbox/license.txt32
-rw-r--r--FEAST/FSToolbox/mRMR_D.c170
20 files changed, 2277 insertions, 0 deletions
diff --git a/FEAST/FSToolbox/BetaGamma.c b/FEAST/FSToolbox/BetaGamma.c
new file mode 100644
index 0000000..925ef8b
--- /dev/null
+++ b/FEAST/FSToolbox/BetaGamma.c
@@ -0,0 +1,188 @@
+/*******************************************************************************
+** betaGamma() implements the Beta-Gamma space from Brown (2009).
+** This incoporates MIFS, CIFE, and CondRed.
+**
+** MIFS - "Using mutual information for selecting features in supervised neural net learning"
+** R. Battiti, IEEE Transactions on Neural Networks, 1994
+**
+** CIFE - "Conditional Infomax Learning: An Integrated Framework for Feature Extraction and Fusion"
+** D. Lin and X. Tang, European Conference on Computer Vision (2006)
+**
+** The Beta Gamma space is explained in Brown (2009) and Brown et al. (2011)
+**
+** Initial Version - 13/06/2008
+** Updated - 23/06/2011
+**
+** Author - Adam Pocock
+**
+** Part of the Feature Selection Toolbox, please reference
+** "Conditional Likelihood Maximisation: A Unifying Framework for Mutual
+** Information Feature Selection"
+** G. Brown, A. Pocock, M.-J. Zhao, M. Lujan
+** Journal of Machine Learning Research (JMLR), 2011
+**
+** Please check www.cs.manchester.ac.uk/~gbrown/fstoolbox for updates.
+**
+** Copyright (c) 2010-2011, A. Pocock, G. Brown, The University of Manchester
+** All rights reserved.
+**
+** Redistribution and use in source and binary forms, with or without modification,
+** are permitted provided that the following conditions are met:
+**
+** - Redistributions of source code must retain the above copyright notice, this
+** list of conditions and the following disclaimer.
+** - Redistributions in binary form must reproduce the above copyright notice,
+** this list of conditions and the following disclaimer in the documentation
+** and/or other materials provided with the distribution.
+** - Neither the name of The University of Manchester nor the names of its
+** contributors may be used to endorse or promote products derived from this
+** software without specific prior written permission.
+**
+** THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND
+** ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
+** WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
+** DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR
+** ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
+** (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
+** LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON
+** ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+** (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
+** SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+**
+*******************************************************************************/
+
+#include "FSAlgorithms.h"
+#include "FSToolbox.h"
+
+/* MIToolbox includes */
+#include "MutualInformation.h"
+
+double* BetaGamma(int k, int noOfSamples, int noOfFeatures, double *featureMatrix, double *classColumn, double *outputFeatures, double betaParam, double gammaParam)
+{
+ double **feature2D = (double **) CALLOC_FUNC(noOfFeatures,sizeof(double *));
+
+ /*holds the class MI values*/
+ double *classMI = (double *)CALLOC_FUNC(noOfFeatures,sizeof(double));
+ char *selectedFeatures = (char *)CALLOC_FUNC(noOfFeatures,sizeof(char));
+
+ /*holds the intra feature MI values*/
+ int sizeOfMatrix = k*noOfFeatures;
+ double *featureMIMatrix = (double *)CALLOC_FUNC(sizeOfMatrix,sizeof(double));
+
+ double maxMI = 0.0;
+ int maxMICounter = -1;
+
+ double score, currentScore, totalFeatureMI;
+ int currentHighestFeature, arrayPosition;
+
+ int i,j,m;
+
+ /***********************************************************
+ ** because the array is passed as
+ ** s a m p l e s
+ ** f
+ ** e
+ ** a
+ ** t
+ ** u
+ ** r
+ ** e
+ ** s
+ **
+ ** this pulls out a pointer to the first sample of
+ ** each feature and stores it as a multidimensional array
+ ** so it can be indexed nicely
+ ***********************************************************/
+ for(j = 0; j < noOfFeatures; j++)
+ {
+ feature2D[j] = featureMatrix + (int)j*noOfSamples;
+ }
+
+ for (i = 0; i < sizeOfMatrix; i++)
+ {
+ featureMIMatrix[i] = -1;
+ }/*for featureMIMatrix - blank to -1*/
+
+ /***********************************************************
+ ** SETUP COMPLETE
+ ** Algorithm starts here
+ ***********************************************************/
+
+ for (i = 0; i < noOfFeatures; i++)
+ {
+ classMI[i] = calculateMutualInformation(feature2D[i], classColumn, noOfSamples);
+
+ if (classMI[i] > maxMI)
+ {
+ maxMI = classMI[i];
+ maxMICounter = i;
+ }/*if bigger than current maximum*/
+ }/*for noOfFeatures - filling classMI*/
+
+ selectedFeatures[maxMICounter] = 1;
+ outputFeatures[0] = maxMICounter;
+
+ /*************
+ ** Now we have populated the classMI array, and selected the highest
+ ** MI feature as the first output feature
+ ** Now we move into the JMI algorithm
+ *************/
+
+ for (i = 1; i < k; i++)
+ {
+ /************************************************************
+ ** to ensure it selects some features
+ ** if this is zero then it will not pick features where the
+ ** redundancy is greater than the relevance
+ ************************************************************/
+ score = -HUGE_VAL;
+ currentHighestFeature = 0;
+ currentScore = 0.0;
+ totalFeatureMI = 0.0;
+
+ for (j = 0; j < noOfFeatures; j++)
+ {
+ /*if we haven't selected j*/
+ if (!selectedFeatures[j])
+ {
+ currentScore = classMI[j];
+ totalFeatureMI = 0.0;
+
+ for (m = 0; m < i; m++)
+ {
+ arrayPosition = m*noOfFeatures + j;
+ if (featureMIMatrix[arrayPosition] == -1)
+ {
+ /*double calculateMutualInformation(double *firstVector, double *secondVector, int vectorLength);*/
+ featureMIMatrix[arrayPosition] = betaParam*calculateMutualInformation(feature2D[(int) outputFeatures[m]], feature2D[j], noOfSamples);
+
+ /*double calculateConditionalMutualInformation(double *firstVector, double *targetVector, double* conditionVector, int vectorLength);*/
+ featureMIMatrix[arrayPosition] -= gammaParam*calculateConditionalMutualInformation(feature2D[(int) outputFeatures[m]], feature2D[j], classColumn, noOfSamples);
+ }/*if not already known*/
+
+ totalFeatureMI += featureMIMatrix[arrayPosition];
+ }/*for the number of already selected features*/
+
+ currentScore -= (totalFeatureMI);
+
+ if (currentScore > score)
+ {
+ score = currentScore;
+ currentHighestFeature = j;
+ }
+ }/*if j is unselected*/
+ }/*for number of features*/
+
+ selectedFeatures[currentHighestFeature] = 1;
+ outputFeatures[i] = currentHighestFeature;
+
+ }/*for the number of features to select*/
+
+ for (i = 0; i < k; i++)
+ {
+ outputFeatures[i] += 1; /*C++ indexes from 0 not 1*/
+ }/*for number of selected features*/
+
+ return outputFeatures;
+}/*BetaGamma(int,int,int,double[][],double[],double[],double,double)*/
+
diff --git a/FEAST/FSToolbox/CMIM.c b/FEAST/FSToolbox/CMIM.c
new file mode 100644
index 0000000..9ef21ad
--- /dev/null
+++ b/FEAST/FSToolbox/CMIM.c
@@ -0,0 +1,142 @@
+/*******************************************************************************
+** CMIM.c, implements a discrete version of the
+** Conditional Mutual Information Maximisation criterion, using the fast
+** exact implementation from
+**
+** "Fast Binary Feature Selection using Conditional Mutual Information Maximisation"
+** F. Fleuret, JMLR (2004)
+**
+** Initial Version - 13/06/2008
+** Updated - 23/06/2011
+**
+** Author - Adam Pocock
+**
+** Part of the Feature Selection Toolbox, please reference
+** "Conditional Likelihood Maximisation: A Unifying Framework for Mutual
+** Information Feature Selection"
+** G. Brown, A. Pocock, M.-J. Zhao, M. Lujan
+** Journal of Machine Learning Research (JMLR), 2011
+**
+** Please check www.cs.manchester.ac.uk/~gbrown/fstoolbox for updates.
+**
+** Copyright (c) 2010-2011, A. Pocock, G. Brown, The University of Manchester
+** All rights reserved.
+**
+** Redistribution and use in source and binary forms, with or without modification,
+** are permitted provided that the following conditions are met:
+**
+** - Redistributions of source code must retain the above copyright notice, this
+** list of conditions and the following disclaimer.
+** - Redistributions in binary form must reproduce the above copyright notice,
+** this list of conditions and the following disclaimer in the documentation
+** and/or other materials provided with the distribution.
+** - Neither the name of The University of Manchester nor the names of its
+** contributors may be used to endorse or promote products derived from this
+** software without specific prior written permission.
+**
+** THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND
+** ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
+** WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
+** DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR
+** ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
+** (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
+** LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON
+** ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+** (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
+** SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+**
+*******************************************************************************/
+
+#include "FSAlgorithms.h"
+#include "FSToolbox.h"
+
+/* MIToolbox includes */
+#include "MutualInformation.h"
+
+double* CMIM(int k, int noOfSamples, int noOfFeatures, double *featureMatrix, double *classColumn, double *outputFeatures)
+{
+ /*holds the class MI values
+ **the class MI doubles as the partial score from the CMIM paper
+ */
+ double *classMI = (double *)CALLOC_FUNC(noOfFeatures,sizeof(double));
+ /*in the CMIM paper, m = lastUsedFeature*/
+ int *lastUsedFeature = (int *)CALLOC_FUNC(noOfFeatures,sizeof(int));
+
+ double score, conditionalInfo;
+ int iMinus, currentFeature;
+
+ double maxMI = 0.0;
+ int maxMICounter = -1;
+
+ int j,i;
+
+ double **feature2D = (double**) CALLOC_FUNC(noOfFeatures,sizeof(double*));
+
+ for(j = 0; j < noOfFeatures; j++)
+ {
+ feature2D[j] = featureMatrix + (int)j*noOfSamples;
+ }
+
+ for (i = 0; i < noOfFeatures;i++)
+ {
+ classMI[i] = calculateMutualInformation(feature2D[i], classColumn, noOfSamples);
+
+ if (classMI[i] > maxMI)
+ {
+ maxMI = classMI[i];
+ maxMICounter = i;
+ }/*if bigger than current maximum*/
+ }/*for noOfFeatures - filling classMI*/
+
+ outputFeatures[0] = maxMICounter;
+
+ /*****************************************************************************
+ ** We have populated the classMI array, and selected the highest
+ ** MI feature as the first output feature
+ ** Now we move into the CMIM algorithm
+ *****************************************************************************/
+
+ for (i = 1; i < k; i++)
+ {
+ score = 0.0;
+ iMinus = i-1;
+
+ for (j = 0; j < noOfFeatures; j++)
+ {
+ while ((classMI[j] > score) && (lastUsedFeature[j] < i))
+ {
+ /*double calculateConditionalMutualInformation(double *firstVector, double *targetVector, double *conditionVector, int vectorLength);*/
+ currentFeature = (int) outputFeatures[lastUsedFeature[j]];
+ conditionalInfo = calculateConditionalMutualInformation(feature2D[j],classColumn,feature2D[currentFeature],noOfSamples);
+ if (classMI[j] > conditionalInfo)
+ {
+ classMI[j] = conditionalInfo;
+ }/*reset classMI*/
+ /*moved due to C indexing from 0 rather than 1*/
+ lastUsedFeature[j] += 1;
+ }/*while partial score greater than score & not reached last feature*/
+ if (classMI[j] > score)
+ {
+ score = classMI[j];
+ outputFeatures[i] = j;
+ }/*if partial score still greater than score*/
+ }/*for number of features*/
+ }/*for the number of features to select*/
+
+
+ for (i = 0; i < k; i++)
+ {
+ outputFeatures[i] += 1; /*C indexes from 0 not 1*/
+ }/*for number of selected features*/
+
+ FREE_FUNC(classMI);
+ FREE_FUNC(lastUsedFeature);
+ FREE_FUNC(feature2D);
+
+ classMI = NULL;
+ lastUsedFeature = NULL;
+ feature2D = NULL;
+
+ return outputFeatures;
+}/*CMIM(int,int,int,double[][],double[],double[])*/
+
diff --git a/FEAST/FSToolbox/CompileFEAST.m b/FEAST/FSToolbox/CompileFEAST.m
new file mode 100644
index 0000000..f5dad48
--- /dev/null
+++ b/FEAST/FSToolbox/CompileFEAST.m
@@ -0,0 +1,4 @@
+%Compiles the FEAST Toolbox into a mex executable for use with MATLAB
+
+mex -I../MIToolbox FSToolboxMex.c BetaGamma.c CMIM.c CondMI.c DISR.c ICAP.c JMI.c mRMR_D.c ../MIToolbox/MutualInformation.c ../MIToolbox/Entropy.c ../MIToolbox/CalculateProbability.c ../MIToolbox/ArrayOperations.c
+
diff --git a/FEAST/FSToolbox/CondMI.c b/FEAST/FSToolbox/CondMI.c
new file mode 100644
index 0000000..ea6f8ee
--- /dev/null
+++ b/FEAST/FSToolbox/CondMI.c
@@ -0,0 +1,166 @@
+/*******************************************************************************
+** CondMI.c, implements the CMI criterion using a greedy forward search
+**
+** Initial Version - 19/08/2010
+** Updated - 23/06/2011
+**
+** Author - Adam Pocock
+**
+** Part of the Feature Selection Toolbox, please reference
+** "Conditional Likelihood Maximisation: A Unifying Framework for Mutual
+** Information Feature Selection"
+** G. Brown, A. Pocock, M.-J. Zhao, M. Lujan
+** Journal of Machine Learning Research (JMLR), 2011
+**
+** Please check www.cs.manchester.ac.uk/~gbrown/fstoolbox for updates.
+**
+** Copyright (c) 2010-2011, A. Pocock, G. Brown, The University of Manchester
+** All rights reserved.
+**
+** Redistribution and use in source and binary forms, with or without modification,
+** are permitted provided that the following conditions are met:
+**
+** - Redistributions of source code must retain the above copyright notice, this
+** list of conditions and the following disclaimer.
+** - Redistributions in binary form must reproduce the above copyright notice,
+** this list of conditions and the following disclaimer in the documentation
+** and/or other materials provided with the distribution.
+** - Neither the name of The University of Manchester nor the names of its
+** contributors may be used to endorse or promote products derived from this
+** software without specific prior written permission.
+**
+** THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND
+** ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
+** WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
+** DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR
+** ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
+** (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
+** LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON
+** ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+** (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
+** SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+**
+*******************************************************************************/
+
+#include "FSAlgorithms.h"
+#include "FSToolbox.h"
+
+/* for memcpy */
+#include <string.h>
+
+/* MIToolbox includes */
+#include "MutualInformation.h"
+#include "ArrayOperations.h"
+
+double* CondMI(int k, int noOfSamples, int noOfFeatures, double *featureMatrix, double *classColumn, double *outputFeatures)
+{
+ /*holds the class MI values*/
+ double *classMI = (double *)CALLOC_FUNC(noOfFeatures,sizeof(double));
+
+ char *selectedFeatures = (char *)CALLOC_FUNC(noOfFeatures,sizeof(char));
+
+ /*holds the intra feature MI values*/
+ int sizeOfMatrix = k*noOfFeatures;
+ double *featureMIMatrix = (double *)CALLOC_FUNC(sizeOfMatrix,sizeof(double));
+
+ double maxMI = 0.0;
+ int maxMICounter = -1;
+
+ double **feature2D = (double**) CALLOC_FUNC(noOfFeatures,sizeof(double*));
+
+ double score, currentScore, totalFeatureMI;
+ int currentHighestFeature;
+
+ double *conditionVector = (double *) CALLOC_FUNC(noOfSamples,sizeof(double));
+
+ int arrayPosition;
+ double mi, tripEntropy;
+
+ int i,j,x;
+
+ for(j = 0; j < noOfFeatures; j++)
+ {
+ feature2D[j] = featureMatrix + (int)j*noOfSamples;
+ }
+
+ for (i = 0; i < sizeOfMatrix;i++)
+ {
+ featureMIMatrix[i] = -1;
+ }/*for featureMIMatrix - blank to -1*/
+
+ for (i = 0; i < noOfFeatures;i++)
+ {
+ /*calculate mutual info
+ **double calculateMutualInformation(double *firstVector, double *secondVector, int vectorLength);
+ */
+ classMI[i] = calculateMutualInformation(feature2D[i], classColumn, noOfSamples);
+
+ if (classMI[i] > maxMI)
+ {
+ maxMI = classMI[i];
+ maxMICounter = i;
+ }/*if bigger than current maximum*/
+ }/*for noOfFeatures - filling classMI*/
+
+ selectedFeatures[maxMICounter] = 1;
+ outputFeatures[0] = maxMICounter;
+
+ memcpy(conditionVector,feature2D[maxMICounter],sizeof(double)*noOfSamples);
+
+ /*****************************************************************************
+ ** We have populated the classMI array, and selected the highest
+ ** MI feature as the first output feature
+ ** Now we move into the CondMI algorithm
+ *****************************************************************************/
+
+ for (i = 1; i < k; i++)
+ {
+ score = 0.0;
+ currentHighestFeature = -1;
+ currentScore = 0.0;
+ totalFeatureMI = 0.0;
+
+ for (j = 0; j < noOfFeatures; j++)
+ {
+ /*if we haven't selected j*/
+ if (selectedFeatures[j] == 0)
+ {
+ currentScore = 0.0;
+ totalFeatureMI = 0.0;
+
+ /*double calculateConditionalMutualInformation(double *firstVector, double *targetVector, double *conditionVector, int vectorLength);*/
+ currentScore = calculateConditionalMutualInformation(feature2D[j],classColumn,conditionVector,noOfSamples);
+
+ if (currentScore > score)
+ {
+ score = currentScore;
+ currentHighestFeature = j;
+ }
+ }/*if j is unselected*/
+ }/*for number of features*/
+
+ outputFeatures[i] = currentHighestFeature;
+
+ if (currentHighestFeature != -1)
+ {
+ selectedFeatures[currentHighestFeature] = 1;
+ mergeArrays(feature2D[currentHighestFeature],conditionVector,conditionVector,noOfSamples);
+ }
+
+ }/*for the number of features to select*/
+
+ FREE_FUNC(classMI);
+ FREE_FUNC(conditionVector);
+ FREE_FUNC(feature2D);
+ FREE_FUNC(featureMIMatrix);
+ FREE_FUNC(selectedFeatures);
+
+ classMI = NULL;
+ conditionVector = NULL;
+ feature2D = NULL;
+ featureMIMatrix = NULL;
+ selectedFeatures = NULL;
+
+ return outputFeatures;
+}/*CondMI(int,int,int,double[][],double[],double[])*/
+
diff --git a/FEAST/FSToolbox/DISR.c b/FEAST/FSToolbox/DISR.c
new file mode 100644
index 0000000..3ec7676
--- /dev/null
+++ b/FEAST/FSToolbox/DISR.c
@@ -0,0 +1,182 @@
+/*******************************************************************************
+** DISR.c, implements the Double Input Symmetrical Relevance criterion
+** from
+**
+** "On the Use of Variable Complementarity for Feature Selection in Cancer Classification"
+** P. Meyer and G. Bontempi, (2006)
+**
+** Initial Version - 13/06/2008
+** Updated - 23/06/2011
+**
+** Author - Adam Pocock
+**
+** Part of the Feature Selection Toolbox, please reference
+** "Conditional Likelihood Maximisation: A Unifying Framework for Mutual
+** Information Feature Selection"
+** G. Brown, A. Pocock, M.-J. Zhao, M. Lujan
+** Journal of Machine Learning Research (JMLR), 2011
+**
+** Please check www.cs.manchester.ac.uk/~gbrown/fstoolbox for updates.
+**
+** Copyright (c) 2010-2011, A. Pocock, G. Brown, The University of Manchester
+** All rights reserved.
+**
+** Redistribution and use in source and binary forms, with or without modification,
+** are permitted provided that the following conditions are met:
+**
+** - Redistributions of source code must retain the above copyright notice, this
+** list of conditions and the following disclaimer.
+** - Redistributions in binary form must reproduce the above copyright notice,
+** this list of conditions and the following disclaimer in the documentation
+** and/or other materials provided with the distribution.
+** - Neither the name of The University of Manchester nor the names of its
+** contributors may be used to endorse or promote products derived from this
+** software without specific prior written permission.
+**
+** THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND
+** ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
+** WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
+** DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR
+** ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
+** (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
+** LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON
+** ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+** (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
+** SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+**
+*******************************************************************************/
+#include "FSAlgorithms.h"
+#include "FSToolbox.h"
+
+/* MIToolbox includes */
+#include "MutualInformation.h"
+#include "Entropy.h"
+#include "ArrayOperations.h"
+
+double* DISR(int k, int noOfSamples, int noOfFeatures, double *featureMatrix, double *classColumn, double *outputFeatures)
+{
+ /*holds the class MI values*/
+ double *classMI = (double *)CALLOC_FUNC(noOfFeatures,sizeof(double));
+
+ char *selectedFeatures = (char *)CALLOC_FUNC(noOfFeatures,sizeof(char));
+
+ /*holds the intra feature MI values*/
+ int sizeOfMatrix = k*noOfFeatures;
+ double *featureMIMatrix = (double *)CALLOC_FUNC(sizeOfMatrix,sizeof(double));
+
+ double maxMI = 0.0;
+ int maxMICounter = -1;
+
+ double **feature2D = (double**) CALLOC_FUNC(noOfFeatures,sizeof(double*));
+
+ double score, currentScore, totalFeatureMI;
+ int currentHighestFeature;
+
+ double *mergedVector = (double *) CALLOC_FUNC(noOfSamples,sizeof(double));
+
+ int arrayPosition;
+ double mi, tripEntropy;
+
+ int i,j,x;
+
+ for(j = 0; j < noOfFeatures; j++)
+ {
+ feature2D[j] = featureMatrix + (int)j*noOfSamples;
+ }
+
+ for (i = 0; i < sizeOfMatrix;i++)
+ {
+ featureMIMatrix[i] = -1;
+ }/*for featureMIMatrix - blank to -1*/
+
+
+ for (i = 0; i < noOfFeatures;i++)
+ {
+ /*calculate mutual info
+ **double calculateMutualInformation(double *firstVector, double *secondVector, int vectorLength);
+ */
+ classMI[i] = calculateMutualInformation(feature2D[i], classColumn, noOfSamples);
+
+ if (classMI[i] > maxMI)
+ {
+ maxMI = classMI[i];
+ maxMICounter = i;
+ }/*if bigger than current maximum*/
+ }/*for noOfFeatures - filling classMI*/
+
+ selectedFeatures[maxMICounter] = 1;
+ outputFeatures[0] = maxMICounter;
+
+ /*****************************************************************************
+ ** We have populated the classMI array, and selected the highest
+ ** MI feature as the first output feature
+ ** Now we move into the DISR algorithm
+ *****************************************************************************/
+
+ for (i = 1; i < k; i++)
+ {
+ score = 0.0;
+ currentHighestFeature = 0;
+ currentScore = 0.0;
+ totalFeatureMI = 0.0;
+
+ for (j = 0; j < noOfFeatures; j++)
+ {
+ /*if we haven't selected j*/
+ if (selectedFeatures[j] == 0)
+ {
+ currentScore = 0.0;
+ totalFeatureMI = 0.0;
+
+ for (x = 0; x < i; x++)
+ {
+ arrayPosition = x*noOfFeatures + j;
+ if (featureMIMatrix[arrayPosition] == -1)
+ {
+ /*
+ **double calculateMutualInformation(double *firstVector, double *secondVector, int vectorLength);
+ **double calculateJointEntropy(double *firstVector, double *secondVector, int vectorLength);
+ */
+
+ mergeArrays(feature2D[(int) outputFeatures[x]], feature2D[j],mergedVector,noOfSamples);
+ mi = calculateMutualInformation(mergedVector, classColumn, noOfSamples);
+ tripEntropy = calculateJointEntropy(mergedVector, classColumn, noOfSamples);
+
+ featureMIMatrix[arrayPosition] = mi / tripEntropy;
+ }/*if not already known*/
+ currentScore += featureMIMatrix[arrayPosition];
+ }/*for the number of already selected features*/
+
+ if (currentScore > score)
+ {
+ score = currentScore;
+ currentHighestFeature = j;
+ }
+ }/*if j is unselected*/
+ }/*for number of features*/
+
+ selectedFeatures[currentHighestFeature] = 1;
+ outputFeatures[i] = currentHighestFeature;
+
+ }/*for the number of features to select*/
+
+ for (i = 0; i < k; i++)
+ {
+ outputFeatures[i] += 1; /*C indexes from 0 not 1*/
+ }/*for number of selected features*/
+
+ FREE_FUNC(classMI);
+ FREE_FUNC(mergedVector);
+ FREE_FUNC(feature2D);
+ FREE_FUNC(featureMIMatrix);
+ FREE_FUNC(selectedFeatures);
+
+ classMI = NULL;
+ mergedVector = NULL;
+ feature2D = NULL;
+ featureMIMatrix = NULL;
+ selectedFeatures = NULL;
+
+ return outputFeatures;
+}/*DISR(int,int,int,double[][],double[],double[])*/
+
diff --git a/FEAST/FSToolbox/FCBF.m b/FEAST/FSToolbox/FCBF.m
new file mode 100644
index 0000000..dcaf3bf
--- /dev/null
+++ b/FEAST/FSToolbox/FCBF.m
@@ -0,0 +1,58 @@
+function [selectedFeatures] = FCBF(featureMatrix,classColumn,threshold)
+%function [selectedFeatures] = FCBF(featureMatrix,classColumn,threshold)
+%
+%Performs feature selection using the FCBF measure by Yu and Liu 2004.
+%
+%Instead of selecting a fixed number of features it provides a relevancy threshold and selects all
+%features which score above that and are not redundant
+%
+% The license is in the license.txt provided.
+
+numFeatures = size(featureMatrix,2);
+classScore = zeros(numFeatures,1);
+
+for i = 1:numFeatures
+ classScore(i) = SU(featureMatrix(:,i),classColumn);
+end
+
+[classScore indexScore] = sort(classScore,1,'descend');
+
+indexScore = indexScore(classScore > threshold);
+classScore = classScore(classScore > threshold);
+
+if ~isempty(indexScore)
+ curPosition = 1;
+else
+ curPosition = 0;
+end
+
+while curPosition <= length(indexScore)
+ j = curPosition + 1;
+ curFeature = indexScore(curPosition);
+ while j <= length(indexScore)
+ scoreij = SU(featureMatrix(:,curFeature),featureMatrix(:,indexScore(j)));
+ if scoreij > classScore(j)
+ indexScore(j) = [];
+ classScore(j) = [];
+ else
+ j = j + 1;
+ end
+ end
+ curPosition = curPosition + 1;
+end
+
+selectedFeatures = indexScore;
+
+end
+
+function [score] = SU(firstVector,secondVector)
+%function [score] = SU(firstVector,secondVector)
+%
+%calculates SU = 2 * (I(X;Y)/(H(X) + H(Y)))
+
+hX = h(firstVector);
+hY = h(secondVector);
+iXY = mi(firstVector,secondVector);
+
+score = (2 * iXY) / (hX + hY);
+end
diff --git a/FEAST/FSToolbox/FSAlgorithms.h b/FEAST/FSToolbox/FSAlgorithms.h
new file mode 100644
index 0000000..e6ddba8
--- /dev/null
+++ b/FEAST/FSToolbox/FSAlgorithms.h
@@ -0,0 +1,138 @@
+/*******************************************************************************
+**
+** FSAlgorithms.h
+** Provides the function definitions for the list of algorithms implemented
+** in the FSToolbox.
+**
+** Author: Adam Pocock
+** Created: 27/06/2011
+**
+** Copyright 2010/2011 Adam Pocock, The University Of Manchester
+** www.cs.manchester.ac.uk
+**
+** Part of the FEAture Selection Toolbox (FEAST), please reference
+** "Conditional Likelihood Maximisation: A Unifying Framework for Mutual
+** Information Feature Selection"
+** G. Brown, A. Pocock, M.-J. Zhao, M. Lujan
+** Journal of Machine Learning Research (JMLR), 2011
+**
+**
+** Please check www.cs.manchester.ac.uk/~gbrown/fstoolbox for updates.
+**
+** Copyright (c) 2010-2011, A. Pocock, G. Brown, The University of Manchester
+** All rights reserved.
+**
+** Redistribution and use in source and binary forms, with or without modification,
+** are permitted provided that the following conditions are met:
+**
+** - Redistributions of source code must retain the above copyright notice, this
+** list of conditions and the following disclaimer.
+** - Redistributions in binary form must reproduce the above copyright notice,
+** this list of conditions and the following disclaimer in the documentation
+** and/or other materials provided with the distribution.
+** - Neither the name of The University of Manchester nor the names of its
+** contributors may be used to endorse or promote products derived from this
+** software without specific prior written permission.
+**
+** THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND
+** ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
+** WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
+** DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR
+** ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
+** (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
+** LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON
+** ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+** (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
+** SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+**
+*******************************************************************************/
+
+/*******************************************************************************
+ * All algorithms take an integer k which determines how many features to
+ * select, the number of samples and the number of features. Additionally each
+ * algorithm takes pointers to the data matrix, and the label vector, and
+ * a pointer to the output vector. The output vector should be pre-allocated
+ * with sizeof(double)*k bytes.
+ *
+ * Some algorithms take additional parameters, which given at the end of the
+ * standard parameter list.
+ *
+ * Each algorithm uses a forward search, and selects the feature which has
+ * the maxmimum MI with the labels first.
+ *
+ * All the algorithms except CMIM use an optimised variant which caches the
+ * previously calculated MI values. This trades space for time, but can
+ * allocate large amounts of memory. CMIM uses the optimised implementation
+ * given in Fleuret (2004).
+ *****************************************************************************/
+
+#ifndef __FSAlgorithms_H
+#define __FSAlgorithms_H
+
+/*******************************************************************************
+** mRMR_D() implements the minimum Relevance Maximum Redundancy criterion
+** using the difference variant, from
+**
+** "Feature Selection Based on Mutual Information: Criteria of Max-Dependency, Max-Relevance, and Min-Redundancy"
+** H. Peng et al. IEEE Pattern Analysis and Machine Intelligence (PAMI) (2005)
+*******************************************************************************/
+double* mRMR_D(int k, int noOfSamples, int noOfFeatures, double *featureMatrix, double *classColumn, double *outputFeatures);
+
+/*******************************************************************************
+** CMIM() implements a discrete version of the
+** Conditional Mutual Information Maximisation criterion, using the fast
+** exact implementation from
+**
+** "Fast Binary Feature Selection using Conditional Mutual Information Maximisation"
+** F. Fleuret, JMLR (2004)
+*******************************************************************************/
+double* CMIM(int k, int noOfSamples, int noOfFeatures, double *featureMatrix, double *classColumn, double *outputFeatures);
+
+/*******************************************************************************
+** JMI() implements the JMI criterion from
+**
+** "Data Visualization and Feature Selection: New Algorithms for Nongaussian Data"
+** H. Yang and J. Moody, NIPS (1999)
+*******************************************************************************/
+double* JMI(int k, int noOfSamples, int noOfFeatures, double *featureMatrix, double *classColumn, double *outputFeatures);
+
+/*******************************************************************************
+** DISR() implements the Double Input Symmetrical Relevance criterion
+** from
+**
+** "On the Use of Variable Complementarity for Feature Selection in Cancer Classification"
+** P. Meyer and G. Bontempi, (2006)
+*******************************************************************************/
+double* DISR(int k, int noOfSamples, int noOfFeatures, double *featureMatrix, double *classColumn, double *outputFeatures);
+
+/*******************************************************************************
+** ICAP() implements the Interaction Capping criterion from
+**
+** "Machine Learning Based on Attribute Interactions"
+** A. Jakulin, PhD Thesis (2005)
+*******************************************************************************/
+double* ICAP(int k, int noOfSamples, int noOfFeatures, double *featureMatrix, double *classColumn, double *outputFeatures);
+
+/*******************************************************************************
+** CondMI() implements the CMI criterion using a greedy forward search
+*******************************************************************************/
+double* CondMI(int k, int noOfSamples, int noOfFeatures, double *featureMatrix, double *classColumn, double *outputFeatures);
+
+/*******************************************************************************
+** betaGamma() implements the Beta-Gamma space from Brown (2009).
+** This incoporates MIFS, CIFE, and CondRed.
+**
+** MIFS - "Using mutual information for selecting features in supervised neural net learning"
+** R. Battiti, IEEE Transactions on Neural Networks, 1994
+**
+** CIFE - "Conditional Infomax Learning: An Integrated Framework for Feature Extraction and Fusion"
+** D. Lin and X. Tang, European Conference on Computer Vision (2006)
+**
+** The Beta Gamma space is explained in our paper
+** "Conditional Likelihood Maximisation: A Unifying Framework for Mutual Information Feature Selection"
+** G. Brown, A. Pocock, M.-J. Zhao, M. Lujan
+** Journal of Machine Learning Research (JMLR), 2011
+*******************************************************************************/
+double* BetaGamma(int k, int noOfSamples, int noOfFeatures, double *featureMatrix, double *classColumn, double *outputFeatures, double beta, double gamma);
+
+#endif
diff --git a/FEAST/FSToolbox/FSToolbox.h b/FEAST/FSToolbox/FSToolbox.h
new file mode 100644
index 0000000..bf8662b
--- /dev/null
+++ b/FEAST/FSToolbox/FSToolbox.h
@@ -0,0 +1,70 @@
+/******************************************************************************* **
+** FSToolbox.h
+** Provides the header files and #defines to ensure compatibility with MATLAB
+** and C/C++. By default it compiles to MATLAB, if COMPILE_C is defined it
+** links to the C memory allocation functions.
+**
+** Author: Adam Pocock
+** Created: 27/06/2011
+**
+** Copyright 2010/2011 Adam Pocock, The University Of Manchester
+** www.cs.manchester.ac.uk
+**
+** Part of the FEAture Selection Toolbox (FEAST), please reference
+** "Conditional Likelihood Maximisation: A Unifying Framework for Mutual
+** Information Feature Selection"
+** G. Brown, A. Pocock, M.-J. Zhao, M. Lujan
+** Journal of Machine Learning Research (JMLR), 2011
+**
+** Please check www.cs.manchester.ac.uk/~gbrown/fstoolbox for updates.
+**
+** Copyright (c) 2010-2011, A. Pocock, G. Brown, The University of Manchester
+** All rights reserved.
+**
+** Redistribution and use in source and binary forms, with or without modification,
+** are permitted provided that the following conditions are met:
+**
+** - Redistributions of source code must retain the above copyright notice, this
+** list of conditions and the following disclaimer.
+** - Redistributions in binary form must reproduce the above copyright notice,
+** this list of conditions and the following disclaimer in the documentation
+** and/or other materials provided with the distribution.
+** - Neither the name of The University of Manchester nor the names of its
+** contributors may be used to endorse or promote products derived from this
+** software without specific prior written permission.
+**
+** THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND
+** ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
+** WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
+** DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR
+** ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
+** (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
+** LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON
+** ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+** (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
+** SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+**
+*******************************************************************************/
+
+#ifndef __FSToolbox_H
+#define __FSToolbox_H
+
+#include <math.h>
+#include <string.h>
+
+#ifdef COMPILE_C
+ #define C_IMPLEMENTATION
+ #include <stdio.h>
+ #include <stdlib.h>
+ #define CALLOC_FUNC calloc
+ #define FREE_FUNC free
+#else
+ #define MEX_IMPLEMENTATION
+ #include "mex.h"
+ #define CALLOC_FUNC mxCalloc
+ #define FREE_FUNC mxFree
+ #define printf mexPrintf /*for Octave-3.2*/
+#endif
+
+#endif
+
diff --git a/FEAST/FSToolbox/FSToolboxMex.c b/FEAST/FSToolbox/FSToolboxMex.c
new file mode 100644
index 0000000..73a9197
--- /dev/null
+++ b/FEAST/FSToolbox/FSToolboxMex.c
@@ -0,0 +1,290 @@
+/*******************************************************************************
+** FSToolboxMex.c is the entry point for the feature selection toolbox.
+** It provides a MATLAB interface to the various selection algorithms.
+**
+** Initial Version - 27/06/2011
+**
+** Author - Adam Pocock
+**
+** Part of the Feature Selection Toolbox, please reference
+** "Conditional Likelihood Maximisation: A Unifying Framework for Mutual
+** Information Feature Selection"
+** G. Brown, A. Pocock, M.-J. Zhao, M. Lujan
+** Journal of Machine Learning Research (JMLR), 2011
+**
+** Please check www.cs.manchester.ac.uk/~gbrown/fstoolbox for updates.
+**
+** Copyright (c) 2010-2011, A. Pocock, G. Brown, The University of Manchester
+** All rights reserved.
+**
+** Redistribution and use in source and binary forms, with or without modification,
+** are permitted provided that the following conditions are met:
+**
+** - Redistributions of source code must retain the above copyright notice, this
+** list of conditions and the following disclaimer.
+** - Redistributions in binary form must reproduce the above copyright notice,
+** this list of conditions and the following disclaimer in the documentation
+** and/or other materials provided with the distribution.
+** - Neither the name of The University of Manchester nor the names of its
+** contributors may be used to endorse or promote products derived from this
+** software without specific prior written permission.
+**
+** THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND
+** ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
+** WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
+** DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR
+** ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
+** (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
+** LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON
+** ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+** (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
+** SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+**
+*******************************************************************************/
+
+#include "FSToolbox.h"
+#include "FSAlgorithms.h"
+#include "Entropy.h"
+
+/******************************************************************************
+** entry point for the mex call
+** nlhs - number of outputs
+** plhs - pointer to array of outputs
+** nrhs - number of inputs
+** prhs - pointer to array of inputs
+******************************************************************************/
+void mexFunction(int nlhs, mxArray *plhs[], int nrhs, const mxArray *prhs[])
+{
+ /*************************************************************
+ ** this function takes 4-6 arguments:
+ ** flag = which algorithm to use,
+ ** k = number of features to select,
+ ** featureMatrix[][] = matrix of features,
+ ** classColumn[] = targets,
+ ** optionalParam1 = (path angle or beta value),
+ ** optionalParam2 = (gamma value),
+ ** the arguments should all be discrete integers.
+ ** and has one output:
+ ** selectedFeatures[] of size k
+ *************************************************************/
+
+ int flag, k;
+ double optionalParam1, optionalParam2;
+ int numberOfFeatures, numberOfSamples, numberOfTargets;
+ double *featureMatrix, *targets, *output, *outputFeatures;
+
+ double entropyTest;
+ int i,j;
+
+ /************************************************************
+ ** number to function map
+ ** 1 = MIFS
+ ** 2 = mRMR
+ ** 3 = CMIM
+ ** 4 = JMI
+ ** 5 = DISR
+ ** 6 = CIFE
+ ** 7 = ICAP
+ ** 8 = CondRed
+ ** 9 = BetaGamma
+ ** 10 = CMI
+ *************************************************************/
+ if (nlhs > 1)
+ {
+ printf("Incorrect number of output arguments");
+ }/*if not 1 output*/
+ if ((nrhs < 4) || (nrhs > 6))
+ {
+ printf("Incorrect number of input arguments");
+ return;
+ }/*if not 4-6 inputs*/
+
+ /*get the flag for which algorithm*/
+ flag = (int) mxGetScalar(prhs[0]);
+
+ /*get the number of features to select, cast out as it is a double*/
+ k = (int) mxGetScalar(prhs[1]);
+
+ numberOfFeatures = mxGetN(prhs[2]);
+ numberOfSamples = mxGetM(prhs[2]);
+
+ numberOfTargets = mxGetM(prhs[3]);
+
+ if (nrhs == 6)
+ {
+ optionalParam1 = (double) mxGetScalar(prhs[4]);
+ optionalParam2 = (double) mxGetScalar(prhs[5]);
+ }
+ else if (nrhs == 5)
+ {
+ optionalParam1 = (double) mxGetScalar(prhs[4]);
+ optionalParam2 = 0.0;
+ }
+
+ if (numberOfTargets != numberOfSamples)
+ {
+ printf("Number of targets must match number of samples\n");
+ printf("Number of targets = %d, Number of Samples = %d, Number of Features = %d\n",numberOfTargets,numberOfSamples,numberOfFeatures);
+
+ plhs[0] = mxCreateDoubleMatrix(0,0,mxREAL);
+ return;
+ }/*if size mismatch*/
+ else if ((k < 1) || (k > numberOfFeatures))
+ {
+ printf("You have requested k = %d features, which is not possible\n",k);
+ plhs[0] = mxCreateDoubleMatrix(0,0,mxREAL);
+ return;
+ }
+ else
+ {
+ featureMatrix = mxGetPr(prhs[2]);
+ targets = mxGetPr(prhs[3]);
+
+ /*double calculateEntropy(double *dataVector, int vectorLength)*/
+ entropyTest = calculateEntropy(targets,numberOfSamples);
+ if (entropyTest < 0.0000001)
+ {
+ printf("The class label Y has entropy of 0, therefore all mutual informations containing Y will be 0. No feature selection is performed\n");
+ plhs[0] = mxCreateDoubleMatrix(0,0,mxREAL);
+ return;
+ }
+ else
+ {
+ /*printf("Flag = %d, k = %d, numFeatures = %d, numSamples = %d\n",flag,k,numberOfFeatures,numberOfSamples);*/
+ switch (flag)
+ {
+ case 1: /* MIFS */
+ {
+ plhs[0] = mxCreateDoubleMatrix(k,1,mxREAL);
+ output = (double *)mxGetPr(plhs[0]);
+ if (nrhs == 4)
+ {
+ /* MIFS is Beta = 1, Gamma = 0 */
+ optionalParam1 = 1.0;
+ optionalParam2 = 0.0;
+ }
+
+ /*void BetaGamma(int k, long noOfSamples, long noOfFeatures,double *featureMatrix, double *classColumn, double *outputFeatures, double beta, double gamma)*/
+ BetaGamma(k,numberOfSamples,numberOfFeatures,featureMatrix,targets,output,optionalParam1,optionalParam2);
+ break;
+ }
+ case 2: /* mRMR */
+ {
+ plhs[0] = mxCreateDoubleMatrix(k,1,mxREAL);
+ output = (double *)mxGetPr(plhs[0]);
+
+ /*void mRMR_D(int k, int noOfSamples, int noOfFeatures,double *featureMatrix, double *classColumn, double *outputFeatures)*/
+ mRMR_D(k,numberOfSamples,numberOfFeatures,featureMatrix,targets,output);
+ break;
+ }
+ case 3: /* CMIM */
+ {
+ plhs[0] = mxCreateDoubleMatrix(k,1,mxREAL);
+ output = (double *)mxGetPr(plhs[0]);
+
+ /*void CMIM(int k, int noOfSamples, int noOfFeatures,double *featureMatrix, double *classColumn, double *outputFeatures)*/
+ CMIM(k,numberOfSamples,numberOfFeatures,featureMatrix,targets,output);
+ break;
+ }
+ case 4: /* JMI */
+ {
+ plhs[0] = mxCreateDoubleMatrix(k,1,mxREAL);
+ output = (double *)mxGetPr(plhs[0]);
+
+ /*void JMI(int k, int noOfSamples, int noOfFeatures,double *featureMatrix, double *classColumn, double *outputFeatures)*/
+ JMI(k,numberOfSamples,numberOfFeatures,featureMatrix,targets,output);
+ break;
+ }
+ case 5: /* DISR */
+ {
+ plhs[0] = mxCreateDoubleMatrix(k,1,mxREAL);
+ output = (double *)mxGetPr(plhs[0]);
+
+ /*void DISR(int k, int noOfSamples, int noOfFeatures,double *featureMatrix, double *classColumn, double *outputFeatures)*/
+ DISR(k,numberOfSamples,numberOfFeatures,featureMatrix,targets,output);
+ break;
+ }
+ case 6: /* CIFE */
+ {
+ plhs[0] = mxCreateDoubleMatrix(k,1,mxREAL);
+ output = (double *)mxGetPr(plhs[0]);
+
+ /* CIFE is Beta = 1, Gamma = 1 */
+ optionalParam1 = 1.0;
+ optionalParam2 = 1.0;
+
+ /*void BetaGamma(int k, long noOfSamples, long noOfFeatures,double *featureMatrix, double *classColumn, double *outputFeatures, double beta, double gamma)*/
+ BetaGamma(k,numberOfSamples,numberOfFeatures,featureMatrix,targets,output,optionalParam1,optionalParam2);
+ break;
+ }
+ case 7: /* ICAP */
+ {
+ plhs[0] = mxCreateDoubleMatrix(k,1,mxREAL);
+ output = (double *)mxGetPr(plhs[0]);
+
+ /*void ICAP(k,numberOfSamples,numberOfFeatures,featureMatrix,targets,output);*/
+ ICAP(k,numberOfSamples,numberOfFeatures,featureMatrix,targets,output);
+ break;
+ }
+ case 8: /* CondRed */
+ {
+ plhs[0] = mxCreateDoubleMatrix(k,1,mxREAL);
+ output = (double *)mxGetPr(plhs[0]);
+
+ /* CondRed is Beta = 0, Gamma = 1 */
+ optionalParam1 = 0.0;
+ optionalParam2 = 1.0;
+
+ /*void BetaGamma(int k, long noOfSamples, long noOfFeatures,double *featureMatrix, double *classColumn, double *outputFeatures, double beta, double gamma)*/
+ BetaGamma(k,numberOfSamples,numberOfFeatures,featureMatrix,targets,output,optionalParam1,optionalParam2);
+ break;
+ }
+ case 9: /* BetaGamma */
+ {
+ if (nrhs != 6)
+ {
+ printf("Insufficient arguments specified for Beta Gamma FS\n");
+ plhs[0] = mxCreateDoubleMatrix(0,0,mxREAL);
+ return;
+ }
+ else
+ {
+ plhs[0] = mxCreateDoubleMatrix(k,1,mxREAL);
+ output = (double *)mxGetPr(plhs[0]);
+
+ /*void BetaGamma(int k, long noOfSamples, long noOfFeatures,double *featureMatrix, double *classColumn, double *outputFeatures, double beta, double gamma)*/
+ BetaGamma(k,numberOfSamples,numberOfFeatures,featureMatrix,targets,output,optionalParam1,optionalParam2);
+ }
+ break;
+ }
+ case 10: /* CMI */
+ {
+ output = (double *)mxCalloc(k,sizeof(double));
+
+ /*void CondMI(int k, int noOfSamples, int noOfFeatures,double *featureMatrix, double *classColumn, double *outputFeatures)*/
+ CondMI(k,numberOfSamples,numberOfFeatures,featureMatrix,targets,output);
+
+ i = 0;
+
+ while((output[i] != -1) && (i < k))
+ {
+ i++;
+ }
+
+ plhs[0] = mxCreateDoubleMatrix(i,1,mxREAL);
+ outputFeatures = (double *)mxGetPr(plhs[0]);
+
+ for (j = 0; j < i; j++)
+ {
+ outputFeatures[j] = output[j] + 1; /*C indexes from 0 not 1*/
+ }/*for number of selected features*/
+
+ mxFree(output);
+ output = NULL;
+ break;
+ }
+ }/*switch on flag*/
+ return;
+ }
+ }
+}/*mex function entry*/
diff --git a/FEAST/FSToolbox/FSToolboxMex.mexa64 b/FEAST/FSToolbox/FSToolboxMex.mexa64
new file mode 100755
index 0000000..0015254
--- /dev/null
+++ b/FEAST/FSToolbox/FSToolboxMex.mexa64
Binary files differ
diff --git a/FEAST/FSToolbox/ICAP.c b/FEAST/FSToolbox/ICAP.c
new file mode 100644
index 0000000..00953a7
--- /dev/null
+++ b/FEAST/FSToolbox/ICAP.c
@@ -0,0 +1,184 @@
+/*******************************************************************************
+** ICAP.c implements the Interaction Capping criterion from
+**
+** "Machine Learning Based on Attribute Interactions"
+** A. Jakulin, PhD Thesis (2005)
+**
+** Initial Version - 19/08/2010
+** Updated - 23/06/2011
+**
+** Author - Adam Pocock
+**
+** Part of the Feature Selection Toolbox, please reference
+** "Conditional Likelihood Maximisation: A Unifying Framework for Mutual
+** Information Feature Selection"
+** G. Brown, A. Pocock, M.-J. Zhao, M. Lujan
+** Journal of Machine Learning Research (JMLR), 2011
+**
+** Please check www.cs.manchester.ac.uk/~gbrown/fstoolbox for updates.
+**
+** Copyright (c) 2010-2011, A. Pocock, G. Brown, The University of Manchester
+** All rights reserved.
+**
+** Redistribution and use in source and binary forms, with or without modification,
+** are permitted provided that the following conditions are met:
+**
+** - Redistributions of source code must retain the above copyright notice, this
+** list of conditions and the following disclaimer.
+** - Redistributions in binary form must reproduce the above copyright notice,
+** this list of conditions and the following disclaimer in the documentation
+** and/or other materials provided with the distribution.
+** - Neither the name of The University of Manchester nor the names of its
+** contributors may be used to endorse or promote products derived from this
+** software without specific prior written permission.
+**
+** THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND
+** ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
+** WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
+** DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR
+** ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
+** (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
+** LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON
+** ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+** (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
+** SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+**
+*******************************************************************************/
+
+#include "FSAlgorithms.h"
+#include "FSToolbox.h"
+
+/* MIToolbox includes */
+#include "MutualInformation.h"
+
+double* ICAP(int k, int noOfSamples, int noOfFeatures, double *featureMatrix, double *classColumn, double *outputFeatures)
+{
+ /*holds the class MI values*/
+ double *classMI = (double *)CALLOC_FUNC(noOfFeatures,sizeof(double));
+ char *selectedFeatures = (char *)CALLOC_FUNC(noOfFeatures,sizeof(char));
+
+ /*separates out the features*/
+ double **feature2D = (double **) CALLOC_FUNC(noOfFeatures,sizeof(double *));
+
+ /*holds the intra feature MI values*/
+ int sizeOfMatrix = k*noOfFeatures;
+ double *featureMIMatrix = (double *)CALLOC_FUNC(sizeOfMatrix,sizeof(double));
+ double *featureCMIMatrix = (double *)CALLOC_FUNC(sizeOfMatrix,sizeof(double));
+
+ double maxMI = 0.0;
+ int maxMICounter = -1;
+
+ double score, currentScore, totalFeatureInteraction, interactionInfo;
+ int currentHighestFeature, arrayPosition;
+
+ int i, j, m;
+
+ for (j = 0; j < noOfFeatures; j++)
+ feature2D[j] = featureMatrix + (int) j * noOfSamples;
+
+ for (i = 0; i < sizeOfMatrix; i++)
+ {
+ featureMIMatrix[i] = -1;
+ featureCMIMatrix[i] = -1;
+ }/*for featureMIMatrix and featureCMIMatrix - blank to -1*/
+
+ /*SETUP COMPLETE*/
+ /*Algorithm starts here*/
+
+ for (i = 0; i < noOfFeatures;i++)
+ {
+ classMI[i] = calculateMutualInformation(feature2D[i], classColumn, noOfSamples);
+
+ if (classMI[i] > maxMI)
+ {
+ maxMI = classMI[i];
+ maxMICounter = i;
+ }/*if bigger than current maximum*/
+ }/*for noOfFeatures - filling classMI*/
+
+ selectedFeatures[maxMICounter] = 1;
+ outputFeatures[0] = maxMICounter;
+
+ /*************
+ ** Now we have populated the classMI array, and selected the highest
+ ** MI feature as the first output feature
+ *************/
+
+ for (i = 1; i < k; i++)
+ {
+ /**********************************************************************
+ ** to ensure it selects some features
+ **if this is zero then it will not pick features where the redundancy is greater than the
+ **relevance
+ **********************************************************************/
+ score = -HUGE_VAL;
+ currentHighestFeature = 0;
+ currentScore = 0.0;
+
+ for (j = 0; j < noOfFeatures; j++)
+ {
+ /*if we haven't selected j*/
+ if (!selectedFeatures[j])
+ {
+ currentScore = classMI[j];
+ totalFeatureInteraction = 0.0;
+
+ for (m = 0; m < i; m++)
+ {
+ arrayPosition = m*noOfFeatures + j;
+
+ if (featureMIMatrix[arrayPosition] == -1)
+ {
+ /*work out interaction*/
+
+ /*double calculateMutualInformation(double *firstVector, double *secondVector, int vectorLength);*/
+ featureMIMatrix[arrayPosition] = calculateMutualInformation(feature2D[(int) outputFeatures[m]], feature2D[j], noOfSamples);
+ /*double calculateConditionalMutualInformation(double *firstVector, double *targetVector, double* conditionVector, int vectorLength);*/
+ featureCMIMatrix[arrayPosition] = calculateConditionalMutualInformation(feature2D[(int) outputFeatures[m]], feature2D[j], classColumn, noOfSamples);
+ }/*if not already known*/
+
+ interactionInfo = featureCMIMatrix[arrayPosition] - featureMIMatrix[arrayPosition];
+
+ if (interactionInfo < 0)
+ {
+ totalFeatureInteraction += interactionInfo;
+ }
+ }/*for the number of already selected features*/
+
+ currentScore += totalFeatureInteraction;
+
+
+ if (currentScore > score)
+ {
+ score = currentScore;
+ currentHighestFeature = j;
+ }
+ }/*if j is unselected*/
+ }/*for number of features*/
+
+ selectedFeatures[currentHighestFeature] = 1;
+ outputFeatures[i] = currentHighestFeature;
+
+ }/*for the number of features to select*/
+
+ /*C++ indexes from 0 not 1, so we need to increment all the feature indices*/
+ for (i = 0; i < k; i++)
+ {
+ outputFeatures[i] += 1;
+ }/*for number of selected features*/
+
+ FREE_FUNC(classMI);
+ FREE_FUNC(feature2D);
+ FREE_FUNC(featureMIMatrix);
+ FREE_FUNC(featureCMIMatrix);
+ FREE_FUNC(selectedFeatures);
+
+ classMI = NULL;
+ feature2D = NULL;
+ featureMIMatrix = NULL;
+ featureCMIMatrix = NULL;
+ selectedFeatures = NULL;
+
+ return outputFeatures;
+}/*ICAP(int,int,int,double[][],double[],double[])*/
+
diff --git a/FEAST/FSToolbox/JMI.c b/FEAST/FSToolbox/JMI.c
new file mode 100644
index 0000000..30ef1bc
--- /dev/null
+++ b/FEAST/FSToolbox/JMI.c
@@ -0,0 +1,177 @@
+/*******************************************************************************
+** JMI.c implements the JMI criterion from
+**
+** "Data Visualization and Feature Selection: New Algorithms for Nongaussian Data"
+** H. Yang and J. Moody, NIPS (1999)
+**
+** Initial Version - 19/08/2010
+** Updated - 23/06/2011
+**
+** Author - Adam Pocock
+**
+** Part of the Feature Selection Toolbox, please reference
+** "Conditional Likelihood Maximisation: A Unifying Framework for Mutual
+** Information Feature Selection"
+** G. Brown, A. Pocock, M.-J. Zhao, M. Lujan
+** Journal of Machine Learning Research (JMLR), 2011
+**
+** Please check www.cs.manchester.ac.uk/~gbrown/fstoolbox for updates.
+**
+** Copyright (c) 2010-2011, A. Pocock, G. Brown, The University of Manchester
+** All rights reserved.
+**
+** Redistribution and use in source and binary forms, with or without modification,
+** are permitted provided that the following conditions are met:
+**
+** - Redistributions of source code must retain the above copyright notice, this
+** list of conditions and the following disclaimer.
+** - Redistributions in binary form must reproduce the above copyright notice,
+** this list of conditions and the following disclaimer in the documentation
+** and/or other materials provided with the distribution.
+** - Neither the name of The University of Manchester nor the names of its
+** contributors may be used to endorse or promote products derived from this
+** software without specific prior written permission.
+**
+** THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND
+** ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
+** WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
+** DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR
+** ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
+** (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
+** LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON
+** ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+** (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
+** SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+**
+*******************************************************************************/
+
+#include "FSAlgorithms.h"
+#include "FSToolbox.h"
+
+/* MIToolbox includes */
+#include "MutualInformation.h"
+#include "ArrayOperations.h"
+
+double* JMI(int k, int noOfSamples, int noOfFeatures, double *featureMatrix, double *classColumn, double *outputFeatures)
+{
+ /*holds the class MI values*/
+ double *classMI = (double *)CALLOC_FUNC(noOfFeatures,sizeof(double));
+
+ char *selectedFeatures = (char *)CALLOC_FUNC(noOfFeatures,sizeof(char));
+
+ /*holds the intra feature MI values*/
+ int sizeOfMatrix = k*noOfFeatures;
+ double *featureMIMatrix = (double *)CALLOC_FUNC(sizeOfMatrix,sizeof(double));
+
+ double maxMI = 0.0;
+ int maxMICounter = -1;
+
+ double **feature2D = (double**) CALLOC_FUNC(noOfFeatures,sizeof(double*));
+
+ double score, currentScore, totalFeatureMI;
+ int currentHighestFeature;
+
+ double *mergedVector = (double *) CALLOC_FUNC(noOfSamples,sizeof(double));
+
+ int arrayPosition;
+ double mi, tripEntropy;
+
+ int i,j,x;
+
+ for(j = 0; j < noOfFeatures; j++)
+ {
+ feature2D[j] = featureMatrix + (int)j*noOfSamples;
+ }
+
+ for (i = 0; i < sizeOfMatrix;i++)
+ {
+ featureMIMatrix[i] = -1;
+ }/*for featureMIMatrix - blank to -1*/
+
+
+ for (i = 0; i < noOfFeatures;i++)
+ {
+ /*calculate mutual info
+ **double calculateMutualInformation(double *firstVector, double *secondVector, int vectorLength);
+ */
+ classMI[i] = calculateMutualInformation(feature2D[i], classColumn, noOfSamples);
+
+ if (classMI[i] > maxMI)
+ {
+ maxMI = classMI[i];
+ maxMICounter = i;
+ }/*if bigger than current maximum*/
+ }/*for noOfFeatures - filling classMI*/
+
+ selectedFeatures[maxMICounter] = 1;
+ outputFeatures[0] = maxMICounter;
+
+ /*****************************************************************************
+ ** We have populated the classMI array, and selected the highest
+ ** MI feature as the first output feature
+ ** Now we move into the JMI algorithm
+ *****************************************************************************/
+
+ for (i = 1; i < k; i++)
+ {
+ score = 0.0;
+ currentHighestFeature = 0;
+ currentScore = 0.0;
+ totalFeatureMI = 0.0;
+
+ for (j = 0; j < noOfFeatures; j++)
+ {
+ /*if we haven't selected j*/
+ if (selectedFeatures[j] == 0)
+ {
+ currentScore = 0.0;
+ totalFeatureMI = 0.0;
+
+ for (x = 0; x < i; x++)
+ {
+ arrayPosition = x*noOfFeatures + j;
+ if (featureMIMatrix[arrayPosition] == -1)
+ {
+ mergeArrays(feature2D[(int) outputFeatures[x]], feature2D[j],mergedVector,noOfSamples);
+ /*double calculateMutualInformation(double *firstVector, double *secondVector, int vectorLength);*/
+ mi = calculateMutualInformation(mergedVector, classColumn, noOfSamples);
+
+ featureMIMatrix[arrayPosition] = mi;
+ }/*if not already known*/
+ currentScore += featureMIMatrix[arrayPosition];
+ }/*for the number of already selected features*/
+
+ if (currentScore > score)
+ {
+ score = currentScore;
+ currentHighestFeature = j;
+ }
+ }/*if j is unselected*/
+ }/*for number of features*/
+
+ selectedFeatures[currentHighestFeature] = 1;
+ outputFeatures[i] = currentHighestFeature;
+
+ }/*for the number of features to select*/
+
+ for (i = 0; i < k; i++)
+ {
+ outputFeatures[i] += 1; /*C indexes from 0 not 1*/
+ }/*for number of selected features*/
+
+ FREE_FUNC(classMI);
+ FREE_FUNC(feature2D);
+ FREE_FUNC(featureMIMatrix);
+ FREE_FUNC(mergedVector);
+ FREE_FUNC(selectedFeatures);
+
+ classMI = NULL;
+ feature2D = NULL;
+ featureMIMatrix = NULL;
+ mergedVector = NULL;
+ selectedFeatures = NULL;
+
+ return outputFeatures;
+
+}/*JMI(int,int,int,double[][],double[],double[])*/
+
diff --git a/FEAST/FSToolbox/MIM.m b/FEAST/FSToolbox/MIM.m
new file mode 100644
index 0000000..31695e4
--- /dev/null
+++ b/FEAST/FSToolbox/MIM.m
@@ -0,0 +1,17 @@
+function [selectedFeatures scoreVector] = MIM(k, data, labels)
+%function [selectedFeatures scoreVector] = MIM(k, data, labels)
+%
+%Mutual information Maximisation
+%
+% The license is in the license.txt provided.
+
+numf = size(data,2);
+classMI = zeros(numf,1);
+
+for n = 1 : numf
+ classMI(n) = mi(data(:,n),labels);
+end
+
+[scoreVector index] = sort(classMI,'descend');
+
+selectedFeatures = index(1:k);
diff --git a/FEAST/FSToolbox/Makefile b/FEAST/FSToolbox/Makefile
new file mode 100644
index 0000000..c0806a9
--- /dev/null
+++ b/FEAST/FSToolbox/Makefile
@@ -0,0 +1,96 @@
+# makefile for FEAST
+# Author: Adam Pocock, apocock@cs.man.ac.uk
+# Created: 29/06/2011
+#
+# Part of the Feature Selection Toolbox, please reference
+# "Conditional Likelihood Maximisation: A Unifying Framework for Mutual
+# Information Feature Selection"
+# G. Brown, A. Pocock, M.-J. Zhao, M. Lujan
+# Journal of Machine Learning Research (JMLR), 2012
+#
+# Please check www.cs.manchester.ac.uk/~gbrown/fstoolbox for updates.
+#
+# Copyright (c) 2010-2011, A. Pocock, G. Brown, The University of Manchester
+# All rights reserved.
+#
+# Redistribution and use in source and binary forms, with or without modification,
+# are permitted provided that the following conditions are met:
+#
+# - Redistributions of source code must retain the above copyright notice, this
+# list of conditions and the following disclaimer.
+# - Redistributions in binary form must reproduce the above copyright notice,
+# this list of conditions and the following disclaimer in the documentation
+# and/or other materials provided with the distribution.
+# - Neither the name of The University of Manchester nor the names of its
+# contributors may be used to endorse or promote products derived from this
+# software without specific prior written permission.
+#
+# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND
+# ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
+# WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
+# DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR
+# ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
+# (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
+# LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON
+# ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
+# SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+
+CXXFLAGS = -O3 -fPIC
+COMPILER = gcc
+MITOOLBOXPATH = ../MIToolbox/
+objects = mRMR_D.o CMIM.o JMI.o DISR.o CondMI.o ICAP.o BetaGamma.o
+
+libFSToolbox.so : $(objects)
+ $(COMPILER) $(CXXFLAGS) -lMIToolbox -L$(MITOOLBOXPATH) -shared -o libFSToolbox.so $(objects)
+
+mRMR_D.o: mRMR_D.c
+ $(COMPILER) $(CXXFLAGS) -DCOMPILE_C -c mRMR_D.c -I$(MITOOLBOXPATH)
+
+CMIM.o: CMIM.c
+ $(COMPILER) $(CXXFLAGS) -DCOMPILE_C -c CMIM.c -I$(MITOOLBOXPATH)
+
+JMI.o: JMI.c
+ $(COMPILER) $(CXXFLAGS) -DCOMPILE_C -c JMI.c -I$(MITOOLBOXPATH)
+
+DISR.o: DISR.c
+ $(COMPILER) $(CXXFLAGS) -DCOMPILE_C -c DISR.c -I$(MITOOLBOXPATH)
+
+CondMI.o: CondMI.c
+ $(COMPILER) $(CXXFLAGS) -DCOMPILE_C -c CondMI.c -I$(MITOOLBOXPATH)
+
+ICAP.o: ICAP.c
+ $(COMPILER) $(CXXFLAGS) -DCOMPILE_C -c ICAP.c -I$(MITOOLBOXPATH)
+
+BetaGamma.o: BetaGamma.c
+ $(COMPILER) $(CXXFLAGS) -DCOMPILE_C -c BetaGamma.c -I$(MITOOLBOXPATH)
+
+.PHONY : debug
+debug:
+ $(MAKE) libFSToolbox.so "CXXFLAGS = -g -DDEBUG -fPIC"
+
+.PHONY : x86
+x86:
+ $(MAKE) libFSToolbox.so "CXXFLAGS = -O3 -fPIC -m32"
+
+.PHONY : x64
+x64:
+ $(MAKE) libFSToolbox.so "CXXFLAGS = -O3 -fPIC -m64"
+
+.PHONY : matlab
+matlab:
+ mex -I$(MITOOLBOXPATH) FSToolboxMex.c BetaGamma.c CMIM.c CondMI.c DISR.c ICAP.c JMI.c mRMR_D.c $(MITOOLBOXPATH)MutualInformation.c $(MITOOLBOXPATH)Entropy.c $(MITOOLBOXPATH)CalculateProbability.c $(MITOOLBOXPATH)ArrayOperations.c
+
+.PHONY : matlab-debug
+matlab-debug:
+ mex -g -I$(MITOOLBOXPATH) FSToolboxMex.c BetaGamma.c CMIM.c CondMI.c DISR.c ICAP.c JMI.c mRMR_D.c $(MITOOLBOXPATH)MutualInformation.c $(MITOOLBOXPATH)Entropy.c $(MITOOLBOXPATH)CalculateProbability.c $(MITOOLBOXPATH)ArrayOperations.c
+
+.PHONY : intel
+intel:
+ $(MAKE) libFSToolbox.so "COMPILER = icc" "CXXFLAGS = -O2 -fPIC -xHost"
+
+.PHONY : clean
+clean:
+ rm *.o
+ rm libFSToolbox.so
+
diff --git a/FEAST/FSToolbox/README b/FEAST/FSToolbox/README
new file mode 100644
index 0000000..1aae2d7
--- /dev/null
+++ b/FEAST/FSToolbox/README
@@ -0,0 +1,80 @@
+FEAST v1.0
+A feature selection toolbox for C/C++ and MATLAB/OCTAVE
+
+FEAST provides implementations of common mutual information based filter
+feature selection algorithms, and an implementation of RELIEF. All
+functions expect discrete inputs (except RELIEF, which does not depend
+on the MIToolbox), and they return the selected feature indices. These
+implementations were developed to help our research into the similarities
+between these algorithms, and our results are presented in the following paper:
+
+ Conditional Likelihood Maximisation: A Unifying Framework for Mutual Information Feature Selection
+ G.Brown, A.Pocock, M.Lujan, M.-J.Zhao
+ Journal of Machine Learning Research (in press, to appear 2012)
+
+All FEAST code is licensed under the BSD 3-Clause License.
+If you use these implementations for academic research please cite the paper above.
+
+Contains implementations of:
+ mim, mrmr, mifs, cmim, jmi, disr, cife, icap, condred, cmi, relief, fcbf, betagamma
+
+References for these algorithms are provided in the accompanying feast.bib file (in BibTeX format).
+
+MATLAB Example (using "data" as our feature matrix, and "labels" as the class label vector):
+
+>> size(data)
+ans =
+ (569,30) %% denoting 569 examples, and 30 features
+
+>> selectedIndices = feast('jmi',5,data,labels) %% selecting the top 5 features using the jmi algorithm
+selectedIndices =
+
+ 28
+ 21
+ 8
+ 27
+ 23
+
+>> selectedIndices = feast('mrmr',10,data,labels) %% selecting the top 10 features using the mrmr algorithm
+selectedIndices =
+
+ 28
+ 24
+ 22
+ 8
+ 27
+ 21
+ 29
+ 4
+ 7
+ 25
+
+>> selectedIndices = feast('mifs',5,data,labels,0.7) %% selecting the top 5 features using the mifs algorithm with beta = 0.7
+selectedIndices =
+
+ 28
+ 24
+ 22
+ 20
+ 29
+
+The library is written in ANSI C for compatibility with the MATLAB mex compiler,
+except for MIM, FCBF and RELIEF, which are written in MATLAB/OCTAVE script.
+
+If you wish to use MIM in a C program you can use the BetaGamma function with
+Beta = 0, Gamma = 0, as this is equivalent to MIM (but slower than the other implementation).
+MIToolbox is required to compile these algorithms, and these implementations
+supercede the example implementations given in that package (they have more robust behaviour
+when used with unexpected inputs).
+
+MIToolbox can be found at:
+ http://www.cs.man.ac.uk/~gbrown/mitoolbox/
+and v1.03 is included in the ZIP for the FEAST package.
+
+Compilation instructions:
+ MATLAB/OCTAVE - run CompileFEAST.m,
+ Linux C shared library - use the included makefile
+
+Update History
+08/11/2011 - v1.0 - Public Release to complement the JMLR publication.
+
diff --git a/FEAST/FSToolbox/RELIEF.m b/FEAST/FSToolbox/RELIEF.m
new file mode 100644
index 0000000..194ce7b
--- /dev/null
+++ b/FEAST/FSToolbox/RELIEF.m
@@ -0,0 +1,61 @@
+% RELIEF - Kira & Rendell 1992
+% T is number of patterns to use
+% Defaults to all patterns if not specified.
+%
+% The license is in the license.txt provided.
+%
+% function w = RELIEF( data, labels, T )
+%
+function [w bestidx] = RELIEF ( data, labels, T )
+
+if ~exist('T','var')
+ T=size(data,1);
+end
+
+idx = randperm(length(labels));
+idx = idx(1:T);
+
+w = zeros(size(data,2),1);
+for t = 1:T
+
+ x = data(idx(t),:);
+ y = labels(idx(t));
+
+ %copy the x
+ protos = repmat(x, length(labels), 1);
+ %measure the distance from x to every other example
+ distances = [sqrt(sum((data-protos).^2,2)) labels];
+ %sort them according to distances (find nearest neighbours)
+ [distances originalidx] = sortrows(distances,1);
+
+ foundhit = false; hitidx=0;
+ foundmiss = false; missidx=0;
+ i=2; %start from the second one
+ while (~foundhit || ~foundmiss)
+
+ if distances(i,2) == y
+ hitidx = originalidx(i);
+ foundhit = true;
+ end
+ if distances(i,2) ~= y
+ missidx = originalidx(i);
+ foundmiss = true;
+ end
+
+ i=i+1;
+
+ end
+
+ alpha = 1/T;
+ for f = 1:size(data,2)%each feature
+ hitpenalty = (x(f)-data(hitidx,f)) / (max(data(:,f))-min(data(:,f)));
+ misspenalty = (x(f)-data(missidx,f)) / (max(data(:,f))-min(data(:,f)));
+
+ w(f) = w(f) - alpha*hitpenalty^2 + alpha*misspenalty^2;
+ end
+
+end
+
+[~,bestidx] = sort(w,'descend');
+
+
diff --git a/FEAST/FSToolbox/feast.bib b/FEAST/FSToolbox/feast.bib
new file mode 100644
index 0000000..2c58f0d
--- /dev/null
+++ b/FEAST/FSToolbox/feast.bib
@@ -0,0 +1,122 @@
+% MIM (Mutual Information Maximisation)
+@inproceedings{MIM,
+ author = {David D. Lewis},
+ title = {Feature Selection and Feature Extraction for Text Categorization},
+ booktitle = {In Proceedings of Speech and Natural Language Workshop},
+ year = {1992},
+ pages = {212--217},
+ publisher = {Morgan Kaufmann}
+}
+
+% MIFS (Mutual Information Feature Selection )
+@article{MIFS,
+ author={Battiti, R.},
+ journal={Neural Networks, IEEE Transactions on},
+ title={Using mutual information for selecting features in supervised neural net learning},
+ year={1994},
+ month={jul},
+ volume={5},
+ number={4},
+ pages={537 -550},
+ ISSN={1045-9227}
+}
+
+% mRMR (minimum Redundancy Maximum Relevance)
+@article{mRMR,
+ title={Feature selection based on mutual information: criteria of max-dependency, max-relevance, and min-redundancy},
+ author={Peng, H. and Long, F. and Ding, C.},
+ journal={IEEE Transactions on pattern analysis and machine intelligence},
+ pages={1226--1238},
+ year={2005},
+ publisher={Published by the IEEE Computer Society}
+}
+
+% CMIM (Conditional Mutual Information Maximisation)
+@article{CMIM,
+ author = {Fleuret, Fran\c{c}ois},
+ title = {Fast Binary Feature Selection with Conditional Mutual Information},
+ journal = {Journal of Machine Learning Research},
+ volume = {5},
+ month = {December},
+ year = {2004},
+ issn = {1532-4435},
+ pages = {1531--1555},
+ publisher = {JMLR.org}
+}
+
+% JMI (Joint Mutual Information)
+@inproceedings{JMI,
+ title={Feature selection based on joint mutual information},
+ author={Yang, H. and Moody, J.},
+ booktitle={Proceedings of International ICSC Symposium on Advances in Intelligent Data Analysis},
+ pages={22--25},
+ year={1999}
+}
+
+% DISR (Double Input Symmetrical Relevance)
+@incollection{DISR,
+ author = {Meyer, Patrick and Bontempi, Gianluca},
+ title = {On the Use of Variable Complementarity for Feature Selection in Cancer Classification},
+ booktitle = {Applications of Evolutionary Computing},
+ publisher = {Springer Berlin / Heidelberg},
+ pages = {91-102},
+ volume = {3907},
+ url = {http://dx.doi.org/10.1007/11732242_9},
+ year = {2006}
+}
+
+% ICAP (Interaction Capping)
+@article{ICAP,
+ title={Machine learning based on attribute interactions},
+ author={Jakulin, A.},
+ journal={Fakulteta za racunalni{\v{s}}tvo in informatiko, Univerza v Ljubljani},
+ year={2005}
+}
+
+% CIFE (Conditional Informative Feature Extraction)
+@incollection{CIFE
+ author = {Lin, Dahua and Tang, Xiaoou},
+ title = {Conditional Infomax Learning: An Integrated Framework for Feature Extraction and Fusion},
+ booktitle = {Computer Vision – ECCV 2006},
+ series = {Lecture Notes in Computer Science},
+ publisher = {Springer Berlin / Heidelberg},
+ pages = {68-82},
+ volume = {3951},
+ url = {http://dx.doi.org/10.1007/11744023_6},
+ year = {2006}
+}
+
+% Beta Gamma Space
+@inproceedings{BetaGamma,
+ title={A new perspective for information theoretic feature selection},
+ author={Brown, G.},
+ booktitle={12th International Conference on Artificial Intelligence and Statistics},
+ volume={5},
+ pages={49--56},
+ year={2009}
+}
+
+% FCBF (Fast Correlation-Based Filter)
+@article{FCBF,
+ author = {Yu, Lei and Liu, Huan},
+ title = {Efficient Feature Selection via Analysis of Relevance and Redundancy},
+ journal = {Journal of Machine Learning Research},
+ volume = {5},
+ year = {2004},
+ issn = {1532-4435},
+ pages = {1205--1224},
+ publisher = {JMLR.org}
+}
+
+% RELIEF
+@inproceedings{RELIEF,
+ author = {Kira, Kenji and Rendell, Larry A.},
+ title = {The feature selection problem: traditional methods and a new algorithm},
+ booktitle = {Proceedings of the tenth national conference on Artificial intelligence},
+ series = {AAAI'92},
+ year = {1992},
+ pages = {129--134},
+ publisher = {AAAI Press}
+}
+
+
diff --git a/FEAST/FSToolbox/feast.m b/FEAST/FSToolbox/feast.m
new file mode 100644
index 0000000..96a685e
--- /dev/null
+++ b/FEAST/FSToolbox/feast.m
@@ -0,0 +1,100 @@
+function [selectedFeatures] = feast(criteria,numToSelect,data,labels,varargin)
+%function [selectedFeatures] = feast(criteria,numToSelect,data,labels,varargin)
+%
+%Provides access to the feature selection algorithms in FSToolboxMex
+%
+%Expects the features to be columns of the data matrix, and
+%requires that both the features and labels are integers.
+%
+%Algorithms are called as follows
+%
+%[selectedFeatures] = feast('algName',numToSelect,data,labels)
+% where algName is:
+% mim, mrmr, cmim, jmi, disr, cife, icap, condred, cmi, relief
+%
+%[selectedFeatures] = feast('algName',numToSelect,data,labels,beta)
+% where algName is:
+% mifs (defaults to beta = 1.0 if unspecified)
+%
+%[selectedFeatures] = feast('algName',numToSelect,data,labels,beta,gamma)
+% where algName is:
+% betagamma
+%[selectedFeatures] = feast('algName',numToSelect,data,labels,threshold)
+% where algName is:
+% fcbf (note this ignores the numToSelect)
+%
+% The license is in the license.txt provided.
+
+
+%Internal FSToolbox Criteria to number mapping
+%MIFS = 1
+%mRMR = 2
+%CMIM = 3
+%JMI = 4
+%DISR = 5
+%CIFE = 6
+%ICAP = 7
+%CondRed = 8
+%BetaGamma = 9
+%CMI = 10
+%
+
+if ((numToSelect < 1) || (numToSelect > size(data,2)))
+ error(['You have requested ' num2str(numToSelect) ' features, which is not possible']);
+end
+
+finiteDataCount = sum(sum(isfinite(data)));
+finiteLabelsCount = sum(sum(isfinite(labels)));
+
+totalData = numel(data);
+totalLabels = numel(labels);
+
+if ((finiteDataCount ~= totalData) || (finiteLabelsCount ~= totalLabels))
+ error(['Some elements are NaNs or infinite. Please check your data']);
+end
+
+if (strcmpi(criteria,'mim'))
+ selectedFeatures = MIM(numToSelect,data,labels);
+elseif (strcmpi(criteria,'mifs'))
+ if (nargin == 4)
+ beta = 1;
+ else
+ beta = varargin{1};
+ end
+ selectedFeatures = FSToolboxMex(1,numToSelect,data,labels,beta);
+elseif (strcmpi(criteria,'mrmr'))
+ selectedFeatures = FSToolboxMex(2,numToSelect,data,labels);
+elseif (strcmpi(criteria,'cmim'))
+ selectedFeatures = FSToolboxMex(3,numToSelect,data,labels);
+elseif (strcmpi(criteria,'jmi'))
+ selectedFeatures = FSToolboxMex(4,numToSelect,data,labels);
+elseif (strcmpi(criteria,'disr'))
+ selectedFeatures = FSToolboxMex(5,numToSelect,data,labels);
+elseif ((strcmpi(criteria,'cife')) || (strcmpi(criteria,'fou')))
+ selectedFeatures = FSToolboxMex(6,numToSelect,data,labels);
+elseif (strcmpi(criteria,'icap'))
+ selectedFeatures = FSToolboxMex(7,numToSelect,data,labels);
+elseif (strcmpi(criteria,'condred'))
+ selectedFeatures = FSToolboxMex(8,numToSelect,data,labels);
+elseif (strcmpi(criteria,'betagamma'))
+ if (nargin ~= 6)
+ error('BetaGamma criteria expects a beta and a gamma');
+ else
+ beta = varargin{1};
+ gamma = varargin{2};
+ end
+ selectedFeatures = FSToolboxMex(9,numToSelect,data,labels,beta,gamma);
+elseif (strcmpi(criteria,'cmi'))
+ selectedFeatures = FSToolboxMex(10,numToSelect,data,labels);
+elseif (strcmpi(criteria,'fcbf'))
+ if (nargin == 4)
+ error('Threshold for FCBF not supplied');
+ else
+ selectedFeatures = FCBF(data,labels,varargin{1});
+ end
+elseif (strcmpi(criteria,'relief'))
+ [tmp selectedFeatures] = RELIEF(data,labels);
+else
+ selectedFeatures = [];
+ disp(['Unrecognised criteria ' criteria]);
+end
diff --git a/FEAST/FSToolbox/license.txt b/FEAST/FSToolbox/license.txt
new file mode 100644
index 0000000..798960e
--- /dev/null
+++ b/FEAST/FSToolbox/license.txt
@@ -0,0 +1,32 @@
+This is FEAST (a FEAture Selection Toolbox for C and MATLAB)
+if you use this code in academic work could you please reference:
+"Conditional Likelihood Maximisation: A Unifying Framework for Mutual
+Information Feature Selection"
+G. Brown, A. Pocock, M.-J. Zhao, M. Lujan
+Journal of Machine Learning Research (JMLR), 2011
+
+Copyright (c) 2010-2011, A. Pocock, G. Brown, The University of Manchester
+All rights reserved.
+
+Redistribution and use in source and binary forms, with or without modification,
+are permitted provided that the following conditions are met:
+
+ - Redistributions of source code must retain the above copyright notice, this
+ list of conditions and the following disclaimer.
+ - Redistributions in binary form must reproduce the above copyright notice,
+ this list of conditions and the following disclaimer in the documentation
+ and/or other materials provided with the distribution.
+ - Neither the name of The University of Manchester nor the names of its
+ contributors may be used to endorse or promote products derived from this
+ software without specific prior written permission.
+
+THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND
+ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
+WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
+DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR
+ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
+(INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
+LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON
+ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
+SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
diff --git a/FEAST/FSToolbox/mRMR_D.c b/FEAST/FSToolbox/mRMR_D.c
new file mode 100644
index 0000000..3eeb2a4
--- /dev/null
+++ b/FEAST/FSToolbox/mRMR_D.c
@@ -0,0 +1,170 @@
+/*******************************************************************************
+** mRMR_D.c implements the minimum Relevance Maximum Redundancy criterion
+** using the difference variant, from
+**
+** "Feature Selection Based on Mutual Information: Criteria of Max-Dependency, Max-Relevance, and Min-Redundancy"
+** H. Peng et al. IEEE PAMI (2005)
+**
+** Initial Version - 13/06/2008
+** Updated - 23/06/2011
+**
+** Author - Adam Pocock
+**
+** Part of the Feature Selection Toolbox, please reference
+** "Conditional Likelihood Maximisation: A Unifying Framework for Mutual
+** Information Feature Selection"
+** G. Brown, A. Pocock, M.-J. Zhao, M. Lujan
+** Journal of Machine Learning Research (JMLR), 2011
+**
+** Please check www.cs.manchester.ac.uk/~gbrown/fstoolbox for updates.
+**
+** Copyright (c) 2010-2011, A. Pocock, G. Brown, The University of Manchester
+** All rights reserved.
+**
+** Redistribution and use in source and binary forms, with or without modification,
+** are permitted provided that the following conditions are met:
+**
+** - Redistributions of source code must retain the above copyright notice, this
+** list of conditions and the following disclaimer.
+** - Redistributions in binary form must reproduce the above copyright notice,
+** this list of conditions and the following disclaimer in the documentation
+** and/or other materials provided with the distribution.
+** - Neither the name of The University of Manchester nor the names of its
+** contributors may be used to endorse or promote products derived from this
+** software without specific prior written permission.
+**
+** THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND
+** ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
+** WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
+** DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR
+** ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
+** (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
+** LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON
+** ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+** (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
+** SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+**
+*******************************************************************************/
+
+#include "FSAlgorithms.h"
+#include "FSToolbox.h"
+
+/* MIToolbox includes */
+#include "MutualInformation.h"
+
+double* mRMR_D(int k, int noOfSamples, int noOfFeatures, double *featureMatrix, double *classColumn, double *outputFeatures)
+{
+ double **feature2D = (double**) CALLOC_FUNC(noOfFeatures,sizeof(double*));
+ /*holds the class MI values*/
+ double *classMI = (double *)CALLOC_FUNC(noOfFeatures,sizeof(double));
+ int *selectedFeatures = (int *)CALLOC_FUNC(noOfFeatures,sizeof(int));
+ /*holds the intra feature MI values*/
+ int sizeOfMatrix = k*noOfFeatures;
+ double *featureMIMatrix = (double *)CALLOC_FUNC(sizeOfMatrix,sizeof(double));
+
+ double maxMI = 0.0;
+ int maxMICounter = -1;
+
+ /*init variables*/
+
+ double score, currentScore, totalFeatureMI;
+ int currentHighestFeature;
+
+ int arrayPosition, i, j, x;
+
+ for(j = 0; j < noOfFeatures; j++)
+ {
+ feature2D[j] = featureMatrix + (int)j*noOfSamples;
+ }
+
+ for (i = 0; i < sizeOfMatrix;i++)
+ {
+ featureMIMatrix[i] = -1;
+ }/*for featureMIMatrix - blank to -1*/
+
+
+ for (i = 0; i < noOfFeatures;i++)
+ {
+ classMI[i] = calculateMutualInformation(feature2D[i], classColumn, noOfSamples);
+ if (classMI[i] > maxMI)
+ {
+ maxMI = classMI[i];
+ maxMICounter = i;
+ }/*if bigger than current maximum*/
+ }/*for noOfFeatures - filling classMI*/
+
+ selectedFeatures[maxMICounter] = 1;
+ outputFeatures[0] = maxMICounter;
+
+ /*************
+ ** Now we have populated the classMI array, and selected the highest
+ ** MI feature as the first output feature
+ ** Now we move into the mRMR-D algorithm
+ *************/
+
+ for (i = 1; i < k; i++)
+ {
+ /****************************************************
+ ** to ensure it selects some features
+ **if this is zero then it will not pick features where the redundancy is greater than the
+ **relevance
+ ****************************************************/
+ score = -HUGE_VAL;
+ currentHighestFeature = 0;
+ currentScore = 0.0;
+ totalFeatureMI = 0.0;
+
+ for (j = 0; j < noOfFeatures; j++)
+ {
+ /*if we haven't selected j*/
+ if (selectedFeatures[j] == 0)
+ {
+ currentScore = classMI[j];
+ totalFeatureMI = 0.0;
+
+ for (x = 0; x < i; x++)
+ {
+ arrayPosition = x*noOfFeatures + j;
+ if (featureMIMatrix[arrayPosition] == -1)
+ {
+ /*work out intra MI*/
+
+ /*double calculateMutualInformation(double *firstVector, double *secondVector, int vectorLength);*/
+ featureMIMatrix[arrayPosition] = calculateMutualInformation(feature2D[(int) outputFeatures[x]], feature2D[j], noOfSamples);
+ }
+
+ totalFeatureMI += featureMIMatrix[arrayPosition];
+ }/*for the number of already selected features*/
+
+ currentScore -= (totalFeatureMI/i);
+ if (currentScore > score)
+ {
+ score = currentScore;
+ currentHighestFeature = j;
+ }
+ }/*if j is unselected*/
+ }/*for number of features*/
+
+ selectedFeatures[currentHighestFeature] = 1;
+ outputFeatures[i] = currentHighestFeature;
+
+ }/*for the number of features to select*/
+
+ for (i = 0; i < k; i++)
+ {
+ outputFeatures[i] += 1; /*C indexes from 0 not 1*/
+ }/*for number of selected features*/
+
+ FREE_FUNC(classMI);
+ FREE_FUNC(feature2D);
+ FREE_FUNC(featureMIMatrix);
+ FREE_FUNC(selectedFeatures);
+
+ classMI = NULL;
+ feature2D = NULL;
+ featureMIMatrix = NULL;
+ selectedFeatures = NULL;
+
+ return outputFeatures;
+}/*mRMR(int,int,int,double[][],double[],double[])*/
+