diff options
Diffstat (limited to 'FEAST/FSToolbox')
-rw-r--r-- | FEAST/FSToolbox/BetaGamma.c | 188 | ||||
-rw-r--r-- | FEAST/FSToolbox/CMIM.c | 142 | ||||
-rw-r--r-- | FEAST/FSToolbox/CompileFEAST.m | 4 | ||||
-rw-r--r-- | FEAST/FSToolbox/CondMI.c | 166 | ||||
-rw-r--r-- | FEAST/FSToolbox/DISR.c | 182 | ||||
-rw-r--r-- | FEAST/FSToolbox/FCBF.m | 58 | ||||
-rw-r--r-- | FEAST/FSToolbox/FSAlgorithms.h | 138 | ||||
-rw-r--r-- | FEAST/FSToolbox/FSToolbox.h | 70 | ||||
-rw-r--r-- | FEAST/FSToolbox/FSToolboxMex.c | 290 | ||||
-rwxr-xr-x | FEAST/FSToolbox/FSToolboxMex.mexa64 | bin | 0 -> 22290 bytes | |||
-rw-r--r-- | FEAST/FSToolbox/ICAP.c | 184 | ||||
-rw-r--r-- | FEAST/FSToolbox/JMI.c | 177 | ||||
-rw-r--r-- | FEAST/FSToolbox/MIM.m | 17 | ||||
-rw-r--r-- | FEAST/FSToolbox/Makefile | 96 | ||||
-rw-r--r-- | FEAST/FSToolbox/README | 80 | ||||
-rw-r--r-- | FEAST/FSToolbox/RELIEF.m | 61 | ||||
-rw-r--r-- | FEAST/FSToolbox/feast.bib | 122 | ||||
-rw-r--r-- | FEAST/FSToolbox/feast.m | 100 | ||||
-rw-r--r-- | FEAST/FSToolbox/license.txt | 32 | ||||
-rw-r--r-- | FEAST/FSToolbox/mRMR_D.c | 170 |
20 files changed, 2277 insertions, 0 deletions
diff --git a/FEAST/FSToolbox/BetaGamma.c b/FEAST/FSToolbox/BetaGamma.c new file mode 100644 index 0000000..925ef8b --- /dev/null +++ b/FEAST/FSToolbox/BetaGamma.c @@ -0,0 +1,188 @@ +/******************************************************************************* +** betaGamma() implements the Beta-Gamma space from Brown (2009). +** This incoporates MIFS, CIFE, and CondRed. +** +** MIFS - "Using mutual information for selecting features in supervised neural net learning" +** R. Battiti, IEEE Transactions on Neural Networks, 1994 +** +** CIFE - "Conditional Infomax Learning: An Integrated Framework for Feature Extraction and Fusion" +** D. Lin and X. Tang, European Conference on Computer Vision (2006) +** +** The Beta Gamma space is explained in Brown (2009) and Brown et al. (2011) +** +** Initial Version - 13/06/2008 +** Updated - 23/06/2011 +** +** Author - Adam Pocock +** +** Part of the Feature Selection Toolbox, please reference +** "Conditional Likelihood Maximisation: A Unifying Framework for Mutual +** Information Feature Selection" +** G. Brown, A. Pocock, M.-J. Zhao, M. Lujan +** Journal of Machine Learning Research (JMLR), 2011 +** +** Please check www.cs.manchester.ac.uk/~gbrown/fstoolbox for updates. +** +** Copyright (c) 2010-2011, A. Pocock, G. Brown, The University of Manchester +** All rights reserved. +** +** Redistribution and use in source and binary forms, with or without modification, +** are permitted provided that the following conditions are met: +** +** - Redistributions of source code must retain the above copyright notice, this +** list of conditions and the following disclaimer. +** - Redistributions in binary form must reproduce the above copyright notice, +** this list of conditions and the following disclaimer in the documentation +** and/or other materials provided with the distribution. +** - Neither the name of The University of Manchester nor the names of its +** contributors may be used to endorse or promote products derived from this +** software without specific prior written permission. +** +** THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND +** ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED +** WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE +** DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR +** ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES +** (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; +** LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON +** ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +** (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS +** SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +** +*******************************************************************************/ + +#include "FSAlgorithms.h" +#include "FSToolbox.h" + +/* MIToolbox includes */ +#include "MutualInformation.h" + +double* BetaGamma(int k, int noOfSamples, int noOfFeatures, double *featureMatrix, double *classColumn, double *outputFeatures, double betaParam, double gammaParam) +{ + double **feature2D = (double **) CALLOC_FUNC(noOfFeatures,sizeof(double *)); + + /*holds the class MI values*/ + double *classMI = (double *)CALLOC_FUNC(noOfFeatures,sizeof(double)); + char *selectedFeatures = (char *)CALLOC_FUNC(noOfFeatures,sizeof(char)); + + /*holds the intra feature MI values*/ + int sizeOfMatrix = k*noOfFeatures; + double *featureMIMatrix = (double *)CALLOC_FUNC(sizeOfMatrix,sizeof(double)); + + double maxMI = 0.0; + int maxMICounter = -1; + + double score, currentScore, totalFeatureMI; + int currentHighestFeature, arrayPosition; + + int i,j,m; + + /*********************************************************** + ** because the array is passed as + ** s a m p l e s + ** f + ** e + ** a + ** t + ** u + ** r + ** e + ** s + ** + ** this pulls out a pointer to the first sample of + ** each feature and stores it as a multidimensional array + ** so it can be indexed nicely + ***********************************************************/ + for(j = 0; j < noOfFeatures; j++) + { + feature2D[j] = featureMatrix + (int)j*noOfSamples; + } + + for (i = 0; i < sizeOfMatrix; i++) + { + featureMIMatrix[i] = -1; + }/*for featureMIMatrix - blank to -1*/ + + /*********************************************************** + ** SETUP COMPLETE + ** Algorithm starts here + ***********************************************************/ + + for (i = 0; i < noOfFeatures; i++) + { + classMI[i] = calculateMutualInformation(feature2D[i], classColumn, noOfSamples); + + if (classMI[i] > maxMI) + { + maxMI = classMI[i]; + maxMICounter = i; + }/*if bigger than current maximum*/ + }/*for noOfFeatures - filling classMI*/ + + selectedFeatures[maxMICounter] = 1; + outputFeatures[0] = maxMICounter; + + /************* + ** Now we have populated the classMI array, and selected the highest + ** MI feature as the first output feature + ** Now we move into the JMI algorithm + *************/ + + for (i = 1; i < k; i++) + { + /************************************************************ + ** to ensure it selects some features + ** if this is zero then it will not pick features where the + ** redundancy is greater than the relevance + ************************************************************/ + score = -HUGE_VAL; + currentHighestFeature = 0; + currentScore = 0.0; + totalFeatureMI = 0.0; + + for (j = 0; j < noOfFeatures; j++) + { + /*if we haven't selected j*/ + if (!selectedFeatures[j]) + { + currentScore = classMI[j]; + totalFeatureMI = 0.0; + + for (m = 0; m < i; m++) + { + arrayPosition = m*noOfFeatures + j; + if (featureMIMatrix[arrayPosition] == -1) + { + /*double calculateMutualInformation(double *firstVector, double *secondVector, int vectorLength);*/ + featureMIMatrix[arrayPosition] = betaParam*calculateMutualInformation(feature2D[(int) outputFeatures[m]], feature2D[j], noOfSamples); + + /*double calculateConditionalMutualInformation(double *firstVector, double *targetVector, double* conditionVector, int vectorLength);*/ + featureMIMatrix[arrayPosition] -= gammaParam*calculateConditionalMutualInformation(feature2D[(int) outputFeatures[m]], feature2D[j], classColumn, noOfSamples); + }/*if not already known*/ + + totalFeatureMI += featureMIMatrix[arrayPosition]; + }/*for the number of already selected features*/ + + currentScore -= (totalFeatureMI); + + if (currentScore > score) + { + score = currentScore; + currentHighestFeature = j; + } + }/*if j is unselected*/ + }/*for number of features*/ + + selectedFeatures[currentHighestFeature] = 1; + outputFeatures[i] = currentHighestFeature; + + }/*for the number of features to select*/ + + for (i = 0; i < k; i++) + { + outputFeatures[i] += 1; /*C++ indexes from 0 not 1*/ + }/*for number of selected features*/ + + return outputFeatures; +}/*BetaGamma(int,int,int,double[][],double[],double[],double,double)*/ + diff --git a/FEAST/FSToolbox/CMIM.c b/FEAST/FSToolbox/CMIM.c new file mode 100644 index 0000000..9ef21ad --- /dev/null +++ b/FEAST/FSToolbox/CMIM.c @@ -0,0 +1,142 @@ +/******************************************************************************* +** CMIM.c, implements a discrete version of the +** Conditional Mutual Information Maximisation criterion, using the fast +** exact implementation from +** +** "Fast Binary Feature Selection using Conditional Mutual Information Maximisation" +** F. Fleuret, JMLR (2004) +** +** Initial Version - 13/06/2008 +** Updated - 23/06/2011 +** +** Author - Adam Pocock +** +** Part of the Feature Selection Toolbox, please reference +** "Conditional Likelihood Maximisation: A Unifying Framework for Mutual +** Information Feature Selection" +** G. Brown, A. Pocock, M.-J. Zhao, M. Lujan +** Journal of Machine Learning Research (JMLR), 2011 +** +** Please check www.cs.manchester.ac.uk/~gbrown/fstoolbox for updates. +** +** Copyright (c) 2010-2011, A. Pocock, G. Brown, The University of Manchester +** All rights reserved. +** +** Redistribution and use in source and binary forms, with or without modification, +** are permitted provided that the following conditions are met: +** +** - Redistributions of source code must retain the above copyright notice, this +** list of conditions and the following disclaimer. +** - Redistributions in binary form must reproduce the above copyright notice, +** this list of conditions and the following disclaimer in the documentation +** and/or other materials provided with the distribution. +** - Neither the name of The University of Manchester nor the names of its +** contributors may be used to endorse or promote products derived from this +** software without specific prior written permission. +** +** THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND +** ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED +** WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE +** DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR +** ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES +** (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; +** LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON +** ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +** (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS +** SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +** +*******************************************************************************/ + +#include "FSAlgorithms.h" +#include "FSToolbox.h" + +/* MIToolbox includes */ +#include "MutualInformation.h" + +double* CMIM(int k, int noOfSamples, int noOfFeatures, double *featureMatrix, double *classColumn, double *outputFeatures) +{ + /*holds the class MI values + **the class MI doubles as the partial score from the CMIM paper + */ + double *classMI = (double *)CALLOC_FUNC(noOfFeatures,sizeof(double)); + /*in the CMIM paper, m = lastUsedFeature*/ + int *lastUsedFeature = (int *)CALLOC_FUNC(noOfFeatures,sizeof(int)); + + double score, conditionalInfo; + int iMinus, currentFeature; + + double maxMI = 0.0; + int maxMICounter = -1; + + int j,i; + + double **feature2D = (double**) CALLOC_FUNC(noOfFeatures,sizeof(double*)); + + for(j = 0; j < noOfFeatures; j++) + { + feature2D[j] = featureMatrix + (int)j*noOfSamples; + } + + for (i = 0; i < noOfFeatures;i++) + { + classMI[i] = calculateMutualInformation(feature2D[i], classColumn, noOfSamples); + + if (classMI[i] > maxMI) + { + maxMI = classMI[i]; + maxMICounter = i; + }/*if bigger than current maximum*/ + }/*for noOfFeatures - filling classMI*/ + + outputFeatures[0] = maxMICounter; + + /***************************************************************************** + ** We have populated the classMI array, and selected the highest + ** MI feature as the first output feature + ** Now we move into the CMIM algorithm + *****************************************************************************/ + + for (i = 1; i < k; i++) + { + score = 0.0; + iMinus = i-1; + + for (j = 0; j < noOfFeatures; j++) + { + while ((classMI[j] > score) && (lastUsedFeature[j] < i)) + { + /*double calculateConditionalMutualInformation(double *firstVector, double *targetVector, double *conditionVector, int vectorLength);*/ + currentFeature = (int) outputFeatures[lastUsedFeature[j]]; + conditionalInfo = calculateConditionalMutualInformation(feature2D[j],classColumn,feature2D[currentFeature],noOfSamples); + if (classMI[j] > conditionalInfo) + { + classMI[j] = conditionalInfo; + }/*reset classMI*/ + /*moved due to C indexing from 0 rather than 1*/ + lastUsedFeature[j] += 1; + }/*while partial score greater than score & not reached last feature*/ + if (classMI[j] > score) + { + score = classMI[j]; + outputFeatures[i] = j; + }/*if partial score still greater than score*/ + }/*for number of features*/ + }/*for the number of features to select*/ + + + for (i = 0; i < k; i++) + { + outputFeatures[i] += 1; /*C indexes from 0 not 1*/ + }/*for number of selected features*/ + + FREE_FUNC(classMI); + FREE_FUNC(lastUsedFeature); + FREE_FUNC(feature2D); + + classMI = NULL; + lastUsedFeature = NULL; + feature2D = NULL; + + return outputFeatures; +}/*CMIM(int,int,int,double[][],double[],double[])*/ + diff --git a/FEAST/FSToolbox/CompileFEAST.m b/FEAST/FSToolbox/CompileFEAST.m new file mode 100644 index 0000000..f5dad48 --- /dev/null +++ b/FEAST/FSToolbox/CompileFEAST.m @@ -0,0 +1,4 @@ +%Compiles the FEAST Toolbox into a mex executable for use with MATLAB + +mex -I../MIToolbox FSToolboxMex.c BetaGamma.c CMIM.c CondMI.c DISR.c ICAP.c JMI.c mRMR_D.c ../MIToolbox/MutualInformation.c ../MIToolbox/Entropy.c ../MIToolbox/CalculateProbability.c ../MIToolbox/ArrayOperations.c + diff --git a/FEAST/FSToolbox/CondMI.c b/FEAST/FSToolbox/CondMI.c new file mode 100644 index 0000000..ea6f8ee --- /dev/null +++ b/FEAST/FSToolbox/CondMI.c @@ -0,0 +1,166 @@ +/******************************************************************************* +** CondMI.c, implements the CMI criterion using a greedy forward search +** +** Initial Version - 19/08/2010 +** Updated - 23/06/2011 +** +** Author - Adam Pocock +** +** Part of the Feature Selection Toolbox, please reference +** "Conditional Likelihood Maximisation: A Unifying Framework for Mutual +** Information Feature Selection" +** G. Brown, A. Pocock, M.-J. Zhao, M. Lujan +** Journal of Machine Learning Research (JMLR), 2011 +** +** Please check www.cs.manchester.ac.uk/~gbrown/fstoolbox for updates. +** +** Copyright (c) 2010-2011, A. Pocock, G. Brown, The University of Manchester +** All rights reserved. +** +** Redistribution and use in source and binary forms, with or without modification, +** are permitted provided that the following conditions are met: +** +** - Redistributions of source code must retain the above copyright notice, this +** list of conditions and the following disclaimer. +** - Redistributions in binary form must reproduce the above copyright notice, +** this list of conditions and the following disclaimer in the documentation +** and/or other materials provided with the distribution. +** - Neither the name of The University of Manchester nor the names of its +** contributors may be used to endorse or promote products derived from this +** software without specific prior written permission. +** +** THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND +** ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED +** WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE +** DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR +** ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES +** (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; +** LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON +** ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +** (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS +** SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +** +*******************************************************************************/ + +#include "FSAlgorithms.h" +#include "FSToolbox.h" + +/* for memcpy */ +#include <string.h> + +/* MIToolbox includes */ +#include "MutualInformation.h" +#include "ArrayOperations.h" + +double* CondMI(int k, int noOfSamples, int noOfFeatures, double *featureMatrix, double *classColumn, double *outputFeatures) +{ + /*holds the class MI values*/ + double *classMI = (double *)CALLOC_FUNC(noOfFeatures,sizeof(double)); + + char *selectedFeatures = (char *)CALLOC_FUNC(noOfFeatures,sizeof(char)); + + /*holds the intra feature MI values*/ + int sizeOfMatrix = k*noOfFeatures; + double *featureMIMatrix = (double *)CALLOC_FUNC(sizeOfMatrix,sizeof(double)); + + double maxMI = 0.0; + int maxMICounter = -1; + + double **feature2D = (double**) CALLOC_FUNC(noOfFeatures,sizeof(double*)); + + double score, currentScore, totalFeatureMI; + int currentHighestFeature; + + double *conditionVector = (double *) CALLOC_FUNC(noOfSamples,sizeof(double)); + + int arrayPosition; + double mi, tripEntropy; + + int i,j,x; + + for(j = 0; j < noOfFeatures; j++) + { + feature2D[j] = featureMatrix + (int)j*noOfSamples; + } + + for (i = 0; i < sizeOfMatrix;i++) + { + featureMIMatrix[i] = -1; + }/*for featureMIMatrix - blank to -1*/ + + for (i = 0; i < noOfFeatures;i++) + { + /*calculate mutual info + **double calculateMutualInformation(double *firstVector, double *secondVector, int vectorLength); + */ + classMI[i] = calculateMutualInformation(feature2D[i], classColumn, noOfSamples); + + if (classMI[i] > maxMI) + { + maxMI = classMI[i]; + maxMICounter = i; + }/*if bigger than current maximum*/ + }/*for noOfFeatures - filling classMI*/ + + selectedFeatures[maxMICounter] = 1; + outputFeatures[0] = maxMICounter; + + memcpy(conditionVector,feature2D[maxMICounter],sizeof(double)*noOfSamples); + + /***************************************************************************** + ** We have populated the classMI array, and selected the highest + ** MI feature as the first output feature + ** Now we move into the CondMI algorithm + *****************************************************************************/ + + for (i = 1; i < k; i++) + { + score = 0.0; + currentHighestFeature = -1; + currentScore = 0.0; + totalFeatureMI = 0.0; + + for (j = 0; j < noOfFeatures; j++) + { + /*if we haven't selected j*/ + if (selectedFeatures[j] == 0) + { + currentScore = 0.0; + totalFeatureMI = 0.0; + + /*double calculateConditionalMutualInformation(double *firstVector, double *targetVector, double *conditionVector, int vectorLength);*/ + currentScore = calculateConditionalMutualInformation(feature2D[j],classColumn,conditionVector,noOfSamples); + + if (currentScore > score) + { + score = currentScore; + currentHighestFeature = j; + } + }/*if j is unselected*/ + }/*for number of features*/ + + outputFeatures[i] = currentHighestFeature; + + if (currentHighestFeature != -1) + { + selectedFeatures[currentHighestFeature] = 1; + mergeArrays(feature2D[currentHighestFeature],conditionVector,conditionVector,noOfSamples); + } + + }/*for the number of features to select*/ + + FREE_FUNC(classMI); + FREE_FUNC(conditionVector); + FREE_FUNC(feature2D); + FREE_FUNC(featureMIMatrix); + FREE_FUNC(selectedFeatures); + + classMI = NULL; + conditionVector = NULL; + feature2D = NULL; + featureMIMatrix = NULL; + selectedFeatures = NULL; + + return outputFeatures; +}/*CondMI(int,int,int,double[][],double[],double[])*/ + diff --git a/FEAST/FSToolbox/DISR.c b/FEAST/FSToolbox/DISR.c new file mode 100644 index 0000000..3ec7676 --- /dev/null +++ b/FEAST/FSToolbox/DISR.c @@ -0,0 +1,182 @@ +/******************************************************************************* +** DISR.c, implements the Double Input Symmetrical Relevance criterion +** from +** +** "On the Use of Variable Complementarity for Feature Selection in Cancer Classification" +** P. Meyer and G. Bontempi, (2006) +** +** Initial Version - 13/06/2008 +** Updated - 23/06/2011 +** +** Author - Adam Pocock +** +** Part of the Feature Selection Toolbox, please reference +** "Conditional Likelihood Maximisation: A Unifying Framework for Mutual +** Information Feature Selection" +** G. Brown, A. Pocock, M.-J. Zhao, M. Lujan +** Journal of Machine Learning Research (JMLR), 2011 +** +** Please check www.cs.manchester.ac.uk/~gbrown/fstoolbox for updates. +** +** Copyright (c) 2010-2011, A. Pocock, G. Brown, The University of Manchester +** All rights reserved. +** +** Redistribution and use in source and binary forms, with or without modification, +** are permitted provided that the following conditions are met: +** +** - Redistributions of source code must retain the above copyright notice, this +** list of conditions and the following disclaimer. +** - Redistributions in binary form must reproduce the above copyright notice, +** this list of conditions and the following disclaimer in the documentation +** and/or other materials provided with the distribution. +** - Neither the name of The University of Manchester nor the names of its +** contributors may be used to endorse or promote products derived from this +** software without specific prior written permission. +** +** THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND +** ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED +** WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE +** DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR +** ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES +** (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; +** LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON +** ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +** (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS +** SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +** +*******************************************************************************/ +#include "FSAlgorithms.h" +#include "FSToolbox.h" + +/* MIToolbox includes */ +#include "MutualInformation.h" +#include "Entropy.h" +#include "ArrayOperations.h" + +double* DISR(int k, int noOfSamples, int noOfFeatures, double *featureMatrix, double *classColumn, double *outputFeatures) +{ + /*holds the class MI values*/ + double *classMI = (double *)CALLOC_FUNC(noOfFeatures,sizeof(double)); + + char *selectedFeatures = (char *)CALLOC_FUNC(noOfFeatures,sizeof(char)); + + /*holds the intra feature MI values*/ + int sizeOfMatrix = k*noOfFeatures; + double *featureMIMatrix = (double *)CALLOC_FUNC(sizeOfMatrix,sizeof(double)); + + double maxMI = 0.0; + int maxMICounter = -1; + + double **feature2D = (double**) CALLOC_FUNC(noOfFeatures,sizeof(double*)); + + double score, currentScore, totalFeatureMI; + int currentHighestFeature; + + double *mergedVector = (double *) CALLOC_FUNC(noOfSamples,sizeof(double)); + + int arrayPosition; + double mi, tripEntropy; + + int i,j,x; + + for(j = 0; j < noOfFeatures; j++) + { + feature2D[j] = featureMatrix + (int)j*noOfSamples; + } + + for (i = 0; i < sizeOfMatrix;i++) + { + featureMIMatrix[i] = -1; + }/*for featureMIMatrix - blank to -1*/ + + + for (i = 0; i < noOfFeatures;i++) + { + /*calculate mutual info + **double calculateMutualInformation(double *firstVector, double *secondVector, int vectorLength); + */ + classMI[i] = calculateMutualInformation(feature2D[i], classColumn, noOfSamples); + + if (classMI[i] > maxMI) + { + maxMI = classMI[i]; + maxMICounter = i; + }/*if bigger than current maximum*/ + }/*for noOfFeatures - filling classMI*/ + + selectedFeatures[maxMICounter] = 1; + outputFeatures[0] = maxMICounter; + + /***************************************************************************** + ** We have populated the classMI array, and selected the highest + ** MI feature as the first output feature + ** Now we move into the DISR algorithm + *****************************************************************************/ + + for (i = 1; i < k; i++) + { + score = 0.0; + currentHighestFeature = 0; + currentScore = 0.0; + totalFeatureMI = 0.0; + + for (j = 0; j < noOfFeatures; j++) + { + /*if we haven't selected j*/ + if (selectedFeatures[j] == 0) + { + currentScore = 0.0; + totalFeatureMI = 0.0; + + for (x = 0; x < i; x++) + { + arrayPosition = x*noOfFeatures + j; + if (featureMIMatrix[arrayPosition] == -1) + { + /* + **double calculateMutualInformation(double *firstVector, double *secondVector, int vectorLength); + **double calculateJointEntropy(double *firstVector, double *secondVector, int vectorLength); + */ + + mergeArrays(feature2D[(int) outputFeatures[x]], feature2D[j],mergedVector,noOfSamples); + mi = calculateMutualInformation(mergedVector, classColumn, noOfSamples); + tripEntropy = calculateJointEntropy(mergedVector, classColumn, noOfSamples); + + featureMIMatrix[arrayPosition] = mi / tripEntropy; + }/*if not already known*/ + currentScore += featureMIMatrix[arrayPosition]; + }/*for the number of already selected features*/ + + if (currentScore > score) + { + score = currentScore; + currentHighestFeature = j; + } + }/*if j is unselected*/ + }/*for number of features*/ + + selectedFeatures[currentHighestFeature] = 1; + outputFeatures[i] = currentHighestFeature; + + }/*for the number of features to select*/ + + for (i = 0; i < k; i++) + { + outputFeatures[i] += 1; /*C indexes from 0 not 1*/ + }/*for number of selected features*/ + + FREE_FUNC(classMI); + FREE_FUNC(mergedVector); + FREE_FUNC(feature2D); + FREE_FUNC(featureMIMatrix); + FREE_FUNC(selectedFeatures); + + classMI = NULL; + mergedVector = NULL; + feature2D = NULL; + featureMIMatrix = NULL; + selectedFeatures = NULL; + + return outputFeatures; +}/*DISR(int,int,int,double[][],double[],double[])*/ + diff --git a/FEAST/FSToolbox/FCBF.m b/FEAST/FSToolbox/FCBF.m new file mode 100644 index 0000000..dcaf3bf --- /dev/null +++ b/FEAST/FSToolbox/FCBF.m @@ -0,0 +1,58 @@ +function [selectedFeatures] = FCBF(featureMatrix,classColumn,threshold) +%function [selectedFeatures] = FCBF(featureMatrix,classColumn,threshold) +% +%Performs feature selection using the FCBF measure by Yu and Liu 2004. +% +%Instead of selecting a fixed number of features it provides a relevancy threshold and selects all +%features which score above that and are not redundant +% +% The license is in the license.txt provided. + +numFeatures = size(featureMatrix,2); +classScore = zeros(numFeatures,1); + +for i = 1:numFeatures + classScore(i) = SU(featureMatrix(:,i),classColumn); +end + +[classScore indexScore] = sort(classScore,1,'descend'); + +indexScore = indexScore(classScore > threshold); +classScore = classScore(classScore > threshold); + +if ~isempty(indexScore) + curPosition = 1; +else + curPosition = 0; +end + +while curPosition <= length(indexScore) + j = curPosition + 1; + curFeature = indexScore(curPosition); + while j <= length(indexScore) + scoreij = SU(featureMatrix(:,curFeature),featureMatrix(:,indexScore(j))); + if scoreij > classScore(j) + indexScore(j) = []; + classScore(j) = []; + else + j = j + 1; + end + end + curPosition = curPosition + 1; +end + +selectedFeatures = indexScore; + +end + +function [score] = SU(firstVector,secondVector) +%function [score] = SU(firstVector,secondVector) +% +%calculates SU = 2 * (I(X;Y)/(H(X) + H(Y))) + +hX = h(firstVector); +hY = h(secondVector); +iXY = mi(firstVector,secondVector); + +score = (2 * iXY) / (hX + hY); +end diff --git a/FEAST/FSToolbox/FSAlgorithms.h b/FEAST/FSToolbox/FSAlgorithms.h new file mode 100644 index 0000000..e6ddba8 --- /dev/null +++ b/FEAST/FSToolbox/FSAlgorithms.h @@ -0,0 +1,138 @@ +/******************************************************************************* +** +** FSAlgorithms.h +** Provides the function definitions for the list of algorithms implemented +** in the FSToolbox. +** +** Author: Adam Pocock +** Created: 27/06/2011 +** +** Copyright 2010/2011 Adam Pocock, The University Of Manchester +** www.cs.manchester.ac.uk +** +** Part of the FEAture Selection Toolbox (FEAST), please reference +** "Conditional Likelihood Maximisation: A Unifying Framework for Mutual +** Information Feature Selection" +** G. Brown, A. Pocock, M.-J. Zhao, M. Lujan +** Journal of Machine Learning Research (JMLR), 2011 +** +** +** Please check www.cs.manchester.ac.uk/~gbrown/fstoolbox for updates. +** +** Copyright (c) 2010-2011, A. Pocock, G. Brown, The University of Manchester +** All rights reserved. +** +** Redistribution and use in source and binary forms, with or without modification, +** are permitted provided that the following conditions are met: +** +** - Redistributions of source code must retain the above copyright notice, this +** list of conditions and the following disclaimer. +** - Redistributions in binary form must reproduce the above copyright notice, +** this list of conditions and the following disclaimer in the documentation +** and/or other materials provided with the distribution. +** - Neither the name of The University of Manchester nor the names of its +** contributors may be used to endorse or promote products derived from this +** software without specific prior written permission. +** +** THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND +** ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED +** WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE +** DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR +** ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES +** (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; +** LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON +** ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +** (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS +** SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +** +*******************************************************************************/ + +/******************************************************************************* + * All algorithms take an integer k which determines how many features to + * select, the number of samples and the number of features. Additionally each + * algorithm takes pointers to the data matrix, and the label vector, and + * a pointer to the output vector. The output vector should be pre-allocated + * with sizeof(double)*k bytes. + * + * Some algorithms take additional parameters, which given at the end of the + * standard parameter list. + * + * Each algorithm uses a forward search, and selects the feature which has + * the maxmimum MI with the labels first. + * + * All the algorithms except CMIM use an optimised variant which caches the + * previously calculated MI values. This trades space for time, but can + * allocate large amounts of memory. CMIM uses the optimised implementation + * given in Fleuret (2004). + *****************************************************************************/ + +#ifndef __FSAlgorithms_H +#define __FSAlgorithms_H + +/******************************************************************************* +** mRMR_D() implements the minimum Relevance Maximum Redundancy criterion +** using the difference variant, from +** +** "Feature Selection Based on Mutual Information: Criteria of Max-Dependency, Max-Relevance, and Min-Redundancy" +** H. Peng et al. IEEE Pattern Analysis and Machine Intelligence (PAMI) (2005) +*******************************************************************************/ +double* mRMR_D(int k, int noOfSamples, int noOfFeatures, double *featureMatrix, double *classColumn, double *outputFeatures); + +/******************************************************************************* +** CMIM() implements a discrete version of the +** Conditional Mutual Information Maximisation criterion, using the fast +** exact implementation from +** +** "Fast Binary Feature Selection using Conditional Mutual Information Maximisation" +** F. Fleuret, JMLR (2004) +*******************************************************************************/ +double* CMIM(int k, int noOfSamples, int noOfFeatures, double *featureMatrix, double *classColumn, double *outputFeatures); + +/******************************************************************************* +** JMI() implements the JMI criterion from +** +** "Data Visualization and Feature Selection: New Algorithms for Nongaussian Data" +** H. Yang and J. Moody, NIPS (1999) +*******************************************************************************/ +double* JMI(int k, int noOfSamples, int noOfFeatures, double *featureMatrix, double *classColumn, double *outputFeatures); + +/******************************************************************************* +** DISR() implements the Double Input Symmetrical Relevance criterion +** from +** +** "On the Use of Variable Complementarity for Feature Selection in Cancer Classification" +** P. Meyer and G. Bontempi, (2006) +*******************************************************************************/ +double* DISR(int k, int noOfSamples, int noOfFeatures, double *featureMatrix, double *classColumn, double *outputFeatures); + +/******************************************************************************* +** ICAP() implements the Interaction Capping criterion from +** +** "Machine Learning Based on Attribute Interactions" +** A. Jakulin, PhD Thesis (2005) +*******************************************************************************/ +double* ICAP(int k, int noOfSamples, int noOfFeatures, double *featureMatrix, double *classColumn, double *outputFeatures); + +/******************************************************************************* +** CondMI() implements the CMI criterion using a greedy forward search +*******************************************************************************/ +double* CondMI(int k, int noOfSamples, int noOfFeatures, double *featureMatrix, double *classColumn, double *outputFeatures); + +/******************************************************************************* +** betaGamma() implements the Beta-Gamma space from Brown (2009). +** This incoporates MIFS, CIFE, and CondRed. +** +** MIFS - "Using mutual information for selecting features in supervised neural net learning" +** R. Battiti, IEEE Transactions on Neural Networks, 1994 +** +** CIFE - "Conditional Infomax Learning: An Integrated Framework for Feature Extraction and Fusion" +** D. Lin and X. Tang, European Conference on Computer Vision (2006) +** +** The Beta Gamma space is explained in our paper +** "Conditional Likelihood Maximisation: A Unifying Framework for Mutual Information Feature Selection" +** G. Brown, A. Pocock, M.-J. Zhao, M. Lujan +** Journal of Machine Learning Research (JMLR), 2011 +*******************************************************************************/ +double* BetaGamma(int k, int noOfSamples, int noOfFeatures, double *featureMatrix, double *classColumn, double *outputFeatures, double beta, double gamma); + +#endif diff --git a/FEAST/FSToolbox/FSToolbox.h b/FEAST/FSToolbox/FSToolbox.h new file mode 100644 index 0000000..bf8662b --- /dev/null +++ b/FEAST/FSToolbox/FSToolbox.h @@ -0,0 +1,70 @@ +/******************************************************************************* ** +** FSToolbox.h +** Provides the header files and #defines to ensure compatibility with MATLAB +** and C/C++. By default it compiles to MATLAB, if COMPILE_C is defined it +** links to the C memory allocation functions. +** +** Author: Adam Pocock +** Created: 27/06/2011 +** +** Copyright 2010/2011 Adam Pocock, The University Of Manchester +** www.cs.manchester.ac.uk +** +** Part of the FEAture Selection Toolbox (FEAST), please reference +** "Conditional Likelihood Maximisation: A Unifying Framework for Mutual +** Information Feature Selection" +** G. Brown, A. Pocock, M.-J. Zhao, M. Lujan +** Journal of Machine Learning Research (JMLR), 2011 +** +** Please check www.cs.manchester.ac.uk/~gbrown/fstoolbox for updates. +** +** Copyright (c) 2010-2011, A. Pocock, G. Brown, The University of Manchester +** All rights reserved. +** +** Redistribution and use in source and binary forms, with or without modification, +** are permitted provided that the following conditions are met: +** +** - Redistributions of source code must retain the above copyright notice, this +** list of conditions and the following disclaimer. +** - Redistributions in binary form must reproduce the above copyright notice, +** this list of conditions and the following disclaimer in the documentation +** and/or other materials provided with the distribution. +** - Neither the name of The University of Manchester nor the names of its +** contributors may be used to endorse or promote products derived from this +** software without specific prior written permission. +** +** THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND +** ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED +** WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE +** DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR +** ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES +** (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; +** LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON +** ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +** (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS +** SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +** +*******************************************************************************/ + +#ifndef __FSToolbox_H +#define __FSToolbox_H + +#include <math.h> +#include <string.h> + +#ifdef COMPILE_C + #define C_IMPLEMENTATION + #include <stdio.h> + #include <stdlib.h> + #define CALLOC_FUNC calloc + #define FREE_FUNC free +#else + #define MEX_IMPLEMENTATION + #include "mex.h" + #define CALLOC_FUNC mxCalloc + #define FREE_FUNC mxFree + #define printf mexPrintf /*for Octave-3.2*/ +#endif + +#endif + diff --git a/FEAST/FSToolbox/FSToolboxMex.c b/FEAST/FSToolbox/FSToolboxMex.c new file mode 100644 index 0000000..73a9197 --- /dev/null +++ b/FEAST/FSToolbox/FSToolboxMex.c @@ -0,0 +1,290 @@ +/******************************************************************************* +** FSToolboxMex.c is the entry point for the feature selection toolbox. +** It provides a MATLAB interface to the various selection algorithms. +** +** Initial Version - 27/06/2011 +** +** Author - Adam Pocock +** +** Part of the Feature Selection Toolbox, please reference +** "Conditional Likelihood Maximisation: A Unifying Framework for Mutual +** Information Feature Selection" +** G. Brown, A. Pocock, M.-J. Zhao, M. Lujan +** Journal of Machine Learning Research (JMLR), 2011 +** +** Please check www.cs.manchester.ac.uk/~gbrown/fstoolbox for updates. +** +** Copyright (c) 2010-2011, A. Pocock, G. Brown, The University of Manchester +** All rights reserved. +** +** Redistribution and use in source and binary forms, with or without modification, +** are permitted provided that the following conditions are met: +** +** - Redistributions of source code must retain the above copyright notice, this +** list of conditions and the following disclaimer. +** - Redistributions in binary form must reproduce the above copyright notice, +** this list of conditions and the following disclaimer in the documentation +** and/or other materials provided with the distribution. +** - Neither the name of The University of Manchester nor the names of its +** contributors may be used to endorse or promote products derived from this +** software without specific prior written permission. +** +** THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND +** ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED +** WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE +** DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR +** ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES +** (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; +** LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON +** ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +** (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS +** SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +** +*******************************************************************************/ + +#include "FSToolbox.h" +#include "FSAlgorithms.h" +#include "Entropy.h" + +/****************************************************************************** +** entry point for the mex call +** nlhs - number of outputs +** plhs - pointer to array of outputs +** nrhs - number of inputs +** prhs - pointer to array of inputs +******************************************************************************/ +void mexFunction(int nlhs, mxArray *plhs[], int nrhs, const mxArray *prhs[]) +{ + /************************************************************* + ** this function takes 4-6 arguments: + ** flag = which algorithm to use, + ** k = number of features to select, + ** featureMatrix[][] = matrix of features, + ** classColumn[] = targets, + ** optionalParam1 = (path angle or beta value), + ** optionalParam2 = (gamma value), + ** the arguments should all be discrete integers. + ** and has one output: + ** selectedFeatures[] of size k + *************************************************************/ + + int flag, k; + double optionalParam1, optionalParam2; + int numberOfFeatures, numberOfSamples, numberOfTargets; + double *featureMatrix, *targets, *output, *outputFeatures; + + double entropyTest; + int i,j; + + /************************************************************ + ** number to function map + ** 1 = MIFS + ** 2 = mRMR + ** 3 = CMIM + ** 4 = JMI + ** 5 = DISR + ** 6 = CIFE + ** 7 = ICAP + ** 8 = CondRed + ** 9 = BetaGamma + ** 10 = CMI + *************************************************************/ + if (nlhs > 1) + { + printf("Incorrect number of output arguments"); + }/*if not 1 output*/ + if ((nrhs < 4) || (nrhs > 6)) + { + printf("Incorrect number of input arguments"); + return; + }/*if not 4-6 inputs*/ + + /*get the flag for which algorithm*/ + flag = (int) mxGetScalar(prhs[0]); + + /*get the number of features to select, cast out as it is a double*/ + k = (int) mxGetScalar(prhs[1]); + + numberOfFeatures = mxGetN(prhs[2]); + numberOfSamples = mxGetM(prhs[2]); + + numberOfTargets = mxGetM(prhs[3]); + + if (nrhs == 6) + { + optionalParam1 = (double) mxGetScalar(prhs[4]); + optionalParam2 = (double) mxGetScalar(prhs[5]); + } + else if (nrhs == 5) + { + optionalParam1 = (double) mxGetScalar(prhs[4]); + optionalParam2 = 0.0; + } + + if (numberOfTargets != numberOfSamples) + { + printf("Number of targets must match number of samples\n"); + printf("Number of targets = %d, Number of Samples = %d, Number of Features = %d\n",numberOfTargets,numberOfSamples,numberOfFeatures); + + plhs[0] = mxCreateDoubleMatrix(0,0,mxREAL); + return; + }/*if size mismatch*/ + else if ((k < 1) || (k > numberOfFeatures)) + { + printf("You have requested k = %d features, which is not possible\n",k); + plhs[0] = mxCreateDoubleMatrix(0,0,mxREAL); + return; + } + else + { + featureMatrix = mxGetPr(prhs[2]); + targets = mxGetPr(prhs[3]); + + /*double calculateEntropy(double *dataVector, int vectorLength)*/ + entropyTest = calculateEntropy(targets,numberOfSamples); + if (entropyTest < 0.0000001) + { + printf("The class label Y has entropy of 0, therefore all mutual informations containing Y will be 0. No feature selection is performed\n"); + plhs[0] = mxCreateDoubleMatrix(0,0,mxREAL); + return; + } + else + { + /*printf("Flag = %d, k = %d, numFeatures = %d, numSamples = %d\n",flag,k,numberOfFeatures,numberOfSamples);*/ + switch (flag) + { + case 1: /* MIFS */ + { + plhs[0] = mxCreateDoubleMatrix(k,1,mxREAL); + output = (double *)mxGetPr(plhs[0]); + if (nrhs == 4) + { + /* MIFS is Beta = 1, Gamma = 0 */ + optionalParam1 = 1.0; + optionalParam2 = 0.0; + } + + /*void BetaGamma(int k, long noOfSamples, long noOfFeatures,double *featureMatrix, double *classColumn, double *outputFeatures, double beta, double gamma)*/ + BetaGamma(k,numberOfSamples,numberOfFeatures,featureMatrix,targets,output,optionalParam1,optionalParam2); + break; + } + case 2: /* mRMR */ + { + plhs[0] = mxCreateDoubleMatrix(k,1,mxREAL); + output = (double *)mxGetPr(plhs[0]); + + /*void mRMR_D(int k, int noOfSamples, int noOfFeatures,double *featureMatrix, double *classColumn, double *outputFeatures)*/ + mRMR_D(k,numberOfSamples,numberOfFeatures,featureMatrix,targets,output); + break; + } + case 3: /* CMIM */ + { + plhs[0] = mxCreateDoubleMatrix(k,1,mxREAL); + output = (double *)mxGetPr(plhs[0]); + + /*void CMIM(int k, int noOfSamples, int noOfFeatures,double *featureMatrix, double *classColumn, double *outputFeatures)*/ + CMIM(k,numberOfSamples,numberOfFeatures,featureMatrix,targets,output); + break; + } + case 4: /* JMI */ + { + plhs[0] = mxCreateDoubleMatrix(k,1,mxREAL); + output = (double *)mxGetPr(plhs[0]); + + /*void JMI(int k, int noOfSamples, int noOfFeatures,double *featureMatrix, double *classColumn, double *outputFeatures)*/ + JMI(k,numberOfSamples,numberOfFeatures,featureMatrix,targets,output); + break; + } + case 5: /* DISR */ + { + plhs[0] = mxCreateDoubleMatrix(k,1,mxREAL); + output = (double *)mxGetPr(plhs[0]); + + /*void DISR(int k, int noOfSamples, int noOfFeatures,double *featureMatrix, double *classColumn, double *outputFeatures)*/ + DISR(k,numberOfSamples,numberOfFeatures,featureMatrix,targets,output); + break; + } + case 6: /* CIFE */ + { + plhs[0] = mxCreateDoubleMatrix(k,1,mxREAL); + output = (double *)mxGetPr(plhs[0]); + + /* CIFE is Beta = 1, Gamma = 1 */ + optionalParam1 = 1.0; + optionalParam2 = 1.0; + + /*void BetaGamma(int k, long noOfSamples, long noOfFeatures,double *featureMatrix, double *classColumn, double *outputFeatures, double beta, double gamma)*/ + BetaGamma(k,numberOfSamples,numberOfFeatures,featureMatrix,targets,output,optionalParam1,optionalParam2); + break; + } + case 7: /* ICAP */ + { + plhs[0] = mxCreateDoubleMatrix(k,1,mxREAL); + output = (double *)mxGetPr(plhs[0]); + + /*void ICAP(k,numberOfSamples,numberOfFeatures,featureMatrix,targets,output);*/ + ICAP(k,numberOfSamples,numberOfFeatures,featureMatrix,targets,output); + break; + } + case 8: /* CondRed */ + { + plhs[0] = mxCreateDoubleMatrix(k,1,mxREAL); + output = (double *)mxGetPr(plhs[0]); + + /* CondRed is Beta = 0, Gamma = 1 */ + optionalParam1 = 0.0; + optionalParam2 = 1.0; + + /*void BetaGamma(int k, long noOfSamples, long noOfFeatures,double *featureMatrix, double *classColumn, double *outputFeatures, double beta, double gamma)*/ + BetaGamma(k,numberOfSamples,numberOfFeatures,featureMatrix,targets,output,optionalParam1,optionalParam2); + break; + } + case 9: /* BetaGamma */ + { + if (nrhs != 6) + { + printf("Insufficient arguments specified for Beta Gamma FS\n"); + plhs[0] = mxCreateDoubleMatrix(0,0,mxREAL); + return; + } + else + { + plhs[0] = mxCreateDoubleMatrix(k,1,mxREAL); + output = (double *)mxGetPr(plhs[0]); + + /*void BetaGamma(int k, long noOfSamples, long noOfFeatures,double *featureMatrix, double *classColumn, double *outputFeatures, double beta, double gamma)*/ + BetaGamma(k,numberOfSamples,numberOfFeatures,featureMatrix,targets,output,optionalParam1,optionalParam2); + } + break; + } + case 10: /* CMI */ + { + output = (double *)mxCalloc(k,sizeof(double)); + + /*void CondMI(int k, int noOfSamples, int noOfFeatures,double *featureMatrix, double *classColumn, double *outputFeatures)*/ + CondMI(k,numberOfSamples,numberOfFeatures,featureMatrix,targets,output); + + i = 0; + + while((output[i] != -1) && (i < k)) + { + i++; + } + + plhs[0] = mxCreateDoubleMatrix(i,1,mxREAL); + outputFeatures = (double *)mxGetPr(plhs[0]); + + for (j = 0; j < i; j++) + { + outputFeatures[j] = output[j] + 1; /*C indexes from 0 not 1*/ + }/*for number of selected features*/ + + mxFree(output); + output = NULL; + break; + } + }/*switch on flag*/ + return; + } + } +}/*mex function entry*/ diff --git a/FEAST/FSToolbox/FSToolboxMex.mexa64 b/FEAST/FSToolbox/FSToolboxMex.mexa64 Binary files differnew file mode 100755 index 0000000..0015254 --- /dev/null +++ b/FEAST/FSToolbox/FSToolboxMex.mexa64 diff --git a/FEAST/FSToolbox/ICAP.c b/FEAST/FSToolbox/ICAP.c new file mode 100644 index 0000000..00953a7 --- /dev/null +++ b/FEAST/FSToolbox/ICAP.c @@ -0,0 +1,184 @@ +/******************************************************************************* +** ICAP.c implements the Interaction Capping criterion from +** +** "Machine Learning Based on Attribute Interactions" +** A. Jakulin, PhD Thesis (2005) +** +** Initial Version - 19/08/2010 +** Updated - 23/06/2011 +** +** Author - Adam Pocock +** +** Part of the Feature Selection Toolbox, please reference +** "Conditional Likelihood Maximisation: A Unifying Framework for Mutual +** Information Feature Selection" +** G. Brown, A. Pocock, M.-J. Zhao, M. Lujan +** Journal of Machine Learning Research (JMLR), 2011 +** +** Please check www.cs.manchester.ac.uk/~gbrown/fstoolbox for updates. +** +** Copyright (c) 2010-2011, A. Pocock, G. Brown, The University of Manchester +** All rights reserved. +** +** Redistribution and use in source and binary forms, with or without modification, +** are permitted provided that the following conditions are met: +** +** - Redistributions of source code must retain the above copyright notice, this +** list of conditions and the following disclaimer. +** - Redistributions in binary form must reproduce the above copyright notice, +** this list of conditions and the following disclaimer in the documentation +** and/or other materials provided with the distribution. +** - Neither the name of The University of Manchester nor the names of its +** contributors may be used to endorse or promote products derived from this +** software without specific prior written permission. +** +** THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND +** ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED +** WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE +** DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR +** ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES +** (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; +** LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON +** ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +** (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS +** SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +** +*******************************************************************************/ + +#include "FSAlgorithms.h" +#include "FSToolbox.h" + +/* MIToolbox includes */ +#include "MutualInformation.h" + +double* ICAP(int k, int noOfSamples, int noOfFeatures, double *featureMatrix, double *classColumn, double *outputFeatures) +{ + /*holds the class MI values*/ + double *classMI = (double *)CALLOC_FUNC(noOfFeatures,sizeof(double)); + char *selectedFeatures = (char *)CALLOC_FUNC(noOfFeatures,sizeof(char)); + + /*separates out the features*/ + double **feature2D = (double **) CALLOC_FUNC(noOfFeatures,sizeof(double *)); + + /*holds the intra feature MI values*/ + int sizeOfMatrix = k*noOfFeatures; + double *featureMIMatrix = (double *)CALLOC_FUNC(sizeOfMatrix,sizeof(double)); + double *featureCMIMatrix = (double *)CALLOC_FUNC(sizeOfMatrix,sizeof(double)); + + double maxMI = 0.0; + int maxMICounter = -1; + + double score, currentScore, totalFeatureInteraction, interactionInfo; + int currentHighestFeature, arrayPosition; + + int i, j, m; + + for (j = 0; j < noOfFeatures; j++) + feature2D[j] = featureMatrix + (int) j * noOfSamples; + + for (i = 0; i < sizeOfMatrix; i++) + { + featureMIMatrix[i] = -1; + featureCMIMatrix[i] = -1; + }/*for featureMIMatrix and featureCMIMatrix - blank to -1*/ + + /*SETUP COMPLETE*/ + /*Algorithm starts here*/ + + for (i = 0; i < noOfFeatures;i++) + { + classMI[i] = calculateMutualInformation(feature2D[i], classColumn, noOfSamples); + + if (classMI[i] > maxMI) + { + maxMI = classMI[i]; + maxMICounter = i; + }/*if bigger than current maximum*/ + }/*for noOfFeatures - filling classMI*/ + + selectedFeatures[maxMICounter] = 1; + outputFeatures[0] = maxMICounter; + + /************* + ** Now we have populated the classMI array, and selected the highest + ** MI feature as the first output feature + *************/ + + for (i = 1; i < k; i++) + { + /********************************************************************** + ** to ensure it selects some features + **if this is zero then it will not pick features where the redundancy is greater than the + **relevance + **********************************************************************/ + score = -HUGE_VAL; + currentHighestFeature = 0; + currentScore = 0.0; + + for (j = 0; j < noOfFeatures; j++) + { + /*if we haven't selected j*/ + if (!selectedFeatures[j]) + { + currentScore = classMI[j]; + totalFeatureInteraction = 0.0; + + for (m = 0; m < i; m++) + { + arrayPosition = m*noOfFeatures + j; + + if (featureMIMatrix[arrayPosition] == -1) + { + /*work out interaction*/ + + /*double calculateMutualInformation(double *firstVector, double *secondVector, int vectorLength);*/ + featureMIMatrix[arrayPosition] = calculateMutualInformation(feature2D[(int) outputFeatures[m]], feature2D[j], noOfSamples); + /*double calculateConditionalMutualInformation(double *firstVector, double *targetVector, double* conditionVector, int vectorLength);*/ + featureCMIMatrix[arrayPosition] = calculateConditionalMutualInformation(feature2D[(int) outputFeatures[m]], feature2D[j], classColumn, noOfSamples); + }/*if not already known*/ + + interactionInfo = featureCMIMatrix[arrayPosition] - featureMIMatrix[arrayPosition]; + + if (interactionInfo < 0) + { + totalFeatureInteraction += interactionInfo; + } + }/*for the number of already selected features*/ + + currentScore += totalFeatureInteraction; + + + if (currentScore > score) + { + score = currentScore; + currentHighestFeature = j; + } + }/*if j is unselected*/ + }/*for number of features*/ + + selectedFeatures[currentHighestFeature] = 1; + outputFeatures[i] = currentHighestFeature; + + }/*for the number of features to select*/ + + /*C++ indexes from 0 not 1, so we need to increment all the feature indices*/ + for (i = 0; i < k; i++) + { + outputFeatures[i] += 1; + }/*for number of selected features*/ + + FREE_FUNC(classMI); + FREE_FUNC(feature2D); + FREE_FUNC(featureMIMatrix); + FREE_FUNC(featureCMIMatrix); + FREE_FUNC(selectedFeatures); + + classMI = NULL; + feature2D = NULL; + featureMIMatrix = NULL; + featureCMIMatrix = NULL; + selectedFeatures = NULL; + + return outputFeatures; +}/*ICAP(int,int,int,double[][],double[],double[])*/ + diff --git a/FEAST/FSToolbox/JMI.c b/FEAST/FSToolbox/JMI.c new file mode 100644 index 0000000..30ef1bc --- /dev/null +++ b/FEAST/FSToolbox/JMI.c @@ -0,0 +1,177 @@ +/******************************************************************************* +** JMI.c implements the JMI criterion from +** +** "Data Visualization and Feature Selection: New Algorithms for Nongaussian Data" +** H. Yang and J. Moody, NIPS (1999) +** +** Initial Version - 19/08/2010 +** Updated - 23/06/2011 +** +** Author - Adam Pocock +** +** Part of the Feature Selection Toolbox, please reference +** "Conditional Likelihood Maximisation: A Unifying Framework for Mutual +** Information Feature Selection" +** G. Brown, A. Pocock, M.-J. Zhao, M. Lujan +** Journal of Machine Learning Research (JMLR), 2011 +** +** Please check www.cs.manchester.ac.uk/~gbrown/fstoolbox for updates. +** +** Copyright (c) 2010-2011, A. Pocock, G. Brown, The University of Manchester +** All rights reserved. +** +** Redistribution and use in source and binary forms, with or without modification, +** are permitted provided that the following conditions are met: +** +** - Redistributions of source code must retain the above copyright notice, this +** list of conditions and the following disclaimer. +** - Redistributions in binary form must reproduce the above copyright notice, +** this list of conditions and the following disclaimer in the documentation +** and/or other materials provided with the distribution. +** - Neither the name of The University of Manchester nor the names of its +** contributors may be used to endorse or promote products derived from this +** software without specific prior written permission. +** +** THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND +** ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED +** WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE +** DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR +** ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES +** (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; +** LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON +** ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +** (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS +** SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +** +*******************************************************************************/ + +#include "FSAlgorithms.h" +#include "FSToolbox.h" + +/* MIToolbox includes */ +#include "MutualInformation.h" +#include "ArrayOperations.h" + +double* JMI(int k, int noOfSamples, int noOfFeatures, double *featureMatrix, double *classColumn, double *outputFeatures) +{ + /*holds the class MI values*/ + double *classMI = (double *)CALLOC_FUNC(noOfFeatures,sizeof(double)); + + char *selectedFeatures = (char *)CALLOC_FUNC(noOfFeatures,sizeof(char)); + + /*holds the intra feature MI values*/ + int sizeOfMatrix = k*noOfFeatures; + double *featureMIMatrix = (double *)CALLOC_FUNC(sizeOfMatrix,sizeof(double)); + + double maxMI = 0.0; + int maxMICounter = -1; + + double **feature2D = (double**) CALLOC_FUNC(noOfFeatures,sizeof(double*)); + + double score, currentScore, totalFeatureMI; + int currentHighestFeature; + + double *mergedVector = (double *) CALLOC_FUNC(noOfSamples,sizeof(double)); + + int arrayPosition; + double mi, tripEntropy; + + int i,j,x; + + for(j = 0; j < noOfFeatures; j++) + { + feature2D[j] = featureMatrix + (int)j*noOfSamples; + } + + for (i = 0; i < sizeOfMatrix;i++) + { + featureMIMatrix[i] = -1; + }/*for featureMIMatrix - blank to -1*/ + + + for (i = 0; i < noOfFeatures;i++) + { + /*calculate mutual info + **double calculateMutualInformation(double *firstVector, double *secondVector, int vectorLength); + */ + classMI[i] = calculateMutualInformation(feature2D[i], classColumn, noOfSamples); + + if (classMI[i] > maxMI) + { + maxMI = classMI[i]; + maxMICounter = i; + }/*if bigger than current maximum*/ + }/*for noOfFeatures - filling classMI*/ + + selectedFeatures[maxMICounter] = 1; + outputFeatures[0] = maxMICounter; + + /***************************************************************************** + ** We have populated the classMI array, and selected the highest + ** MI feature as the first output feature + ** Now we move into the JMI algorithm + *****************************************************************************/ + + for (i = 1; i < k; i++) + { + score = 0.0; + currentHighestFeature = 0; + currentScore = 0.0; + totalFeatureMI = 0.0; + + for (j = 0; j < noOfFeatures; j++) + { + /*if we haven't selected j*/ + if (selectedFeatures[j] == 0) + { + currentScore = 0.0; + totalFeatureMI = 0.0; + + for (x = 0; x < i; x++) + { + arrayPosition = x*noOfFeatures + j; + if (featureMIMatrix[arrayPosition] == -1) + { + mergeArrays(feature2D[(int) outputFeatures[x]], feature2D[j],mergedVector,noOfSamples); + /*double calculateMutualInformation(double *firstVector, double *secondVector, int vectorLength);*/ + mi = calculateMutualInformation(mergedVector, classColumn, noOfSamples); + + featureMIMatrix[arrayPosition] = mi; + }/*if not already known*/ + currentScore += featureMIMatrix[arrayPosition]; + }/*for the number of already selected features*/ + + if (currentScore > score) + { + score = currentScore; + currentHighestFeature = j; + } + }/*if j is unselected*/ + }/*for number of features*/ + + selectedFeatures[currentHighestFeature] = 1; + outputFeatures[i] = currentHighestFeature; + + }/*for the number of features to select*/ + + for (i = 0; i < k; i++) + { + outputFeatures[i] += 1; /*C indexes from 0 not 1*/ + }/*for number of selected features*/ + + FREE_FUNC(classMI); + FREE_FUNC(feature2D); + FREE_FUNC(featureMIMatrix); + FREE_FUNC(mergedVector); + FREE_FUNC(selectedFeatures); + + classMI = NULL; + feature2D = NULL; + featureMIMatrix = NULL; + mergedVector = NULL; + selectedFeatures = NULL; + + return outputFeatures; + +}/*JMI(int,int,int,double[][],double[],double[])*/ + diff --git a/FEAST/FSToolbox/MIM.m b/FEAST/FSToolbox/MIM.m new file mode 100644 index 0000000..31695e4 --- /dev/null +++ b/FEAST/FSToolbox/MIM.m @@ -0,0 +1,17 @@ +function [selectedFeatures scoreVector] = MIM(k, data, labels)
+%function [selectedFeatures scoreVector] = MIM(k, data, labels)
+%
+%Mutual information Maximisation
+%
+% The license is in the license.txt provided.
+
+numf = size(data,2);
+classMI = zeros(numf,1);
+
+for n = 1 : numf
+ classMI(n) = mi(data(:,n),labels);
+end
+
+[scoreVector index] = sort(classMI,'descend');
+
+selectedFeatures = index(1:k);
diff --git a/FEAST/FSToolbox/Makefile b/FEAST/FSToolbox/Makefile new file mode 100644 index 0000000..c0806a9 --- /dev/null +++ b/FEAST/FSToolbox/Makefile @@ -0,0 +1,96 @@ +# makefile for FEAST +# Author: Adam Pocock, apocock@cs.man.ac.uk +# Created: 29/06/2011 +# +# Part of the Feature Selection Toolbox, please reference +# "Conditional Likelihood Maximisation: A Unifying Framework for Mutual +# Information Feature Selection" +# G. Brown, A. Pocock, M.-J. Zhao, M. Lujan +# Journal of Machine Learning Research (JMLR), 2012 +# +# Please check www.cs.manchester.ac.uk/~gbrown/fstoolbox for updates. +# +# Copyright (c) 2010-2011, A. Pocock, G. Brown, The University of Manchester +# All rights reserved. +# +# Redistribution and use in source and binary forms, with or without modification, +# are permitted provided that the following conditions are met: +# +# - Redistributions of source code must retain the above copyright notice, this +# list of conditions and the following disclaimer. +# - Redistributions in binary form must reproduce the above copyright notice, +# this list of conditions and the following disclaimer in the documentation +# and/or other materials provided with the distribution. +# - Neither the name of The University of Manchester nor the names of its +# contributors may be used to endorse or promote products derived from this +# software without specific prior written permission. +# +# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND +# ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED +# WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE +# DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR +# ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES +# (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; +# LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON +# ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS +# SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + +CXXFLAGS = -O3 -fPIC +COMPILER = gcc +MITOOLBOXPATH = ../MIToolbox/ +objects = mRMR_D.o CMIM.o JMI.o DISR.o CondMI.o ICAP.o BetaGamma.o + +libFSToolbox.so : $(objects) + $(COMPILER) $(CXXFLAGS) -lMIToolbox -L$(MITOOLBOXPATH) -shared -o libFSToolbox.so $(objects) + +mRMR_D.o: mRMR_D.c + $(COMPILER) $(CXXFLAGS) -DCOMPILE_C -c mRMR_D.c -I$(MITOOLBOXPATH) + +CMIM.o: CMIM.c + $(COMPILER) $(CXXFLAGS) -DCOMPILE_C -c CMIM.c -I$(MITOOLBOXPATH) + +JMI.o: JMI.c + $(COMPILER) $(CXXFLAGS) -DCOMPILE_C -c JMI.c -I$(MITOOLBOXPATH) + +DISR.o: DISR.c + $(COMPILER) $(CXXFLAGS) -DCOMPILE_C -c DISR.c -I$(MITOOLBOXPATH) + +CondMI.o: CondMI.c + $(COMPILER) $(CXXFLAGS) -DCOMPILE_C -c CondMI.c -I$(MITOOLBOXPATH) + +ICAP.o: ICAP.c + $(COMPILER) $(CXXFLAGS) -DCOMPILE_C -c ICAP.c -I$(MITOOLBOXPATH) + +BetaGamma.o: BetaGamma.c + $(COMPILER) $(CXXFLAGS) -DCOMPILE_C -c BetaGamma.c -I$(MITOOLBOXPATH) + +.PHONY : debug +debug: + $(MAKE) libFSToolbox.so "CXXFLAGS = -g -DDEBUG -fPIC" + +.PHONY : x86 +x86: + $(MAKE) libFSToolbox.so "CXXFLAGS = -O3 -fPIC -m32" + +.PHONY : x64 +x64: + $(MAKE) libFSToolbox.so "CXXFLAGS = -O3 -fPIC -m64" + +.PHONY : matlab +matlab: + mex -I$(MITOOLBOXPATH) FSToolboxMex.c BetaGamma.c CMIM.c CondMI.c DISR.c ICAP.c JMI.c mRMR_D.c $(MITOOLBOXPATH)MutualInformation.c $(MITOOLBOXPATH)Entropy.c $(MITOOLBOXPATH)CalculateProbability.c $(MITOOLBOXPATH)ArrayOperations.c + +.PHONY : matlab-debug +matlab-debug: + mex -g -I$(MITOOLBOXPATH) FSToolboxMex.c BetaGamma.c CMIM.c CondMI.c DISR.c ICAP.c JMI.c mRMR_D.c $(MITOOLBOXPATH)MutualInformation.c $(MITOOLBOXPATH)Entropy.c $(MITOOLBOXPATH)CalculateProbability.c $(MITOOLBOXPATH)ArrayOperations.c + +.PHONY : intel +intel: + $(MAKE) libFSToolbox.so "COMPILER = icc" "CXXFLAGS = -O2 -fPIC -xHost" + +.PHONY : clean +clean: + rm *.o + rm libFSToolbox.so + diff --git a/FEAST/FSToolbox/README b/FEAST/FSToolbox/README new file mode 100644 index 0000000..1aae2d7 --- /dev/null +++ b/FEAST/FSToolbox/README @@ -0,0 +1,80 @@ +FEAST v1.0 +A feature selection toolbox for C/C++ and MATLAB/OCTAVE + +FEAST provides implementations of common mutual information based filter +feature selection algorithms, and an implementation of RELIEF. All +functions expect discrete inputs (except RELIEF, which does not depend +on the MIToolbox), and they return the selected feature indices. These +implementations were developed to help our research into the similarities +between these algorithms, and our results are presented in the following paper: + + Conditional Likelihood Maximisation: A Unifying Framework for Mutual Information Feature Selection + G.Brown, A.Pocock, M.Lujan, M.-J.Zhao + Journal of Machine Learning Research (in press, to appear 2012) + +All FEAST code is licensed under the BSD 3-Clause License. +If you use these implementations for academic research please cite the paper above. + +Contains implementations of: + mim, mrmr, mifs, cmim, jmi, disr, cife, icap, condred, cmi, relief, fcbf, betagamma + +References for these algorithms are provided in the accompanying feast.bib file (in BibTeX format). + +MATLAB Example (using "data" as our feature matrix, and "labels" as the class label vector): + +>> size(data) +ans = + (569,30) %% denoting 569 examples, and 30 features + +>> selectedIndices = feast('jmi',5,data,labels) %% selecting the top 5 features using the jmi algorithm +selectedIndices = + + 28 + 21 + 8 + 27 + 23 + +>> selectedIndices = feast('mrmr',10,data,labels) %% selecting the top 10 features using the mrmr algorithm +selectedIndices = + + 28 + 24 + 22 + 8 + 27 + 21 + 29 + 4 + 7 + 25 + +>> selectedIndices = feast('mifs',5,data,labels,0.7) %% selecting the top 5 features using the mifs algorithm with beta = 0.7 +selectedIndices = + + 28 + 24 + 22 + 20 + 29 + +The library is written in ANSI C for compatibility with the MATLAB mex compiler, +except for MIM, FCBF and RELIEF, which are written in MATLAB/OCTAVE script. + +If you wish to use MIM in a C program you can use the BetaGamma function with +Beta = 0, Gamma = 0, as this is equivalent to MIM (but slower than the other implementation). +MIToolbox is required to compile these algorithms, and these implementations +supercede the example implementations given in that package (they have more robust behaviour +when used with unexpected inputs). + +MIToolbox can be found at: + http://www.cs.man.ac.uk/~gbrown/mitoolbox/ +and v1.03 is included in the ZIP for the FEAST package. + +Compilation instructions: + MATLAB/OCTAVE - run CompileFEAST.m, + Linux C shared library - use the included makefile + +Update History +08/11/2011 - v1.0 - Public Release to complement the JMLR publication. + diff --git a/FEAST/FSToolbox/RELIEF.m b/FEAST/FSToolbox/RELIEF.m new file mode 100644 index 0000000..194ce7b --- /dev/null +++ b/FEAST/FSToolbox/RELIEF.m @@ -0,0 +1,61 @@ +% RELIEF - Kira & Rendell 1992 +% T is number of patterns to use +% Defaults to all patterns if not specified. +% +% The license is in the license.txt provided. +% +% function w = RELIEF( data, labels, T ) +% +function [w bestidx] = RELIEF ( data, labels, T ) + +if ~exist('T','var') + T=size(data,1); +end + +idx = randperm(length(labels)); +idx = idx(1:T); + +w = zeros(size(data,2),1); +for t = 1:T + + x = data(idx(t),:); + y = labels(idx(t)); + + %copy the x + protos = repmat(x, length(labels), 1); + %measure the distance from x to every other example + distances = [sqrt(sum((data-protos).^2,2)) labels]; + %sort them according to distances (find nearest neighbours) + [distances originalidx] = sortrows(distances,1); + + foundhit = false; hitidx=0; + foundmiss = false; missidx=0; + i=2; %start from the second one + while (~foundhit || ~foundmiss) + + if distances(i,2) == y + hitidx = originalidx(i); + foundhit = true; + end + if distances(i,2) ~= y + missidx = originalidx(i); + foundmiss = true; + end + + i=i+1; + + end + + alpha = 1/T; + for f = 1:size(data,2)%each feature + hitpenalty = (x(f)-data(hitidx,f)) / (max(data(:,f))-min(data(:,f))); + misspenalty = (x(f)-data(missidx,f)) / (max(data(:,f))-min(data(:,f))); + + w(f) = w(f) - alpha*hitpenalty^2 + alpha*misspenalty^2; + end + +end + +[~,bestidx] = sort(w,'descend'); + + diff --git a/FEAST/FSToolbox/feast.bib b/FEAST/FSToolbox/feast.bib new file mode 100644 index 0000000..2c58f0d --- /dev/null +++ b/FEAST/FSToolbox/feast.bib @@ -0,0 +1,122 @@ +% MIM (Mutual Information Maximisation) +@inproceedings{MIM, + author = {David D. Lewis}, + title = {Feature Selection and Feature Extraction for Text Categorization}, + booktitle = {In Proceedings of Speech and Natural Language Workshop}, + year = {1992}, + pages = {212--217}, + publisher = {Morgan Kaufmann} +} + +% MIFS (Mutual Information Feature Selection ) +@article{MIFS, + author={Battiti, R.}, + journal={Neural Networks, IEEE Transactions on}, + title={Using mutual information for selecting features in supervised neural net learning}, + year={1994}, + month={jul}, + volume={5}, + number={4}, + pages={537 -550}, + ISSN={1045-9227} +} + +% mRMR (minimum Redundancy Maximum Relevance) +@article{mRMR, + title={Feature selection based on mutual information: criteria of max-dependency, max-relevance, and min-redundancy}, + author={Peng, H. and Long, F. and Ding, C.}, + journal={IEEE Transactions on pattern analysis and machine intelligence}, + pages={1226--1238}, + year={2005}, + publisher={Published by the IEEE Computer Society} +} + +% CMIM (Conditional Mutual Information Maximisation) +@article{CMIM, + author = {Fleuret, Fran\c{c}ois}, + title = {Fast Binary Feature Selection with Conditional Mutual Information}, + journal = {Journal of Machine Learning Research}, + volume = {5}, + month = {December}, + year = {2004}, + issn = {1532-4435}, + pages = {1531--1555}, + publisher = {JMLR.org} +} + +% JMI (Joint Mutual Information) +@inproceedings{JMI, + title={Feature selection based on joint mutual information}, + author={Yang, H. and Moody, J.}, + booktitle={Proceedings of International ICSC Symposium on Advances in Intelligent Data Analysis}, + pages={22--25}, + year={1999} +} + +% DISR (Double Input Symmetrical Relevance) +@incollection{DISR, + author = {Meyer, Patrick and Bontempi, Gianluca}, + title = {On the Use of Variable Complementarity for Feature Selection in Cancer Classification}, + booktitle = {Applications of Evolutionary Computing}, + publisher = {Springer Berlin / Heidelberg}, + pages = {91-102}, + volume = {3907}, + url = {http://dx.doi.org/10.1007/11732242_9}, + year = {2006} +} + +% ICAP (Interaction Capping) +@article{ICAP, + title={Machine learning based on attribute interactions}, + author={Jakulin, A.}, + journal={Fakulteta za racunalni{\v{s}}tvo in informatiko, Univerza v Ljubljani}, + year={2005} +} + +% CIFE (Conditional Informative Feature Extraction) +@incollection{CIFE + author = {Lin, Dahua and Tang, Xiaoou}, + title = {Conditional Infomax Learning: An Integrated Framework for Feature Extraction and Fusion}, + booktitle = {Computer Vision – ECCV 2006}, + series = {Lecture Notes in Computer Science}, + publisher = {Springer Berlin / Heidelberg}, + pages = {68-82}, + volume = {3951}, + url = {http://dx.doi.org/10.1007/11744023_6}, + year = {2006} +} + +% Beta Gamma Space +@inproceedings{BetaGamma, + title={A new perspective for information theoretic feature selection}, + author={Brown, G.}, + booktitle={12th International Conference on Artificial Intelligence and Statistics}, + volume={5}, + pages={49--56}, + year={2009} +} + +% FCBF (Fast Correlation-Based Filter) +@article{FCBF, + author = {Yu, Lei and Liu, Huan}, + title = {Efficient Feature Selection via Analysis of Relevance and Redundancy}, + journal = {Journal of Machine Learning Research}, + volume = {5}, + year = {2004}, + issn = {1532-4435}, + pages = {1205--1224}, + publisher = {JMLR.org} +} + +% RELIEF +@inproceedings{RELIEF, + author = {Kira, Kenji and Rendell, Larry A.}, + title = {The feature selection problem: traditional methods and a new algorithm}, + booktitle = {Proceedings of the tenth national conference on Artificial intelligence}, + series = {AAAI'92}, + year = {1992}, + pages = {129--134}, + publisher = {AAAI Press} +} + + diff --git a/FEAST/FSToolbox/feast.m b/FEAST/FSToolbox/feast.m new file mode 100644 index 0000000..96a685e --- /dev/null +++ b/FEAST/FSToolbox/feast.m @@ -0,0 +1,100 @@ +function [selectedFeatures] = feast(criteria,numToSelect,data,labels,varargin) +%function [selectedFeatures] = feast(criteria,numToSelect,data,labels,varargin) +% +%Provides access to the feature selection algorithms in FSToolboxMex +% +%Expects the features to be columns of the data matrix, and +%requires that both the features and labels are integers. +% +%Algorithms are called as follows +% +%[selectedFeatures] = feast('algName',numToSelect,data,labels) +% where algName is: +% mim, mrmr, cmim, jmi, disr, cife, icap, condred, cmi, relief +% +%[selectedFeatures] = feast('algName',numToSelect,data,labels,beta) +% where algName is: +% mifs (defaults to beta = 1.0 if unspecified) +% +%[selectedFeatures] = feast('algName',numToSelect,data,labels,beta,gamma) +% where algName is: +% betagamma +%[selectedFeatures] = feast('algName',numToSelect,data,labels,threshold) +% where algName is: +% fcbf (note this ignores the numToSelect) +% +% The license is in the license.txt provided. + + +%Internal FSToolbox Criteria to number mapping +%MIFS = 1 +%mRMR = 2 +%CMIM = 3 +%JMI = 4 +%DISR = 5 +%CIFE = 6 +%ICAP = 7 +%CondRed = 8 +%BetaGamma = 9 +%CMI = 10 +% + +if ((numToSelect < 1) || (numToSelect > size(data,2))) + error(['You have requested ' num2str(numToSelect) ' features, which is not possible']); +end + +finiteDataCount = sum(sum(isfinite(data))); +finiteLabelsCount = sum(sum(isfinite(labels))); + +totalData = numel(data); +totalLabels = numel(labels); + +if ((finiteDataCount ~= totalData) || (finiteLabelsCount ~= totalLabels)) + error(['Some elements are NaNs or infinite. Please check your data']); +end + +if (strcmpi(criteria,'mim')) + selectedFeatures = MIM(numToSelect,data,labels); +elseif (strcmpi(criteria,'mifs')) + if (nargin == 4) + beta = 1; + else + beta = varargin{1}; + end + selectedFeatures = FSToolboxMex(1,numToSelect,data,labels,beta); +elseif (strcmpi(criteria,'mrmr')) + selectedFeatures = FSToolboxMex(2,numToSelect,data,labels); +elseif (strcmpi(criteria,'cmim')) + selectedFeatures = FSToolboxMex(3,numToSelect,data,labels); +elseif (strcmpi(criteria,'jmi')) + selectedFeatures = FSToolboxMex(4,numToSelect,data,labels); +elseif (strcmpi(criteria,'disr')) + selectedFeatures = FSToolboxMex(5,numToSelect,data,labels); +elseif ((strcmpi(criteria,'cife')) || (strcmpi(criteria,'fou'))) + selectedFeatures = FSToolboxMex(6,numToSelect,data,labels); +elseif (strcmpi(criteria,'icap')) + selectedFeatures = FSToolboxMex(7,numToSelect,data,labels); +elseif (strcmpi(criteria,'condred')) + selectedFeatures = FSToolboxMex(8,numToSelect,data,labels); +elseif (strcmpi(criteria,'betagamma')) + if (nargin ~= 6) + error('BetaGamma criteria expects a beta and a gamma'); + else + beta = varargin{1}; + gamma = varargin{2}; + end + selectedFeatures = FSToolboxMex(9,numToSelect,data,labels,beta,gamma); +elseif (strcmpi(criteria,'cmi')) + selectedFeatures = FSToolboxMex(10,numToSelect,data,labels); +elseif (strcmpi(criteria,'fcbf')) + if (nargin == 4) + error('Threshold for FCBF not supplied'); + else + selectedFeatures = FCBF(data,labels,varargin{1}); + end +elseif (strcmpi(criteria,'relief')) + [tmp selectedFeatures] = RELIEF(data,labels); +else + selectedFeatures = []; + disp(['Unrecognised criteria ' criteria]); +end diff --git a/FEAST/FSToolbox/license.txt b/FEAST/FSToolbox/license.txt new file mode 100644 index 0000000..798960e --- /dev/null +++ b/FEAST/FSToolbox/license.txt @@ -0,0 +1,32 @@ +This is FEAST (a FEAture Selection Toolbox for C and MATLAB) +if you use this code in academic work could you please reference: +"Conditional Likelihood Maximisation: A Unifying Framework for Mutual +Information Feature Selection" +G. Brown, A. Pocock, M.-J. Zhao, M. Lujan +Journal of Machine Learning Research (JMLR), 2011 + +Copyright (c) 2010-2011, A. Pocock, G. Brown, The University of Manchester +All rights reserved. + +Redistribution and use in source and binary forms, with or without modification, +are permitted provided that the following conditions are met: + + - Redistributions of source code must retain the above copyright notice, this + list of conditions and the following disclaimer. + - Redistributions in binary form must reproduce the above copyright notice, + this list of conditions and the following disclaimer in the documentation + and/or other materials provided with the distribution. + - Neither the name of The University of Manchester nor the names of its + contributors may be used to endorse or promote products derived from this + software without specific prior written permission. + +THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND +ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED +WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE +DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR +ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES +(INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; +LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON +ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS +SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. diff --git a/FEAST/FSToolbox/mRMR_D.c b/FEAST/FSToolbox/mRMR_D.c new file mode 100644 index 0000000..3eeb2a4 --- /dev/null +++ b/FEAST/FSToolbox/mRMR_D.c @@ -0,0 +1,170 @@ +/******************************************************************************* +** mRMR_D.c implements the minimum Relevance Maximum Redundancy criterion +** using the difference variant, from +** +** "Feature Selection Based on Mutual Information: Criteria of Max-Dependency, Max-Relevance, and Min-Redundancy" +** H. Peng et al. IEEE PAMI (2005) +** +** Initial Version - 13/06/2008 +** Updated - 23/06/2011 +** +** Author - Adam Pocock +** +** Part of the Feature Selection Toolbox, please reference +** "Conditional Likelihood Maximisation: A Unifying Framework for Mutual +** Information Feature Selection" +** G. Brown, A. Pocock, M.-J. Zhao, M. Lujan +** Journal of Machine Learning Research (JMLR), 2011 +** +** Please check www.cs.manchester.ac.uk/~gbrown/fstoolbox for updates. +** +** Copyright (c) 2010-2011, A. Pocock, G. Brown, The University of Manchester +** All rights reserved. +** +** Redistribution and use in source and binary forms, with or without modification, +** are permitted provided that the following conditions are met: +** +** - Redistributions of source code must retain the above copyright notice, this +** list of conditions and the following disclaimer. +** - Redistributions in binary form must reproduce the above copyright notice, +** this list of conditions and the following disclaimer in the documentation +** and/or other materials provided with the distribution. +** - Neither the name of The University of Manchester nor the names of its +** contributors may be used to endorse or promote products derived from this +** software without specific prior written permission. +** +** THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND +** ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED +** WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE +** DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR +** ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES +** (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; +** LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON +** ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +** (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS +** SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +** +*******************************************************************************/ + +#include "FSAlgorithms.h" +#include "FSToolbox.h" + +/* MIToolbox includes */ +#include "MutualInformation.h" + +double* mRMR_D(int k, int noOfSamples, int noOfFeatures, double *featureMatrix, double *classColumn, double *outputFeatures) +{ + double **feature2D = (double**) CALLOC_FUNC(noOfFeatures,sizeof(double*)); + /*holds the class MI values*/ + double *classMI = (double *)CALLOC_FUNC(noOfFeatures,sizeof(double)); + int *selectedFeatures = (int *)CALLOC_FUNC(noOfFeatures,sizeof(int)); + /*holds the intra feature MI values*/ + int sizeOfMatrix = k*noOfFeatures; + double *featureMIMatrix = (double *)CALLOC_FUNC(sizeOfMatrix,sizeof(double)); + + double maxMI = 0.0; + int maxMICounter = -1; + + /*init variables*/ + + double score, currentScore, totalFeatureMI; + int currentHighestFeature; + + int arrayPosition, i, j, x; + + for(j = 0; j < noOfFeatures; j++) + { + feature2D[j] = featureMatrix + (int)j*noOfSamples; + } + + for (i = 0; i < sizeOfMatrix;i++) + { + featureMIMatrix[i] = -1; + }/*for featureMIMatrix - blank to -1*/ + + + for (i = 0; i < noOfFeatures;i++) + { + classMI[i] = calculateMutualInformation(feature2D[i], classColumn, noOfSamples); + if (classMI[i] > maxMI) + { + maxMI = classMI[i]; + maxMICounter = i; + }/*if bigger than current maximum*/ + }/*for noOfFeatures - filling classMI*/ + + selectedFeatures[maxMICounter] = 1; + outputFeatures[0] = maxMICounter; + + /************* + ** Now we have populated the classMI array, and selected the highest + ** MI feature as the first output feature + ** Now we move into the mRMR-D algorithm + *************/ + + for (i = 1; i < k; i++) + { + /**************************************************** + ** to ensure it selects some features + **if this is zero then it will not pick features where the redundancy is greater than the + **relevance + ****************************************************/ + score = -HUGE_VAL; + currentHighestFeature = 0; + currentScore = 0.0; + totalFeatureMI = 0.0; + + for (j = 0; j < noOfFeatures; j++) + { + /*if we haven't selected j*/ + if (selectedFeatures[j] == 0) + { + currentScore = classMI[j]; + totalFeatureMI = 0.0; + + for (x = 0; x < i; x++) + { + arrayPosition = x*noOfFeatures + j; + if (featureMIMatrix[arrayPosition] == -1) + { + /*work out intra MI*/ + + /*double calculateMutualInformation(double *firstVector, double *secondVector, int vectorLength);*/ + featureMIMatrix[arrayPosition] = calculateMutualInformation(feature2D[(int) outputFeatures[x]], feature2D[j], noOfSamples); + } + + totalFeatureMI += featureMIMatrix[arrayPosition]; + }/*for the number of already selected features*/ + + currentScore -= (totalFeatureMI/i); + if (currentScore > score) + { + score = currentScore; + currentHighestFeature = j; + } + }/*if j is unselected*/ + }/*for number of features*/ + + selectedFeatures[currentHighestFeature] = 1; + outputFeatures[i] = currentHighestFeature; + + }/*for the number of features to select*/ + + for (i = 0; i < k; i++) + { + outputFeatures[i] += 1; /*C indexes from 0 not 1*/ + }/*for number of selected features*/ + + FREE_FUNC(classMI); + FREE_FUNC(feature2D); + FREE_FUNC(featureMIMatrix); + FREE_FUNC(selectedFeatures); + + classMI = NULL; + feature2D = NULL; + featureMIMatrix = NULL; + selectedFeatures = NULL; + + return outputFeatures; +}/*mRMR(int,int,int,double[][],double[],double[])*/ + |