diff options
author | Calvin Morrison <mutantturkey@gmail.com> | 2014-10-27 14:08:54 -0400 |
---|---|---|
committer | Calvin Morrison <mutantturkey@gmail.com> | 2014-10-27 14:08:54 -0400 |
commit | 556ae3823ce8105668cf22bb966acdec1ef954e6 (patch) | |
tree | cff905d1cec43db71ad4310633e6c2d7fa3bc342 | |
parent | ba67abd20e413a9672ce26f5cfcfb1251ecfde62 (diff) | |
parent | 719281d5f02872ad83bf1a6f206e10622a383976 (diff) |
Force Column major
-rw-r--r-- | README.markdown | 6 | ||||
-rw-r--r-- | feast.py | 149 | ||||
-rw-r--r-- | test/test.py | 15 |
3 files changed, 71 insertions, 99 deletions
diff --git a/README.markdown b/README.markdown index 5916a28..2004fb8 100644 --- a/README.markdown +++ b/README.markdown @@ -1,5 +1,5 @@ # PyFeast -Python bindings to the FEAST Feature Selection Toolbox. +Python bindings to the FEAST Feature Selection Toolbox.. ## Download @@ -22,8 +22,8 @@ In order to use the feast module, you will need the following dependencies * Python 2.7 * Numpy * Linux or OS X -* [FEAST](https://github.com/Craigacp/FEAST) * [MIToolbox](https://github.com/Craigacp/MIToolbox) +* [FEAST](https://github.com/Craigacp/FEAST) v1.1.1 or higher ## Installation @@ -33,7 +33,7 @@ In order to use the feast module, you will need the following dependencies ## Demonstration See test/test.py for an example with uniform data and an image data set. The image data set was collected from the digits example in -the Scikits-Learn toolbox. +the Scikits-Learn toolbox. Make sure that if you are loading the data from a file and converting the data to a `numpy` array that you set `order="F"`. This is *very* important. ## Documentation We have documentation for each of the functions available [here](http://mutantturkey.github.com/PyFeast/feast-module.html) @@ -1,4 +1,4 @@ -''' +""" The FEAST module provides an interface between the C-library for feature selection to Python. @@ -8,7 +8,7 @@ theoretic feature selection," Journal of Machine Learning Research, vol. 13, pp. 27-66, 2012. -''' +""" __author__ = "Calvin Morrison" __copyright__ = "Copyright 2013, EESI Laboratory" __credits__ = ["Calvin Morrison", "Gregory Ditzler"] @@ -21,14 +21,10 @@ __status__ = "Release" import numpy as np import ctypes as c -try: - libFSToolbox = c.CDLL("libFSToolbox.so"); -except: - raise Exception("Error: could not load libFSToolbox.so") - +libFSToolbox = c.CDLL("libFSToolbox.so"); def BetaGamma(data, labels, n_select, beta=1.0, gamma=1.0): - ''' + """ This algorithm implements conditional mutual information feature select, such that beta and gamma control the weight attached to the redundant mutual and conditional @@ -52,7 +48,7 @@ def BetaGamma(data, labels, n_select, beta=1.0, gamma=1.0): @type gamma: float between 0 and 1.0 @return: features in the order they were selected. @rtype: list - ''' + """ data, labels = check_data(data, labels) # python values @@ -77,20 +73,14 @@ def BetaGamma(data, labels, n_select, beta=1.0, gamma=1.0): c_gamma ) - # turn our output into a list selected_features = [] for i in features.contents: - # recall that feast was implemented with Matlab in mind, so the - # authors assumed the indexing started a one; however, in Python - # the indexing starts at zero. - selected_features.append(i - 1) - + selected_features.append(i) return selected_features - def CIFE(data, labels, n_select): - ''' + """ This function implements the Condred feature selection algorithm. beta = 1; gamma = 1; @@ -105,15 +95,11 @@ def CIFE(data, labels, n_select): @type n_select: integer @return selected_features: features in the order they were selected. @rtype: list - ''' - + """ return BetaGamma(data, labels, n_select, beta=1.0, gamma=1.0) - - - def CMIM(data, labels, n_select): - ''' + """ This function implements the conditional mutual information maximization feature selection algorithm. Note that this implementation does not allow for the weighting of the @@ -130,7 +116,7 @@ def CMIM(data, labels, n_select): @type n_select: integer @return: features in the order that they were selected. @rtype: list - ''' + """ data, labels = check_data(data, labels) # python values @@ -151,21 +137,16 @@ def CMIM(data, labels, n_select): output.ctypes.data_as(c.POINTER(c.c_double)) ) - - # turn our output into a list selected_features = [] for i in features.contents: - # recall that feast was implemented with Matlab in mind, so the - # authors assumed the indexing started a one; however, in Python - # the indexing starts at zero. - selected_features.append(i - 1) + selected_features.append(i) return selected_features def CondMI(data, labels, n_select): - ''' + """ This function implements the conditional mutual information maximization feature selection algorithm. @@ -180,7 +161,7 @@ def CondMI(data, labels, n_select): @type n_select: integer @return: features in the order they were selected. @rtype list - ''' + """ data, labels = check_data(data, labels) # python values @@ -200,21 +181,16 @@ def CondMI(data, labels, n_select): labels.ctypes.data_as(c.POINTER(c.c_double)), output.ctypes.data_as(c.POINTER(c.c_double)) ) - - # turn our output into a list selected_features = [] for i in features.contents: - # recall that feast was implemented with Matlab in mind, so the - # authors assumed the indexing started a one; however, in Python - # the indexing starts at zero. - selected_features.append(i - 1) + selected_features.append(i) return selected_features def Condred(data, labels, n_select): - ''' + """ This function implements the Condred feature selection algorithm. beta = 0; gamma = 1; @@ -229,15 +205,14 @@ def Condred(data, labels, n_select): @type n_select: integer @return: the features in the order they were selected. @rtype: list - ''' + """ data, labels = check_data(data, labels) - return BetaGamma(data, labels, n_select, beta=0.0, gamma=1.0) def DISR(data, labels, n_select): - ''' + """ This function implements the double input symmetrical relevance feature selection algorithm. @@ -252,7 +227,7 @@ def DISR(data, labels, n_select): @type n_select: integer @return: the features in the order they were selected. @rtype: list - ''' + """ data, labels = check_data(data, labels) # python values @@ -272,23 +247,15 @@ def DISR(data, labels, n_select): labels.ctypes.data_as(c.POINTER(c.c_double)), output.ctypes.data_as(c.POINTER(c.c_double)) ) - - # turn our output into a list selected_features = [] for i in features.contents: - # recall that feast was implemented with Matlab in mind, so the - # authors assumed the indexing started a one; however, in Python - # the indexing starts at zero. - selected_features.append(i - 1) + selected_features.append(i) return selected_features - - - def ICAP(data, labels, n_select): - ''' + """ This function implements the interaction capping feature selection algorithm. @@ -303,7 +270,7 @@ def ICAP(data, labels, n_select): @type n_select: integer @return: the features in the order they were selected. @rtype: list - ''' + """ data, labels = check_data(data, labels) # python values @@ -323,24 +290,15 @@ def ICAP(data, labels, n_select): labels.ctypes.data_as(c.POINTER(c.c_double)), output.ctypes.data_as(c.POINTER(c.c_double)) ) - - # turn our output into a list selected_features = [] for i in features.contents: - # recall that feast was implemented with Matlab in mind, so the - # authors assumed the indexing started a one; however, in Python - # the indexing starts at zero. - selected_features.append(i - 1) + selected_features.append(i) return selected_features - - - - def JMI(data, labels, n_select): - ''' + """ This function implements the joint mutual information feature selection algorithm. @@ -355,7 +313,7 @@ def JMI(data, labels, n_select): @type n_select: integer @return: the features in the order they were selected. @rtype: list - ''' + """ data, labels = check_data(data, labels) # python values @@ -376,21 +334,15 @@ def JMI(data, labels, n_select): output.ctypes.data_as(c.POINTER(c.c_double)) ) - - # turn our output into a list selected_features = [] for i in features.contents: - # recall that feast was implemented with Matlab in mind, so the - # authors assumed the indexing started a one; however, in Python - # the indexing starts at zero. - selected_features.append(i - 1) - + selected_features.append(i) return selected_features def MIFS(data, labels, n_select): - ''' + """ This function implements the MIFS algorithm. beta = 1; gamma = 0; @@ -405,13 +357,12 @@ def MIFS(data, labels, n_select): @type n_select: integer @return: the features in the order they were selected. @rtype: list - ''' - + """ return BetaGamma(data, labels, n_select, beta=0.0, gamma=0.0) def MIM(data, labels, n_select): - ''' + """ This function implements the MIM algorithm. beta = 0; gamma = 0; @@ -426,15 +377,35 @@ def MIM(data, labels, n_select): @type n_select: integer @return: the features in the order they were selected. @rtype: list - ''' + """ data, labels = check_data(data, labels) + + # python values + n_observations, n_features = data.shape + output = np.zeros(n_select) - return BetaGamma(data, labels, n_select, beta=0.0, gamma=0.0) + # cast as C types + c_n_observations = c.c_int(n_observations) + c_n_select = c.c_int(n_select) + c_n_features = c.c_int(n_features) + libFSToolbox.MIM.restype = c.POINTER(c.c_double * n_select) + features = libFSToolbox.MIM(c_n_select, + c_n_observations, + c_n_features, + data.ctypes.data_as(c.POINTER(c.c_double)), + labels.ctypes.data_as(c.POINTER(c.c_double)), + output.ctypes.data_as(c.POINTER(c.c_double)) + ) + + selected_features = [] + for i in features.contents: + selected_features.append(i) + return selected_features def mRMR(data, labels, n_select): - ''' + """ This funciton implements the max-relevance min-redundancy feature selection algorithm. @@ -449,7 +420,7 @@ def mRMR(data, labels, n_select): @type n_select: integer @return: the features in the order they were selected. @rtype: list - ''' + """ data, labels = check_data(data, labels) # python values @@ -470,19 +441,13 @@ def mRMR(data, labels, n_select): output.ctypes.data_as(c.POINTER(c.c_double)) ) - - # turn our output into a list selected_features = [] for i in features.contents: - # recall that feast was implemented with Matlab in mind, so the - # authors assumed the indexing started a one; however, in Python - # the indexing starts at zero. - selected_features.append(i - 1) - + selected_features.append(i) return selected_features def check_data(data, labels): - ''' + """ Check dimensions of the data and the labels. Raise and exception if there is a problem. @@ -493,7 +458,7 @@ def check_data(data, labels): @param labels: the labels @return (data, labels): ndarray of floats @rtype: tuple - ''' + """ if isinstance(data, np.ndarray) is False: raise Exception("data must be an numpy ndarray.") @@ -502,5 +467,5 @@ def check_data(data, labels): if len(data) != len(labels): raise Exception("data and labels must be the same length") - - return 1.0*data, 1.0*labels + + return 1.0*np.array(data, order="F"), 1.0*np.array(labels, order="F") diff --git a/test/test.py b/test/test.py index b5e16d1..7b90b3b 100644 --- a/test/test.py +++ b/test/test.py @@ -25,7 +25,7 @@ def read_digits(fname='digit.txt'): data = [] for line in fw: data.append( [float(x) for x in line] ) - data = np.array(data) + data = np.array(data, order="F") labels = data[:,len(data.transpose())-1] data = data[:,:len(data.transpose())-1] return data, labels @@ -47,7 +47,6 @@ def uniform_data(n_observations = 1000, n_features = 50, n_relevant = 5): else: labels[m] = 2 data = data.transpose() - return data, labels @@ -66,7 +65,7 @@ elif data_source == 'digits': n_observations = len(data) # number of samples in the data set n_features = len(data.transpose()) # number of features in the data set n_select = 15 # how many features to select -method = 'JMI' # feature selection algorithm +method = 'MIM' # feature selection algorithm print '---> Information' @@ -87,7 +86,6 @@ if check_result(sf, n_relevant) == True: else: print ' BetaGamma failed!' - ################################################################# ################################################################# print ' Running CMIM... ' @@ -147,6 +145,15 @@ if check_result(sf, n_relevant) == True: else: print ' mRMR failed!' +################################################################# +################################################################# +print ' Running MIM...' +sf = MIM(data, labels, n_select) +if check_result(sf, n_relevant) == True: + print ' MIM passed!' +else: + print ' MIM failed!' + print '---> Done unit tests!' |