diff options
author | Gregory Ditzler <gditzler@Gregorys-MacBook-Pro.local> | 2013-03-26 12:27:20 -0400 |
---|---|---|
committer | Gregory Ditzler <gditzler@Gregorys-MacBook-Pro.local> | 2013-03-26 12:27:20 -0400 |
commit | eee9760003139088685bcf4e69e729ff57cb1d69 (patch) | |
tree | d2717e31193c67ba7cb27a8d3b0fd4bf735101c4 | |
parent | 0a0fadb1281df594452ac239d5d8362a9e0c5e66 (diff) |
improved unit tests and documented the scripts.
-rw-r--r-- | python/demo_feast_wrapper.py | 96 | ||||
-rw-r--r-- | python/feast.py | 245 |
2 files changed, 312 insertions, 29 deletions
diff --git a/python/demo_feast_wrapper.py b/python/demo_feast_wrapper.py index 813c2f5..fd40515 100644 --- a/python/demo_feast_wrapper.py +++ b/python/demo_feast_wrapper.py @@ -1,12 +1,21 @@ #!/usr/bin/env python -import feast +from feast import * import numpy as np import import_data -print '---> Loading digit data' +def check_result(selected_features, n_select): + selected_features = sorted(selected_features) + success = True + for k in range(n_select): + if k != selected_features[k]: + success = False + return success -data_source = 'uniform' + + + +data_source = 'uniform' # set the data set we want to test if data_source == 'uniform': @@ -14,9 +23,6 @@ if data_source == 'uniform': elif data_source == 'digits': data, labels = import_data.read_digits('digit.txt') -print data - - n_observations = len(data) # number of samples in the data set n_features = len(data.transpose()) # number of features in the data set n_select = 15 # how many features to select @@ -28,7 +34,81 @@ print ' :n_observations - ' + str(n_observations) print ' :n_features - ' + str(n_features) print ' :n_select - ' + str(n_select) print ' :algorithm - ' + str(method) +print ' ' +print '---> Running unit tests on FEAST 4 Python... ' + + +################################################################# +################################################################# +print ' Running BetaGamma... ' +sf = BetaGamma(data, labels, n_select, beta=0.5, gamma=0.5) +if check_result(sf) == True: + print ' BetaGamma passed!' +else: + print ' BetaGamma failed!' + + +################################################################# +################################################################# +print ' Running CMIM... ' +sf = CMIM(data, labels, n_select) +if check_result(sf) == True: + print ' CMIM passed!' +else: + print ' CMIM failed!' + + +################################################################# +################################################################# +print ' Running CondMI... ' +sf = CondMI(data, labels, n_select) +if check_result(sf) == True: + print ' CondMI passed!' +else: + print ' CondMI failed!' + + +################################################################# +################################################################# +print ' Running DISR... ' +sf = DISR(data, labels, n_select) +if check_result(sf) == True: + print ' DISR passed!' +else: + print ' DISR failed!' + + +################################################################# +################################################################# +print ' Running ICAP... ' +sf = ICAP(data, labels, n_select) +if check_result(sf) == True: + print ' ICAP passed!' +else: + print ' ICAP failed!' + + +################################################################# +################################################################# +print ' Running JMI... ' +sf = JMI(data, labels, n_select) +if check_result(sf) == True: + print ' JMI passed!' +else: + print ' JMI failed!' + + +################################################################# +################################################################# +print ' Running mRMR... ' +sf = mRMR(data, labels, n_select) +if check_result(sf) == True: + print ' mRMR passed!' +else: + print ' mRMR failed!' + +print '---> Done unit tests!' + + -selected_features = feast.JMI(data, labels, n_select) -print selected_features diff --git a/python/feast.py b/python/feast.py index c30c405..0d2fee6 100644 --- a/python/feast.py +++ b/python/feast.py @@ -1,6 +1,32 @@ import numpy as np from ctypes import * + +''' + The FEAST module provides an interface between the C-library + for feature selection to Python. + + References: + 1) G. Brown, A. Pocock, M.-J. Zhao, and M. Lujan, "Conditional + likelihood maximization: A unifying framework for information + theoretic feature selection," Journal of Machine Learning + Research, vol. 13, pp. 27-66, 2012. + + + __author__ = "Calvin Morrison" + __copyright__ = "Copyright 2013, EESI Laboratory" + __credits__ = ["Calvin Morrison", "Gregory Ditzler"] + __license__ = "GPL" + __version__ = "0.1.0" + __maintainer__ = "Calvin Morrison" + __email__ = "mutantturkey@gmail.com" + __status__ = "Release" +''' + +# I listed the function definitions in alphabetical order. Lets +# keep this up. + + try: libFSToolbox = CDLL("libFSToolbox.so"); except: @@ -8,9 +34,31 @@ except: exit() - -def BetaGamma(data, labels, n_select, beta=2.0, gamma=2.0): - +def BetaGamma(data, labels, n_select, beta=1.0, gamma=1.0): + ''' + BetaGamma(data, labels, n_select, beta=1.0, gamma=1.0) + + This algotihm implements conditional mutual information + feature select, such that beta and gamma control the + weight attached to the redundant mutual and conditional + mutual information, respectively. + + Input + :data - data in a Numpy array such that len(data) = + n_observations, and len(data.transpose()) = n_features + (REQUIRED) + :labels - labels represented in a numpy list with + n_observations as the number of elements. That is + len(labels) = len(data) = n_observations. + (REQUIRED) + :n_select - number of features to select. (REQUIRED) + :beta - penalty attacted to I(X_j;X_k) + :gamma - positive weight attached to the conditional + redundancy term I(X_k;X_j|Y) + Output + :selected_features - returns a list containing the features + in the order they were selected. + ''' # python values n_observations, n_features = data.shape output = np.zeros(n_select) @@ -36,11 +84,37 @@ def BetaGamma(data, labels, n_select, beta=2.0, gamma=2.0): # turn our output into a list selected_features = [] for i in features.contents: + # recall that feast was implemented with Matlab in mind, so the + # authors assumed the indexing started a one; however, in Python + # the indexing starts at zero. selected_features.append(i - 1) return selected_features -def JMI(data, labels, n_select): + + +def CMIM(data, labels, n_select): + ''' + CMIM(data, labels, n_select) + + This function implements the conditional mutual information + maximization feature selection algorithm. Note that this + implementation does not allow for the weighting of the + redundancy terms that BetaGamma will allow you to do. + + Input + :data - data in a Numpy array such that len(data) = + n_observations, and len(data.transpose()) = n_features + (REQUIRED) + :labels - labels represented in a numpy list with + n_observations as the number of elements. That is + len(labels) = len(data) = n_observations. + (REQUIRED) + :n_select - number of features to select. (REQUIRED) + Output + :selected_features - returns a list containing the features + in the order they were selected. + ''' # python values n_observations, n_features = data.shape @@ -51,8 +125,8 @@ def JMI(data, labels, n_select): c_n_select = c_int(n_select) c_n_features = c_int(n_features) - libFSToolbox.JMI.restype = POINTER(c_double * n_select) - features = libFSToolbox.JMI(c_n_select, + libFSToolbox.CMIM.restype = POINTER(c_double * n_select) + features = libFSToolbox.CMIM(c_n_select, c_n_observations, c_n_features, data.ctypes.data_as(POINTER(c_double)), @@ -64,12 +138,35 @@ def JMI(data, labels, n_select): # turn our output into a list selected_features = [] for i in features.contents: + # recall that feast was implemented with Matlab in mind, so the + # authors assumed the indexing started a one; however, in Python + # the indexing starts at zero. selected_features.append(i - 1) return selected_features -def mRMR_D(data, labels, n_select): + +def CondMI(data, labels, n_select): + ''' + CondMI(data, labels, n_select) + + This function implements the conditional mutual information + maximization feature selection algorithm. + + Input + :data - data in a Numpy array such that len(data) = + n_observations, and len(data.transpose()) = n_features + (REQUIRED) + :labels - labels represented in a numpy list with + n_observations as the number of elements. That is + len(labels) = len(data) = n_observations. + (REQUIRED) + :n_select - number of features to select. (REQUIRED) + Output + :selected_features - returns a list containing the features + in the order they were selected. + ''' # python values n_observations, n_features = data.shape output = np.zeros(n_select) @@ -79,8 +176,8 @@ def mRMR_D(data, labels, n_select): c_n_select = c_int(n_select) c_n_features = c_int(n_features) - libFSToolbox.mRMR_D.restype = POINTER(c_double * n_select) - features = libFSToolbox.mRMR_D(c_n_select, + libFSToolbox.CondMI.restype = POINTER(c_double * n_select) + features = libFSToolbox.CondMI(c_n_select, c_n_observations, c_n_features, data.ctypes.data_as(POINTER(c_double)), @@ -92,12 +189,38 @@ def mRMR_D(data, labels, n_select): # turn our output into a list selected_features = [] for i in features.contents: + # recall that feast was implemented with Matlab in mind, so the + # authors assumed the indexing started a one; however, in Python + # the indexing starts at zero. selected_features.append(i - 1) return selected_features -def CMIM(data, labels, n_select): + + + + +def DISR(data, labels, n_select): + ''' + DISR(data, labels, n_select) + + This function implements the double input symmetrical relevance + feature selection algorithm. + + Input + :data - data in a Numpy array such that len(data) = + n_observations, and len(data.transpose()) = n_features + (REQUIRED) + :labels - labels represented in a numpy list with + n_observations as the number of elements. That is + len(labels) = len(data) = n_observations. + (REQUIRED) + :n_select - number of features to select. (REQUIRED) + Output + :selected_features - returns a list containing the features + in the order they were selected. + ''' # python values n_observations, n_features = data.shape output = np.zeros(n_select) @@ -107,8 +230,8 @@ def CMIM(data, labels, n_select): c_n_select = c_int(n_select) c_n_features = c_int(n_features) - libFSToolbox.CMIM.restype = POINTER(c_double * n_select) - features = libFSToolbox.CMIM(c_n_select, + libFSToolbox.DISR.restype = POINTER(c_double * n_select) + features = libFSToolbox.DISR(c_n_select, c_n_observations, c_n_features, data.ctypes.data_as(POINTER(c_double)), @@ -120,12 +243,36 @@ def CMIM(data, labels, n_select): # turn our output into a list selected_features = [] for i in features.contents: + # recall that feast was implemented with Matlab in mind, so the + # authors assumed the indexing started a one; however, in Python + # the indexing starts at zero. selected_features.append(i - 1) return selected_features -def DISR(data, labels, n_select): + + +def ICAP(data, labels, n_select): + ''' + ICAP(data, labels, n_select) + + This function implements the interaction capping feature + selection algorithm. + + Input + :data - data in a Numpy array such that len(data) = + n_observations, and len(data.transpose()) = n_features + (REQUIRED) + :labels - labels represented in a numpy list with + n_observations as the number of elements. That is + len(labels) = len(data) = n_observations. + (REQUIRED) + :n_select - number of features to select. (REQUIRED) + Output + :selected_features - returns a list containing the features + in the order they were selected. + ''' # python values n_observations, n_features = data.shape output = np.zeros(n_select) @@ -135,8 +282,8 @@ def DISR(data, labels, n_select): c_n_select = c_int(n_select) c_n_features = c_int(n_features) - libFSToolbox.DISR.restype = POINTER(c_double * n_select) - features = libFSToolbox.DISR(c_n_select, + libFSToolbox.ICAP.restype = POINTER(c_double * n_select) + features = libFSToolbox.ICAP(c_n_select, c_n_observations, c_n_features, data.ctypes.data_as(POINTER(c_double)), @@ -148,11 +295,37 @@ def DISR(data, labels, n_select): # turn our output into a list selected_features = [] for i in features.contents: + # recall that feast was implemented with Matlab in mind, so the + # authors assumed the indexing started a one; however, in Python + # the indexing starts at zero. selected_features.append(i - 1) return selected_features -def ICAP(data, labels, n_select): + + + + +def JMI(data, labels, n_select): + ''' + JMI(data, labels, n_select) + + This function implements the joint mutual information feature + selection algorithm. + + Input + :data - data in a Numpy array such that len(data) = + n_observations, and len(data.transpose()) = n_features + (REQUIRED) + :labels - labels represented in a numpy list with + n_observations as the number of elements. That is + len(labels) = len(data) = n_observations. + (REQUIRED) + :n_select - number of features to select. (REQUIRED) + Output + :selected_features - returns a list containing the features + in the order they were selected. + ''' # python values n_observations, n_features = data.shape @@ -163,8 +336,8 @@ def ICAP(data, labels, n_select): c_n_select = c_int(n_select) c_n_features = c_int(n_features) - libFSToolbox.ICAP.restype = POINTER(c_double * n_select) - features = libFSToolbox.ICAP(c_n_select, + libFSToolbox.JMI.restype = POINTER(c_double * n_select) + features = libFSToolbox.JMI(c_n_select, c_n_observations, c_n_features, data.ctypes.data_as(POINTER(c_double)), @@ -176,11 +349,33 @@ def ICAP(data, labels, n_select): # turn our output into a list selected_features = [] for i in features.contents: + # recall that feast was implemented with Matlab in mind, so the + # authors assumed the indexing started a one; however, in Python + # the indexing starts at zero. selected_features.append(i - 1) return selected_features -def CondMI(data, labels, n_select): +def mRMR(data, labels, n_select): + ''' + mRMR(data, labels, n_select) + + This funciton implements the max-relevance min-redundancy feature + selection algorithm. + + Input + :data - data in a Numpy array such that len(data) = + n_observations, and len(data.transpose()) = n_features + (REQUIRED) + :labels - labels represented in a numpy list with + n_observations as the number of elements. That is + len(labels) = len(data) = n_observations. + (REQUIRED) + :n_select - number of features to select. (REQUIRED) + Output + :selected_features - returns a list containing the features + in the order they were selected. + ''' # python values n_observations, n_features = data.shape @@ -191,8 +386,8 @@ def CondMI(data, labels, n_select): c_n_select = c_int(n_select) c_n_features = c_int(n_features) - libFSToolbox.CondMI.restype = POINTER(c_double * n_select) - features = libFSToolbox.CondMI(c_n_select, + libFSToolbox.mRMR_D.restype = POINTER(c_double * n_select) + features = libFSToolbox.mRMR_D(c_n_select, c_n_observations, c_n_features, data.ctypes.data_as(POINTER(c_double)), @@ -204,6 +399,14 @@ def CondMI(data, labels, n_select): # turn our output into a list selected_features = [] for i in features.contents: + # recall that feast was implemented with Matlab in mind, so the + # authors assumed the indexing started a one; however, in Python + # the indexing starts at zero. selected_features.append(i - 1) return selected_features + + + + + |