aboutsummaryrefslogtreecommitdiff
path: root/python/feast.py
diff options
context:
space:
mode:
Diffstat (limited to 'python/feast.py')
-rw-r--r--python/feast.py412
1 files changed, 0 insertions, 412 deletions
diff --git a/python/feast.py b/python/feast.py
deleted file mode 100644
index 0d2fee6..0000000
--- a/python/feast.py
+++ /dev/null
@@ -1,412 +0,0 @@
-import numpy as np
-from ctypes import *
-
-
-'''
- The FEAST module provides an interface between the C-library
- for feature selection to Python.
-
- References:
- 1) G. Brown, A. Pocock, M.-J. Zhao, and M. Lujan, "Conditional
- likelihood maximization: A unifying framework for information
- theoretic feature selection," Journal of Machine Learning
- Research, vol. 13, pp. 27-66, 2012.
-
-
- __author__ = "Calvin Morrison"
- __copyright__ = "Copyright 2013, EESI Laboratory"
- __credits__ = ["Calvin Morrison", "Gregory Ditzler"]
- __license__ = "GPL"
- __version__ = "0.1.0"
- __maintainer__ = "Calvin Morrison"
- __email__ = "mutantturkey@gmail.com"
- __status__ = "Release"
-'''
-
-# I listed the function definitions in alphabetical order. Lets
-# keep this up.
-
-
-try:
- libFSToolbox = CDLL("libFSToolbox.so");
-except:
- print "Error: could not find libFSToolbox"
- exit()
-
-
-def BetaGamma(data, labels, n_select, beta=1.0, gamma=1.0):
- '''
- BetaGamma(data, labels, n_select, beta=1.0, gamma=1.0)
-
- This algotihm implements conditional mutual information
- feature select, such that beta and gamma control the
- weight attached to the redundant mutual and conditional
- mutual information, respectively.
-
- Input
- :data - data in a Numpy array such that len(data) =
- n_observations, and len(data.transpose()) = n_features
- (REQUIRED)
- :labels - labels represented in a numpy list with
- n_observations as the number of elements. That is
- len(labels) = len(data) = n_observations.
- (REQUIRED)
- :n_select - number of features to select. (REQUIRED)
- :beta - penalty attacted to I(X_j;X_k)
- :gamma - positive weight attached to the conditional
- redundancy term I(X_k;X_j|Y)
- Output
- :selected_features - returns a list containing the features
- in the order they were selected.
- '''
- # python values
- n_observations, n_features = data.shape
- output = np.zeros(n_select)
-
- # cast as C types
- c_n_observations = c_int(n_observations)
- c_n_select = c_int(n_select)
- c_n_features = c_int(n_features)
- c_beta = c_double(beta)
- c_gamma = c_double(gamma)
-
- libFSToolbox.BetaGamma.restype = POINTER(c_double * n_select)
- features = libFSToolbox.BetaGamma(c_n_select,
- c_n_observations,
- c_n_features,
- data.ctypes.data_as(POINTER(c_double)),
- labels.ctypes.data_as(POINTER(c_double)),
- output.ctypes.data_as(POINTER(c_double)),
- c_beta,
- c_gamma
- )
-
- # turn our output into a list
- selected_features = []
- for i in features.contents:
- # recall that feast was implemented with Matlab in mind, so the
- # authors assumed the indexing started a one; however, in Python
- # the indexing starts at zero.
- selected_features.append(i - 1)
-
- return selected_features
-
-
-
-def CMIM(data, labels, n_select):
- '''
- CMIM(data, labels, n_select)
-
- This function implements the conditional mutual information
- maximization feature selection algorithm. Note that this
- implementation does not allow for the weighting of the
- redundancy terms that BetaGamma will allow you to do.
-
- Input
- :data - data in a Numpy array such that len(data) =
- n_observations, and len(data.transpose()) = n_features
- (REQUIRED)
- :labels - labels represented in a numpy list with
- n_observations as the number of elements. That is
- len(labels) = len(data) = n_observations.
- (REQUIRED)
- :n_select - number of features to select. (REQUIRED)
- Output
- :selected_features - returns a list containing the features
- in the order they were selected.
- '''
-
- # python values
- n_observations, n_features = data.shape
- output = np.zeros(n_select)
-
- # cast as C types
- c_n_observations = c_int(n_observations)
- c_n_select = c_int(n_select)
- c_n_features = c_int(n_features)
-
- libFSToolbox.CMIM.restype = POINTER(c_double * n_select)
- features = libFSToolbox.CMIM(c_n_select,
- c_n_observations,
- c_n_features,
- data.ctypes.data_as(POINTER(c_double)),
- labels.ctypes.data_as(POINTER(c_double)),
- output.ctypes.data_as(POINTER(c_double))
- )
-
-
- # turn our output into a list
- selected_features = []
- for i in features.contents:
- # recall that feast was implemented with Matlab in mind, so the
- # authors assumed the indexing started a one; however, in Python
- # the indexing starts at zero.
- selected_features.append(i - 1)
-
- return selected_features
-
-
-
-def CondMI(data, labels, n_select):
- '''
- CondMI(data, labels, n_select)
-
- This function implements the conditional mutual information
- maximization feature selection algorithm.
-
- Input
- :data - data in a Numpy array such that len(data) =
- n_observations, and len(data.transpose()) = n_features
- (REQUIRED)
- :labels - labels represented in a numpy list with
- n_observations as the number of elements. That is
- len(labels) = len(data) = n_observations.
- (REQUIRED)
- :n_select - number of features to select. (REQUIRED)
- Output
- :selected_features - returns a list containing the features
- in the order they were selected.
- '''
- # python values
- n_observations, n_features = data.shape
- output = np.zeros(n_select)
-
- # cast as C types
- c_n_observations = c_int(n_observations)
- c_n_select = c_int(n_select)
- c_n_features = c_int(n_features)
-
- libFSToolbox.CondMI.restype = POINTER(c_double * n_select)
- features = libFSToolbox.CondMI(c_n_select,
- c_n_observations,
- c_n_features,
- data.ctypes.data_as(POINTER(c_double)),
- labels.ctypes.data_as(POINTER(c_double)),
- output.ctypes.data_as(POINTER(c_double))
- )
-
-
- # turn our output into a list
- selected_features = []
- for i in features.contents:
- # recall that feast was implemented with Matlab in mind, so the
- # authors assumed the indexing started a one; however, in Python
- # the indexing starts at zero.
- selected_features.append(i - 1)
-
- return selected_features
-
-
-
-
-
-
-def DISR(data, labels, n_select):
- '''
- DISR(data, labels, n_select)
-
- This function implements the double input symmetrical relevance
- feature selection algorithm.
-
- Input
- :data - data in a Numpy array such that len(data) =
- n_observations, and len(data.transpose()) = n_features
- (REQUIRED)
- :labels - labels represented in a numpy list with
- n_observations as the number of elements. That is
- len(labels) = len(data) = n_observations.
- (REQUIRED)
- :n_select - number of features to select. (REQUIRED)
- Output
- :selected_features - returns a list containing the features
- in the order they were selected.
- '''
- # python values
- n_observations, n_features = data.shape
- output = np.zeros(n_select)
-
- # cast as C types
- c_n_observations = c_int(n_observations)
- c_n_select = c_int(n_select)
- c_n_features = c_int(n_features)
-
- libFSToolbox.DISR.restype = POINTER(c_double * n_select)
- features = libFSToolbox.DISR(c_n_select,
- c_n_observations,
- c_n_features,
- data.ctypes.data_as(POINTER(c_double)),
- labels.ctypes.data_as(POINTER(c_double)),
- output.ctypes.data_as(POINTER(c_double))
- )
-
-
- # turn our output into a list
- selected_features = []
- for i in features.contents:
- # recall that feast was implemented with Matlab in mind, so the
- # authors assumed the indexing started a one; however, in Python
- # the indexing starts at zero.
- selected_features.append(i - 1)
-
- return selected_features
-
-
-
-
-def ICAP(data, labels, n_select):
- '''
- ICAP(data, labels, n_select)
-
- This function implements the interaction capping feature
- selection algorithm.
-
- Input
- :data - data in a Numpy array such that len(data) =
- n_observations, and len(data.transpose()) = n_features
- (REQUIRED)
- :labels - labels represented in a numpy list with
- n_observations as the number of elements. That is
- len(labels) = len(data) = n_observations.
- (REQUIRED)
- :n_select - number of features to select. (REQUIRED)
- Output
- :selected_features - returns a list containing the features
- in the order they were selected.
- '''
- # python values
- n_observations, n_features = data.shape
- output = np.zeros(n_select)
-
- # cast as C types
- c_n_observations = c_int(n_observations)
- c_n_select = c_int(n_select)
- c_n_features = c_int(n_features)
-
- libFSToolbox.ICAP.restype = POINTER(c_double * n_select)
- features = libFSToolbox.ICAP(c_n_select,
- c_n_observations,
- c_n_features,
- data.ctypes.data_as(POINTER(c_double)),
- labels.ctypes.data_as(POINTER(c_double)),
- output.ctypes.data_as(POINTER(c_double))
- )
-
-
- # turn our output into a list
- selected_features = []
- for i in features.contents:
- # recall that feast was implemented with Matlab in mind, so the
- # authors assumed the indexing started a one; however, in Python
- # the indexing starts at zero.
- selected_features.append(i - 1)
-
- return selected_features
-
-
-
-
-
-def JMI(data, labels, n_select):
- '''
- JMI(data, labels, n_select)
-
- This function implements the joint mutual information feature
- selection algorithm.
-
- Input
- :data - data in a Numpy array such that len(data) =
- n_observations, and len(data.transpose()) = n_features
- (REQUIRED)
- :labels - labels represented in a numpy list with
- n_observations as the number of elements. That is
- len(labels) = len(data) = n_observations.
- (REQUIRED)
- :n_select - number of features to select. (REQUIRED)
- Output
- :selected_features - returns a list containing the features
- in the order they were selected.
- '''
-
- # python values
- n_observations, n_features = data.shape
- output = np.zeros(n_select)
-
- # cast as C types
- c_n_observations = c_int(n_observations)
- c_n_select = c_int(n_select)
- c_n_features = c_int(n_features)
-
- libFSToolbox.JMI.restype = POINTER(c_double * n_select)
- features = libFSToolbox.JMI(c_n_select,
- c_n_observations,
- c_n_features,
- data.ctypes.data_as(POINTER(c_double)),
- labels.ctypes.data_as(POINTER(c_double)),
- output.ctypes.data_as(POINTER(c_double))
- )
-
-
- # turn our output into a list
- selected_features = []
- for i in features.contents:
- # recall that feast was implemented with Matlab in mind, so the
- # authors assumed the indexing started a one; however, in Python
- # the indexing starts at zero.
- selected_features.append(i - 1)
-
- return selected_features
-
-def mRMR(data, labels, n_select):
- '''
- mRMR(data, labels, n_select)
-
- This funciton implements the max-relevance min-redundancy feature
- selection algorithm.
-
- Input
- :data - data in a Numpy array such that len(data) =
- n_observations, and len(data.transpose()) = n_features
- (REQUIRED)
- :labels - labels represented in a numpy list with
- n_observations as the number of elements. That is
- len(labels) = len(data) = n_observations.
- (REQUIRED)
- :n_select - number of features to select. (REQUIRED)
- Output
- :selected_features - returns a list containing the features
- in the order they were selected.
- '''
-
- # python values
- n_observations, n_features = data.shape
- output = np.zeros(n_select)
-
- # cast as C types
- c_n_observations = c_int(n_observations)
- c_n_select = c_int(n_select)
- c_n_features = c_int(n_features)
-
- libFSToolbox.mRMR_D.restype = POINTER(c_double * n_select)
- features = libFSToolbox.mRMR_D(c_n_select,
- c_n_observations,
- c_n_features,
- data.ctypes.data_as(POINTER(c_double)),
- labels.ctypes.data_as(POINTER(c_double)),
- output.ctypes.data_as(POINTER(c_double))
- )
-
-
- # turn our output into a list
- selected_features = []
- for i in features.contents:
- # recall that feast was implemented with Matlab in mind, so the
- # authors assumed the indexing started a one; however, in Python
- # the indexing starts at zero.
- selected_features.append(i - 1)
-
- return selected_features
-
-
-
-
-