improved unit tests and documented the scripts.

author: Gregory Ditzler <gditzler@Gregorys-MacBook-Pro.local> 2013-03-26 12:27:20 -0400
committer: Gregory Ditzler <gditzler@Gregorys-MacBook-Pro.local> 2013-03-26 12:27:20 -0400
commit: eee9760003139088685bcf4e69e729ff57cb1d69 (patch)
tree: d2717e31193c67ba7cb27a8d3b0fd4bf735101c4
parent: 0a0fadb1281df594452ac239d5d8362a9e0c5e66 (diff)
2 files changed, 312 insertions, 29 deletions
diff --git a/python/demo_feast_wrapper.py b/python/demo_feast_wrapper.py
index 813c2f5..fd40515 100644
--- a/python/demo_feast_wrapper.py
+++ b/python/demo_feast_wrapper.py
@@ -1,12 +1,21 @@
 #!/usr/bin/env python 
-import feast
+from feast import *
 import numpy as np
 import import_data 
 
 
-print '---> Loading digit data'
+def check_result(selected_features, n_select):
+	selected_features = sorted(selected_features)
+	success = True
+	for k in range(n_select):
+		if k != selected_features[k]:
+			success = False
+	return success
 
-data_source = 'uniform'
+
+
+
+data_source = 'uniform'    # set the data set we want to test
 
 
 if data_source == 'uniform':
@@ -14,9 +23,6 @@ if data_source == 'uniform':
 elif data_source == 'digits':
 	data, labels = import_data.read_digits('digit.txt')
 
-print data
-
-
 n_observations = len(data)					# number of samples in the data set
 n_features = len(data.transpose())	# number of features in the data set
 n_select = 15												# how many features to select
@@ -28,7 +34,81 @@ print '     :n_observations - ' + str(n_observations)
 print '     :n_features     - ' + str(n_features)
 print '     :n_select       - ' + str(n_select)
 print '     :algorithm      - ' + str(method)
+print ' '
+print '---> Running unit tests on FEAST 4 Python... '
+
+
+#################################################################
+#################################################################
+print '       Running BetaGamma... '
+sf = BetaGamma(data, labels, n_select, beta=0.5, gamma=0.5)
+if check_result(sf) == True:
+	print '          BetaGamma passed!'
+else:
+	print '          BetaGamma failed!'
+
+
+#################################################################
+#################################################################
+print '       Running CMIM... '
+sf = CMIM(data, labels, n_select)
+if check_result(sf) == True:
+	print '          CMIM passed!'
+else:
+	print '          CMIM failed!'
+
+
+#################################################################
+#################################################################
+print '       Running CondMI... '
+sf = CondMI(data, labels, n_select)
+if check_result(sf) == True:
+	print '          CondMI passed!'
+else:
+	print '          CondMI failed!'
+
+
+#################################################################
+#################################################################
+print '       Running DISR... '
+sf = DISR(data, labels, n_select)
+if check_result(sf) == True:
+	print '          DISR passed!'
+else:
+	print '          DISR failed!'
+
+
+#################################################################
+#################################################################
+print '       Running ICAP... '
+sf = ICAP(data, labels, n_select)
+if check_result(sf) == True:
+	print '          ICAP passed!'
+else:
+	print '          ICAP failed!'
+
+
+#################################################################
+#################################################################
+print '       Running JMI... '
+sf = JMI(data, labels, n_select)
+if check_result(sf) == True:
+	print '          JMI passed!'
+else:
+	print '          JMI failed!'
+
+
+#################################################################
+#################################################################
+print '       Running mRMR... '
+sf = mRMR(data, labels, n_select)
+if check_result(sf) == True:
+	print '          mRMR passed!'
+else:
+	print '          mRMR failed!'
+
+print '---> Done unit tests!'
+
+
 
-selected_features = feast.JMI(data, labels, n_select)
 
-print selected_features
diff --git a/python/feast.py b/python/feast.py
index c30c405..0d2fee6 100644
--- a/python/feast.py
+++ b/python/feast.py
@@ -1,6 +1,32 @@
 import numpy as np
 from ctypes import * 
 
+
+'''
+  The FEAST module provides an interface between the C-library
+  for feature selection to Python. 
+
+  References: 
+  1) G. Brown, A. Pocock, M.-J. Zhao, and M. Lujan, "Conditional
+      likelihood maximization: A unifying framework for information
+      theoretic feature selection," Journal of Machine Learning 
+      Research, vol. 13, pp. 27-66, 2012.
+
+
+  __author__ = "Calvin Morrison"
+  __copyright__ = "Copyright 2013, EESI Laboratory"
+  __credits__ = ["Calvin Morrison", "Gregory Ditzler"]
+  __license__ = "GPL"
+  __version__ = "0.1.0"
+  __maintainer__ = "Calvin Morrison"
+  __email__ = "mutantturkey@gmail.com"
+  __status__ = "Release"
+'''
+
+# I listed the function definitions in alphabetical order. Lets
+# keep this up. 
+
+
 try:
   libFSToolbox = CDLL("libFSToolbox.so"); 
 except:
@@ -8,9 +34,31 @@ except:
   exit()
 
 
-
-def BetaGamma(data, labels, n_select, beta=2.0, gamma=2.0):
-
+def BetaGamma(data, labels, n_select, beta=1.0, gamma=1.0):
+  '''
+    BetaGamma(data, labels, n_select, beta=1.0, gamma=1.0)
+
+    This algotihm implements conditional mutual information 
+    feature select, such that beta and gamma control the 
+    weight attached to the redundant mutual and conditional
+    mutual information, respectively. 
+
+    Input 
+      :data - data in a Numpy array such that len(data) = 
+        n_observations, and len(data.transpose()) = n_features
+        (REQUIRED)
+      :labels - labels represented in a numpy list with 
+        n_observations as the number of elements. That is 
+        len(labels) = len(data) = n_observations.
+        (REQUIRED)
+      :n_select - number of features to select. (REQUIRED)
+      :beta - penalty attacted to I(X_j;X_k) 
+      :gamma - positive weight attached to the conditional
+        redundancy term I(X_k;X_j|Y)
+    Output 
+      :selected_features - returns a list containing the features
+        in the order they were selected. 
+  '''
   # python values
   n_observations, n_features = data.shape
   output = np.zeros(n_select)
@@ -36,11 +84,37 @@ def BetaGamma(data, labels, n_select, beta=2.0, gamma=2.0):
   # turn our output into a list
   selected_features = []
   for i in features.contents:
+    # recall that feast was implemented with Matlab in mind, so the 
+    # authors assumed the indexing started a one; however, in Python 
+    # the indexing starts at zero. 
     selected_features.append(i - 1)
 
   return selected_features
 
-def JMI(data, labels, n_select):
+
+
+def CMIM(data, labels, n_select):
+  '''
+    CMIM(data, labels, n_select)
+
+    This function implements the conditional mutual information
+    maximization feature selection algorithm. Note that this 
+    implementation does not allow for the weighting of the 
+    redundancy terms that BetaGamma will allow you to do. 
+
+    Input 
+      :data - data in a Numpy array such that len(data) = 
+        n_observations, and len(data.transpose()) = n_features
+        (REQUIRED)
+      :labels - labels represented in a numpy list with 
+        n_observations as the number of elements. That is 
+        len(labels) = len(data) = n_observations.
+        (REQUIRED)
+      :n_select - number of features to select. (REQUIRED)
+    Output 
+      :selected_features - returns a list containing the features
+        in the order they were selected. 
+  '''
 
   # python values
   n_observations, n_features = data.shape
@@ -51,8 +125,8 @@ def JMI(data, labels, n_select):
   c_n_select = c_int(n_select)
   c_n_features = c_int(n_features)
 
-  libFSToolbox.JMI.restype = POINTER(c_double * n_select)
-  features = libFSToolbox.JMI(c_n_select,
+  libFSToolbox.CMIM.restype = POINTER(c_double * n_select)
+  features = libFSToolbox.CMIM(c_n_select,
                    c_n_observations,
                    c_n_features, 
                    data.ctypes.data_as(POINTER(c_double)),
@@ -64,12 +138,35 @@ def JMI(data, labels, n_select):
   # turn our output into a list
   selected_features = []
   for i in features.contents:
+    # recall that feast was implemented with Matlab in mind, so the 
+    # authors assumed the indexing started a one; however, in Python 
+    # the indexing starts at zero. 
     selected_features.append(i - 1)
 
   return selected_features
 
-def mRMR_D(data, labels, n_select):
 
+
+def CondMI(data, labels, n_select):
+  '''
+    CondMI(data, labels, n_select)
+
+    This function implements the conditional mutual information
+    maximization feature selection algorithm. 
+
+    Input 
+      :data - data in a Numpy array such that len(data) = 
+        n_observations, and len(data.transpose()) = n_features
+        (REQUIRED)
+      :labels - labels represented in a numpy list with 
+        n_observations as the number of elements. That is 
+        len(labels) = len(data) = n_observations.
+        (REQUIRED)
+      :n_select - number of features to select. (REQUIRED)
+    Output 
+      :selected_features - returns a list containing the features
+        in the order they were selected. 
+  '''
   # python values
   n_observations, n_features = data.shape
   output = np.zeros(n_select)
@@ -79,8 +176,8 @@ def mRMR_D(data, labels, n_select):
   c_n_select = c_int(n_select)
   c_n_features = c_int(n_features)
 
-  libFSToolbox.mRMR_D.restype = POINTER(c_double * n_select)
-  features = libFSToolbox.mRMR_D(c_n_select,
+  libFSToolbox.CondMI.restype = POINTER(c_double * n_select)
+  features = libFSToolbox.CondMI(c_n_select,
                    c_n_observations,
                    c_n_features, 
                    data.ctypes.data_as(POINTER(c_double)),
@@ -92,12 +189,38 @@ def mRMR_D(data, labels, n_select):
   # turn our output into a list
   selected_features = []
   for i in features.contents:
+    # recall that feast was implemented with Matlab in mind, so the 
+    # authors assumed the indexing started a one; however, in Python 
+    # the indexing starts at zero. 
     selected_features.append(i - 1)
 
   return selected_features
 
-def CMIM(data, labels, n_select):
 
+
+
+
+
+def DISR(data, labels, n_select):
+  '''
+    DISR(data, labels, n_select)
+
+    This function implements the double input symmetrical relevance
+    feature selection algorithm. 
+
+    Input 
+      :data - data in a Numpy array such that len(data) = 
+        n_observations, and len(data.transpose()) = n_features
+        (REQUIRED)
+      :labels - labels represented in a numpy list with 
+        n_observations as the number of elements. That is 
+        len(labels) = len(data) = n_observations.
+        (REQUIRED)
+      :n_select - number of features to select. (REQUIRED)
+    Output 
+      :selected_features - returns a list containing the features
+        in the order they were selected. 
+  '''
   # python values
   n_observations, n_features = data.shape
   output = np.zeros(n_select)
@@ -107,8 +230,8 @@ def CMIM(data, labels, n_select):
   c_n_select = c_int(n_select)
   c_n_features = c_int(n_features)
 
-  libFSToolbox.CMIM.restype = POINTER(c_double * n_select)
-  features = libFSToolbox.CMIM(c_n_select,
+  libFSToolbox.DISR.restype = POINTER(c_double * n_select)
+  features = libFSToolbox.DISR(c_n_select,
                    c_n_observations,
                    c_n_features, 
                    data.ctypes.data_as(POINTER(c_double)),
@@ -120,12 +243,36 @@ def CMIM(data, labels, n_select):
   # turn our output into a list
   selected_features = []
   for i in features.contents:
+    # recall that feast was implemented with Matlab in mind, so the 
+    # authors assumed the indexing started a one; however, in Python 
+    # the indexing starts at zero. 
     selected_features.append(i - 1)
 
   return selected_features
 
-def DISR(data, labels, n_select):
 
+
+
+def ICAP(data, labels, n_select):
+  '''
+    ICAP(data, labels, n_select)
+
+    This function implements the interaction capping feature 
+    selection algorithm. 
+
+    Input 
+      :data - data in a Numpy array such that len(data) = 
+        n_observations, and len(data.transpose()) = n_features
+        (REQUIRED)
+      :labels - labels represented in a numpy list with 
+        n_observations as the number of elements. That is 
+        len(labels) = len(data) = n_observations.
+        (REQUIRED)
+      :n_select - number of features to select. (REQUIRED)
+    Output 
+      :selected_features - returns a list containing the features
+        in the order they were selected. 
+  '''
   # python values
   n_observations, n_features = data.shape
   output = np.zeros(n_select)
@@ -135,8 +282,8 @@ def DISR(data, labels, n_select):
   c_n_select = c_int(n_select)
   c_n_features = c_int(n_features)
 
-  libFSToolbox.DISR.restype = POINTER(c_double * n_select)
-  features = libFSToolbox.DISR(c_n_select,
+  libFSToolbox.ICAP.restype = POINTER(c_double * n_select)
+  features = libFSToolbox.ICAP(c_n_select,
                    c_n_observations,
                    c_n_features, 
                    data.ctypes.data_as(POINTER(c_double)),
@@ -148,11 +295,37 @@ def DISR(data, labels, n_select):
   # turn our output into a list
   selected_features = []
   for i in features.contents:
+    # recall that feast was implemented with Matlab in mind, so the 
+    # authors assumed the indexing started a one; however, in Python 
+    # the indexing starts at zero. 
     selected_features.append(i - 1)
 
   return selected_features
 
-def ICAP(data, labels, n_select):
+
+
+
+
+def JMI(data, labels, n_select):
+  '''
+    JMI(data, labels, n_select)
+
+    This function implements the joint mutual information feature
+    selection algorithm. 
+
+    Input 
+      :data - data in a Numpy array such that len(data) = 
+        n_observations, and len(data.transpose()) = n_features
+        (REQUIRED)
+      :labels - labels represented in a numpy list with 
+        n_observations as the number of elements. That is 
+        len(labels) = len(data) = n_observations.
+        (REQUIRED)
+      :n_select - number of features to select. (REQUIRED)
+    Output 
+      :selected_features - returns a list containing the features
+        in the order they were selected. 
+  '''
 
   # python values
   n_observations, n_features = data.shape
@@ -163,8 +336,8 @@ def ICAP(data, labels, n_select):
   c_n_select = c_int(n_select)
   c_n_features = c_int(n_features)
 
-  libFSToolbox.ICAP.restype = POINTER(c_double * n_select)
-  features = libFSToolbox.ICAP(c_n_select,
+  libFSToolbox.JMI.restype = POINTER(c_double * n_select)
+  features = libFSToolbox.JMI(c_n_select,
                    c_n_observations,
                    c_n_features, 
                    data.ctypes.data_as(POINTER(c_double)),
@@ -176,11 +349,33 @@ def ICAP(data, labels, n_select):
   # turn our output into a list
   selected_features = []
   for i in features.contents:
+    # recall that feast was implemented with Matlab in mind, so the 
+    # authors assumed the indexing started a one; however, in Python 
+    # the indexing starts at zero. 
     selected_features.append(i - 1)
 
   return selected_features
 
-def CondMI(data, labels, n_select):
+def mRMR(data, labels, n_select):
+  '''
+    mRMR(data, labels, n_select)
+
+    This funciton implements the max-relevance min-redundancy feature
+    selection algorithm. 
+
+    Input 
+      :data - data in a Numpy array such that len(data) = 
+        n_observations, and len(data.transpose()) = n_features
+        (REQUIRED)
+      :labels - labels represented in a numpy list with 
+        n_observations as the number of elements. That is 
+        len(labels) = len(data) = n_observations.
+        (REQUIRED)
+      :n_select - number of features to select. (REQUIRED)
+    Output 
+      :selected_features - returns a list containing the features
+        in the order they were selected. 
+  '''
 
   # python values
   n_observations, n_features = data.shape
@@ -191,8 +386,8 @@ def CondMI(data, labels, n_select):
   c_n_select = c_int(n_select)
   c_n_features = c_int(n_features)
 
-  libFSToolbox.CondMI.restype = POINTER(c_double * n_select)
-  features = libFSToolbox.CondMI(c_n_select,
+  libFSToolbox.mRMR_D.restype = POINTER(c_double * n_select)
+  features = libFSToolbox.mRMR_D(c_n_select,
                    c_n_observations,
                    c_n_features, 
                    data.ctypes.data_as(POINTER(c_double)),
@@ -204,6 +399,14 @@ def CondMI(data, labels, n_select):
   # turn our output into a list
   selected_features = []
   for i in features.contents:
+    # recall that feast was implemented with Matlab in mind, so the 
+    # authors assumed the indexing started a one; however, in Python 
+    # the indexing starts at zero. 
     selected_features.append(i - 1)
 
   return selected_features
+
+
+
+
+
author	Gregory Ditzler <gditzler@Gregorys-MacBook-Pro.local>	2013-03-26 12:27:20 -0400
committer	Gregory Ditzler <gditzler@Gregorys-MacBook-Pro.local>	2013-03-26 12:27:20 -0400
commit	eee9760003139088685bcf4e69e729ff57cb1d69 (patch)
tree	d2717e31193c67ba7cb27a8d3b0fd4bf735101c4
parent	0a0fadb1281df594452ac239d5d8362a9e0c5e66 (diff)