Merge pull request #3 from gditzler/masterHEAD master

Force Column major
author: Calvin Morrison <mutantturkey@gmail.com> 2014-10-27 14:08:54 -0400
committer: Calvin Morrison <mutantturkey@gmail.com> 2014-10-27 14:08:54 -0400
commit: 556ae3823ce8105668cf22bb966acdec1ef954e6 (patch)
tree: cff905d1cec43db71ad4310633e6c2d7fa3bc342
parent: ba67abd20e413a9672ce26f5cfcfb1251ecfde62 (diff)
parent: 719281d5f02872ad83bf1a6f206e10622a383976 (diff)
3 files changed, 71 insertions, 99 deletions
diff --git a/README.markdown b/README.markdown
index 5916a28..2004fb8 100644
--- a/README.markdown
+++ b/README.markdown
@@ -1,5 +1,5 @@
 # PyFeast
-Python bindings to the FEAST Feature Selection Toolbox.
+Python bindings to the FEAST Feature Selection Toolbox..
 
 ## Download
 
@@ -22,8 +22,8 @@ In order to use the feast module, you will need the following dependencies
 * Python 2.7
 * Numpy
 * Linux or OS X 
-* [FEAST](https://github.com/Craigacp/FEAST)
 * [MIToolbox](https://github.com/Craigacp/MIToolbox)
+* [FEAST](https://github.com/Craigacp/FEAST) v1.1.1 or higher
 
 ## Installation
 
@@ -33,7 +33,7 @@ In order to use the feast module, you will need the following dependencies
 ## Demonstration
 See test/test.py for an example with uniform data and an image
 data set. The image data set was collected from the digits example in 
-the Scikits-Learn toolbox.
+the Scikits-Learn toolbox. Make sure that if you are loading the data from a file and converting the data to a `numpy` array that you set `order="F"`. This is *very* important. 
 
 ## Documentation
 We have documentation for each of the functions available [here](http://mutantturkey.github.com/PyFeast/feast-module.html)
diff --git a/feast.py b/feast.py
index 5c3c3ee..ca8ce4a 100644
--- a/feast.py
+++ b/feast.py
@@ -1,4 +1,4 @@
-'''
+"""
   The FEAST module provides an interface between the C-library
   for feature selection to Python. 
 
@@ -8,7 +8,7 @@
       theoretic feature selection," Journal of Machine Learning 
       Research, vol. 13, pp. 27-66, 2012.
 
-'''
+"""
 __author__ = "Calvin Morrison"
 __copyright__ = "Copyright 2013, EESI Laboratory"
 __credits__ = ["Calvin Morrison", "Gregory Ditzler"]
@@ -21,14 +21,10 @@ __status__ = "Release"
 import numpy as np
 import ctypes as c
 
-try:
-  libFSToolbox = c.CDLL("libFSToolbox.so"); 
-except:
-  raise Exception("Error: could not load libFSToolbox.so")
-
+libFSToolbox = c.CDLL("libFSToolbox.so"); 
 
 def BetaGamma(data, labels, n_select, beta=1.0, gamma=1.0):
-  '''
+  """
     This algorithm implements conditional mutual information 
     feature select, such that beta and gamma control the 
     weight attached to the redundant mutual and conditional
@@ -52,7 +48,7 @@ def BetaGamma(data, labels, n_select, beta=1.0, gamma=1.0):
       @type gamma: float between 0 and 1.0 
       @return: features in the order they were selected. 
       @rtype: list
-  '''
+  """
   data, labels = check_data(data, labels)
 
   # python values
@@ -77,20 +73,14 @@ def BetaGamma(data, labels, n_select, beta=1.0, gamma=1.0):
                    c_gamma
                    )
 
-  # turn our output into a list
   selected_features = []
   for i in features.contents:
-    # recall that feast was implemented with Matlab in mind, so the 
-    # authors assumed the indexing started a one; however, in Python 
-    # the indexing starts at zero. 
-    selected_features.append(i - 1)
-
+    selected_features.append(i)
   return selected_features
 
 
-
 def CIFE(data, labels, n_select):
-  '''
+  """
     This function implements the Condred feature selection algorithm.
     beta = 1; gamma = 1;
 
@@ -105,15 +95,11 @@ def CIFE(data, labels, n_select):
     @type n_select: integer
     @return selected_features: features in the order they were selected. 
     @rtype: list
-  '''
-
+  """
   return BetaGamma(data, labels, n_select, beta=1.0, gamma=1.0)
 
-
-
-
 def CMIM(data, labels, n_select):
-  '''
+  """
     This function implements the conditional mutual information
     maximization feature selection algorithm. Note that this 
     implementation does not allow for the weighting of the 
@@ -130,7 +116,7 @@ def CMIM(data, labels, n_select):
     @type n_select: integer
     @return: features in the order that they were selected. 
     @rtype: list
-  '''
+  """
   data, labels = check_data(data, labels)
 
   # python values
@@ -151,21 +137,16 @@ def CMIM(data, labels, n_select):
                    output.ctypes.data_as(c.POINTER(c.c_double))
                    )
 
-  
-  # turn our output into a list
   selected_features = []
   for i in features.contents:
-    # recall that feast was implemented with Matlab in mind, so the 
-    # authors assumed the indexing started a one; however, in Python 
-    # the indexing starts at zero. 
-    selected_features.append(i - 1)
+    selected_features.append(i)
 
   return selected_features
 
 
 
 def CondMI(data, labels, n_select):
-  '''
+  """
     This function implements the conditional mutual information
     maximization feature selection algorithm. 
 
@@ -180,7 +161,7 @@ def CondMI(data, labels, n_select):
     @type n_select: integer
     @return: features in the order they were selected. 
     @rtype list
-  '''
+  """
   data, labels = check_data(data, labels)
 
   # python values
@@ -200,21 +181,16 @@ def CondMI(data, labels, n_select):
                    labels.ctypes.data_as(c.POINTER(c.c_double)),
                    output.ctypes.data_as(c.POINTER(c.c_double))
                    )
-
   
-  # turn our output into a list
   selected_features = []
   for i in features.contents:
-    # recall that feast was implemented with Matlab in mind, so the 
-    # authors assumed the indexing started a one; however, in Python 
-    # the indexing starts at zero. 
-    selected_features.append(i - 1)
+    selected_features.append(i)
 
   return selected_features
 
 
 def Condred(data, labels, n_select):
-  '''
+  """
     This function implements the Condred feature selection algorithm.
     beta = 0; gamma = 1;
 
@@ -229,15 +205,14 @@ def Condred(data, labels, n_select):
     @type n_select: integer
     @return: the features in the order they were selected. 
     @rtype: list
-  '''
+  """
   data, labels = check_data(data, labels)
-
   return BetaGamma(data, labels, n_select, beta=0.0, gamma=1.0)
 
 
 
 def DISR(data, labels, n_select):
-  '''
+  """
     This function implements the double input symmetrical relevance
     feature selection algorithm. 
 
@@ -252,7 +227,7 @@ def DISR(data, labels, n_select):
     @type n_select: integer
     @return: the features in the order they were selected. 
     @rtype: list
-  '''
+  """
   data, labels = check_data(data, labels)
 
   # python values
@@ -272,23 +247,15 @@ def DISR(data, labels, n_select):
                    labels.ctypes.data_as(c.POINTER(c.c_double)),
                    output.ctypes.data_as(c.POINTER(c.c_double))
                    )
-
   
-  # turn our output into a list
   selected_features = []
   for i in features.contents:
-    # recall that feast was implemented with Matlab in mind, so the 
-    # authors assumed the indexing started a one; however, in Python 
-    # the indexing starts at zero. 
-    selected_features.append(i - 1)
+    selected_features.append(i)
 
   return selected_features
 
-
-
-
 def ICAP(data, labels, n_select):
-  '''
+  """
     This function implements the interaction capping feature 
     selection algorithm. 
 
@@ -303,7 +270,7 @@ def ICAP(data, labels, n_select):
     @type n_select: integer
     @return: the features in the order they were selected. 
     @rtype: list
-  '''
+  """
   data, labels = check_data(data, labels)
 
   # python values
@@ -323,24 +290,15 @@ def ICAP(data, labels, n_select):
                    labels.ctypes.data_as(c.POINTER(c.c_double)),
                    output.ctypes.data_as(c.POINTER(c.c_double))
                    )
-
   
-  # turn our output into a list
   selected_features = []
   for i in features.contents:
-    # recall that feast was implemented with Matlab in mind, so the 
-    # authors assumed the indexing started a one; however, in Python 
-    # the indexing starts at zero. 
-    selected_features.append(i - 1)
+    selected_features.append(i)
 
   return selected_features
 
-
-
-
-
 def JMI(data, labels, n_select):
-  '''
+  """
     This function implements the joint mutual information feature
     selection algorithm. 
 
@@ -355,7 +313,7 @@ def JMI(data, labels, n_select):
     @type n_select: integer
     @return: the features in the order they were selected. 
     @rtype: list
-  '''
+  """
   data, labels = check_data(data, labels)
 
   # python values
@@ -376,21 +334,15 @@ def JMI(data, labels, n_select):
                    output.ctypes.data_as(c.POINTER(c.c_double))
                    )
 
-  
-  # turn our output into a list
   selected_features = []
   for i in features.contents:
-    # recall that feast was implemented with Matlab in mind, so the 
-    # authors assumed the indexing started a one; however, in Python 
-    # the indexing starts at zero. 
-    selected_features.append(i - 1)
-
+    selected_features.append(i)
   return selected_features
 
 
 
 def MIFS(data, labels, n_select):
-  '''
+  """
     This function implements the MIFS algorithm.
     beta = 1; gamma = 0;
 
@@ -405,13 +357,12 @@ def MIFS(data, labels, n_select):
     @type n_select: integer
     @return: the features in the order they were selected. 
     @rtype: list
-  '''
-
+  """
   return BetaGamma(data, labels, n_select, beta=0.0, gamma=0.0)
 
 
 def MIM(data, labels, n_select):
-  '''
+  """
     This function implements the MIM algorithm.
     beta = 0; gamma = 0;
 
@@ -426,15 +377,35 @@ def MIM(data, labels, n_select):
     @type n_select: integer
     @return: the features in the order they were selected. 
     @rtype: list
-  '''
+  """
   data, labels = check_data(data, labels)
+  
+  # python values
+  n_observations, n_features = data.shape
+  output = np.zeros(n_select)
 
-  return BetaGamma(data, labels, n_select, beta=0.0, gamma=0.0)
+  # cast as C types
+  c_n_observations = c.c_int(n_observations)
+  c_n_select = c.c_int(n_select)
+  c_n_features = c.c_int(n_features)
 
+  libFSToolbox.MIM.restype = c.POINTER(c.c_double * n_select)
+  features = libFSToolbox.MIM(c_n_select,
+                   c_n_observations,
+                   c_n_features, 
+                   data.ctypes.data_as(c.POINTER(c.c_double)),
+                   labels.ctypes.data_as(c.POINTER(c.c_double)),
+                   output.ctypes.data_as(c.POINTER(c.c_double))
+                   )
+  
+  selected_features = []
+  for i in features.contents:
+    selected_features.append(i)
+  return selected_features
 
 
 def mRMR(data, labels, n_select):
-  '''
+  """
     This funciton implements the max-relevance min-redundancy feature
     selection algorithm. 
 
@@ -449,7 +420,7 @@ def mRMR(data, labels, n_select):
     @type n_select: integer
     @return: the features in the order they were selected. 
     @rtype: list
-  '''
+  """
   data, labels = check_data(data, labels)
 
   # python values
@@ -470,19 +441,13 @@ def mRMR(data, labels, n_select):
                    output.ctypes.data_as(c.POINTER(c.c_double))
                    )
 
-  
-  # turn our output into a list
   selected_features = []
   for i in features.contents:
-    # recall that feast was implemented with Matlab in mind, so the 
-    # authors assumed the indexing started a one; however, in Python 
-    # the indexing starts at zero. 
-    selected_features.append(i - 1)
-
+    selected_features.append(i)
   return selected_features
 
 def check_data(data, labels):
-  '''
+  """
     Check dimensions of the data and the labels.  Raise and exception
     if there is a problem.
 
@@ -493,7 +458,7 @@ def check_data(data, labels):
     @param labels: the labels
     @return (data, labels): ndarray of floats
     @rtype: tuple
-  '''
+  """
 
   if isinstance(data, np.ndarray) is False:
     raise Exception("data must be an numpy ndarray.")
@@ -502,5 +467,5 @@ def check_data(data, labels):
 
   if len(data) != len(labels):
     raise Exception("data and labels must be the same length")
-  
-  return 1.0*data, 1.0*labels
+
+  return 1.0*np.array(data, order="F"), 1.0*np.array(labels, order="F")
diff --git a/test/test.py b/test/test.py
index b5e16d1..7b90b3b 100644
--- a/test/test.py
+++ b/test/test.py
@@ -25,7 +25,7 @@ def read_digits(fname='digit.txt'):
 	data = []
 	for line in fw: 
 		data.append( [float(x) for x in line] )
-	data = np.array(data)
+	data = np.array(data, order="F")
 	labels = data[:,len(data.transpose())-1]
 	data = data[:,:len(data.transpose())-1]
 	return data, labels
@@ -47,7 +47,6 @@ def uniform_data(n_observations = 1000, n_features = 50, n_relevant = 5):
 		else:
 			labels[m] = 2
 	data = data.transpose()
-	
 	return data, labels
 
 
@@ -66,7 +65,7 @@ elif data_source == 'digits':
 n_observations = len(data)					# number of samples in the data set
 n_features = len(data.transpose())	# number of features in the data set
 n_select = 15												# how many features to select
-method = 'JMI'											# feature selection algorithm
+method = 'MIM'											# feature selection algorithm
 
 
 print '---> Information'
@@ -87,7 +86,6 @@ if check_result(sf, n_relevant) == True:
 else:
 	print '          BetaGamma failed!'
 
-
 #################################################################
 #################################################################
 print '       Running CMIM... '
@@ -147,6 +145,15 @@ if check_result(sf, n_relevant) == True:
 else:
 	print '          mRMR failed!'
 
+#################################################################
+#################################################################
+print '       Running MIM...'
+sf = MIM(data, labels, n_select)
+if check_result(sf, n_relevant) == True:
+	print '          MIM passed!'
+else:
+	print '          MIM failed!'
+
 print '---> Done unit tests!'
author	Calvin Morrison <mutantturkey@gmail.com>	2014-10-27 14:08:54 -0400
committer	Calvin Morrison <mutantturkey@gmail.com>	2014-10-27 14:08:54 -0400
commit	556ae3823ce8105668cf22bb966acdec1ef954e6 (patch)
tree	cff905d1cec43db71ad4310633e6c2d7fa3bc342
parent	ba67abd20e413a9672ce26f5cfcfb1251ecfde62 (diff)
parent	719281d5f02872ad83bf1a6f206e10622a383976 (diff)