1 files changed, 207 insertions, 271 deletions
diff --git a/feast.py b/feast.py
index 767d664..3d53245 100644
--- a/feast.py
+++ b/feast.py
@@ -1,4 +1,4 @@
-'''
+"""
   The FEAST module provides an interface between the C-library
   for feature selection to Python. 
 
@@ -8,7 +8,7 @@
       theoretic feature selection," Journal of Machine Learning 
       Research, vol. 13, pp. 27-66, 2012.
 
-'''
+"""
 __author__ = "Calvin Morrison"
 __copyright__ = "Copyright 2013, EESI Laboratory"
 __credits__ = ["Calvin Morrison", "Gregory Ditzler"]
@@ -19,39 +19,40 @@ __email__ = "mutantturkey@gmail.com"
 __status__ = "Release"
 
 import numpy as np
-from ctypes import * 
+import ctypes as c
 
 try:
-  libFSToolbox = CDLL("libFSToolbox.so"); 
+  libFSToolbox = c.CDLL("libFSToolbox.so"); 
 except:
   raise Exception("Error: could not load libFSToolbox.so")
 
 
 def BetaGamma(data, labels, n_select, beta=1.0, gamma=1.0):
-  '''
-    BetaGamma(data, labels, n_select, beta=1.0, gamma=1.0)
-
-    This algotihm implements conditional mutual information 
+  """
+    This algorithm implements conditional mutual information 
     feature select, such that beta and gamma control the 
     weight attached to the redundant mutual and conditional
     mutual information, respectively. 
 
-    Input 
-      :data - data in a Numpy array such that len(data) = 
+      @param data: data in a Numpy array such that len(data) = 
         n_observations, and len(data.transpose()) = n_features
         (REQUIRED)
-      :labels - labels represented in a numpy list with 
+      @type data: ndarray
+      @param labels: labels represented in a numpy list with 
         n_observations as the number of elements. That is 
         len(labels) = len(data) = n_observations.
         (REQUIRED)
-      :n_select - number of features to select. (REQUIRED)
-      :beta - penalty attacted to I(X_j;X_k) 
-      :gamma - positive weight attached to the conditional
+      @type labels: ndarray
+      @param n_select: number of features to select. (REQUIRED)
+      @type n_select: integer
+      @param beta: penalty attacted to I(X_j;X_k) 
+      @type beta: float between 0 and 1.0 
+      @param gamma: positive weight attached to the conditional
         redundancy term I(X_k;X_j|Y)
-    Output 
-      :selected_features - returns a list containing the features
-        in the order they were selected. 
-  '''
+      @type gamma: float between 0 and 1.0 
+      @return: features in the order they were selected. 
+      @rtype: list
+  """
   data, labels = check_data(data, labels)
 
   # python values
@@ -59,83 +60,67 @@ def BetaGamma(data, labels, n_select, beta=1.0, gamma=1.0):
   output = np.zeros(n_select)
 
   # cast as C types
-  c_n_observations = c_int(n_observations)
-  c_n_select = c_int(n_select)
-  c_n_features = c_int(n_features)
-  c_beta = c_double(beta)
-  c_gamma = c_double(gamma)
+  c_n_observations = c.c_int(n_observations)
+  c_n_select = c.c_int(n_select)
+  c_n_features = c.c_int(n_features)
+  c_beta = c.c_double(beta)
+  c_gamma = c.c_double(gamma)
 
-  libFSToolbox.BetaGamma.restype = POINTER(c_double * n_select)
+  libFSToolbox.BetaGamma.restype = c.POINTER(c.c_double * n_select)
   features = libFSToolbox.BetaGamma(c_n_select,
                    c_n_observations,
                    c_n_features, 
-                   data.ctypes.data_as(POINTER(c_double)),
-                   labels.ctypes.data_as(POINTER(c_double)),
-                   output.ctypes.data_as(POINTER(c_double)),
+                   data.ctypes.data_as(c.POINTER(c.c_double)),
+                   labels.ctypes.data_as(c.POINTER(c.c_double)),
+                   output.ctypes.data_as(c.POINTER(c.c_double)),
                    c_beta,
                    c_gamma
                    )
 
-  # turn our output into a list
   selected_features = []
   for i in features.contents:
-    # recall that feast was implemented with Matlab in mind, so the 
-    # authors assumed the indexing started a one; however, in Python 
-    # the indexing starts at zero. 
-    selected_features.append(i - 1)
-
+    selected_features.append(i)
   return selected_features
 
 
-
 def CIFE(data, labels, n_select):
-  '''
-    CIFE(data, labels, n_select)
-
+  """
     This function implements the Condred feature selection algorithm.
     beta = 1; gamma = 1;
 
-    Input 
-      :data - data in a Numpy array such that len(data) = 
+    @param data: A Numpy array such that len(data) = 
         n_observations, and len(data.transpose()) = n_features
-        (REQUIRED)
-      :labels - labels represented in a numpy list with 
+    @type data: ndarray
+    @param labels: labels represented in a numpy list with 
         n_observations as the number of elements. That is 
         len(labels) = len(data) = n_observations.
-        (REQUIRED)
-      :n_select - number of features to select. (REQUIRED)
-    Output 
-      :selected_features - returns a list containing the features
-        in the order they were selected. 
-  '''
-
+    @type labels: ndarray
+    @param n_select:  number of features to select.
+    @type n_select: integer
+    @return selected_features: features in the order they were selected. 
+    @rtype: list
+  """
   return BetaGamma(data, labels, n_select, beta=1.0, gamma=1.0)
 
-
-
-
 def CMIM(data, labels, n_select):
-  '''
-    CMIM(data, labels, n_select)
-
+  """
     This function implements the conditional mutual information
     maximization feature selection algorithm. Note that this 
     implementation does not allow for the weighting of the 
     redundancy terms that BetaGamma will allow you to do.
 
-    Input 
-      :data - data in a Numpy array such that len(data) = 
+    @param data: A Numpy array such that len(data) = 
         n_observations, and len(data.transpose()) = n_features
-        (REQUIRED)
-      :labels - labels represented in a numpy list with 
+    @type data: ndarray
+    @param labels: labels represented in a numpy array with 
         n_observations as the number of elements. That is 
         len(labels) = len(data) = n_observations.
-        (REQUIRED)
-      :n_select - number of features to select. (REQUIRED)
-    Output 
-      :selected_features - returns a list containing the features
-        in the order they were selected. 
-  '''
+    @type labels: ndarray
+    @param n_select: number of features to select.
+    @type n_select: integer
+    @return: features in the order that they were selected. 
+    @rtype: list
+  """
   data, labels = check_data(data, labels)
 
   # python values
@@ -143,52 +128,44 @@ def CMIM(data, labels, n_select):
   output = np.zeros(n_select)
 
   # cast as C types
-  c_n_observations = c_int(n_observations)
-  c_n_select = c_int(n_select)
-  c_n_features = c_int(n_features)
+  c_n_observations = c.c_int(n_observations)
+  c_n_select = c.c_int(n_select)
+  c_n_features = c.c_int(n_features)
 
-  libFSToolbox.CMIM.restype = POINTER(c_double * n_select)
+  libFSToolbox.CMIM.restype = c.POINTER(c.c_double * n_select)
   features = libFSToolbox.CMIM(c_n_select,
                    c_n_observations,
                    c_n_features, 
-                   data.ctypes.data_as(POINTER(c_double)),
-                   labels.ctypes.data_as(POINTER(c_double)),
-                   output.ctypes.data_as(POINTER(c_double))
+                   data.ctypes.data_as(c.POINTER(c.c_double)),
+                   labels.ctypes.data_as(c.POINTER(c.c_double)),
+                   output.ctypes.data_as(c.POINTER(c.c_double))
                    )
 
-  
-  # turn our output into a list
   selected_features = []
   for i in features.contents:
-    # recall that feast was implemented with Matlab in mind, so the 
-    # authors assumed the indexing started a one; however, in Python 
-    # the indexing starts at zero. 
-    selected_features.append(i - 1)
+    selected_features.append(i)
 
   return selected_features
 
 
 
 def CondMI(data, labels, n_select):
-  '''
-    CondMI(data, labels, n_select)
-
+  """
     This function implements the conditional mutual information
     maximization feature selection algorithm. 
 
-    Input 
-      :data - data in a Numpy array such that len(data) = 
-        n_observations, and len(data.transpose()) = n_features
-        (REQUIRED)
-      :labels - labels represented in a numpy list with 
-        n_observations as the number of elements. That is 
-        len(labels) = len(data) = n_observations.
-        (REQUIRED)
-      :n_select - number of features to select. (REQUIRED)
-    Output 
-      :selected_features - returns a list containing the features
-        in the order they were selected. 
-  '''
+    @param data: data in a Numpy array such that len(data) = n_observations,
+       and len(data.transpose()) = n_features
+    @type data: ndarray
+    @param labels: represented in a numpy list with 
+      n_observations as the number of elements. That is 
+      len(labels) = len(data) = n_observations.
+    @type labels: ndarray
+    @param n_select: number of features to select.
+    @type n_select: integer
+    @return: features in the order they were selected. 
+    @rtype list
+  """
   data, labels = check_data(data, labels)
 
   # python values
@@ -196,77 +173,65 @@ def CondMI(data, labels, n_select):
   output = np.zeros(n_select)
 
   # cast as C types
-  c_n_observations = c_int(n_observations)
-  c_n_select = c_int(n_select)
-  c_n_features = c_int(n_features)
+  c_n_observations = c.c_int(n_observations)
+  c_n_select = c.c_int(n_select)
+  c_n_features = c.c_int(n_features)
 
-  libFSToolbox.CondMI.restype = POINTER(c_double * n_select)
+  libFSToolbox.CondMI.restype = c.POINTER(c.c_double * n_select)
   features = libFSToolbox.CondMI(c_n_select,
                    c_n_observations,
                    c_n_features, 
-                   data.ctypes.data_as(POINTER(c_double)),
-                   labels.ctypes.data_as(POINTER(c_double)),
-                   output.ctypes.data_as(POINTER(c_double))
+                   data.ctypes.data_as(c.POINTER(c.c_double)),
+                   labels.ctypes.data_as(c.POINTER(c.c_double)),
+                   output.ctypes.data_as(c.POINTER(c.c_double))
                    )
-
   
-  # turn our output into a list
   selected_features = []
   for i in features.contents:
-    # recall that feast was implemented with Matlab in mind, so the 
-    # authors assumed the indexing started a one; however, in Python 
-    # the indexing starts at zero. 
-    selected_features.append(i - 1)
+    selected_features.append(i)
 
   return selected_features
 
 
 def Condred(data, labels, n_select):
-  '''
-    Condred(data, labels, n_select)
-
+  """
     This function implements the Condred feature selection algorithm.
     beta = 0; gamma = 1;
 
-    Input 
-      :data - data in a Numpy array such that len(data) = 
+    @param data: data in a Numpy array such that len(data) = 
         n_observations, and len(data.transpose()) = n_features
-        (REQUIRED)
-      :labels - labels represented in a numpy list with 
+    @type data: ndarray
+    @param labels: labels represented in a numpy list with 
         n_observations as the number of elements. That is 
         len(labels) = len(data) = n_observations.
-        (REQUIRED)
-      :n_select - number of features to select. (REQUIRED)
-    Output 
-      :selected_features - returns a list containing the features
-        in the order they were selected. 
-  '''
+    @type labels: ndarray
+    @param n_select: number of features to select.
+    @type n_select: integer
+    @return: the features in the order they were selected. 
+    @rtype: list
+  """
   data, labels = check_data(data, labels)
-
   return BetaGamma(data, labels, n_select, beta=0.0, gamma=1.0)
 
 
 
 def DISR(data, labels, n_select):
-  '''
-    DISR(data, labels, n_select)
-
+  """
     This function implements the double input symmetrical relevance
     feature selection algorithm. 
 
-    Input 
-      :data - data in a Numpy array such that len(data) = 
+    @param data: data in a Numpy array such that len(data) = 
         n_observations, and len(data.transpose()) = n_features
-        (REQUIRED)
-      :labels - labels represented in a numpy list with 
+    @type data: ndarray
+    @param labels: labels represented in a numpy list with 
         n_observations as the number of elements. That is 
         len(labels) = len(data) = n_observations.
-        (REQUIRED)
-      :n_select - number of features to select. (REQUIRED)
-    Output 
-      :selected_features - returns a list containing the features
-        in the order they were selected. 
-  '''
+    @type labels: ndarray
+    @param n_select: number of features to select. (REQUIRED)
+    @type n_select: integer
+    @return: the features in the order they were selected. 
+    @rtype: list
+  """
   data, labels = check_data(data, labels)
 
   # python values
@@ -274,53 +239,42 @@ def DISR(data, labels, n_select):
   output = np.zeros(n_select)
 
   # cast as C types
-  c_n_observations = c_int(n_observations)
-  c_n_select = c_int(n_select)
-  c_n_features = c_int(n_features)
+  c_n_observations = c.c_int(n_observations)
+  c_n_select = c.c_int(n_select)
+  c_n_features = c.c_int(n_features)
 
-  libFSToolbox.DISR.restype = POINTER(c_double * n_select)
+  libFSToolbox.DISR.restype = c.POINTER(c.c_double * n_select)
   features = libFSToolbox.DISR(c_n_select,
                    c_n_observations,
                    c_n_features, 
-                   data.ctypes.data_as(POINTER(c_double)),
-                   labels.ctypes.data_as(POINTER(c_double)),
-                   output.ctypes.data_as(POINTER(c_double))
+                   data.ctypes.data_as(c.POINTER(c.c_double)),
+                   labels.ctypes.data_as(c.POINTER(c.c_double)),
+                   output.ctypes.data_as(c.POINTER(c.c_double))
                    )
-
   
-  # turn our output into a list
   selected_features = []
   for i in features.contents:
-    # recall that feast was implemented with Matlab in mind, so the 
-    # authors assumed the indexing started a one; however, in Python 
-    # the indexing starts at zero. 
-    selected_features.append(i - 1)
+    selected_features.append(i)
 
   return selected_features
 
-
-
-
 def ICAP(data, labels, n_select):
-  '''
-    ICAP(data, labels, n_select)
-
+  """
     This function implements the interaction capping feature 
     selection algorithm. 
 
-    Input 
-      :data - data in a Numpy array such that len(data) = 
+    @param data: data in a Numpy array such that len(data) = 
         n_observations, and len(data.transpose()) = n_features
-        (REQUIRED)
-      :labels - labels represented in a numpy list with 
+    @type data: ndarray
+    @param labels: labels represented in a numpy list with 
         n_observations as the number of elements. That is 
         len(labels) = len(data) = n_observations.
-        (REQUIRED)
-      :n_select - number of features to select. (REQUIRED)
-    Output 
-      :selected_features - returns a list containing the features
-        in the order they were selected. 
-  '''
+    @type labels: ndarray
+    @param n_select: number of features to select. (REQUIRED)
+    @type n_select: integer
+    @return: the features in the order they were selected. 
+    @rtype: list
+  """
   data, labels = check_data(data, labels)
 
   # python values
@@ -328,54 +282,42 @@ def ICAP(data, labels, n_select):
   output = np.zeros(n_select)
 
   # cast as C types
-  c_n_observations = c_int(n_observations)
-  c_n_select = c_int(n_select)
-  c_n_features = c_int(n_features)
+  c_n_observations = c.c_int(n_observations)
+  c_n_select = c.c_int(n_select)
+  c_n_features = c.c_int(n_features)
 
-  libFSToolbox.ICAP.restype = POINTER(c_double * n_select)
+  libFSToolbox.ICAP.restype = c.POINTER(c.c_double * n_select)
   features = libFSToolbox.ICAP(c_n_select,
                    c_n_observations,
                    c_n_features, 
-                   data.ctypes.data_as(POINTER(c_double)),
-                   labels.ctypes.data_as(POINTER(c_double)),
-                   output.ctypes.data_as(POINTER(c_double))
+                   data.ctypes.data_as(c.POINTER(c.c_double)),
+                   labels.ctypes.data_as(c.POINTER(c.c_double)),
+                   output.ctypes.data_as(c.POINTER(c.c_double))
                    )
-
   
-  # turn our output into a list
   selected_features = []
   for i in features.contents:
-    # recall that feast was implemented with Matlab in mind, so the 
-    # authors assumed the indexing started a one; however, in Python 
-    # the indexing starts at zero. 
-    selected_features.append(i - 1)
+    selected_features.append(i)
 
   return selected_features
 
-
-
-
-
 def JMI(data, labels, n_select):
-  '''
-    JMI(data, labels, n_select)
-
+  """
     This function implements the joint mutual information feature
     selection algorithm. 
 
-    Input 
-      :data - data in a Numpy array such that len(data) = 
+    @param data: data in a Numpy array such that len(data) = 
         n_observations, and len(data.transpose()) = n_features
-        (REQUIRED)
-      :labels - labels represented in a numpy list with 
+    @type data: ndarray
+    @param labels: labels represented in a numpy list with 
         n_observations as the number of elements. That is 
         len(labels) = len(data) = n_observations.
-        (REQUIRED)
-      :n_select - number of features to select. (REQUIRED)
-    Output 
-      :selected_features - returns a list containing the features
-        in the order they were selected. 
-  '''
+    @type labels: ndarray
+    @param n_select: number of features to select. (REQUIRED)
+    @type n_select: integer
+    @return: the features in the order they were selected. 
+    @rtype: list
+  """
   data, labels = check_data(data, labels)
 
   # python values
@@ -383,102 +325,106 @@ def JMI(data, labels, n_select):
   output = np.zeros(n_select)
 
   # cast as C types
-  c_n_observations = c_int(n_observations)
-  c_n_select = c_int(n_select)
-  c_n_features = c_int(n_features)
+  c_n_observations = c.c_int(n_observations)
+  c_n_select = c.c_int(n_select)
+  c_n_features = c.c_int(n_features)
 
-  libFSToolbox.JMI.restype = POINTER(c_double * n_select)
+  libFSToolbox.JMI.restype = c.POINTER(c.c_double * n_select)
   features = libFSToolbox.JMI(c_n_select,
                    c_n_observations,
                    c_n_features, 
-                   data.ctypes.data_as(POINTER(c_double)),
-                   labels.ctypes.data_as(POINTER(c_double)),
-                   output.ctypes.data_as(POINTER(c_double))
+                   data.ctypes.data_as(c.POINTER(c.c_double)),
+                   labels.ctypes.data_as(c.POINTER(c.c_double)),
+                   output.ctypes.data_as(c.POINTER(c.c_double))
                    )
 
-  
-  # turn our output into a list
   selected_features = []
   for i in features.contents:
-    # recall that feast was implemented with Matlab in mind, so the 
-    # authors assumed the indexing started a one; however, in Python 
-    # the indexing starts at zero. 
-    selected_features.append(i - 1)
-
+    selected_features.append(i)
   return selected_features
 
 
 
 def MIFS(data, labels, n_select):
-  '''
-    MIFS(data, labels, n_select)
-
+  """
     This function implements the MIFS algorithm.
     beta = 1; gamma = 0;
 
-    Input 
-      :data - data in a Numpy array such that len(data) = 
+    @param data: data in a Numpy array such that len(data) = 
         n_observations, and len(data.transpose()) = n_features
-        (REQUIRED)
-      :labels - labels represented in a numpy list with 
+    @type data: ndarray
+    @param labels: labels represented in a numpy list with 
         n_observations as the number of elements. That is 
         len(labels) = len(data) = n_observations.
-        (REQUIRED)
-      :n_select - number of features to select. (REQUIRED)
-    Output 
-      :selected_features - returns a list containing the features
-        in the order they were selected. 
-  '''
-
+    @type labels: ndarray
+    @param n_select: number of features to select. (REQUIRED)
+    @type n_select: integer
+    @return: the features in the order they were selected. 
+    @rtype: list
+  """
   return BetaGamma(data, labels, n_select, beta=0.0, gamma=0.0)
 
 
 def MIM(data, labels, n_select):
-  '''
-    MIM(data, labels, n_select)
-
+  """
     This function implements the MIM algorithm.
     beta = 0; gamma = 0;
 
-    Input 
-      :data - data in a Numpy array such that len(data) = 
+    @param data: data in a Numpy array such that len(data) = 
         n_observations, and len(data.transpose()) = n_features
-        (REQUIRED)
-      :labels - labels represented in a numpy list with 
+    @type data: ndarray
+    @param labels: labels represented in a numpy list with 
         n_observations as the number of elements. That is 
         len(labels) = len(data) = n_observations.
-        (REQUIRED)
-      :n_select - number of features to select. (REQUIRED)
-    Output 
-      :selected_features - returns a list containing the features
-        in the order they were selected. 
-  '''
+    @type labels: ndarray
+    @param n_select: number of features to select. (REQUIRED)
+    @type n_select: integer
+    @return: the features in the order they were selected. 
+    @rtype: list
+  """
   data, labels = check_data(data, labels)
+  
+  # python values
+  n_observations, n_features = data.shape
+  output = np.zeros(n_select)
 
-  return BetaGamma(data, labels, n_select, beta=0.0, gamma=0.0)
+  # cast as C types
+  c_n_observations = c.c_int(n_observations)
+  c_n_select = c.c_int(n_select)
+  c_n_features = c.c_int(n_features)
 
+  libFSToolbox.MIM.restype = c.POINTER(c.c_double * n_select)
+  features = libFSToolbox.MIM(c_n_select,
+                   c_n_observations,
+                   c_n_features, 
+                   data.ctypes.data_as(c.POINTER(c.c_double)),
+                   labels.ctypes.data_as(c.POINTER(c.c_double)),
+                   output.ctypes.data_as(c.POINTER(c.c_double))
+                   )
+  
+  selected_features = []
+  for i in features.contents:
+    selected_features.append(i)
+  return selected_features
 
 
 def mRMR(data, labels, n_select):
-  '''
-    mRMR(data, labels, n_select)
-
+  """
     This funciton implements the max-relevance min-redundancy feature
     selection algorithm. 
 
-    Input 
-      :data - data in a Numpy array such that len(data) = 
+    @param data: data in a Numpy array such that len(data) = 
         n_observations, and len(data.transpose()) = n_features
-        (REQUIRED)
-      :labels - labels represented in a numpy list with 
+    @type data: ndarray
+    @param labels: labels represented in a numpy list with 
         n_observations as the number of elements. That is 
         len(labels) = len(data) = n_observations.
-        (REQUIRED)
-      :n_select - number of features to select. (REQUIRED)
-    Output 
-      :selected_features - returns a list containing the features
-        in the order they were selected. 
-  '''
+    @type labels: ndarray
+    @param n_select: number of features to select. (REQUIRED)
+    @type n_select: integer
+    @return: the features in the order they were selected. 
+    @rtype: list
+  """
   data, labels = check_data(data, labels)
 
   # python values
@@ -486,47 +432,37 @@ def mRMR(data, labels, n_select):
   output = np.zeros(n_select)
 
   # cast as C types
-  c_n_observations = c_int(n_observations)
-  c_n_select = c_int(n_select)
-  c_n_features = c_int(n_features)
+  c_n_observations = c.c_int(n_observations)
+  c_n_select = c.c_int(n_select)
+  c_n_features = c.c_int(n_features)
 
-  libFSToolbox.mRMR_D.restype = POINTER(c_double * n_select)
+  libFSToolbox.mRMR_D.restype = c.POINTER(c.c_double * n_select)
   features = libFSToolbox.mRMR_D(c_n_select,
                    c_n_observations,
                    c_n_features, 
-                   data.ctypes.data_as(POINTER(c_double)),
-                   labels.ctypes.data_as(POINTER(c_double)),
-                   output.ctypes.data_as(POINTER(c_double))
+                   data.ctypes.data_as(c.POINTER(c.c_double)),
+                   labels.ctypes.data_as(c.POINTER(c.c_double)),
+                   output.ctypes.data_as(c.POINTER(c.c_double))
                    )
 
-  
-  # turn our output into a list
   selected_features = []
   for i in features.contents:
-    # recall that feast was implemented with Matlab in mind, so the 
-    # authors assumed the indexing started a one; however, in Python 
-    # the indexing starts at zero. 
-    selected_features.append(i - 1)
-
+    selected_features.append(i)
   return selected_features
 
 def check_data(data, labels):
-  '''
-    check_data(data, labels)
-
+  """
     Check dimensions of the data and the labels.  Raise and exception
     if there is a problem.
 
     Data and Labels are automatically cast as doubles before calling the 
     feature selection functions
 
-    Input
-      :data
-      :labels
-    Output
-      :data
-      :labels
-  '''
+    @param data: the data 
+    @param labels: the labels
+    @return (data, labels): ndarray of floats
+    @rtype: tuple
+  """
 
   if isinstance(data, np.ndarray) is False:
     raise Exception("data must be an numpy ndarray.")