Merge branch 'master' of https://github.com/mutantturkey/PyFeast

author: Calvin Morrison <mutantturkey@gmail.com> 2014-02-19 16:03:14 -0500
committer: Calvin Morrison <mutantturkey@gmail.com> 2014-02-19 16:03:14 -0500
commit: 49162177c5da0404d41f91f5f41006f0456babcd (patch)
tree: 48a0d506e4ef9a58637429c643ca2b149f392e95
parent: ac86133781d7cd50964579f79522a4e3f8c3f339 (diff)
parent: 0fe82b9151d2a71053659336120e37f3a1124570 (diff)
2 files changed, 105 insertions, 134 deletions
diff --git a/README.markdown b/README.markdown
index b6b4d42..7456ad4 100644
--- a/README.markdown
+++ b/README.markdown
@@ -10,7 +10,8 @@ to enable researchers to utilize these feature selection algorithms in Python
 was only natural.
 
 At Drexel University's [EESI Lab](http://www.ece.drexel.edu/gailr/EESI/), we are using PyFeast to create a feature
-selection tool for the Department of Energy's upcoming KBase platform.
+selection tool for the Department of Energy's upcoming KBase platform. We are also integrating a tool that utilizes
+PyFeast as a script for Qiime users: [Qiime Fizzy Branch](https://github.com/EESI/FizzyQIIME)
 
 ## Requirements
 In order to use the feast module, you will need the following dependencies
@@ -49,6 +50,9 @@ See test/test.py for an example with uniform data and an image
 data set. The image data set was collected from the digits example in 
 the Scikits-Learn toolbox.
 
+## Documentation
+We have documentation for each of the functions available [here](http://mutantturkey.github.com/PyFeast/feast-module.html)
+
 ## References
 * [FEAST](http://www.cs.man.ac.uk/~gbrown/fstoolbox/) - The Feature Selection Toolbox  
 * [Fizzy](http://www.kbase.us/developer-zone/api-documentation/fizzy-feature-selection-service/)  - A KBase Service for Feature Selection
diff --git a/feast.py b/feast.py
index 34d81a3..5c3c3ee 100644
--- a/feast.py
+++ b/feast.py
@@ -29,28 +29,29 @@ except:
 
 def BetaGamma(data, labels, n_select, beta=1.0, gamma=1.0):
   '''
-    BetaGamma(data, labels, n_select, beta=1.0, gamma=1.0)
-
-    This algotihm implements conditional mutual information 
+    This algorithm implements conditional mutual information 
     feature select, such that beta and gamma control the 
     weight attached to the redundant mutual and conditional
     mutual information, respectively. 
 
-    Input 
-      :data - data in a Numpy array such that len(data) = 
+      @param data: data in a Numpy array such that len(data) = 
         n_observations, and len(data.transpose()) = n_features
         (REQUIRED)
-      :labels - labels represented in a numpy list with 
+      @type data: ndarray
+      @param labels: labels represented in a numpy list with 
         n_observations as the number of elements. That is 
         len(labels) = len(data) = n_observations.
         (REQUIRED)
-      :n_select - number of features to select. (REQUIRED)
-      :beta - penalty attacted to I(X_j;X_k) 
-      :gamma - positive weight attached to the conditional
+      @type labels: ndarray
+      @param n_select: number of features to select. (REQUIRED)
+      @type n_select: integer
+      @param beta: penalty attacted to I(X_j;X_k) 
+      @type beta: float between 0 and 1.0 
+      @param gamma: positive weight attached to the conditional
         redundancy term I(X_k;X_j|Y)
-    Output 
-      :selected_features - returns a list containing the features
-        in the order they were selected. 
+      @type gamma: float between 0 and 1.0 
+      @return: features in the order they were selected. 
+      @rtype: list
   '''
   data, labels = check_data(data, labels)
 
@@ -90,23 +91,20 @@ def BetaGamma(data, labels, n_select, beta=1.0, gamma=1.0):
 
 def CIFE(data, labels, n_select):
   '''
-    CIFE(data, labels, n_select)
-
     This function implements the Condred feature selection algorithm.
     beta = 1; gamma = 1;
 
-    Input 
-      :data - data in a Numpy array such that len(data) = 
+    @param data: A Numpy array such that len(data) = 
         n_observations, and len(data.transpose()) = n_features
-        (REQUIRED)
-      :labels - labels represented in a numpy list with 
+    @type data: ndarray
+    @param labels: labels represented in a numpy list with 
         n_observations as the number of elements. That is 
         len(labels) = len(data) = n_observations.
-        (REQUIRED)
-      :n_select - number of features to select. (REQUIRED)
-    Output 
-      :selected_features - returns a list containing the features
-        in the order they were selected. 
+    @type labels: ndarray
+    @param n_select:  number of features to select.
+    @type n_select: integer
+    @return selected_features: features in the order they were selected. 
+    @rtype: list
   '''
 
   return BetaGamma(data, labels, n_select, beta=1.0, gamma=1.0)
@@ -116,25 +114,22 @@ def CIFE(data, labels, n_select):
 
 def CMIM(data, labels, n_select):
   '''
-    CMIM(data, labels, n_select)
-
     This function implements the conditional mutual information
     maximization feature selection algorithm. Note that this 
     implementation does not allow for the weighting of the 
     redundancy terms that BetaGamma will allow you to do.
 
-    Input 
-      :data - data in a Numpy array such that len(data) = 
+    @param data: A Numpy array such that len(data) = 
         n_observations, and len(data.transpose()) = n_features
-        (REQUIRED)
-      :labels - labels represented in a numpy list with 
+    @type data: ndarray
+    @param labels: labels represented in a numpy array with 
         n_observations as the number of elements. That is 
         len(labels) = len(data) = n_observations.
-        (REQUIRED)
-      :n_select - number of features to select. (REQUIRED)
-    Output 
-      :selected_features - returns a list containing the features
-        in the order they were selected. 
+    @type labels: ndarray
+    @param n_select: number of features to select.
+    @type n_select: integer
+    @return: features in the order that they were selected. 
+    @rtype: list
   '''
   data, labels = check_data(data, labels)
 
@@ -171,23 +166,20 @@ def CMIM(data, labels, n_select):
 
 def CondMI(data, labels, n_select):
   '''
-    CondMI(data, labels, n_select)
-
     This function implements the conditional mutual information
     maximization feature selection algorithm. 
 
-    Input 
-      :data - data in a Numpy array such that len(data) = 
-        n_observations, and len(data.transpose()) = n_features
-        (REQUIRED)
-      :labels - labels represented in a numpy list with 
-        n_observations as the number of elements. That is 
-        len(labels) = len(data) = n_observations.
-        (REQUIRED)
-      :n_select - number of features to select. (REQUIRED)
-    Output 
-      :selected_features - returns a list containing the features
-        in the order they were selected. 
+    @param data: data in a Numpy array such that len(data) = n_observations,
+       and len(data.transpose()) = n_features
+    @type data: ndarray
+    @param labels: represented in a numpy list with 
+      n_observations as the number of elements. That is 
+      len(labels) = len(data) = n_observations.
+    @type labels: ndarray
+    @param n_select: number of features to select.
+    @type n_select: integer
+    @return: features in the order they were selected. 
+    @rtype list
   '''
   data, labels = check_data(data, labels)
 
@@ -223,23 +215,20 @@ def CondMI(data, labels, n_select):
 
 def Condred(data, labels, n_select):
   '''
-    Condred(data, labels, n_select)
-
     This function implements the Condred feature selection algorithm.
     beta = 0; gamma = 1;
 
-    Input 
-      :data - data in a Numpy array such that len(data) = 
+    @param data: data in a Numpy array such that len(data) = 
         n_observations, and len(data.transpose()) = n_features
-        (REQUIRED)
-      :labels - labels represented in a numpy list with 
+    @type data: ndarray
+    @param labels: labels represented in a numpy list with 
         n_observations as the number of elements. That is 
         len(labels) = len(data) = n_observations.
-        (REQUIRED)
-      :n_select - number of features to select. (REQUIRED)
-    Output 
-      :selected_features - returns a list containing the features
-        in the order they were selected. 
+    @type labels: ndarray
+    @param n_select: number of features to select.
+    @type n_select: integer
+    @return: the features in the order they were selected. 
+    @rtype: list
   '''
   data, labels = check_data(data, labels)
 
@@ -249,23 +238,20 @@ def Condred(data, labels, n_select):
 
 def DISR(data, labels, n_select):
   '''
-    DISR(data, labels, n_select)
-
     This function implements the double input symmetrical relevance
     feature selection algorithm. 
 
-    Input 
-      :data - data in a Numpy array such that len(data) = 
+    @param data: data in a Numpy array such that len(data) = 
         n_observations, and len(data.transpose()) = n_features
-        (REQUIRED)
-      :labels - labels represented in a numpy list with 
+    @type data: ndarray
+    @param labels: labels represented in a numpy list with 
         n_observations as the number of elements. That is 
         len(labels) = len(data) = n_observations.
-        (REQUIRED)
-      :n_select - number of features to select. (REQUIRED)
-    Output 
-      :selected_features - returns a list containing the features
-        in the order they were selected. 
+    @type labels: ndarray
+    @param n_select: number of features to select. (REQUIRED)
+    @type n_select: integer
+    @return: the features in the order they were selected. 
+    @rtype: list
   '''
   data, labels = check_data(data, labels)
 
@@ -303,23 +289,20 @@ def DISR(data, labels, n_select):
 
 def ICAP(data, labels, n_select):
   '''
-    ICAP(data, labels, n_select)
-
     This function implements the interaction capping feature 
     selection algorithm. 
 
-    Input 
-      :data - data in a Numpy array such that len(data) = 
+    @param data: data in a Numpy array such that len(data) = 
         n_observations, and len(data.transpose()) = n_features
-        (REQUIRED)
-      :labels - labels represented in a numpy list with 
+    @type data: ndarray
+    @param labels: labels represented in a numpy list with 
         n_observations as the number of elements. That is 
         len(labels) = len(data) = n_observations.
-        (REQUIRED)
-      :n_select - number of features to select. (REQUIRED)
-    Output 
-      :selected_features - returns a list containing the features
-        in the order they were selected. 
+    @type labels: ndarray
+    @param n_select: number of features to select. (REQUIRED)
+    @type n_select: integer
+    @return: the features in the order they were selected. 
+    @rtype: list
   '''
   data, labels = check_data(data, labels)
 
@@ -358,23 +341,20 @@ def ICAP(data, labels, n_select):
 
 def JMI(data, labels, n_select):
   '''
-    JMI(data, labels, n_select)
-
     This function implements the joint mutual information feature
     selection algorithm. 
 
-    Input 
-      :data - data in a Numpy array such that len(data) = 
+    @param data: data in a Numpy array such that len(data) = 
         n_observations, and len(data.transpose()) = n_features
-        (REQUIRED)
-      :labels - labels represented in a numpy list with 
+    @type data: ndarray
+    @param labels: labels represented in a numpy list with 
         n_observations as the number of elements. That is 
         len(labels) = len(data) = n_observations.
-        (REQUIRED)
-      :n_select - number of features to select. (REQUIRED)
-    Output 
-      :selected_features - returns a list containing the features
-        in the order they were selected. 
+    @type labels: ndarray
+    @param n_select: number of features to select. (REQUIRED)
+    @type n_select: integer
+    @return: the features in the order they were selected. 
+    @rtype: list
   '''
   data, labels = check_data(data, labels)
 
@@ -411,23 +391,20 @@ def JMI(data, labels, n_select):
 
 def MIFS(data, labels, n_select):
   '''
-    MIFS(data, labels, n_select)
-
     This function implements the MIFS algorithm.
     beta = 1; gamma = 0;
 
-    Input 
-      :data - data in a Numpy array such that len(data) = 
+    @param data: data in a Numpy array such that len(data) = 
         n_observations, and len(data.transpose()) = n_features
-        (REQUIRED)
-      :labels - labels represented in a numpy list with 
+    @type data: ndarray
+    @param labels: labels represented in a numpy list with 
         n_observations as the number of elements. That is 
         len(labels) = len(data) = n_observations.
-        (REQUIRED)
-      :n_select - number of features to select. (REQUIRED)
-    Output 
-      :selected_features - returns a list containing the features
-        in the order they were selected. 
+    @type labels: ndarray
+    @param n_select: number of features to select. (REQUIRED)
+    @type n_select: integer
+    @return: the features in the order they were selected. 
+    @rtype: list
   '''
 
   return BetaGamma(data, labels, n_select, beta=0.0, gamma=0.0)
@@ -435,23 +412,20 @@ def MIFS(data, labels, n_select):
 
 def MIM(data, labels, n_select):
   '''
-    MIM(data, labels, n_select)
-
     This function implements the MIM algorithm.
     beta = 0; gamma = 0;
 
-    Input 
-      :data - data in a Numpy array such that len(data) = 
+    @param data: data in a Numpy array such that len(data) = 
         n_observations, and len(data.transpose()) = n_features
-        (REQUIRED)
-      :labels - labels represented in a numpy list with 
+    @type data: ndarray
+    @param labels: labels represented in a numpy list with 
         n_observations as the number of elements. That is 
         len(labels) = len(data) = n_observations.
-        (REQUIRED)
-      :n_select - number of features to select. (REQUIRED)
-    Output 
-      :selected_features - returns a list containing the features
-        in the order they were selected. 
+    @type labels: ndarray
+    @param n_select: number of features to select. (REQUIRED)
+    @type n_select: integer
+    @return: the features in the order they were selected. 
+    @rtype: list
   '''
   data, labels = check_data(data, labels)
 
@@ -461,23 +435,20 @@ def MIM(data, labels, n_select):
 
 def mRMR(data, labels, n_select):
   '''
-    mRMR(data, labels, n_select)
-
     This funciton implements the max-relevance min-redundancy feature
     selection algorithm. 
 
-    Input 
-      :data - data in a Numpy array such that len(data) = 
+    @param data: data in a Numpy array such that len(data) = 
         n_observations, and len(data.transpose()) = n_features
-        (REQUIRED)
-      :labels - labels represented in a numpy list with 
+    @type data: ndarray
+    @param labels: labels represented in a numpy list with 
         n_observations as the number of elements. That is 
         len(labels) = len(data) = n_observations.
-        (REQUIRED)
-      :n_select - number of features to select. (REQUIRED)
-    Output 
-      :selected_features - returns a list containing the features
-        in the order they were selected. 
+    @type labels: ndarray
+    @param n_select: number of features to select. (REQUIRED)
+    @type n_select: integer
+    @return: the features in the order they were selected. 
+    @rtype: list
   '''
   data, labels = check_data(data, labels)
 
@@ -512,20 +483,16 @@ def mRMR(data, labels, n_select):
 
 def check_data(data, labels):
   '''
-    check_data(data, labels)
-
     Check dimensions of the data and the labels.  Raise and exception
     if there is a problem.
 
     Data and Labels are automatically cast as doubles before calling the 
     feature selection functions
 
-    Input
-      :data
-      :labels
-    Output
-      :data
-      :labels
+    @param data: the data 
+    @param labels: the labels
+    @return (data, labels): ndarray of floats
+    @rtype: tuple
   '''
 
   if isinstance(data, np.ndarray) is False:
author	Calvin Morrison <mutantturkey@gmail.com>	2014-02-19 16:03:14 -0500
committer	Calvin Morrison <mutantturkey@gmail.com>	2014-02-19 16:03:14 -0500
commit	49162177c5da0404d41f91f5f41006f0456babcd (patch)
tree	48a0d506e4ef9a58637429c643ca2b149f392e95
parent	ac86133781d7cd50964579f79522a4e3f8c3f339 (diff)
parent	0fe82b9151d2a71053659336120e37f3a1124570 (diff)