From 2925f1e93b6618af955b1190e100531b9f947ad5 Mon Sep 17 00:00:00 2001 From: Calvin Date: Wed, 3 Apr 2013 17:15:07 -0400 Subject: fix spelling error --- feast.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/feast.py b/feast.py index 34d81a3..9bb2b9f 100644 --- a/feast.py +++ b/feast.py @@ -31,7 +31,7 @@ def BetaGamma(data, labels, n_select, beta=1.0, gamma=1.0): ''' BetaGamma(data, labels, n_select, beta=1.0, gamma=1.0) - This algotihm implements conditional mutual information + This algorithm implements conditional mutual information feature select, such that beta and gamma control the weight attached to the redundant mutual and conditional mutual information, respectively. -- cgit v1.2.3 From 2aa8b73636ad82e1ede0f91da03793c4f61f9f59 Mon Sep 17 00:00:00 2001 From: Calvin Date: Thu, 4 Apr 2013 17:10:14 -0400 Subject: convert beta-gamma to epydoc style docs --- feast.py | 22 ++++++++++++---------- 1 file changed, 12 insertions(+), 10 deletions(-) diff --git a/feast.py b/feast.py index 9bb2b9f..96cf59d 100644 --- a/feast.py +++ b/feast.py @@ -29,28 +29,30 @@ except: def BetaGamma(data, labels, n_select, beta=1.0, gamma=1.0): ''' - BetaGamma(data, labels, n_select, beta=1.0, gamma=1.0) - This algorithm implements conditional mutual information feature select, such that beta and gamma control the weight attached to the redundant mutual and conditional mutual information, respectively. - Input - :data - data in a Numpy array such that len(data) = + @param data: data in a Numpy array such that len(data) = n_observations, and len(data.transpose()) = n_features (REQUIRED) - :labels - labels represented in a numpy list with + @type data: ndarray + @param labels: labels represented in a numpy list with n_observations as the number of elements. That is len(labels) = len(data) = n_observations. (REQUIRED) - :n_select - number of features to select. (REQUIRED) - :beta - penalty attacted to I(X_j;X_k) - :gamma - positive weight attached to the conditional + @type labels: ndarray + @param n_select: number of features to select. (REQUIRED) + @type n_select: integer + @param beta: penalty attacted to I(X_j;X_k) + @type beta: float between 0 and 1.0 + @param gamma: positive weight attached to the conditional redundancy term I(X_k;X_j|Y) - Output - :selected_features - returns a list containing the features + @type gamma: float between 0 and 1.0 + @return:selected_features - returns a list containing the features in the order they were selected. + @rtype: ndarray ''' data, labels = check_data(data, labels) -- cgit v1.2.3 From 33e8bd4f7e15ae6038f3e0a83e7be72d945cc4e1 Mon Sep 17 00:00:00 2001 From: Calvin Date: Thu, 4 Apr 2013 17:12:45 -0400 Subject: convert CIFEto epydoc style docs --- feast.py | 18 ++++++++---------- 1 file changed, 8 insertions(+), 10 deletions(-) diff --git a/feast.py b/feast.py index 96cf59d..54c9daf 100644 --- a/feast.py +++ b/feast.py @@ -92,23 +92,21 @@ def BetaGamma(data, labels, n_select, beta=1.0, gamma=1.0): def CIFE(data, labels, n_select): ''' - CIFE(data, labels, n_select) - This function implements the Condred feature selection algorithm. beta = 1; gamma = 1; - Input - :data - data in a Numpy array such that len(data) = + @param data: A Numpy array such that len(data) = n_observations, and len(data.transpose()) = n_features - (REQUIRED) - :labels - labels represented in a numpy list with + @type data: ndarray + @param labels: labels represented in a numpy list with n_observations as the number of elements. That is len(labels) = len(data) = n_observations. - (REQUIRED) - :n_select - number of features to select. (REQUIRED) - Output - :selected_features - returns a list containing the features + @type labels: ndarray + @param n_select: number of features to select. + @type n_select: integer + @return selected_features: returns a list containing the features in the order they were selected. + @return type: ndarray ''' return BetaGamma(data, labels, n_select, beta=1.0, gamma=1.0) -- cgit v1.2.3 From 88a6c0d6243207a968e82dd607652715ac655816 Mon Sep 17 00:00:00 2001 From: Calvin Date: Thu, 4 Apr 2013 17:15:19 -0400 Subject: convert CMIM to epydoc style docs --- feast.py | 22 ++++++++++------------ 1 file changed, 10 insertions(+), 12 deletions(-) diff --git a/feast.py b/feast.py index 54c9daf..28fe6ab 100644 --- a/feast.py +++ b/feast.py @@ -106,7 +106,7 @@ def CIFE(data, labels, n_select): @type n_select: integer @return selected_features: returns a list containing the features in the order they were selected. - @return type: ndarray + @rtype: ndarray ''' return BetaGamma(data, labels, n_select, beta=1.0, gamma=1.0) @@ -116,25 +116,23 @@ def CIFE(data, labels, n_select): def CMIM(data, labels, n_select): ''' - CMIM(data, labels, n_select) - This function implements the conditional mutual information maximization feature selection algorithm. Note that this implementation does not allow for the weighting of the redundancy terms that BetaGamma will allow you to do. - Input - :data - data in a Numpy array such that len(data) = + @param data: A Numpy array such that len(data) = n_observations, and len(data.transpose()) = n_features - (REQUIRED) - :labels - labels represented in a numpy list with + @type data: ndarray + @param labels: labels represented in a numpy array with n_observations as the number of elements. That is len(labels) = len(data) = n_observations. - (REQUIRED) - :n_select - number of features to select. (REQUIRED) - Output - :selected_features - returns a list containing the features - in the order they were selected. + @type labels: ndarray + @param n_select: number of features to select. + @type n_select: integer + @return selected_features: A list containing the features + in the order that they were selected. + @rtype: ndarray ''' data, labels = check_data(data, labels) -- cgit v1.2.3 From b95f7fee2741132eb34382a8868dad0e9cd9c331 Mon Sep 17 00:00:00 2001 From: Calvin Date: Fri, 5 Apr 2013 13:45:24 -0400 Subject: the rest of the documentation --- feast.py | 185 ++++++++++++++++++++++++++------------------------------------- 1 file changed, 77 insertions(+), 108 deletions(-) diff --git a/feast.py b/feast.py index 28fe6ab..5c3c3ee 100644 --- a/feast.py +++ b/feast.py @@ -50,9 +50,8 @@ def BetaGamma(data, labels, n_select, beta=1.0, gamma=1.0): @param gamma: positive weight attached to the conditional redundancy term I(X_k;X_j|Y) @type gamma: float between 0 and 1.0 - @return:selected_features - returns a list containing the features - in the order they were selected. - @rtype: ndarray + @return: features in the order they were selected. + @rtype: list ''' data, labels = check_data(data, labels) @@ -104,9 +103,8 @@ def CIFE(data, labels, n_select): @type labels: ndarray @param n_select: number of features to select. @type n_select: integer - @return selected_features: returns a list containing the features - in the order they were selected. - @rtype: ndarray + @return selected_features: features in the order they were selected. + @rtype: list ''' return BetaGamma(data, labels, n_select, beta=1.0, gamma=1.0) @@ -130,9 +128,8 @@ def CMIM(data, labels, n_select): @type labels: ndarray @param n_select: number of features to select. @type n_select: integer - @return selected_features: A list containing the features - in the order that they were selected. - @rtype: ndarray + @return: features in the order that they were selected. + @rtype: list ''' data, labels = check_data(data, labels) @@ -169,23 +166,20 @@ def CMIM(data, labels, n_select): def CondMI(data, labels, n_select): ''' - CondMI(data, labels, n_select) - This function implements the conditional mutual information maximization feature selection algorithm. - Input - :data - data in a Numpy array such that len(data) = - n_observations, and len(data.transpose()) = n_features - (REQUIRED) - :labels - labels represented in a numpy list with - n_observations as the number of elements. That is - len(labels) = len(data) = n_observations. - (REQUIRED) - :n_select - number of features to select. (REQUIRED) - Output - :selected_features - returns a list containing the features - in the order they were selected. + @param data: data in a Numpy array such that len(data) = n_observations, + and len(data.transpose()) = n_features + @type data: ndarray + @param labels: represented in a numpy list with + n_observations as the number of elements. That is + len(labels) = len(data) = n_observations. + @type labels: ndarray + @param n_select: number of features to select. + @type n_select: integer + @return: features in the order they were selected. + @rtype list ''' data, labels = check_data(data, labels) @@ -221,23 +215,20 @@ def CondMI(data, labels, n_select): def Condred(data, labels, n_select): ''' - Condred(data, labels, n_select) - This function implements the Condred feature selection algorithm. beta = 0; gamma = 1; - Input - :data - data in a Numpy array such that len(data) = + @param data: data in a Numpy array such that len(data) = n_observations, and len(data.transpose()) = n_features - (REQUIRED) - :labels - labels represented in a numpy list with + @type data: ndarray + @param labels: labels represented in a numpy list with n_observations as the number of elements. That is len(labels) = len(data) = n_observations. - (REQUIRED) - :n_select - number of features to select. (REQUIRED) - Output - :selected_features - returns a list containing the features - in the order they were selected. + @type labels: ndarray + @param n_select: number of features to select. + @type n_select: integer + @return: the features in the order they were selected. + @rtype: list ''' data, labels = check_data(data, labels) @@ -247,23 +238,20 @@ def Condred(data, labels, n_select): def DISR(data, labels, n_select): ''' - DISR(data, labels, n_select) - This function implements the double input symmetrical relevance feature selection algorithm. - Input - :data - data in a Numpy array such that len(data) = + @param data: data in a Numpy array such that len(data) = n_observations, and len(data.transpose()) = n_features - (REQUIRED) - :labels - labels represented in a numpy list with + @type data: ndarray + @param labels: labels represented in a numpy list with n_observations as the number of elements. That is len(labels) = len(data) = n_observations. - (REQUIRED) - :n_select - number of features to select. (REQUIRED) - Output - :selected_features - returns a list containing the features - in the order they were selected. + @type labels: ndarray + @param n_select: number of features to select. (REQUIRED) + @type n_select: integer + @return: the features in the order they were selected. + @rtype: list ''' data, labels = check_data(data, labels) @@ -301,23 +289,20 @@ def DISR(data, labels, n_select): def ICAP(data, labels, n_select): ''' - ICAP(data, labels, n_select) - This function implements the interaction capping feature selection algorithm. - Input - :data - data in a Numpy array such that len(data) = + @param data: data in a Numpy array such that len(data) = n_observations, and len(data.transpose()) = n_features - (REQUIRED) - :labels - labels represented in a numpy list with + @type data: ndarray + @param labels: labels represented in a numpy list with n_observations as the number of elements. That is len(labels) = len(data) = n_observations. - (REQUIRED) - :n_select - number of features to select. (REQUIRED) - Output - :selected_features - returns a list containing the features - in the order they were selected. + @type labels: ndarray + @param n_select: number of features to select. (REQUIRED) + @type n_select: integer + @return: the features in the order they were selected. + @rtype: list ''' data, labels = check_data(data, labels) @@ -356,23 +341,20 @@ def ICAP(data, labels, n_select): def JMI(data, labels, n_select): ''' - JMI(data, labels, n_select) - This function implements the joint mutual information feature selection algorithm. - Input - :data - data in a Numpy array such that len(data) = + @param data: data in a Numpy array such that len(data) = n_observations, and len(data.transpose()) = n_features - (REQUIRED) - :labels - labels represented in a numpy list with + @type data: ndarray + @param labels: labels represented in a numpy list with n_observations as the number of elements. That is len(labels) = len(data) = n_observations. - (REQUIRED) - :n_select - number of features to select. (REQUIRED) - Output - :selected_features - returns a list containing the features - in the order they were selected. + @type labels: ndarray + @param n_select: number of features to select. (REQUIRED) + @type n_select: integer + @return: the features in the order they were selected. + @rtype: list ''' data, labels = check_data(data, labels) @@ -409,23 +391,20 @@ def JMI(data, labels, n_select): def MIFS(data, labels, n_select): ''' - MIFS(data, labels, n_select) - This function implements the MIFS algorithm. beta = 1; gamma = 0; - Input - :data - data in a Numpy array such that len(data) = + @param data: data in a Numpy array such that len(data) = n_observations, and len(data.transpose()) = n_features - (REQUIRED) - :labels - labels represented in a numpy list with + @type data: ndarray + @param labels: labels represented in a numpy list with n_observations as the number of elements. That is len(labels) = len(data) = n_observations. - (REQUIRED) - :n_select - number of features to select. (REQUIRED) - Output - :selected_features - returns a list containing the features - in the order they were selected. + @type labels: ndarray + @param n_select: number of features to select. (REQUIRED) + @type n_select: integer + @return: the features in the order they were selected. + @rtype: list ''' return BetaGamma(data, labels, n_select, beta=0.0, gamma=0.0) @@ -433,23 +412,20 @@ def MIFS(data, labels, n_select): def MIM(data, labels, n_select): ''' - MIM(data, labels, n_select) - This function implements the MIM algorithm. beta = 0; gamma = 0; - Input - :data - data in a Numpy array such that len(data) = + @param data: data in a Numpy array such that len(data) = n_observations, and len(data.transpose()) = n_features - (REQUIRED) - :labels - labels represented in a numpy list with + @type data: ndarray + @param labels: labels represented in a numpy list with n_observations as the number of elements. That is len(labels) = len(data) = n_observations. - (REQUIRED) - :n_select - number of features to select. (REQUIRED) - Output - :selected_features - returns a list containing the features - in the order they were selected. + @type labels: ndarray + @param n_select: number of features to select. (REQUIRED) + @type n_select: integer + @return: the features in the order they were selected. + @rtype: list ''' data, labels = check_data(data, labels) @@ -459,23 +435,20 @@ def MIM(data, labels, n_select): def mRMR(data, labels, n_select): ''' - mRMR(data, labels, n_select) - This funciton implements the max-relevance min-redundancy feature selection algorithm. - Input - :data - data in a Numpy array such that len(data) = + @param data: data in a Numpy array such that len(data) = n_observations, and len(data.transpose()) = n_features - (REQUIRED) - :labels - labels represented in a numpy list with + @type data: ndarray + @param labels: labels represented in a numpy list with n_observations as the number of elements. That is len(labels) = len(data) = n_observations. - (REQUIRED) - :n_select - number of features to select. (REQUIRED) - Output - :selected_features - returns a list containing the features - in the order they were selected. + @type labels: ndarray + @param n_select: number of features to select. (REQUIRED) + @type n_select: integer + @return: the features in the order they were selected. + @rtype: list ''' data, labels = check_data(data, labels) @@ -510,20 +483,16 @@ def mRMR(data, labels, n_select): def check_data(data, labels): ''' - check_data(data, labels) - Check dimensions of the data and the labels. Raise and exception if there is a problem. Data and Labels are automatically cast as doubles before calling the feature selection functions - Input - :data - :labels - Output - :data - :labels + @param data: the data + @param labels: the labels + @return (data, labels): ndarray of floats + @rtype: tuple ''' if isinstance(data, np.ndarray) is False: -- cgit v1.2.3 From fd4aba177faa389867075bb8fa020ef8a61fe859 Mon Sep 17 00:00:00 2001 From: Calvin Date: Fri, 5 Apr 2013 15:14:27 -0400 Subject: added link to documentation --- README.markdown | 3 +++ 1 file changed, 3 insertions(+) diff --git a/README.markdown b/README.markdown index b6b4d42..fd01651 100644 --- a/README.markdown +++ b/README.markdown @@ -49,6 +49,9 @@ See test/test.py for an example with uniform data and an image data set. The image data set was collected from the digits example in the Scikits-Learn toolbox. +## Documentation +We have documentation for each of the functions setup available [here](http://mutantturkey.github.com/PyFeast/feast-module.html) + ## References * [FEAST](http://www.cs.man.ac.uk/~gbrown/fstoolbox/) - The Feature Selection Toolbox * [Fizzy](http://www.kbase.us/developer-zone/api-documentation/fizzy-feature-selection-service/) - A KBase Service for Feature Selection -- cgit v1.2.3 From 0c3b2c6c7ca3b2bb597bc77a64575ef6e744415f Mon Sep 17 00:00:00 2001 From: Calvin Date: Fri, 5 Apr 2013 15:15:20 -0400 Subject: oops I extra'd a work --- README.markdown | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/README.markdown b/README.markdown index fd01651..aaa36f2 100644 --- a/README.markdown +++ b/README.markdown @@ -50,7 +50,7 @@ data set. The image data set was collected from the digits example in the Scikits-Learn toolbox. ## Documentation -We have documentation for each of the functions setup available [here](http://mutantturkey.github.com/PyFeast/feast-module.html) +We have documentation for each of the functions available [here](http://mutantturkey.github.com/PyFeast/feast-module.html) ## References * [FEAST](http://www.cs.man.ac.uk/~gbrown/fstoolbox/) - The Feature Selection Toolbox -- cgit v1.2.3 From 0fe82b9151d2a71053659336120e37f3a1124570 Mon Sep 17 00:00:00 2001 From: Calvin Morrison Date: Thu, 22 Aug 2013 10:53:56 -0400 Subject: Add link to fizzy qiime --- README.markdown | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/README.markdown b/README.markdown index aaa36f2..7456ad4 100644 --- a/README.markdown +++ b/README.markdown @@ -10,7 +10,8 @@ to enable researchers to utilize these feature selection algorithms in Python was only natural. At Drexel University's [EESI Lab](http://www.ece.drexel.edu/gailr/EESI/), we are using PyFeast to create a feature -selection tool for the Department of Energy's upcoming KBase platform. +selection tool for the Department of Energy's upcoming KBase platform. We are also integrating a tool that utilizes +PyFeast as a script for Qiime users: [Qiime Fizzy Branch](https://github.com/EESI/FizzyQIIME) ## Requirements In order to use the feast module, you will need the following dependencies -- cgit v1.2.3