aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
-rw-r--r--feast.py219
-rw-r--r--test/import_data.py58
2 files changed, 218 insertions, 59 deletions
diff --git a/feast.py b/feast.py
index 0d2fee6..9cf5725 100644
--- a/feast.py
+++ b/feast.py
@@ -43,6 +43,11 @@ def BetaGamma(data, labels, n_select, beta=1.0, gamma=1.0):
weight attached to the redundant mutual and conditional
mutual information, respectively.
+ The return type is none if there is an error with the
+ dimensions of the data and/or labels. All data are
+ automatically cast as doubles before calling the feature
+ selection tool.
+
Input
:data - data in a Numpy array such that len(data) =
n_observations, and len(data.transpose()) = n_features
@@ -59,6 +64,11 @@ def BetaGamma(data, labels, n_select, beta=1.0, gamma=1.0):
:selected_features - returns a list containing the features
in the order they were selected.
'''
+ data, labels = check_data(data, labels)
+ if data == None or labels == None:
+ return None
+
+
# python values
n_observations, n_features = data.shape
output = np.zeros(n_select)
@@ -93,6 +103,39 @@ def BetaGamma(data, labels, n_select, beta=1.0, gamma=1.0):
+def CIFE(data, labels, n_select):
+ '''
+ CIFE(data, labels, n_select)
+
+ This function implements the Condred feature selection algorithm.
+ beta = 1; gamma = 1;
+
+ The return type is none if there is an error with the
+ dimensions of the data and/or labels. All data are
+ automatically cast as doubles before calling the feature
+ selection tool.
+
+ Input
+ :data - data in a Numpy array such that len(data) =
+ n_observations, and len(data.transpose()) = n_features
+ (REQUIRED)
+ :labels - labels represented in a numpy list with
+ n_observations as the number of elements. That is
+ len(labels) = len(data) = n_observations.
+ (REQUIRED)
+ :n_select - number of features to select. (REQUIRED)
+ Output
+ :selected_features - returns a list containing the features
+ in the order they were selected.
+ '''
+ data, labels = check_data(data, labels)
+ if data == None or labels == None:
+ return None
+ return BetaGamma(data, labels, n_select, beta=1.0, gamma=1.0)
+
+
+
+
def CMIM(data, labels, n_select):
'''
CMIM(data, labels, n_select)
@@ -100,7 +143,12 @@ def CMIM(data, labels, n_select):
This function implements the conditional mutual information
maximization feature selection algorithm. Note that this
implementation does not allow for the weighting of the
- redundancy terms that BetaGamma will allow you to do.
+ redundancy terms that BetaGamma will allow you to do.
+
+ The return type is none if there is an error with the
+ dimensions of the data and/or labels. All data are
+ automatically cast as doubles before calling the feature
+ selection tool.
Input
:data - data in a Numpy array such that len(data) =
@@ -115,6 +163,9 @@ def CMIM(data, labels, n_select):
:selected_features - returns a list containing the features
in the order they were selected.
'''
+ data, labels = check_data(data, labels)
+ if data == None or labels == None:
+ return None
# python values
n_observations, n_features = data.shape
@@ -154,6 +205,11 @@ def CondMI(data, labels, n_select):
This function implements the conditional mutual information
maximization feature selection algorithm.
+ The return type is none if there is an error with the
+ dimensions of the data and/or labels. All data are
+ automatically cast as doubles before calling the feature
+ selection tool.
+
Input
:data - data in a Numpy array such that len(data) =
n_observations, and len(data.transpose()) = n_features
@@ -167,6 +223,10 @@ def CondMI(data, labels, n_select):
:selected_features - returns a list containing the features
in the order they were selected.
'''
+ data, labels = check_data(data, labels)
+ if data == None or labels == None:
+ return None
+
# python values
n_observations, n_features = data.shape
output = np.zeros(n_select)
@@ -197,7 +257,36 @@ def CondMI(data, labels, n_select):
return selected_features
+def Condred(data, labels, n_select):
+ '''
+ Condred(data, labels, n_select)
+
+ This function implements the Condred feature selection algorithm.
+ beta = 0; gamma = 1;
+
+ The return type is none if there is an error with the
+ dimensions of the data and/or labels. All data are
+ automatically cast as doubles before calling the feature
+ selection tool.
+
+ Input
+ :data - data in a Numpy array such that len(data) =
+ n_observations, and len(data.transpose()) = n_features
+ (REQUIRED)
+ :labels - labels represented in a numpy list with
+ n_observations as the number of elements. That is
+ len(labels) = len(data) = n_observations.
+ (REQUIRED)
+ :n_select - number of features to select. (REQUIRED)
+ Output
+ :selected_features - returns a list containing the features
+ in the order they were selected.
+ '''
+ data, labels = check_data(data, labels)
+ if data == None or labels == None:
+ return None
+ return BetaGamma(data, labels, n_select, beta=0.0, gamma=1.0)
@@ -208,6 +297,11 @@ def DISR(data, labels, n_select):
This function implements the double input symmetrical relevance
feature selection algorithm.
+ The return type is none if there is an error with the
+ dimensions of the data and/or labels. All data are
+ automatically cast as doubles before calling the feature
+ selection tool.
+
Input
:data - data in a Numpy array such that len(data) =
n_observations, and len(data.transpose()) = n_features
@@ -221,6 +315,10 @@ def DISR(data, labels, n_select):
:selected_features - returns a list containing the features
in the order they were selected.
'''
+ data, labels = check_data(data, labels)
+ if data == None or labels == None:
+ return None
+
# python values
n_observations, n_features = data.shape
output = np.zeros(n_select)
@@ -260,6 +358,11 @@ def ICAP(data, labels, n_select):
This function implements the interaction capping feature
selection algorithm.
+ The return type is none if there is an error with the
+ dimensions of the data and/or labels. All data are
+ automatically cast as doubles before calling the feature
+ selection tool.
+
Input
:data - data in a Numpy array such that len(data) =
n_observations, and len(data.transpose()) = n_features
@@ -273,6 +376,10 @@ def ICAP(data, labels, n_select):
:selected_features - returns a list containing the features
in the order they were selected.
'''
+ data, labels = check_data(data, labels)
+ if data == None or labels == None:
+ return None
+
# python values
n_observations, n_features = data.shape
output = np.zeros(n_select)
@@ -313,6 +420,11 @@ def JMI(data, labels, n_select):
This function implements the joint mutual information feature
selection algorithm.
+ The return type is none if there is an error with the
+ dimensions of the data and/or labels. All data are
+ automatically cast as doubles before calling the feature
+ selection tool.
+
Input
:data - data in a Numpy array such that len(data) =
n_observations, and len(data.transpose()) = n_features
@@ -326,6 +438,9 @@ def JMI(data, labels, n_select):
:selected_features - returns a list containing the features
in the order they were selected.
'''
+ data, labels = check_data(data, labels)
+ if data == None or labels == None:
+ return None
# python values
n_observations, n_features = data.shape
@@ -356,6 +471,73 @@ def JMI(data, labels, n_select):
return selected_features
+
+
+def MIFS(data, labels, n_select):
+ '''
+ MIFS(data, labels, n_select)
+
+ This function implements the MIFS algorithm.
+ beta = 1; gamma = 0;
+
+ The return type is none if there is an error with the
+ dimensions of the data and/or labels. All data are
+ automatically cast as doubles before calling the feature
+ selection tool.
+
+ Input
+ :data - data in a Numpy array such that len(data) =
+ n_observations, and len(data.transpose()) = n_features
+ (REQUIRED)
+ :labels - labels represented in a numpy list with
+ n_observations as the number of elements. That is
+ len(labels) = len(data) = n_observations.
+ (REQUIRED)
+ :n_select - number of features to select. (REQUIRED)
+ Output
+ :selected_features - returns a list containing the features
+ in the order they were selected.
+ '''
+ data, labels = check_data(data, labels)
+ if data == None or labels == None:
+ return None
+
+ return BetaGamma(data, labels, n_select, beta=0.0, gamma=0.0)
+
+
+def MIM(data, labels, n_select):
+ '''
+ MIM(data, labels, n_select)
+
+ This function implements the MIM algorithm.
+ beta = 0; gamma = 0;
+
+ The return type is none if there is an error with the
+ dimensions of the data and/or labels. All data are
+ automatically cast as doubles before calling the feature
+ selection tool.
+
+ Input
+ :data - data in a Numpy array such that len(data) =
+ n_observations, and len(data.transpose()) = n_features
+ (REQUIRED)
+ :labels - labels represented in a numpy list with
+ n_observations as the number of elements. That is
+ len(labels) = len(data) = n_observations.
+ (REQUIRED)
+ :n_select - number of features to select. (REQUIRED)
+ Output
+ :selected_features - returns a list containing the features
+ in the order they were selected.
+ '''
+ data, labels = check_data(data, labels)
+ if data == None or labels == None:
+ return None
+
+ return BetaGamma(data, labels, n_select, beta=0.0, gamma=0.0)
+
+
+
def mRMR(data, labels, n_select):
'''
mRMR(data, labels, n_select)
@@ -363,6 +545,11 @@ def mRMR(data, labels, n_select):
This funciton implements the max-relevance min-redundancy feature
selection algorithm.
+ The return type is none if there is an error with the
+ dimensions of the data and/or labels. All data are
+ automatically cast as doubles before calling the feature
+ selection tool.
+
Input
:data - data in a Numpy array such that len(data) =
n_observations, and len(data.transpose()) = n_features
@@ -376,6 +563,10 @@ def mRMR(data, labels, n_select):
:selected_features - returns a list containing the features
in the order they were selected.
'''
+ data, labels = check_data(data, labels)
+ if data == None or labels == None:
+ return None
+
# python values
n_observations, n_features = data.shape
@@ -410,3 +601,29 @@ def mRMR(data, labels, n_select):
+
+
+
+def check_data(data, labels):
+ '''
+ check_data(data, labels)
+
+ The return type is none if there is an error with the
+ dimensions of the data and/or labels. All data are
+ automatically cast as doubles before calling the feature
+ selection tool.
+
+ Input
+ :data
+ :labels
+ Output
+ :data
+ :labels
+ '''
+ if len(data) != len(labels):
+ return None, None
+
+ return 1.0*data, 1.0*labels
+
+
+
diff --git a/test/import_data.py b/test/import_data.py
deleted file mode 100644
index 158e97d..0000000
--- a/test/import_data.py
+++ /dev/null
@@ -1,58 +0,0 @@
-
-
-
-
-##################################################################
-##################################################################
-##################################################################
-def read_digits(fname='digit.txt'):
- '''
- read_digits(fname='digit.txt')
-
- read a data file that contains the features and class labels.
- each row of the file is a feature vector with the class
- label appended.
- '''
- import csv
- import numpy as np
-
- fw = csv.reader(open(fname,'rb'), delimiter='\t')
- data = []
- for line in fw:
- data.append( [float(x) for x in line] )
- data = np.array(data)
- labels = data[:,len(data.transpose())-1]
- data = data[:,:len(data.transpose())-1]
- return data, labels
-##################################################################
-##################################################################
-##################################################################
-
-
-
-##################################################################
-##################################################################
-##################################################################
-def uniform_data(n_observations = 1000, n_features = 50, n_relevant = 5):
- import numpy as np
- xmax = 10
- xmin = 0
- data = 1.0*np.random.randint(xmax + 1, size = (n_features, n_observations))
- labels = np.zeros(n_observations)
- delta = n_relevant * (xmax - xmin) / 2.0
-
- for m in range(n_observations):
- zz = 0.0
- for k in range(n_relevant):
- zz += data[k, m]
- if zz > delta:
- labels[m] = 1
- else:
- labels[m] = 2
- data = data.transpose()
-
- return data, labels
-
-##################################################################
-##################################################################
-##################################################################