diff options
-rw-r--r-- | feast.py | 219 | ||||
-rw-r--r-- | test/import_data.py | 58 |
2 files changed, 218 insertions, 59 deletions
@@ -43,6 +43,11 @@ def BetaGamma(data, labels, n_select, beta=1.0, gamma=1.0): weight attached to the redundant mutual and conditional mutual information, respectively. + The return type is none if there is an error with the + dimensions of the data and/or labels. All data are + automatically cast as doubles before calling the feature + selection tool. + Input :data - data in a Numpy array such that len(data) = n_observations, and len(data.transpose()) = n_features @@ -59,6 +64,11 @@ def BetaGamma(data, labels, n_select, beta=1.0, gamma=1.0): :selected_features - returns a list containing the features in the order they were selected. ''' + data, labels = check_data(data, labels) + if data == None or labels == None: + return None + + # python values n_observations, n_features = data.shape output = np.zeros(n_select) @@ -93,6 +103,39 @@ def BetaGamma(data, labels, n_select, beta=1.0, gamma=1.0): +def CIFE(data, labels, n_select): + ''' + CIFE(data, labels, n_select) + + This function implements the Condred feature selection algorithm. + beta = 1; gamma = 1; + + The return type is none if there is an error with the + dimensions of the data and/or labels. All data are + automatically cast as doubles before calling the feature + selection tool. + + Input + :data - data in a Numpy array such that len(data) = + n_observations, and len(data.transpose()) = n_features + (REQUIRED) + :labels - labels represented in a numpy list with + n_observations as the number of elements. That is + len(labels) = len(data) = n_observations. + (REQUIRED) + :n_select - number of features to select. (REQUIRED) + Output + :selected_features - returns a list containing the features + in the order they were selected. + ''' + data, labels = check_data(data, labels) + if data == None or labels == None: + return None + return BetaGamma(data, labels, n_select, beta=1.0, gamma=1.0) + + + + def CMIM(data, labels, n_select): ''' CMIM(data, labels, n_select) @@ -100,7 +143,12 @@ def CMIM(data, labels, n_select): This function implements the conditional mutual information maximization feature selection algorithm. Note that this implementation does not allow for the weighting of the - redundancy terms that BetaGamma will allow you to do. + redundancy terms that BetaGamma will allow you to do. + + The return type is none if there is an error with the + dimensions of the data and/or labels. All data are + automatically cast as doubles before calling the feature + selection tool. Input :data - data in a Numpy array such that len(data) = @@ -115,6 +163,9 @@ def CMIM(data, labels, n_select): :selected_features - returns a list containing the features in the order they were selected. ''' + data, labels = check_data(data, labels) + if data == None or labels == None: + return None # python values n_observations, n_features = data.shape @@ -154,6 +205,11 @@ def CondMI(data, labels, n_select): This function implements the conditional mutual information maximization feature selection algorithm. + The return type is none if there is an error with the + dimensions of the data and/or labels. All data are + automatically cast as doubles before calling the feature + selection tool. + Input :data - data in a Numpy array such that len(data) = n_observations, and len(data.transpose()) = n_features @@ -167,6 +223,10 @@ def CondMI(data, labels, n_select): :selected_features - returns a list containing the features in the order they were selected. ''' + data, labels = check_data(data, labels) + if data == None or labels == None: + return None + # python values n_observations, n_features = data.shape output = np.zeros(n_select) @@ -197,7 +257,36 @@ def CondMI(data, labels, n_select): return selected_features +def Condred(data, labels, n_select): + ''' + Condred(data, labels, n_select) + + This function implements the Condred feature selection algorithm. + beta = 0; gamma = 1; + + The return type is none if there is an error with the + dimensions of the data and/or labels. All data are + automatically cast as doubles before calling the feature + selection tool. + + Input + :data - data in a Numpy array such that len(data) = + n_observations, and len(data.transpose()) = n_features + (REQUIRED) + :labels - labels represented in a numpy list with + n_observations as the number of elements. That is + len(labels) = len(data) = n_observations. + (REQUIRED) + :n_select - number of features to select. (REQUIRED) + Output + :selected_features - returns a list containing the features + in the order they were selected. + ''' + data, labels = check_data(data, labels) + if data == None or labels == None: + return None + return BetaGamma(data, labels, n_select, beta=0.0, gamma=1.0) @@ -208,6 +297,11 @@ def DISR(data, labels, n_select): This function implements the double input symmetrical relevance feature selection algorithm. + The return type is none if there is an error with the + dimensions of the data and/or labels. All data are + automatically cast as doubles before calling the feature + selection tool. + Input :data - data in a Numpy array such that len(data) = n_observations, and len(data.transpose()) = n_features @@ -221,6 +315,10 @@ def DISR(data, labels, n_select): :selected_features - returns a list containing the features in the order they were selected. ''' + data, labels = check_data(data, labels) + if data == None or labels == None: + return None + # python values n_observations, n_features = data.shape output = np.zeros(n_select) @@ -260,6 +358,11 @@ def ICAP(data, labels, n_select): This function implements the interaction capping feature selection algorithm. + The return type is none if there is an error with the + dimensions of the data and/or labels. All data are + automatically cast as doubles before calling the feature + selection tool. + Input :data - data in a Numpy array such that len(data) = n_observations, and len(data.transpose()) = n_features @@ -273,6 +376,10 @@ def ICAP(data, labels, n_select): :selected_features - returns a list containing the features in the order they were selected. ''' + data, labels = check_data(data, labels) + if data == None or labels == None: + return None + # python values n_observations, n_features = data.shape output = np.zeros(n_select) @@ -313,6 +420,11 @@ def JMI(data, labels, n_select): This function implements the joint mutual information feature selection algorithm. + The return type is none if there is an error with the + dimensions of the data and/or labels. All data are + automatically cast as doubles before calling the feature + selection tool. + Input :data - data in a Numpy array such that len(data) = n_observations, and len(data.transpose()) = n_features @@ -326,6 +438,9 @@ def JMI(data, labels, n_select): :selected_features - returns a list containing the features in the order they were selected. ''' + data, labels = check_data(data, labels) + if data == None or labels == None: + return None # python values n_observations, n_features = data.shape @@ -356,6 +471,73 @@ def JMI(data, labels, n_select): return selected_features + + +def MIFS(data, labels, n_select): + ''' + MIFS(data, labels, n_select) + + This function implements the MIFS algorithm. + beta = 1; gamma = 0; + + The return type is none if there is an error with the + dimensions of the data and/or labels. All data are + automatically cast as doubles before calling the feature + selection tool. + + Input + :data - data in a Numpy array such that len(data) = + n_observations, and len(data.transpose()) = n_features + (REQUIRED) + :labels - labels represented in a numpy list with + n_observations as the number of elements. That is + len(labels) = len(data) = n_observations. + (REQUIRED) + :n_select - number of features to select. (REQUIRED) + Output + :selected_features - returns a list containing the features + in the order they were selected. + ''' + data, labels = check_data(data, labels) + if data == None or labels == None: + return None + + return BetaGamma(data, labels, n_select, beta=0.0, gamma=0.0) + + +def MIM(data, labels, n_select): + ''' + MIM(data, labels, n_select) + + This function implements the MIM algorithm. + beta = 0; gamma = 0; + + The return type is none if there is an error with the + dimensions of the data and/or labels. All data are + automatically cast as doubles before calling the feature + selection tool. + + Input + :data - data in a Numpy array such that len(data) = + n_observations, and len(data.transpose()) = n_features + (REQUIRED) + :labels - labels represented in a numpy list with + n_observations as the number of elements. That is + len(labels) = len(data) = n_observations. + (REQUIRED) + :n_select - number of features to select. (REQUIRED) + Output + :selected_features - returns a list containing the features + in the order they were selected. + ''' + data, labels = check_data(data, labels) + if data == None or labels == None: + return None + + return BetaGamma(data, labels, n_select, beta=0.0, gamma=0.0) + + + def mRMR(data, labels, n_select): ''' mRMR(data, labels, n_select) @@ -363,6 +545,11 @@ def mRMR(data, labels, n_select): This funciton implements the max-relevance min-redundancy feature selection algorithm. + The return type is none if there is an error with the + dimensions of the data and/or labels. All data are + automatically cast as doubles before calling the feature + selection tool. + Input :data - data in a Numpy array such that len(data) = n_observations, and len(data.transpose()) = n_features @@ -376,6 +563,10 @@ def mRMR(data, labels, n_select): :selected_features - returns a list containing the features in the order they were selected. ''' + data, labels = check_data(data, labels) + if data == None or labels == None: + return None + # python values n_observations, n_features = data.shape @@ -410,3 +601,29 @@ def mRMR(data, labels, n_select): + + + +def check_data(data, labels): + ''' + check_data(data, labels) + + The return type is none if there is an error with the + dimensions of the data and/or labels. All data are + automatically cast as doubles before calling the feature + selection tool. + + Input + :data + :labels + Output + :data + :labels + ''' + if len(data) != len(labels): + return None, None + + return 1.0*data, 1.0*labels + + + diff --git a/test/import_data.py b/test/import_data.py deleted file mode 100644 index 158e97d..0000000 --- a/test/import_data.py +++ /dev/null @@ -1,58 +0,0 @@ - - - - -################################################################## -################################################################## -################################################################## -def read_digits(fname='digit.txt'): - ''' - read_digits(fname='digit.txt') - - read a data file that contains the features and class labels. - each row of the file is a feature vector with the class - label appended. - ''' - import csv - import numpy as np - - fw = csv.reader(open(fname,'rb'), delimiter='\t') - data = [] - for line in fw: - data.append( [float(x) for x in line] ) - data = np.array(data) - labels = data[:,len(data.transpose())-1] - data = data[:,:len(data.transpose())-1] - return data, labels -################################################################## -################################################################## -################################################################## - - - -################################################################## -################################################################## -################################################################## -def uniform_data(n_observations = 1000, n_features = 50, n_relevant = 5): - import numpy as np - xmax = 10 - xmin = 0 - data = 1.0*np.random.randint(xmax + 1, size = (n_features, n_observations)) - labels = np.zeros(n_observations) - delta = n_relevant * (xmax - xmin) / 2.0 - - for m in range(n_observations): - zz = 0.0 - for k in range(n_relevant): - zz += data[k, m] - if zz > delta: - labels[m] = 1 - else: - labels[m] = 2 - data = data.transpose() - - return data, labels - -################################################################## -################################################################## -################################################################## |