From 1e857f0420c6423fb7453ed3cbc6a1d062e97bf3 Mon Sep 17 00:00:00 2001 From: Calvin Date: Fri, 5 Apr 2013 13:51:26 -0400 Subject: added basic docs generated with epydocs, and stripped down --- feast-pysrc.html | 620 +++++++++++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 620 insertions(+) create mode 100644 feast-pysrc.html (limited to 'feast-pysrc.html') diff --git a/feast-pysrc.html b/feast-pysrc.html new file mode 100644 index 0000000..d5f5dc1 --- /dev/null +++ b/feast-pysrc.html @@ -0,0 +1,620 @@ + + + + + feast + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
+ + Module feast + + + + +
+
+

Source Code for Module feast

+
+  1  ''' 
+  2    The FEAST module provides an interface between the C-library 
+  3    for feature selection to Python.  
+  4   
+  5    References:  
+  6    1) G. Brown, A. Pocock, M.-J. Zhao, and M. Lujan, "Conditional 
+  7        likelihood maximization: A unifying framework for information 
+  8        theoretic feature selection," Journal of Machine Learning  
+  9        Research, vol. 13, pp. 27-66, 2012. 
+ 10   
+ 11  ''' 
+ 12  __author__ = "Calvin Morrison" 
+ 13  __copyright__ = "Copyright 2013, EESI Laboratory" 
+ 14  __credits__ = ["Calvin Morrison", "Gregory Ditzler"] 
+ 15  __license__ = "GPL" 
+ 16  __version__ = "0.2.0" 
+ 17  __maintainer__ = "Calvin Morrison" 
+ 18  __email__ = "mutantturkey@gmail.com" 
+ 19  __status__ = "Release" 
+ 20   
+ 21  import numpy as np 
+ 22  import ctypes as c 
+ 23   
+ 24  try: 
+ 25    libFSToolbox = c.CDLL("libFSToolbox.so");  
+ 26  except: 
+ 27    raise Exception("Error: could not load libFSToolbox.so") 
+ 28   
+ 29   
+
30 -def BetaGamma(data, labels, n_select, beta=1.0, gamma=1.0): +
31 ''' + 32 This algorithm implements conditional mutual information + 33 feature select, such that beta and gamma control the + 34 weight attached to the redundant mutual and conditional + 35 mutual information, respectively. + 36 + 37 @param data: data in a Numpy array such that len(data) = + 38 n_observations, and len(data.transpose()) = n_features + 39 (REQUIRED) + 40 @type data: ndarray + 41 @param labels: labels represented in a numpy list with + 42 n_observations as the number of elements. That is + 43 len(labels) = len(data) = n_observations. + 44 (REQUIRED) + 45 @type labels: ndarray + 46 @param n_select: number of features to select. (REQUIRED) + 47 @type n_select: integer + 48 @param beta: penalty attacted to I(X_j;X_k) + 49 @type beta: float between 0 and 1.0 + 50 @param gamma: positive weight attached to the conditional + 51 redundancy term I(X_k;X_j|Y) + 52 @type gamma: float between 0 and 1.0 + 53 @return: features in the order they were selected. + 54 @rtype: list + 55 ''' + 56 data, labels = check_data(data, labels) + 57 + 58 # python values + 59 n_observations, n_features = data.shape + 60 output = np.zeros(n_select) + 61 + 62 # cast as C types + 63 c_n_observations = c.c_int(n_observations) + 64 c_n_select = c.c_int(n_select) + 65 c_n_features = c.c_int(n_features) + 66 c_beta = c.c_double(beta) + 67 c_gamma = c.c_double(gamma) + 68 + 69 libFSToolbox.BetaGamma.restype = c.POINTER(c.c_double * n_select) + 70 features = libFSToolbox.BetaGamma(c_n_select, + 71 c_n_observations, + 72 c_n_features, + 73 data.ctypes.data_as(c.POINTER(c.c_double)), + 74 labels.ctypes.data_as(c.POINTER(c.c_double)), + 75 output.ctypes.data_as(c.POINTER(c.c_double)), + 76 c_beta, + 77 c_gamma + 78 ) + 79 + 80 # turn our output into a list + 81 selected_features = [] + 82 for i in features.contents: + 83 # recall that feast was implemented with Matlab in mind, so the + 84 # authors assumed the indexing started a one; however, in Python + 85 # the indexing starts at zero. + 86 selected_features.append(i - 1) + 87 + 88 return selected_features +
89 + 90 + 91 +
92 -def CIFE(data, labels, n_select): +
93 ''' + 94 This function implements the Condred feature selection algorithm. + 95 beta = 1; gamma = 1; + 96 + 97 @param data: A Numpy array such that len(data) = + 98 n_observations, and len(data.transpose()) = n_features + 99 @type data: ndarray +100 @param labels: labels represented in a numpy list with +101 n_observations as the number of elements. That is +102 len(labels) = len(data) = n_observations. +103 @type labels: ndarray +104 @param n_select: number of features to select. +105 @type n_select: integer +106 @return selected_features: features in the order they were selected. +107 @rtype: list +108 ''' +109 +110 return BetaGamma(data, labels, n_select, beta=1.0, gamma=1.0) +
111 +112 +113 +114 +
115 -def CMIM(data, labels, n_select): +
116 ''' +117 This function implements the conditional mutual information +118 maximization feature selection algorithm. Note that this +119 implementation does not allow for the weighting of the +120 redundancy terms that BetaGamma will allow you to do. +121 +122 @param data: A Numpy array such that len(data) = +123 n_observations, and len(data.transpose()) = n_features +124 @type data: ndarray +125 @param labels: labels represented in a numpy array with +126 n_observations as the number of elements. That is +127 len(labels) = len(data) = n_observations. +128 @type labels: ndarray +129 @param n_select: number of features to select. +130 @type n_select: integer +131 @return: features in the order that they were selected. +132 @rtype: list +133 ''' +134 data, labels = check_data(data, labels) +135 +136 # python values +137 n_observations, n_features = data.shape +138 output = np.zeros(n_select) +139 +140 # cast as C types +141 c_n_observations = c.c_int(n_observations) +142 c_n_select = c.c_int(n_select) +143 c_n_features = c.c_int(n_features) +144 +145 libFSToolbox.CMIM.restype = c.POINTER(c.c_double * n_select) +146 features = libFSToolbox.CMIM(c_n_select, +147 c_n_observations, +148 c_n_features, +149 data.ctypes.data_as(c.POINTER(c.c_double)), +150 labels.ctypes.data_as(c.POINTER(c.c_double)), +151 output.ctypes.data_as(c.POINTER(c.c_double)) +152 ) +153 +154 +155 # turn our output into a list +156 selected_features = [] +157 for i in features.contents: +158 # recall that feast was implemented with Matlab in mind, so the +159 # authors assumed the indexing started a one; however, in Python +160 # the indexing starts at zero. +161 selected_features.append(i - 1) +162 +163 return selected_features +
164 +165 +166 +
167 -def CondMI(data, labels, n_select): +
168 ''' +169 This function implements the conditional mutual information +170 maximization feature selection algorithm. +171 +172 @param data: data in a Numpy array such that len(data) = n_observations, +173 and len(data.transpose()) = n_features +174 @type data: ndarray +175 @param labels: represented in a numpy list with +176 n_observations as the number of elements. That is +177 len(labels) = len(data) = n_observations. +178 @type labels: ndarray +179 @param n_select: number of features to select. +180 @type n_select: integer +181 @return: features in the order they were selected. +182 @rtype list +183 ''' +184 data, labels = check_data(data, labels) +185 +186 # python values +187 n_observations, n_features = data.shape +188 output = np.zeros(n_select) +189 +190 # cast as C types +191 c_n_observations = c.c_int(n_observations) +192 c_n_select = c.c_int(n_select) +193 c_n_features = c.c_int(n_features) +194 +195 libFSToolbox.CondMI.restype = c.POINTER(c.c_double * n_select) +196 features = libFSToolbox.CondMI(c_n_select, +197 c_n_observations, +198 c_n_features, +199 data.ctypes.data_as(c.POINTER(c.c_double)), +200 labels.ctypes.data_as(c.POINTER(c.c_double)), +201 output.ctypes.data_as(c.POINTER(c.c_double)) +202 ) +203 +204 +205 # turn our output into a list +206 selected_features = [] +207 for i in features.contents: +208 # recall that feast was implemented with Matlab in mind, so the +209 # authors assumed the indexing started a one; however, in Python +210 # the indexing starts at zero. +211 selected_features.append(i - 1) +212 +213 return selected_features +
214 +215 +
216 -def Condred(data, labels, n_select): +
217 ''' +218 This function implements the Condred feature selection algorithm. +219 beta = 0; gamma = 1; +220 +221 @param data: data in a Numpy array such that len(data) = +222 n_observations, and len(data.transpose()) = n_features +223 @type data: ndarray +224 @param labels: labels represented in a numpy list with +225 n_observations as the number of elements. That is +226 len(labels) = len(data) = n_observations. +227 @type labels: ndarray +228 @param n_select: number of features to select. +229 @type n_select: integer +230 @return: the features in the order they were selected. +231 @rtype: list +232 ''' +233 data, labels = check_data(data, labels) +234 +235 return BetaGamma(data, labels, n_select, beta=0.0, gamma=1.0) +
236 +237 +238 +
239 -def DISR(data, labels, n_select): +
240 ''' +241 This function implements the double input symmetrical relevance +242 feature selection algorithm. +243 +244 @param data: data in a Numpy array such that len(data) = +245 n_observations, and len(data.transpose()) = n_features +246 @type data: ndarray +247 @param labels: labels represented in a numpy list with +248 n_observations as the number of elements. That is +249 len(labels) = len(data) = n_observations. +250 @type labels: ndarray +251 @param n_select: number of features to select. (REQUIRED) +252 @type n_select: integer +253 @return: the features in the order they were selected. +254 @rtype: list +255 ''' +256 data, labels = check_data(data, labels) +257 +258 # python values +259 n_observations, n_features = data.shape +260 output = np.zeros(n_select) +261 +262 # cast as C types +263 c_n_observations = c.c_int(n_observations) +264 c_n_select = c.c_int(n_select) +265 c_n_features = c.c_int(n_features) +266 +267 libFSToolbox.DISR.restype = c.POINTER(c.c_double * n_select) +268 features = libFSToolbox.DISR(c_n_select, +269 c_n_observations, +270 c_n_features, +271 data.ctypes.data_as(c.POINTER(c.c_double)), +272 labels.ctypes.data_as(c.POINTER(c.c_double)), +273 output.ctypes.data_as(c.POINTER(c.c_double)) +274 ) +275 +276 +277 # turn our output into a list +278 selected_features = [] +279 for i in features.contents: +280 # recall that feast was implemented with Matlab in mind, so the +281 # authors assumed the indexing started a one; however, in Python +282 # the indexing starts at zero. +283 selected_features.append(i - 1) +284 +285 return selected_features +
286 +287 +288 +289 +
290 -def ICAP(data, labels, n_select): +
291 ''' +292 This function implements the interaction capping feature +293 selection algorithm. +294 +295 @param data: data in a Numpy array such that len(data) = +296 n_observations, and len(data.transpose()) = n_features +297 @type data: ndarray +298 @param labels: labels represented in a numpy list with +299 n_observations as the number of elements. That is +300 len(labels) = len(data) = n_observations. +301 @type labels: ndarray +302 @param n_select: number of features to select. (REQUIRED) +303 @type n_select: integer +304 @return: the features in the order they were selected. +305 @rtype: list +306 ''' +307 data, labels = check_data(data, labels) +308 +309 # python values +310 n_observations, n_features = data.shape +311 output = np.zeros(n_select) +312 +313 # cast as C types +314 c_n_observations = c.c_int(n_observations) +315 c_n_select = c.c_int(n_select) +316 c_n_features = c.c_int(n_features) +317 +318 libFSToolbox.ICAP.restype = c.POINTER(c.c_double * n_select) +319 features = libFSToolbox.ICAP(c_n_select, +320 c_n_observations, +321 c_n_features, +322 data.ctypes.data_as(c.POINTER(c.c_double)), +323 labels.ctypes.data_as(c.POINTER(c.c_double)), +324 output.ctypes.data_as(c.POINTER(c.c_double)) +325 ) +326 +327 +328 # turn our output into a list +329 selected_features = [] +330 for i in features.contents: +331 # recall that feast was implemented with Matlab in mind, so the +332 # authors assumed the indexing started a one; however, in Python +333 # the indexing starts at zero. +334 selected_features.append(i - 1) +335 +336 return selected_features +
337 +338 +339 +340 +341 +
342 -def JMI(data, labels, n_select): +
343 ''' +344 This function implements the joint mutual information feature +345 selection algorithm. +346 +347 @param data: data in a Numpy array such that len(data) = +348 n_observations, and len(data.transpose()) = n_features +349 @type data: ndarray +350 @param labels: labels represented in a numpy list with +351 n_observations as the number of elements. That is +352 len(labels) = len(data) = n_observations. +353 @type labels: ndarray +354 @param n_select: number of features to select. (REQUIRED) +355 @type n_select: integer +356 @return: the features in the order they were selected. +357 @rtype: list +358 ''' +359 data, labels = check_data(data, labels) +360 +361 # python values +362 n_observations, n_features = data.shape +363 output = np.zeros(n_select) +364 +365 # cast as C types +366 c_n_observations = c.c_int(n_observations) +367 c_n_select = c.c_int(n_select) +368 c_n_features = c.c_int(n_features) +369 +370 libFSToolbox.JMI.restype = c.POINTER(c.c_double * n_select) +371 features = libFSToolbox.JMI(c_n_select, +372 c_n_observations, +373 c_n_features, +374 data.ctypes.data_as(c.POINTER(c.c_double)), +375 labels.ctypes.data_as(c.POINTER(c.c_double)), +376 output.ctypes.data_as(c.POINTER(c.c_double)) +377 ) +378 +379 +380 # turn our output into a list +381 selected_features = [] +382 for i in features.contents: +383 # recall that feast was implemented with Matlab in mind, so the +384 # authors assumed the indexing started a one; however, in Python +385 # the indexing starts at zero. +386 selected_features.append(i - 1) +387 +388 return selected_features +
389 +390 +391 +
392 -def MIFS(data, labels, n_select): +
393 ''' +394 This function implements the MIFS algorithm. +395 beta = 1; gamma = 0; +396 +397 @param data: data in a Numpy array such that len(data) = +398 n_observations, and len(data.transpose()) = n_features +399 @type data: ndarray +400 @param labels: labels represented in a numpy list with +401 n_observations as the number of elements. That is +402 len(labels) = len(data) = n_observations. +403 @type labels: ndarray +404 @param n_select: number of features to select. (REQUIRED) +405 @type n_select: integer +406 @return: the features in the order they were selected. +407 @rtype: list +408 ''' +409 +410 return BetaGamma(data, labels, n_select, beta=0.0, gamma=0.0) +
411 +412 +
413 -def MIM(data, labels, n_select): +
414 ''' +415 This function implements the MIM algorithm. +416 beta = 0; gamma = 0; +417 +418 @param data: data in a Numpy array such that len(data) = +419 n_observations, and len(data.transpose()) = n_features +420 @type data: ndarray +421 @param labels: labels represented in a numpy list with +422 n_observations as the number of elements. That is +423 len(labels) = len(data) = n_observations. +424 @type labels: ndarray +425 @param n_select: number of features to select. (REQUIRED) +426 @type n_select: integer +427 @return: the features in the order they were selected. +428 @rtype: list +429 ''' +430 data, labels = check_data(data, labels) +431 +432 return BetaGamma(data, labels, n_select, beta=0.0, gamma=0.0) +
433 +434 +435 +
436 -def mRMR(data, labels, n_select): +
437 ''' +438 This funciton implements the max-relevance min-redundancy feature +439 selection algorithm. +440 +441 @param data: data in a Numpy array such that len(data) = +442 n_observations, and len(data.transpose()) = n_features +443 @type data: ndarray +444 @param labels: labels represented in a numpy list with +445 n_observations as the number of elements. That is +446 len(labels) = len(data) = n_observations. +447 @type labels: ndarray +448 @param n_select: number of features to select. (REQUIRED) +449 @type n_select: integer +450 @return: the features in the order they were selected. +451 @rtype: list +452 ''' +453 data, labels = check_data(data, labels) +454 +455 # python values +456 n_observations, n_features = data.shape +457 output = np.zeros(n_select) +458 +459 # cast as C types +460 c_n_observations = c.c_int(n_observations) +461 c_n_select = c.c_int(n_select) +462 c_n_features = c.c_int(n_features) +463 +464 libFSToolbox.mRMR_D.restype = c.POINTER(c.c_double * n_select) +465 features = libFSToolbox.mRMR_D(c_n_select, +466 c_n_observations, +467 c_n_features, +468 data.ctypes.data_as(c.POINTER(c.c_double)), +469 labels.ctypes.data_as(c.POINTER(c.c_double)), +470 output.ctypes.data_as(c.POINTER(c.c_double)) +471 ) +472 +473 +474 # turn our output into a list +475 selected_features = [] +476 for i in features.contents: +477 # recall that feast was implemented with Matlab in mind, so the +478 # authors assumed the indexing started a one; however, in Python +479 # the indexing starts at zero. +480 selected_features.append(i - 1) +481 +482 return selected_features +
483 +
484 -def check_data(data, labels): +
485 ''' +486 Check dimensions of the data and the labels. Raise and exception +487 if there is a problem. +488 +489 Data and Labels are automatically cast as doubles before calling the +490 feature selection functions +491 +492 @param data: the data +493 @param labels: the labels +494 @return (data, labels): ndarray of floats +495 @rtype: tuple +496 ''' +497 +498 if isinstance(data, np.ndarray) is False: +499 raise Exception("data must be an numpy ndarray.") +500 if isinstance(labels, np.ndarray) is False: +501 raise Exception("labels must be an numpy ndarray.") +502 +503 if len(data) != len(labels): +504 raise Exception("data and labels must be the same length") +505 +506 return 1.0*data, 1.0*labels +
507 +
+
+ + + + + + + + + + + + + + + + + + + + + + + + +
+ + + + -- cgit v1.2.3