From 1e857f0420c6423fb7453ed3cbc6a1d062e97bf3 Mon Sep 17 00:00:00 2001 From: Calvin Date: Fri, 5 Apr 2013 13:51:26 -0400 Subject: added basic docs generated with epydocs, and stripped down --- b/lib/feast.py | 412 ++++++++++++++++++++++++++ crarr.png | Bin 0 -> 340 bytes epydoc.css | 322 ++++++++++++++++++++ feast-module.html | 864 ++++++++++++++++++++++++++++++++++++++++++++++++++++++ feast-pysrc.html | 620 +++++++++++++++++++++++++++++++++++++++ 5 files changed, 2218 insertions(+) create mode 100644 b/lib/feast.py create mode 100644 crarr.png create mode 100644 epydoc.css create mode 100644 feast-module.html create mode 100644 feast-pysrc.html diff --git a/b/lib/feast.py b/b/lib/feast.py new file mode 100644 index 0000000..0d2fee6 --- /dev/null +++ b/b/lib/feast.py @@ -0,0 +1,412 @@ +import numpy as np +from ctypes import * + + +''' + The FEAST module provides an interface between the C-library + for feature selection to Python. + + References: + 1) G. Brown, A. Pocock, M.-J. Zhao, and M. Lujan, "Conditional + likelihood maximization: A unifying framework for information + theoretic feature selection," Journal of Machine Learning + Research, vol. 13, pp. 27-66, 2012. + + + __author__ = "Calvin Morrison" + __copyright__ = "Copyright 2013, EESI Laboratory" + __credits__ = ["Calvin Morrison", "Gregory Ditzler"] + __license__ = "GPL" + __version__ = "0.1.0" + __maintainer__ = "Calvin Morrison" + __email__ = "mutantturkey@gmail.com" + __status__ = "Release" +''' + +# I listed the function definitions in alphabetical order. Lets +# keep this up. + + +try: + libFSToolbox = CDLL("libFSToolbox.so"); +except: + print "Error: could not find libFSToolbox" + exit() + + +def BetaGamma(data, labels, n_select, beta=1.0, gamma=1.0): + ''' + BetaGamma(data, labels, n_select, beta=1.0, gamma=1.0) + + This algotihm implements conditional mutual information + feature select, such that beta and gamma control the + weight attached to the redundant mutual and conditional + mutual information, respectively. + + Input + :data - data in a Numpy array such that len(data) = + n_observations, and len(data.transpose()) = n_features + (REQUIRED) + :labels - labels represented in a numpy list with + n_observations as the number of elements. That is + len(labels) = len(data) = n_observations. + (REQUIRED) + :n_select - number of features to select. (REQUIRED) + :beta - penalty attacted to I(X_j;X_k) + :gamma - positive weight attached to the conditional + redundancy term I(X_k;X_j|Y) + Output + :selected_features - returns a list containing the features + in the order they were selected. + ''' + # python values + n_observations, n_features = data.shape + output = np.zeros(n_select) + + # cast as C types + c_n_observations = c_int(n_observations) + c_n_select = c_int(n_select) + c_n_features = c_int(n_features) + c_beta = c_double(beta) + c_gamma = c_double(gamma) + + libFSToolbox.BetaGamma.restype = POINTER(c_double * n_select) + features = libFSToolbox.BetaGamma(c_n_select, + c_n_observations, + c_n_features, + data.ctypes.data_as(POINTER(c_double)), + labels.ctypes.data_as(POINTER(c_double)), + output.ctypes.data_as(POINTER(c_double)), + c_beta, + c_gamma + ) + + # turn our output into a list + selected_features = [] + for i in features.contents: + # recall that feast was implemented with Matlab in mind, so the + # authors assumed the indexing started a one; however, in Python + # the indexing starts at zero. + selected_features.append(i - 1) + + return selected_features + + + +def CMIM(data, labels, n_select): + ''' + CMIM(data, labels, n_select) + + This function implements the conditional mutual information + maximization feature selection algorithm. Note that this + implementation does not allow for the weighting of the + redundancy terms that BetaGamma will allow you to do. + + Input + :data - data in a Numpy array such that len(data) = + n_observations, and len(data.transpose()) = n_features + (REQUIRED) + :labels - labels represented in a numpy list with + n_observations as the number of elements. That is + len(labels) = len(data) = n_observations. + (REQUIRED) + :n_select - number of features to select. (REQUIRED) + Output + :selected_features - returns a list containing the features + in the order they were selected. + ''' + + # python values + n_observations, n_features = data.shape + output = np.zeros(n_select) + + # cast as C types + c_n_observations = c_int(n_observations) + c_n_select = c_int(n_select) + c_n_features = c_int(n_features) + + libFSToolbox.CMIM.restype = POINTER(c_double * n_select) + features = libFSToolbox.CMIM(c_n_select, + c_n_observations, + c_n_features, + data.ctypes.data_as(POINTER(c_double)), + labels.ctypes.data_as(POINTER(c_double)), + output.ctypes.data_as(POINTER(c_double)) + ) + + + # turn our output into a list + selected_features = [] + for i in features.contents: + # recall that feast was implemented with Matlab in mind, so the + # authors assumed the indexing started a one; however, in Python + # the indexing starts at zero. + selected_features.append(i - 1) + + return selected_features + + + +def CondMI(data, labels, n_select): + ''' + CondMI(data, labels, n_select) + + This function implements the conditional mutual information + maximization feature selection algorithm. + + Input + :data - data in a Numpy array such that len(data) = + n_observations, and len(data.transpose()) = n_features + (REQUIRED) + :labels - labels represented in a numpy list with + n_observations as the number of elements. That is + len(labels) = len(data) = n_observations. + (REQUIRED) + :n_select - number of features to select. (REQUIRED) + Output + :selected_features - returns a list containing the features + in the order they were selected. + ''' + # python values + n_observations, n_features = data.shape + output = np.zeros(n_select) + + # cast as C types + c_n_observations = c_int(n_observations) + c_n_select = c_int(n_select) + c_n_features = c_int(n_features) + + libFSToolbox.CondMI.restype = POINTER(c_double * n_select) + features = libFSToolbox.CondMI(c_n_select, + c_n_observations, + c_n_features, + data.ctypes.data_as(POINTER(c_double)), + labels.ctypes.data_as(POINTER(c_double)), + output.ctypes.data_as(POINTER(c_double)) + ) + + + # turn our output into a list + selected_features = [] + for i in features.contents: + # recall that feast was implemented with Matlab in mind, so the + # authors assumed the indexing started a one; however, in Python + # the indexing starts at zero. + selected_features.append(i - 1) + + return selected_features + + + + + + +def DISR(data, labels, n_select): + ''' + DISR(data, labels, n_select) + + This function implements the double input symmetrical relevance + feature selection algorithm. + + Input + :data - data in a Numpy array such that len(data) = + n_observations, and len(data.transpose()) = n_features + (REQUIRED) + :labels - labels represented in a numpy list with + n_observations as the number of elements. That is + len(labels) = len(data) = n_observations. + (REQUIRED) + :n_select - number of features to select. (REQUIRED) + Output + :selected_features - returns a list containing the features + in the order they were selected. + ''' + # python values + n_observations, n_features = data.shape + output = np.zeros(n_select) + + # cast as C types + c_n_observations = c_int(n_observations) + c_n_select = c_int(n_select) + c_n_features = c_int(n_features) + + libFSToolbox.DISR.restype = POINTER(c_double * n_select) + features = libFSToolbox.DISR(c_n_select, + c_n_observations, + c_n_features, + data.ctypes.data_as(POINTER(c_double)), + labels.ctypes.data_as(POINTER(c_double)), + output.ctypes.data_as(POINTER(c_double)) + ) + + + # turn our output into a list + selected_features = [] + for i in features.contents: + # recall that feast was implemented with Matlab in mind, so the + # authors assumed the indexing started a one; however, in Python + # the indexing starts at zero. + selected_features.append(i - 1) + + return selected_features + + + + +def ICAP(data, labels, n_select): + ''' + ICAP(data, labels, n_select) + + This function implements the interaction capping feature + selection algorithm. + + Input + :data - data in a Numpy array such that len(data) = + n_observations, and len(data.transpose()) = n_features + (REQUIRED) + :labels - labels represented in a numpy list with + n_observations as the number of elements. That is + len(labels) = len(data) = n_observations. + (REQUIRED) + :n_select - number of features to select. (REQUIRED) + Output + :selected_features - returns a list containing the features + in the order they were selected. + ''' + # python values + n_observations, n_features = data.shape + output = np.zeros(n_select) + + # cast as C types + c_n_observations = c_int(n_observations) + c_n_select = c_int(n_select) + c_n_features = c_int(n_features) + + libFSToolbox.ICAP.restype = POINTER(c_double * n_select) + features = libFSToolbox.ICAP(c_n_select, + c_n_observations, + c_n_features, + data.ctypes.data_as(POINTER(c_double)), + labels.ctypes.data_as(POINTER(c_double)), + output.ctypes.data_as(POINTER(c_double)) + ) + + + # turn our output into a list + selected_features = [] + for i in features.contents: + # recall that feast was implemented with Matlab in mind, so the + # authors assumed the indexing started a one; however, in Python + # the indexing starts at zero. + selected_features.append(i - 1) + + return selected_features + + + + + +def JMI(data, labels, n_select): + ''' + JMI(data, labels, n_select) + + This function implements the joint mutual information feature + selection algorithm. + + Input + :data - data in a Numpy array such that len(data) = + n_observations, and len(data.transpose()) = n_features + (REQUIRED) + :labels - labels represented in a numpy list with + n_observations as the number of elements. That is + len(labels) = len(data) = n_observations. + (REQUIRED) + :n_select - number of features to select. (REQUIRED) + Output + :selected_features - returns a list containing the features + in the order they were selected. + ''' + + # python values + n_observations, n_features = data.shape + output = np.zeros(n_select) + + # cast as C types + c_n_observations = c_int(n_observations) + c_n_select = c_int(n_select) + c_n_features = c_int(n_features) + + libFSToolbox.JMI.restype = POINTER(c_double * n_select) + features = libFSToolbox.JMI(c_n_select, + c_n_observations, + c_n_features, + data.ctypes.data_as(POINTER(c_double)), + labels.ctypes.data_as(POINTER(c_double)), + output.ctypes.data_as(POINTER(c_double)) + ) + + + # turn our output into a list + selected_features = [] + for i in features.contents: + # recall that feast was implemented with Matlab in mind, so the + # authors assumed the indexing started a one; however, in Python + # the indexing starts at zero. + selected_features.append(i - 1) + + return selected_features + +def mRMR(data, labels, n_select): + ''' + mRMR(data, labels, n_select) + + This funciton implements the max-relevance min-redundancy feature + selection algorithm. + + Input + :data - data in a Numpy array such that len(data) = + n_observations, and len(data.transpose()) = n_features + (REQUIRED) + :labels - labels represented in a numpy list with + n_observations as the number of elements. That is + len(labels) = len(data) = n_observations. + (REQUIRED) + :n_select - number of features to select. (REQUIRED) + Output + :selected_features - returns a list containing the features + in the order they were selected. + ''' + + # python values + n_observations, n_features = data.shape + output = np.zeros(n_select) + + # cast as C types + c_n_observations = c_int(n_observations) + c_n_select = c_int(n_select) + c_n_features = c_int(n_features) + + libFSToolbox.mRMR_D.restype = POINTER(c_double * n_select) + features = libFSToolbox.mRMR_D(c_n_select, + c_n_observations, + c_n_features, + data.ctypes.data_as(POINTER(c_double)), + labels.ctypes.data_as(POINTER(c_double)), + output.ctypes.data_as(POINTER(c_double)) + ) + + + # turn our output into a list + selected_features = [] + for i in features.contents: + # recall that feast was implemented with Matlab in mind, so the + # authors assumed the indexing started a one; however, in Python + # the indexing starts at zero. + selected_features.append(i - 1) + + return selected_features + + + + + diff --git a/crarr.png b/crarr.png new file mode 100644 index 0000000..26b43c5 Binary files /dev/null and b/crarr.png differ diff --git a/epydoc.css b/epydoc.css new file mode 100644 index 0000000..86d4170 --- /dev/null +++ b/epydoc.css @@ -0,0 +1,322 @@ + + +/* Epydoc CSS Stylesheet + * + * This stylesheet can be used to customize the appearance of epydoc's + * HTML output. + * + */ + +/* Default Colors & Styles + * - Set the default foreground & background color with 'body'; and + * link colors with 'a:link' and 'a:visited'. + * - Use bold for decision list terms. + * - The heading styles defined here are used for headings *within* + * docstring descriptions. All headings used by epydoc itself use + * either class='epydoc' or class='toc' (CSS styles for both + * defined below). + */ +body { background: #ffffff; color: #000000; } +p { margin-top: 0.5em; margin-bottom: 0.5em; } +a:link { color: #0000ff; } +a:visited { color: #204080; } +dt { font-weight: bold; } +h1 { font-size: +140%; font-style: italic; + font-weight: bold; } +h2 { font-size: +125%; font-style: italic; + font-weight: bold; } +h3 { font-size: +110%; font-style: italic; + font-weight: normal; } +code { font-size: 100%; } +/* N.B.: class, not pseudoclass */ +a.link { font-family: monospace; } + +/* Page Header & Footer + * - The standard page header consists of a navigation bar (with + * pointers to standard pages such as 'home' and 'trees'); a + * breadcrumbs list, which can be used to navigate to containing + * classes or modules; options links, to show/hide private + * variables and to show/hide frames; and a page title (using + *

). The page title may be followed by a link to the + * corresponding source code (using 'span.codelink'). + * - The footer consists of a navigation bar, a timestamp, and a + * pointer to epydoc's homepage. + */ +h1.epydoc { margin: 0; font-size: +140%; font-weight: bold; } +h2.epydoc { font-size: +130%; font-weight: bold; } +h3.epydoc { font-size: +115%; font-weight: bold; + margin-top: 0.2em; } +td h3.epydoc { font-size: +115%; font-weight: bold; + margin-bottom: 0; } +table.navbar { background: #a0c0ff; color: #000000; + border: 2px groove #c0d0d0; } +table.navbar table { color: #000000; } +th.navbar-select { background: #70b0ff; + color: #000000; } +table.navbar a { text-decoration: none; } +table.navbar a:link { color: #0000ff; } +table.navbar a:visited { color: #204080; } +span.breadcrumbs { font-size: 85%; font-weight: bold; } +span.options { font-size: 70%; } +span.codelink { font-size: 85%; } +td.footer { font-size: 85%; } + +/* Table Headers + * - Each summary table and details section begins with a 'header' + * row. This row contains a section title (marked by + * 'span.table-header') as well as a show/hide private link + * (marked by 'span.options', defined above). + * - Summary tables that contain user-defined groups mark those + * groups using 'group header' rows. + */ +td.table-header { background: #70b0ff; color: #000000; + border: 1px solid #608090; } +td.table-header table { color: #000000; } +td.table-header table a:link { color: #0000ff; } +td.table-header table a:visited { color: #204080; } +span.table-header { font-size: 120%; font-weight: bold; } +th.group-header { background: #c0e0f8; color: #000000; + text-align: left; font-style: italic; + font-size: 115%; + border: 1px solid #608090; } + +/* Summary Tables (functions, variables, etc) + * - Each object is described by a single row of the table with + * two cells. The left cell gives the object's type, and is + * marked with 'code.summary-type'. The right cell gives the + * object's name and a summary description. + * - CSS styles for the table's header and group headers are + * defined above, under 'Table Headers' + */ +table.summary { border-collapse: collapse; + background: #e8f0f8; color: #000000; + border: 1px solid #608090; + margin-bottom: 0.5em; } +td.summary { border: 1px solid #608090; } +code.summary-type { font-size: 85%; } +table.summary a:link { color: #0000ff; } +table.summary a:visited { color: #204080; } + + +/* Details Tables (functions, variables, etc) + * - Each object is described in its own div. + * - A single-row summary table w/ table-header is used as + * a header for each details section (CSS style for table-header + * is defined above, under 'Table Headers'). + */ +table.details { border-collapse: collapse; + background: #e8f0f8; color: #000000; + border: 1px solid #608090; + margin: .2em 0 0 0; } +table.details table { color: #000000; } +table.details a:link { color: #0000ff; } +table.details a:visited { color: #204080; } + +/* Fields */ +dl.fields { margin-left: 2em; margin-top: 1em; + margin-bottom: 1em; } +dl.fields dd ul { margin-left: 0em; padding-left: 0em; } +dl.fields dd ul li ul { margin-left: 2em; padding-left: 0em; } +div.fields { margin-left: 2em; } +div.fields p { margin-bottom: 0.5em; } + +/* Index tables (identifier index, term index, etc) + * - link-index is used for indices containing lists of links + * (namely, the identifier index & term index). + * - index-where is used in link indices for the text indicating + * the container/source for each link. + * - metadata-index is used for indices containing metadata + * extracted from fields (namely, the bug index & todo index). + */ +table.link-index { border-collapse: collapse; + background: #e8f0f8; color: #000000; + border: 1px solid #608090; } +td.link-index { border-width: 0px; } +table.link-index a:link { color: #0000ff; } +table.link-index a:visited { color: #204080; } +span.index-where { font-size: 70%; } +table.metadata-index { border-collapse: collapse; + background: #e8f0f8; color: #000000; + border: 1px solid #608090; + margin: .2em 0 0 0; } +td.metadata-index { border-width: 1px; border-style: solid; } +table.metadata-index a:link { color: #0000ff; } +table.metadata-index a:visited { color: #204080; } + +/* Function signatures + * - sig* is used for the signature in the details section. + * - .summary-sig* is used for the signature in the summary + * table, and when listing property accessor functions. + * */ +.sig-name { color: #006080; } +.sig-arg { color: #008060; } +.sig-default { color: #602000; } +.summary-sig { font-family: monospace; } +.summary-sig-name { color: #006080; font-weight: bold; } +table.summary a.summary-sig-name:link + { color: #006080; font-weight: bold; } +table.summary a.summary-sig-name:visited + { color: #006080; font-weight: bold; } +.summary-sig-arg { color: #006040; } +.summary-sig-default { color: #501800; } + +/* Subclass list + */ +ul.subclass-list { display: inline; } +ul.subclass-list li { display: inline; } + +/* To render variables, classes etc. like functions */ +table.summary .summary-name { color: #006080; font-weight: bold; + font-family: monospace; } +table.summary + a.summary-name:link { color: #006080; font-weight: bold; + font-family: monospace; } +table.summary + a.summary-name:visited { color: #006080; font-weight: bold; + font-family: monospace; } + +/* Variable values + * - In the 'variable details' sections, each varaible's value is + * listed in a 'pre.variable' box. The width of this box is + * restricted to 80 chars; if the value's repr is longer than + * this it will be wrapped, using a backslash marked with + * class 'variable-linewrap'. If the value's repr is longer + * than 3 lines, the rest will be ellided; and an ellipsis + * marker ('...' marked with 'variable-ellipsis') will be used. + * - If the value is a string, its quote marks will be marked + * with 'variable-quote'. + * - If the variable is a regexp, it is syntax-highlighted using + * the re* CSS classes. + */ +pre.variable { padding: .5em; margin: 0; + background: #dce4ec; color: #000000; + border: 1px solid #708890; } +.variable-linewrap { color: #604000; font-weight: bold; } +.variable-ellipsis { color: #604000; font-weight: bold; } +.variable-quote { color: #604000; font-weight: bold; } +.variable-group { color: #008000; font-weight: bold; } +.variable-op { color: #604000; font-weight: bold; } +.variable-string { color: #006030; } +.variable-unknown { color: #a00000; font-weight: bold; } +.re { color: #000000; } +.re-char { color: #006030; } +.re-op { color: #600000; } +.re-group { color: #003060; } +.re-ref { color: #404040; } + +/* Base tree + * - Used by class pages to display the base class hierarchy. + */ +pre.base-tree { font-size: 80%; margin: 0; } + +/* Frames-based table of contents headers + * - Consists of two frames: one for selecting modules; and + * the other listing the contents of the selected module. + * - h1.toc is used for each frame's heading + * - h2.toc is used for subheadings within each frame. + */ +h1.toc { text-align: center; font-size: 105%; + margin: 0; font-weight: bold; + padding: 0; } +h2.toc { font-size: 100%; font-weight: bold; + margin: 0.5em 0 0 -0.3em; } + +/* Syntax Highlighting for Source Code + * - doctest examples are displayed in a 'pre.py-doctest' block. + * If the example is in a details table entry, then it will use + * the colors specified by the 'table pre.py-doctest' line. + * - Source code listings are displayed in a 'pre.py-src' block. + * Each line is marked with 'span.py-line' (used to draw a line + * down the left margin, separating the code from the line + * numbers). Line numbers are displayed with 'span.py-lineno'. + * The expand/collapse block toggle button is displayed with + * 'a.py-toggle' (Note: the CSS style for 'a.py-toggle' should not + * modify the font size of the text.) + * - If a source code page is opened with an anchor, then the + * corresponding code block will be highlighted. The code + * block's header is highlighted with 'py-highlight-hdr'; and + * the code block's body is highlighted with 'py-highlight'. + * - The remaining py-* classes are used to perform syntax + * highlighting (py-string for string literals, py-name for names, + * etc.) + */ +pre.py-doctest { padding: .5em; margin: 1em; + background: #e8f0f8; color: #000000; + border: 1px solid #708890; } +table pre.py-doctest { background: #dce4ec; + color: #000000; } +pre.py-src { border: 2px solid #000000; + background: #f0f0f0; color: #000000; } +.py-line { border-left: 2px solid #000000; + margin-left: .2em; padding-left: .4em; } +.py-lineno { font-style: italic; font-size: 90%; + padding-left: .5em; } +a.py-toggle { text-decoration: none; } +div.py-highlight-hdr { border-top: 2px solid #000000; + border-bottom: 2px solid #000000; + background: #d8e8e8; } +div.py-highlight { border-bottom: 2px solid #000000; + background: #d0e0e0; } +.py-prompt { color: #005050; font-weight: bold;} +.py-more { color: #005050; font-weight: bold;} +.py-string { color: #006030; } +.py-comment { color: #003060; } +.py-keyword { color: #600000; } +.py-output { color: #404040; } +.py-name { color: #000050; } +.py-name:link { color: #000050 !important; } +.py-name:visited { color: #000050 !important; } +.py-number { color: #005000; } +.py-defname { color: #000060; font-weight: bold; } +.py-def-name { color: #000060; font-weight: bold; } +.py-base-class { color: #000060; } +.py-param { color: #000060; } +.py-docstring { color: #006030; } +.py-decorator { color: #804020; } +/* Use this if you don't want links to names underlined: */ +/*a.py-name { text-decoration: none; }*/ + +/* Graphs & Diagrams + * - These CSS styles are used for graphs & diagrams generated using + * Graphviz dot. 'img.graph-without-title' is used for bare + * diagrams (to remove the border created by making the image + * clickable). + */ +img.graph-without-title { border: none; } +img.graph-with-title { border: 1px solid #000000; } +span.graph-title { font-weight: bold; } +span.graph-caption { } + +/* General-purpose classes + * - 'p.indent-wrapped-lines' defines a paragraph whose first line + * is not indented, but whose subsequent lines are. + * - The 'nomargin-top' class is used to remove the top margin (e.g. + * from lists). The 'nomargin' class is used to remove both the + * top and bottom margin (but not the left or right margin -- + * for lists, that would cause the bullets to disappear.) + */ +p.indent-wrapped-lines { padding: 0 0 0 7em; text-indent: -7em; + margin: 0; } +.nomargin-top { margin-top: 0; } +.nomargin { margin-top: 0; margin-bottom: 0; } + +/* HTML Log */ +div.log-block { padding: 0; margin: .5em 0 .5em 0; + background: #e8f0f8; color: #000000; + border: 1px solid #000000; } +div.log-error { padding: .1em .3em .1em .3em; margin: 4px; + background: #ffb0b0; color: #000000; + border: 1px solid #000000; } +div.log-warning { padding: .1em .3em .1em .3em; margin: 4px; + background: #ffffb0; color: #000000; + border: 1px solid #000000; } +div.log-info { padding: .1em .3em .1em .3em; margin: 4px; + background: #b0ffb0; color: #000000; + border: 1px solid #000000; } +h2.log-hdr { background: #70b0ff; color: #000000; + margin: 0; padding: 0em 0.5em 0em 0.5em; + border-bottom: 1px solid #000000; font-size: 110%; } +p.log { font-weight: bold; margin: .5em 0 .5em 0; } +tr.opt-changed { color: #000000; font-weight: bold; } +tr.opt-default { color: #606060; } +pre.log { margin: 0; padding: 0; padding-left: 1em; } diff --git a/feast-module.html b/feast-module.html new file mode 100644 index 0000000..557d352 --- /dev/null +++ b/feast-module.html @@ -0,0 +1,864 @@ + + + + + feast + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
+ + Module feast + + + + +
+
+ +

Module feast

source code

+
+
+The FEAST module provides an interface between the C-library
+for feature selection to Python. 
+
+References: 
+1) G. Brown, A. Pocock, M.-J. Zhao, and M. Lujan, "Conditional
+    likelihood maximization: A unifying framework for information
+    theoretic feature selection," Journal of Machine Learning 
+    Research, vol. 13, pp. 27-66, 2012.
+
+
+ +
+

Version: + 0.2.0 +

+

Author: + Calvin Morrison +

+

Copyright: + Copyright 2013, EESI Laboratory +

+

License: + GPL +

+
+ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
+ Functions
+ list + + + + + + +
BetaGamma(data, + labels, + n_select, + beta=1.0, + gamma=1.0)
+ This algorithm implements conditional mutual information feature + select, such that beta and gamma control the weight attached to the + redundant mutual and conditional mutual information, respectively.
+ source code + +
+ +
+ list + + + + + + +
CIFE(data, + labels, + n_select)
+ This function implements the Condred feature selection algorithm.
+ source code + +
+ +
+ list + + + + + + +
CMIM(data, + labels, + n_select)
+ This function implements the conditional mutual information + maximization feature selection algorithm.
+ source code + +
+ +
+   + + + + + + +
CondMI(data, + labels, + n_select)
+ This function implements the conditional mutual information + maximization feature selection algorithm.
+ source code + +
+ +
+ list + + + + + + +
Condred(data, + labels, + n_select)
+ This function implements the Condred feature selection algorithm.
+ source code + +
+ +
+ list + + + + + + +
DISR(data, + labels, + n_select)
+ This function implements the double input symmetrical relevance + feature selection algorithm.
+ source code + +
+ +
+ list + + + + + + +
ICAP(data, + labels, + n_select)
+ This function implements the interaction capping feature selection + algorithm.
+ source code + +
+ +
+ list + + + + + + +
JMI(data, + labels, + n_select)
+ This function implements the joint mutual information feature + selection algorithm.
+ source code + +
+ +
+ list + + + + + + +
MIFS(data, + labels, + n_select)
+ This function implements the MIFS algorithm.
+ source code + +
+ +
+ list + + + + + + +
MIM(data, + labels, + n_select)
+ This function implements the MIM algorithm.
+ source code + +
+ +
+ list + + + + + + +
mRMR(data, + labels, + n_select)
+ This funciton implements the max-relevance min-redundancy feature + selection algorithm.
+ source code + +
+ +
+ tuple + + + + + + +
check_data(data, + labels)
+ Check dimensions of the data and the labels.
+ source code + +
+ +
+ + + + + + + + + + + + + + + + + + + + + + + + +
+ Variables
+   + + __credits__ = ['Calvin Morrison', 'Gregory Ditzler'] +
+   + + __maintainer__ = 'Calvin Morrison' +
+   + + __email__ = 'mutantturkey@gmail.com' +
+   + + __status__ = 'Release' +
+   + + libFSToolbox = <CDLL 'libFSToolbox.so', handle 2be1240 at 2b4b... +
+   + + __package__ = None +
+ + + + + + +
+ Function Details
+ +
+ +
+ + +
+

BetaGamma(data, + labels, + n_select, + beta=1.0, + gamma=1.0) +

+
source code  +
+ +

This algorithm implements conditional mutual information feature + select, such that beta and gamma control the weight attached to the + redundant mutual and conditional mutual information, respectively.

+
+
Parameters:
+
    +
  • data (ndarray) - data in a Numpy array such that len(data) = n_observations, and + len(data.transpose()) = n_features (REQUIRED)
  • +
  • labels (ndarray) - labels represented in a numpy list with n_observations as the + number of elements. That is len(labels) = len(data) = + n_observations. (REQUIRED)
  • +
  • n_select (integer) - number of features to select. (REQUIRED)
  • +
  • beta (float between 0 and 1.0) - penalty attacted to I(X_j;X_k)
  • +
  • gamma (float between 0 and 1.0) - positive weight attached to the conditional redundancy term + I(X_k;X_j|Y)
  • +
+
Returns: list
+
features in the order they were selected.
+
+
+
+ +
+ +
+ + +
+

CIFE(data, + labels, + n_select) +

+
source code  +
+ +

This function implements the Condred feature selection algorithm. beta + = 1; gamma = 1;

+
+
Parameters:
+
    +
  • data (ndarray) - A Numpy array such that len(data) = n_observations, and + len(data.transpose()) = n_features
  • +
  • labels (ndarray) - labels represented in a numpy list with n_observations as the + number of elements. That is len(labels) = len(data) = + n_observations.
  • +
  • n_select (integer) - number of features to select.
  • +
+
Returns: list
+
+
+
+ +
+ +
+ + +
+

CMIM(data, + labels, + n_select) +

+
source code  +
+ +

This function implements the conditional mutual information + maximization feature selection algorithm. Note that this implementation + does not allow for the weighting of the redundancy terms that BetaGamma + will allow you to do.

+
+
Parameters:
+
    +
  • data (ndarray) - A Numpy array such that len(data) = n_observations, and + len(data.transpose()) = n_features
  • +
  • labels (ndarray) - labels represented in a numpy array with n_observations as the + number of elements. That is len(labels) = len(data) = + n_observations.
  • +
  • n_select (integer) - number of features to select.
  • +
+
Returns: list
+
features in the order that they were selected.
+
+
+
+ +
+ +
+ + +
+

CondMI(data, + labels, + n_select) +

+
source code  +
+ +

This function implements the conditional mutual information + maximization feature selection algorithm.

+
+
Parameters:
+
    +
  • data (ndarray) - data in a Numpy array such that len(data) = n_observations, and + len(data.transpose()) = n_features
  • +
  • labels (ndarray) - represented in a numpy list with n_observations as the number of + elements. That is len(labels) = len(data) = n_observations.
  • +
  • n_select (integer) - number of features to select.
  • +
+
Returns:
+
features in the order they were selected. @rtype list
+
+
+
+ +
+ +
+ + +
+

Condred(data, + labels, + n_select) +

+
source code  +
+ +

This function implements the Condred feature selection algorithm. beta + = 0; gamma = 1;

+
+
Parameters:
+
    +
  • data (ndarray) - data in a Numpy array such that len(data) = n_observations, and + len(data.transpose()) = n_features
  • +
  • labels (ndarray) - labels represented in a numpy list with n_observations as the + number of elements. That is len(labels) = len(data) = + n_observations.
  • +
  • n_select (integer) - number of features to select.
  • +
+
Returns: list
+
the features in the order they were selected.
+
+
+
+ +
+ +
+ + +
+

DISR(data, + labels, + n_select) +

+
source code  +
+ +

This function implements the double input symmetrical relevance + feature selection algorithm.

+
+
Parameters:
+
    +
  • data (ndarray) - data in a Numpy array such that len(data) = n_observations, and + len(data.transpose()) = n_features
  • +
  • labels (ndarray) - labels represented in a numpy list with n_observations as the + number of elements. That is len(labels) = len(data) = + n_observations.
  • +
  • n_select (integer) - number of features to select. (REQUIRED)
  • +
+
Returns: list
+
the features in the order they were selected.
+
+
+
+ +
+ +
+ + +
+

ICAP(data, + labels, + n_select) +

+
source code  +
+ +

This function implements the interaction capping feature selection + algorithm.

+
+
Parameters:
+
    +
  • data (ndarray) - data in a Numpy array such that len(data) = n_observations, and + len(data.transpose()) = n_features
  • +
  • labels (ndarray) - labels represented in a numpy list with n_observations as the + number of elements. That is len(labels) = len(data) = + n_observations.
  • +
  • n_select (integer) - number of features to select. (REQUIRED)
  • +
+
Returns: list
+
the features in the order they were selected.
+
+
+
+ +
+ +
+ + +
+

JMI(data, + labels, + n_select) +

+
source code  +
+ +

This function implements the joint mutual information feature + selection algorithm.

+
+
Parameters:
+
    +
  • data (ndarray) - data in a Numpy array such that len(data) = n_observations, and + len(data.transpose()) = n_features
  • +
  • labels (ndarray) - labels represented in a numpy list with n_observations as the + number of elements. That is len(labels) = len(data) = + n_observations.
  • +
  • n_select (integer) - number of features to select. (REQUIRED)
  • +
+
Returns: list
+
the features in the order they were selected.
+
+
+
+ +
+ +
+ + +
+

MIFS(data, + labels, + n_select) +

+
source code  +
+ +

This function implements the MIFS algorithm. beta = 1; gamma = 0;

+
+
Parameters:
+
    +
  • data (ndarray) - data in a Numpy array such that len(data) = n_observations, and + len(data.transpose()) = n_features
  • +
  • labels (ndarray) - labels represented in a numpy list with n_observations as the + number of elements. That is len(labels) = len(data) = + n_observations.
  • +
  • n_select (integer) - number of features to select. (REQUIRED)
  • +
+
Returns: list
+
the features in the order they were selected.
+
+
+
+ +
+ +
+ + +
+

MIM(data, + labels, + n_select) +

+
source code  +
+ +

This function implements the MIM algorithm. beta = 0; gamma = 0;

+
+
Parameters:
+
    +
  • data (ndarray) - data in a Numpy array such that len(data) = n_observations, and + len(data.transpose()) = n_features
  • +
  • labels (ndarray) - labels represented in a numpy list with n_observations as the + number of elements. That is len(labels) = len(data) = + n_observations.
  • +
  • n_select (integer) - number of features to select. (REQUIRED)
  • +
+
Returns: list
+
the features in the order they were selected.
+
+
+
+ +
+ +
+ + +
+

mRMR(data, + labels, + n_select) +

+
source code  +
+ +

This funciton implements the max-relevance min-redundancy feature + selection algorithm.

+
+
Parameters:
+
    +
  • data (ndarray) - data in a Numpy array such that len(data) = n_observations, and + len(data.transpose()) = n_features
  • +
  • labels (ndarray) - labels represented in a numpy list with n_observations as the + number of elements. That is len(labels) = len(data) = + n_observations.
  • +
  • n_select (integer) - number of features to select. (REQUIRED)
  • +
+
Returns: list
+
the features in the order they were selected.
+
+
+
+ +
+ +
+ + +
+

check_data(data, + labels) +

+
source code  +
+ +

Check dimensions of the data and the labels. Raise and exception if + there is a problem.

+

Data and Labels are automatically cast as doubles before calling the + feature selection functions

+
+
Parameters:
+
    +
  • data - the data
  • +
  • labels - the labels
  • +
+
Returns: tuple
+
+
+
+
+ + + + + + +
+ Variables Details
+ +
+ +
+

libFSToolbox

+ +
+
+
+
Value:
+
+<CDLL 'libFSToolbox.so', handle 2be1240 at 2b4bc10>
+
+
+
+
+
+
+ + + + + + + + + + + + + + + + + + + + + + + + +
+ + + + diff --git a/feast-pysrc.html b/feast-pysrc.html new file mode 100644 index 0000000..d5f5dc1 --- /dev/null +++ b/feast-pysrc.html @@ -0,0 +1,620 @@ + + + + + feast + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
+ + Module feast + + + + +
+
+

Source Code for Module feast

+
+  1  ''' 
+  2    The FEAST module provides an interface between the C-library 
+  3    for feature selection to Python.  
+  4   
+  5    References:  
+  6    1) G. Brown, A. Pocock, M.-J. Zhao, and M. Lujan, "Conditional 
+  7        likelihood maximization: A unifying framework for information 
+  8        theoretic feature selection," Journal of Machine Learning  
+  9        Research, vol. 13, pp. 27-66, 2012. 
+ 10   
+ 11  ''' 
+ 12  __author__ = "Calvin Morrison" 
+ 13  __copyright__ = "Copyright 2013, EESI Laboratory" 
+ 14  __credits__ = ["Calvin Morrison", "Gregory Ditzler"] 
+ 15  __license__ = "GPL" 
+ 16  __version__ = "0.2.0" 
+ 17  __maintainer__ = "Calvin Morrison" 
+ 18  __email__ = "mutantturkey@gmail.com" 
+ 19  __status__ = "Release" 
+ 20   
+ 21  import numpy as np 
+ 22  import ctypes as c 
+ 23   
+ 24  try: 
+ 25    libFSToolbox = c.CDLL("libFSToolbox.so");  
+ 26  except: 
+ 27    raise Exception("Error: could not load libFSToolbox.so") 
+ 28   
+ 29   
+
30 -def BetaGamma(data, labels, n_select, beta=1.0, gamma=1.0): +
31 ''' + 32 This algorithm implements conditional mutual information + 33 feature select, such that beta and gamma control the + 34 weight attached to the redundant mutual and conditional + 35 mutual information, respectively. + 36 + 37 @param data: data in a Numpy array such that len(data) = + 38 n_observations, and len(data.transpose()) = n_features + 39 (REQUIRED) + 40 @type data: ndarray + 41 @param labels: labels represented in a numpy list with + 42 n_observations as the number of elements. That is + 43 len(labels) = len(data) = n_observations. + 44 (REQUIRED) + 45 @type labels: ndarray + 46 @param n_select: number of features to select. (REQUIRED) + 47 @type n_select: integer + 48 @param beta: penalty attacted to I(X_j;X_k) + 49 @type beta: float between 0 and 1.0 + 50 @param gamma: positive weight attached to the conditional + 51 redundancy term I(X_k;X_j|Y) + 52 @type gamma: float between 0 and 1.0 + 53 @return: features in the order they were selected. + 54 @rtype: list + 55 ''' + 56 data, labels = check_data(data, labels) + 57 + 58 # python values + 59 n_observations, n_features = data.shape + 60 output = np.zeros(n_select) + 61 + 62 # cast as C types + 63 c_n_observations = c.c_int(n_observations) + 64 c_n_select = c.c_int(n_select) + 65 c_n_features = c.c_int(n_features) + 66 c_beta = c.c_double(beta) + 67 c_gamma = c.c_double(gamma) + 68 + 69 libFSToolbox.BetaGamma.restype = c.POINTER(c.c_double * n_select) + 70 features = libFSToolbox.BetaGamma(c_n_select, + 71 c_n_observations, + 72 c_n_features, + 73 data.ctypes.data_as(c.POINTER(c.c_double)), + 74 labels.ctypes.data_as(c.POINTER(c.c_double)), + 75 output.ctypes.data_as(c.POINTER(c.c_double)), + 76 c_beta, + 77 c_gamma + 78 ) + 79 + 80 # turn our output into a list + 81 selected_features = [] + 82 for i in features.contents: + 83 # recall that feast was implemented with Matlab in mind, so the + 84 # authors assumed the indexing started a one; however, in Python + 85 # the indexing starts at zero. + 86 selected_features.append(i - 1) + 87 + 88 return selected_features +
89 + 90 + 91 +
92 -def CIFE(data, labels, n_select): +
93 ''' + 94 This function implements the Condred feature selection algorithm. + 95 beta = 1; gamma = 1; + 96 + 97 @param data: A Numpy array such that len(data) = + 98 n_observations, and len(data.transpose()) = n_features + 99 @type data: ndarray +100 @param labels: labels represented in a numpy list with +101 n_observations as the number of elements. That is +102 len(labels) = len(data) = n_observations. +103 @type labels: ndarray +104 @param n_select: number of features to select. +105 @type n_select: integer +106 @return selected_features: features in the order they were selected. +107 @rtype: list +108 ''' +109 +110 return BetaGamma(data, labels, n_select, beta=1.0, gamma=1.0) +
111 +112 +113 +114 +
115 -def CMIM(data, labels, n_select): +
116 ''' +117 This function implements the conditional mutual information +118 maximization feature selection algorithm. Note that this +119 implementation does not allow for the weighting of the +120 redundancy terms that BetaGamma will allow you to do. +121 +122 @param data: A Numpy array such that len(data) = +123 n_observations, and len(data.transpose()) = n_features +124 @type data: ndarray +125 @param labels: labels represented in a numpy array with +126 n_observations as the number of elements. That is +127 len(labels) = len(data) = n_observations. +128 @type labels: ndarray +129 @param n_select: number of features to select. +130 @type n_select: integer +131 @return: features in the order that they were selected. +132 @rtype: list +133 ''' +134 data, labels = check_data(data, labels) +135 +136 # python values +137 n_observations, n_features = data.shape +138 output = np.zeros(n_select) +139 +140 # cast as C types +141 c_n_observations = c.c_int(n_observations) +142 c_n_select = c.c_int(n_select) +143 c_n_features = c.c_int(n_features) +144 +145 libFSToolbox.CMIM.restype = c.POINTER(c.c_double * n_select) +146 features = libFSToolbox.CMIM(c_n_select, +147 c_n_observations, +148 c_n_features, +149 data.ctypes.data_as(c.POINTER(c.c_double)), +150 labels.ctypes.data_as(c.POINTER(c.c_double)), +151 output.ctypes.data_as(c.POINTER(c.c_double)) +152 ) +153 +154 +155 # turn our output into a list +156 selected_features = [] +157 for i in features.contents: +158 # recall that feast was implemented with Matlab in mind, so the +159 # authors assumed the indexing started a one; however, in Python +160 # the indexing starts at zero. +161 selected_features.append(i - 1) +162 +163 return selected_features +
164 +165 +166 +
167 -def CondMI(data, labels, n_select): +
168 ''' +169 This function implements the conditional mutual information +170 maximization feature selection algorithm. +171 +172 @param data: data in a Numpy array such that len(data) = n_observations, +173 and len(data.transpose()) = n_features +174 @type data: ndarray +175 @param labels: represented in a numpy list with +176 n_observations as the number of elements. That is +177 len(labels) = len(data) = n_observations. +178 @type labels: ndarray +179 @param n_select: number of features to select. +180 @type n_select: integer +181 @return: features in the order they were selected. +182 @rtype list +183 ''' +184 data, labels = check_data(data, labels) +185 +186 # python values +187 n_observations, n_features = data.shape +188 output = np.zeros(n_select) +189 +190 # cast as C types +191 c_n_observations = c.c_int(n_observations) +192 c_n_select = c.c_int(n_select) +193 c_n_features = c.c_int(n_features) +194 +195 libFSToolbox.CondMI.restype = c.POINTER(c.c_double * n_select) +196 features = libFSToolbox.CondMI(c_n_select, +197 c_n_observations, +198 c_n_features, +199 data.ctypes.data_as(c.POINTER(c.c_double)), +200 labels.ctypes.data_as(c.POINTER(c.c_double)), +201 output.ctypes.data_as(c.POINTER(c.c_double)) +202 ) +203 +204 +205 # turn our output into a list +206 selected_features = [] +207 for i in features.contents: +208 # recall that feast was implemented with Matlab in mind, so the +209 # authors assumed the indexing started a one; however, in Python +210 # the indexing starts at zero. +211 selected_features.append(i - 1) +212 +213 return selected_features +
214 +215 +
216 -def Condred(data, labels, n_select): +
217 ''' +218 This function implements the Condred feature selection algorithm. +219 beta = 0; gamma = 1; +220 +221 @param data: data in a Numpy array such that len(data) = +222 n_observations, and len(data.transpose()) = n_features +223 @type data: ndarray +224 @param labels: labels represented in a numpy list with +225 n_observations as the number of elements. That is +226 len(labels) = len(data) = n_observations. +227 @type labels: ndarray +228 @param n_select: number of features to select. +229 @type n_select: integer +230 @return: the features in the order they were selected. +231 @rtype: list +232 ''' +233 data, labels = check_data(data, labels) +234 +235 return BetaGamma(data, labels, n_select, beta=0.0, gamma=1.0) +
236 +237 +238 +
239 -def DISR(data, labels, n_select): +
240 ''' +241 This function implements the double input symmetrical relevance +242 feature selection algorithm. +243 +244 @param data: data in a Numpy array such that len(data) = +245 n_observations, and len(data.transpose()) = n_features +246 @type data: ndarray +247 @param labels: labels represented in a numpy list with +248 n_observations as the number of elements. That is +249 len(labels) = len(data) = n_observations. +250 @type labels: ndarray +251 @param n_select: number of features to select. (REQUIRED) +252 @type n_select: integer +253 @return: the features in the order they were selected. +254 @rtype: list +255 ''' +256 data, labels = check_data(data, labels) +257 +258 # python values +259 n_observations, n_features = data.shape +260 output = np.zeros(n_select) +261 +262 # cast as C types +263 c_n_observations = c.c_int(n_observations) +264 c_n_select = c.c_int(n_select) +265 c_n_features = c.c_int(n_features) +266 +267 libFSToolbox.DISR.restype = c.POINTER(c.c_double * n_select) +268 features = libFSToolbox.DISR(c_n_select, +269 c_n_observations, +270 c_n_features, +271 data.ctypes.data_as(c.POINTER(c.c_double)), +272 labels.ctypes.data_as(c.POINTER(c.c_double)), +273 output.ctypes.data_as(c.POINTER(c.c_double)) +274 ) +275 +276 +277 # turn our output into a list +278 selected_features = [] +279 for i in features.contents: +280 # recall that feast was implemented with Matlab in mind, so the +281 # authors assumed the indexing started a one; however, in Python +282 # the indexing starts at zero. +283 selected_features.append(i - 1) +284 +285 return selected_features +
286 +287 +288 +289 +
290 -def ICAP(data, labels, n_select): +
291 ''' +292 This function implements the interaction capping feature +293 selection algorithm. +294 +295 @param data: data in a Numpy array such that len(data) = +296 n_observations, and len(data.transpose()) = n_features +297 @type data: ndarray +298 @param labels: labels represented in a numpy list with +299 n_observations as the number of elements. That is +300 len(labels) = len(data) = n_observations. +301 @type labels: ndarray +302 @param n_select: number of features to select. (REQUIRED) +303 @type n_select: integer +304 @return: the features in the order they were selected. +305 @rtype: list +306 ''' +307 data, labels = check_data(data, labels) +308 +309 # python values +310 n_observations, n_features = data.shape +311 output = np.zeros(n_select) +312 +313 # cast as C types +314 c_n_observations = c.c_int(n_observations) +315 c_n_select = c.c_int(n_select) +316 c_n_features = c.c_int(n_features) +317 +318 libFSToolbox.ICAP.restype = c.POINTER(c.c_double * n_select) +319 features = libFSToolbox.ICAP(c_n_select, +320 c_n_observations, +321 c_n_features, +322 data.ctypes.data_as(c.POINTER(c.c_double)), +323 labels.ctypes.data_as(c.POINTER(c.c_double)), +324 output.ctypes.data_as(c.POINTER(c.c_double)) +325 ) +326 +327 +328 # turn our output into a list +329 selected_features = [] +330 for i in features.contents: +331 # recall that feast was implemented with Matlab in mind, so the +332 # authors assumed the indexing started a one; however, in Python +333 # the indexing starts at zero. +334 selected_features.append(i - 1) +335 +336 return selected_features +
337 +338 +339 +340 +341 +
342 -def JMI(data, labels, n_select): +
343 ''' +344 This function implements the joint mutual information feature +345 selection algorithm. +346 +347 @param data: data in a Numpy array such that len(data) = +348 n_observations, and len(data.transpose()) = n_features +349 @type data: ndarray +350 @param labels: labels represented in a numpy list with +351 n_observations as the number of elements. That is +352 len(labels) = len(data) = n_observations. +353 @type labels: ndarray +354 @param n_select: number of features to select. (REQUIRED) +355 @type n_select: integer +356 @return: the features in the order they were selected. +357 @rtype: list +358 ''' +359 data, labels = check_data(data, labels) +360 +361 # python values +362 n_observations, n_features = data.shape +363 output = np.zeros(n_select) +364 +365 # cast as C types +366 c_n_observations = c.c_int(n_observations) +367 c_n_select = c.c_int(n_select) +368 c_n_features = c.c_int(n_features) +369 +370 libFSToolbox.JMI.restype = c.POINTER(c.c_double * n_select) +371 features = libFSToolbox.JMI(c_n_select, +372 c_n_observations, +373 c_n_features, +374 data.ctypes.data_as(c.POINTER(c.c_double)), +375 labels.ctypes.data_as(c.POINTER(c.c_double)), +376 output.ctypes.data_as(c.POINTER(c.c_double)) +377 ) +378 +379 +380 # turn our output into a list +381 selected_features = [] +382 for i in features.contents: +383 # recall that feast was implemented with Matlab in mind, so the +384 # authors assumed the indexing started a one; however, in Python +385 # the indexing starts at zero. +386 selected_features.append(i - 1) +387 +388 return selected_features +
389 +390 +391 +
392 -def MIFS(data, labels, n_select): +
393 ''' +394 This function implements the MIFS algorithm. +395 beta = 1; gamma = 0; +396 +397 @param data: data in a Numpy array such that len(data) = +398 n_observations, and len(data.transpose()) = n_features +399 @type data: ndarray +400 @param labels: labels represented in a numpy list with +401 n_observations as the number of elements. That is +402 len(labels) = len(data) = n_observations. +403 @type labels: ndarray +404 @param n_select: number of features to select. (REQUIRED) +405 @type n_select: integer +406 @return: the features in the order they were selected. +407 @rtype: list +408 ''' +409 +410 return BetaGamma(data, labels, n_select, beta=0.0, gamma=0.0) +
411 +412 +
413 -def MIM(data, labels, n_select): +
414 ''' +415 This function implements the MIM algorithm. +416 beta = 0; gamma = 0; +417 +418 @param data: data in a Numpy array such that len(data) = +419 n_observations, and len(data.transpose()) = n_features +420 @type data: ndarray +421 @param labels: labels represented in a numpy list with +422 n_observations as the number of elements. That is +423 len(labels) = len(data) = n_observations. +424 @type labels: ndarray +425 @param n_select: number of features to select. (REQUIRED) +426 @type n_select: integer +427 @return: the features in the order they were selected. +428 @rtype: list +429 ''' +430 data, labels = check_data(data, labels) +431 +432 return BetaGamma(data, labels, n_select, beta=0.0, gamma=0.0) +
433 +434 +435 +
436 -def mRMR(data, labels, n_select): +
437 ''' +438 This funciton implements the max-relevance min-redundancy feature +439 selection algorithm. +440 +441 @param data: data in a Numpy array such that len(data) = +442 n_observations, and len(data.transpose()) = n_features +443 @type data: ndarray +444 @param labels: labels represented in a numpy list with +445 n_observations as the number of elements. That is +446 len(labels) = len(data) = n_observations. +447 @type labels: ndarray +448 @param n_select: number of features to select. (REQUIRED) +449 @type n_select: integer +450 @return: the features in the order they were selected. +451 @rtype: list +452 ''' +453 data, labels = check_data(data, labels) +454 +455 # python values +456 n_observations, n_features = data.shape +457 output = np.zeros(n_select) +458 +459 # cast as C types +460 c_n_observations = c.c_int(n_observations) +461 c_n_select = c.c_int(n_select) +462 c_n_features = c.c_int(n_features) +463 +464 libFSToolbox.mRMR_D.restype = c.POINTER(c.c_double * n_select) +465 features = libFSToolbox.mRMR_D(c_n_select, +466 c_n_observations, +467 c_n_features, +468 data.ctypes.data_as(c.POINTER(c.c_double)), +469 labels.ctypes.data_as(c.POINTER(c.c_double)), +470 output.ctypes.data_as(c.POINTER(c.c_double)) +471 ) +472 +473 +474 # turn our output into a list +475 selected_features = [] +476 for i in features.contents: +477 # recall that feast was implemented with Matlab in mind, so the +478 # authors assumed the indexing started a one; however, in Python +479 # the indexing starts at zero. +480 selected_features.append(i - 1) +481 +482 return selected_features +
483 +
484 -def check_data(data, labels): +
485 ''' +486 Check dimensions of the data and the labels. Raise and exception +487 if there is a problem. +488 +489 Data and Labels are automatically cast as doubles before calling the +490 feature selection functions +491 +492 @param data: the data +493 @param labels: the labels +494 @return (data, labels): ndarray of floats +495 @rtype: tuple +496 ''' +497 +498 if isinstance(data, np.ndarray) is False: +499 raise Exception("data must be an numpy ndarray.") +500 if isinstance(labels, np.ndarray) is False: +501 raise Exception("labels must be an numpy ndarray.") +502 +503 if len(data) != len(labels): +504 raise Exception("data and labels must be the same length") +505 +506 return 1.0*data, 1.0*labels +
507 +
+
+ + + + + + + + + + + + + + + + + + + + + + + + +
+ + + + -- cgit v1.2.3