Source code for evalml.pipelines.components.transformers.feature_selection.feature_selector

import pandas as pd

from evalml.exceptions import MethodPropertyNotFoundError
from evalml.pipelines.components.transformers import Transformer
from evalml.utils import (
    _retain_custom_types_and_initalize_woodwork,
    infer_feature_types,
)


[docs]class FeatureSelector(Transformer): """ Selects top features based on importance weights. Arguments: parameters (dict): Dictionary of parameters for the component. Defaults to None. component_obj (obj): Third-party objects useful in component implementation. Defaults to None. random_seed (int): Seed for the random number generator. Defaults to 0. """
[docs] def get_names(self): """Get names of selected features. Returns: list[str]: List of the names of features selected """ selected_masks = self._component_obj.get_support() return [ feature_name for (selected, feature_name) in zip( selected_masks, self.input_feature_names ) if selected ]
[docs] def transform(self, X, y=None): """Transforms input data by selecting features. If the component_obj does not have a transform method, will raise an MethodPropertyNotFoundError exception. Arguments: X (pd.DataFrame): Data to transform. y (pd.Series, optional): Target data. Ignored. Returns: pd.DataFrame: Transformed X """ X_ww = infer_feature_types(X) self.input_feature_names = list(X_ww.columns.values) try: X_t = self._component_obj.transform(X) except AttributeError: raise MethodPropertyNotFoundError( "Feature selector requires a transform method or a component_obj that implements transform" ) X_dtypes = X_ww.dtypes.to_dict() selected_col_names = self.get_names() col_types = {key: X_dtypes[key] for key in selected_col_names} features = pd.DataFrame( X_t, columns=selected_col_names, index=X_ww.index ).astype(col_types) return _retain_custom_types_and_initalize_woodwork( X_ww.ww.logical_types, features )
[docs] def fit_transform(self, X, y=None): return self.fit(X, y).transform(X, y)