Source code for evalml.pipelines.components.transformers.feature_selection.recursive_feature_elimination_selector

"""Components that select top features based on recursive feature elimination with a Random Forest model."""
from abc import abstractmethod

from sklearn.ensemble import RandomForestClassifier as SKRandomForestClassifier
from sklearn.ensemble import RandomForestRegressor as SKRandomForestRegressor
from sklearn.feature_selection import RFECV
from skopt.space import Real

from evalml.pipelines.components.transformers.feature_selection.feature_selector import (
    FeatureSelector,
)


[docs]class RecursiveFeatureEliminationSelector(FeatureSelector): """Selects relevant features using recursive feature elimination.""" hyperparameter_ranges = { "step": Real(0.05, 0.25), } """{ "step": Real(0.05, 0.25) }""" def __init__( self, step=0.2, min_features_to_select=1, cv=None, scoring=None, n_jobs=-1, n_estimators=10, max_depth=None, random_seed=0, **kwargs, ): parameters = { "step": step, "min_features_to_select": min_features_to_select, "cv": cv, "scoring": scoring, "n_jobs": n_jobs, "n_estimators": n_estimators, "max_depth": max_depth, } parameters.update(kwargs) estimator = self._get_estimator( random_seed=random_seed, n_estimators=n_estimators, max_depth=max_depth, n_jobs=n_jobs, ) feature_selection = RFECV( estimator=estimator, step=step, min_features_to_select=min_features_to_select, cv=cv, scoring=scoring, **kwargs, ) super().__init__( parameters=parameters, component_obj=feature_selection, random_seed=random_seed, ) @abstractmethod def _get_estimator(self, random_seed, n_estimators, max_depth, n_jobs): """Return estimator with supplied parameters."""
[docs]class RFClassifierRFESelector(RecursiveFeatureEliminationSelector): """Selects relevant features using recursive feature elimination with a Random Forest Classifier. Args: step (int, float): The number of features to eliminate in each iteration. If an integer is specified this will represent the number of features to eliminate. If a float is specified this represents the percentage of features to eliminate each iteration. The last iteration may drop fewer than this number of features in order to satisfy the min_features_to_select constraint. Defaults to 0.2. min_features_to_select (int): The minimum number of features to return. Defaults to 1. cv (int or None): Number of folds to use for the cross-validation splitting strategy. Defaults to None which will use 5 folds. scoring (str, callable or None): A string or scorer callable object to specify the scoring method. n_jobs (int or None): Number of jobs to run in parallel. -1 uses all processes. Defaults to -1. n_estimators (int): The number of trees in the forest. Defaults to 10. max_depth (int): Maximum tree depth for base learners. Defaults to None. random_seed (int): Seed for the random number generator. Defaults to 0. """ name = "RFE Selector with RF Classifier" def _get_estimator(self, random_seed, n_estimators, max_depth, n_jobs): return SKRandomForestClassifier( random_state=random_seed, n_estimators=n_estimators, max_depth=max_depth, n_jobs=n_jobs, )
[docs]class RFRegressorRFESelector(RecursiveFeatureEliminationSelector): """Selects relevant features using recursive feature elimination with a Random Forest Regressor. Args: step (int, float): The number of features to eliminate in each iteration. If an integer is specified this will represent the number of features to eliminate. If a float is specified this represents the percentage of features to eliminate each iteration. The last iteration may drop fewer than this number of features in order to satisfy the min_features_to_select constraint. Defaults to 0.2. min_features_to_select (int): The minimum number of features to return. Defaults to 1. cv (int or None): Number of folds to use for the cross-validation splitting strategy. Defaults to None which will use 5 folds. scoring (str, callable or None): A string or scorer callable object to specify the scoring method. n_jobs (int or None): Number of jobs to run in parallel. -1 uses all processes. Defaults to -1. n_estimators (int): The number of trees in the forest. Defaults to 10. max_depth (int): Maximum tree depth for base learners. Defaults to None. random_seed (int): Seed for the random number generator. Defaults to 0. """ name = "RFE Selector with RF Regressor" def _get_estimator(self, random_seed, n_estimators, max_depth, n_jobs): return SKRandomForestRegressor( random_state=random_seed, n_estimators=n_estimators, max_depth=max_depth, n_jobs=n_jobs, )