from abc import abstractmethod
from evalml.pipelines.components.transformers import Transformer
from evalml.utils import infer_feature_types
[docs]class ColumnSelector(Transformer):
"""
Initalizes an transformer that drops specified columns in input data.
Arguments:
columns (list(string)): List of column names, used to determine which columns to select.
random_seed (int): Seed for the random number generator. Defaults to 0.
"""
def __init__(self, columns=None, random_seed=0, **kwargs):
if columns and not isinstance(columns, list):
raise ValueError(
f"Parameter columns must be a list. Received {type(columns)}."
)
parameters = {"columns": columns}
parameters.update(kwargs)
super().__init__(
parameters=parameters, component_obj=None, random_seed=random_seed
)
def _check_input_for_columns(self, X):
cols = self.parameters.get("columns") or []
column_names = X.columns
missing_cols = set(cols) - set(column_names)
if missing_cols:
raise ValueError(
"Columns {} not found in input data".format(
", ".join(f"'{col_name}'" for col_name in missing_cols)
)
)
@abstractmethod
def _modify_columns(self, cols, X, y=None):
"""How the transformer modifies the columns of the input data."""
[docs] def fit(self, X, y=None):
"""Fits the transformer by checking if column names are present in the dataset.
Arguments:
X (pd.DataFrame): Data to check.
y (pd.Series, optional): Targets.
Returns:
self
"""
X = infer_feature_types(X)
self._check_input_for_columns(X)
return self
[docs]class DropColumns(ColumnSelector):
"""
Drops specified columns in input data.
Arguments:
columns (list(string)): List of column names, used to determine which columns to drop.
random_seed (int): Seed for the random number generator. Defaults to 0.
"""
name = "Drop Columns Transformer"
hyperparameter_ranges = {}
"""{}"""
needs_fitting = False
def _modify_columns(self, cols, X, y=None):
return X.ww.drop(cols)
[docs]class SelectColumns(ColumnSelector):
"""
Selects specified columns in input data.
Arguments:
columns (list(string)): List of column names, used to determine which columns to select.
random_seed (int): Seed for the random number generator. Defaults to 0.
"""
name = "Select Columns Transformer"
hyperparameter_ranges = {}
"""{}"""
needs_fitting = False
def _modify_columns(self, cols, X, y=None):
return X.ww[cols]