Source code for evalml.pipelines.components.transformers.column_selectors

from abc import abstractmethod

from evalml.pipelines.components.transformers import Transformer
from evalml.utils import infer_feature_types


class ColumnSelector(Transformer):
    def __init__(self, columns=None, random_seed=0, **kwargs):
        """Initalizes an transformer that drops specified columns in input data.

        Arguments:
            columns (list(string)): List of column names, used to determine which columns to drop.
        """
        if columns and not isinstance(columns, list):
            raise ValueError(
                f"Parameter columns must be a list. Received {type(columns)}."
            )

        parameters = {"columns": columns}
        parameters.update(kwargs)
        super().__init__(
            parameters=parameters, component_obj=None, random_seed=random_seed
        )

    def _check_input_for_columns(self, X):
        cols = self.parameters.get("columns") or []

        column_names = X.columns

        missing_cols = set(cols) - set(column_names)
        if missing_cols:
            raise ValueError(
                "Columns {} not found in input data".format(
                    ", ".join(f"'{col_name}'" for col_name in missing_cols)
                )
            )

    @abstractmethod
    def _modify_columns(self, cols, X, y=None):
        """How the transformer modifies the columns of the input data."""

    def fit(self, X, y=None):
        """Fits the transformer by checking if column names are present in the dataset.

        Arguments:
            X (pd.DataFrame): Data to check.
            y (pd.Series, optional): Targets.

        Returns:
            self
        """
        X = infer_feature_types(X)
        self._check_input_for_columns(X)
        return self

    def transform(self, X, y=None):
        X = infer_feature_types(X)
        self._check_input_for_columns(X)
        cols = self.parameters.get("columns") or []
        modified_cols = self._modify_columns(cols, X, y)
        return infer_feature_types(modified_cols)


[docs]class DropColumns(ColumnSelector): """Drops specified columns in input data.""" name = "Drop Columns Transformer" hyperparameter_ranges = {} needs_fitting = False def _modify_columns(self, cols, X, y=None): return X.ww.drop(cols)
[docs] def transform(self, X, y=None): """Transforms data X by dropping columns. Arguments: X (pd.DataFrame): Data to transform. y (pd.Series, optional): Targets. Returns: pd.DataFrame: Transformed X. """ return super().transform(X, y)
[docs]class SelectColumns(ColumnSelector): """Selects specified columns in input data.""" name = "Select Columns Transformer" hyperparameter_ranges = {} needs_fitting = False def _modify_columns(self, cols, X, y=None): return X.ww[cols]
[docs] def transform(self, X, y=None): """Transforms data X by selecting columns. Arguments: X (pd.DataFrame): Data to transform. y (pd.Series, optional): Targets. Returns: pd.DataFrame: Transformed X. """ return super().transform(X, y)