Source code for evalml.pipelines.components.transformers.preprocessing.replace_nullable_types

"""Transformer to replace features with the new nullable dtypes with a dtype that is compatible in EvalML."""
from woodwork import init_series
from woodwork.logical_types import BooleanNullable, IntegerNullable

from evalml.pipelines.components.transformers import Transformer
from evalml.utils import infer_feature_types


[docs]class ReplaceNullableTypes(Transformer): """Transformer to replace features with the new nullable dtypes with a dtype that is compatible in EvalML.""" name = "Replace Nullable Types Transformer" hyperparameter_ranges = {} modifies_target = True """{}""" def __init__(self, random_seed=0, **kwargs): parameters = {} parameters.update(kwargs) self._nullable_int_cols = [] self._nullable_bool_cols = [] self._nullable_target = None super().__init__( parameters=parameters, component_obj=None, random_seed=random_seed )
[docs] def fit(self, X, y=None): """Fits component to data. Args: X (pd.DataFrame): The input training data of shape [n_samples, n_features]. y (pd.Series, optional): The target training data of length [n_samples]. Returns: self """ X_t = infer_feature_types(X) self._nullable_int_cols = list( X_t.ww.select( ["IntegerNullable", "AgeNullable"], return_schema=True ).columns ) self._nullable_bool_cols = list( X_t.ww.select(["BooleanNullable"], return_schema=True).columns ) if y is None: self._nullable_target = None else: y = infer_feature_types(y) if isinstance(y.ww.logical_type, IntegerNullable): self._nullable_target = "nullable_int" elif isinstance(y.ww.logical_type, BooleanNullable): self._nullable_target = "nullable_bool" return self
[docs] def transform(self, X, y=None): """Transforms data by replacing columns that contain nullable types with the appropriate replacement type. "float64" for nullable integers and "category" for nullable booleans. Args: X (pd.DataFrame): Data to transform y (pd.Series, optional): Target data to transform Returns: pd.DataFrame: Transformed X pd.Series: Transformed y """ X_t = infer_feature_types(X) for col in self._nullable_int_cols: X_t.ww[col] = init_series(X_t[col], logical_type="double") for col in self._nullable_bool_cols: X_t.ww[col] = init_series(X_t[col], logical_type="categorical") if y is not None: y_t = infer_feature_types(y) if self._nullable_target is not None: if self._nullable_target == "nullable_int": y_t = init_series(y_t, logical_type="double") elif self._nullable_target == "nullable_bool": y_t = init_series(y_t, logical_type="categorical") elif y is None: y_t = None return X_t, y_t
[docs] def fit_transform(self, X, y=None): """Substitutes non-nullable types for the new pandas nullable types in the data and target data. Args: X (pd.DataFrame, optional): Input features. y (pd.Series): Target data. Returns: tuple of pd.DataFrame, pd.Series: The input features and target data with the non-nullable types set. """ X_ww = infer_feature_types(X) if y is not None: y_ww = infer_feature_types(y) else: y_ww = y return self.fit(X_ww, y_ww).transform(X_ww, y_ww)