Source code for evalml.pipelines.components.transformers.encoders.label_encoder

"""A transformer that encodes target labels using values between 0 and num_classes - 1."""
import woodwork as ww

from evalml.pipelines.components.transformers.transformer import Transformer
from evalml.utils import infer_feature_types


[docs]class LabelEncoder(Transformer): """A transformer that encodes target labels using values between 0 and num_classes - 1. Args: positive_label (int, str): The label for the class that should be treated as positive (1) for binary classification problems. Ignored for multiclass problems. Defaults to None. random_seed (int): Seed for the random number generator. Defaults to 0. Ignored. """ name = "Label Encoder" hyperparameter_ranges = {} """{}""" modifies_features = False modifies_target = True def __init__(self, positive_label=None, random_seed=0, **kwargs): parameters = {"positive_label": positive_label} parameters.update(kwargs) self.original_typing = "" super().__init__( parameters=parameters, component_obj=None, random_seed=random_seed, )
[docs] def fit(self, X, y): """Fits the label encoder. Args: X (pd.DataFrame): The input training data of shape [n_samples, n_features]. Ignored. y (pd.Series): The target training data of length [n_samples]. Returns: self Raises: ValueError: If input `y` is None. """ if y is None: raise ValueError("y cannot be None!") y_ww = infer_feature_types(y) self.original_typing = str(y_ww.ww.logical_type) self.mapping = {val: i for i, val in enumerate(sorted(y_ww.unique()))} if self.parameters["positive_label"] is not None: if len(self.mapping) != 2: raise ValueError( "positive_label should only be set for binary classification targets. Otherwise, positive_label should be None.", ) if self.parameters["positive_label"] not in self.mapping: raise ValueError( f"positive_label was set to `{self.parameters['positive_label']}` but was not found in the input target data.", ) self.mapping = { val: int(val == self.parameters["positive_label"]) for val in self.mapping } self.inverse_mapping = {i: val for val, i in self.mapping.items()} return self
[docs] def transform(self, X, y=None): """Transform the target using the fitted label encoder. Args: X (pd.DataFrame): The input training data of shape [n_samples, n_features]. Ignored. y (pd.Series): The target training data of length [n_samples]. Returns: pd.DataFrame, pd.Series: The original features and an encoded version of the target. Raises: ValueError: If input `y` is None. """ if y is None: return X, y y_ww = infer_feature_types(y) y_unique_values = set(y_ww.unique()) if y_unique_values.difference(self.mapping.keys()): raise ValueError( f"y contains previously unseen labels: {y_unique_values.difference(self.mapping.keys())}", ) y_t = y_ww.map(self.mapping) return X, ww.init_series(y_t, logical_type="integer")
[docs] def fit_transform(self, X, y): """Fit and transform data using the label encoder. Args: X (pd.DataFrame): The input training data of shape [n_samples, n_features]. y (pd.Series): The target training data of length [n_samples]. Returns: pd.DataFrame, pd.Series: The original features and an encoded version of the target. """ return self.fit(X, y).transform(X, y)
[docs] def inverse_transform(self, y): """Decodes the target data. Args: y (pd.Series): Target data. Returns: pd.Series: The decoded version of the target. Raises: ValueError: If input `y` is None. """ if y is None: raise ValueError("y cannot be None!") y_ww = infer_feature_types(y) y_it = infer_feature_types(y_ww.map(self.inverse_mapping), self.original_typing) return y_it