Source code for evalml.pipelines.components.transformers.scalers.standard_scaler

"""A transformer that standardizes input features by removing the mean and scaling to unit variance."""
from sklearn.preprocessing import StandardScaler as SkScaler

from evalml.pipelines.components.transformers import Transformer
from evalml.utils import infer_feature_types


[docs]class StandardScaler(Transformer): """A transformer that standardizes input features by removing the mean and scaling to unit variance. Args: random_seed (int): Seed for the random number generator. Defaults to 0. """ name = "Standard Scaler" hyperparameter_ranges = {} """{}""" def __init__(self, random_seed=0, **kwargs): parameters = {} parameters.update(kwargs) self._supported_types = [ "Age", "AgeNullable", "Double", "Integer", "IntegerNullable", ] scaler = SkScaler(**parameters) super().__init__( parameters=parameters, component_obj=scaler, random_seed=random_seed, )
[docs] def fit(self, X, y=None): """Fits the standard scalar on the given data. Args: X (pd.DataFrame): The input training data of shape [n_samples, n_features]. y (pd.Series, optional): The target training data of length [n_samples]. Returns: self """ X = infer_feature_types(X) X_scalable = X.ww.select(self._supported_types) self.scaled_columns = list(X_scalable.columns) if X_scalable.empty: return self self._component_obj.fit(X_scalable) return self
[docs] def transform(self, X, y=None): """Transform data using the fitted standard scaler. Args: X (pd.DataFrame): The input training data of shape [n_samples, n_features]. y (pd.Series, optional): The target training data of length [n_samples]. Returns: pd.DataFrame: Transformed data. """ X = infer_feature_types(X) X = X.ww.select(exclude=["datetime"]) if not self.scaled_columns: return X X_scaled_columns = X.ww[self.scaled_columns] scaled = self._component_obj.transform(X_scaled_columns) X[self.scaled_columns] = scaled X.ww.set_types(logical_types={col: "Double" for col in self.scaled_columns}) return X
[docs] def fit_transform(self, X, y=None): """Fit and transform data using the standard scaler component. Args: X (pd.DataFrame): The input training data of shape [n_samples, n_features]. y (pd.Series, optional): The target training data of length [n_samples]. Returns: pd.DataFrame: Transformed data. """ X = infer_feature_types(X) return self.fit(X, y).transform(X, y)