Source code for evalml.pipelines.components.transformers.dimensionality_reduction.lda

"""Component that reduces the number of features by using Linear Discriminant Analysis."""
import pandas as pd
from sklearn.discriminant_analysis import LinearDiscriminantAnalysis as SkLDA

from evalml.pipelines.components.transformers import Transformer
from evalml.utils import infer_feature_types, is_all_numeric


[docs]class LinearDiscriminantAnalysis(Transformer): """Reduces the number of features by using Linear Discriminant Analysis. Args: n_components (int): The number of features to maintain after computation. Defaults to None. random_seed (int): Seed for the random number generator. Defaults to 0. """ name = "Linear Discriminant Analysis Transformer" hyperparameter_ranges = {} """{}""" def __init__(self, n_components=None, random_seed=0, **kwargs): if n_components and n_components < 1: raise ValueError( "Invalid number of compponents for Linear Discriminant Analysis" ) parameters = {"n_components": n_components} parameters.update(kwargs) lda = SkLDA(n_components=n_components, **kwargs) super().__init__( parameters=parameters, component_obj=lda, random_seed=random_seed )
[docs] def fit(self, X, y): """Fits the LDA component. Args: X (pd.DataFrame): The input training data of shape [n_samples, n_features]. y (pd.Series, optional): The target training data of length [n_samples]. Returns: self Raises: ValueError: If input data is not all numeric. """ X = infer_feature_types(X) if not is_all_numeric(X): raise ValueError("LDA input must be all numeric") y = infer_feature_types(y) n_features = X.shape[1] n_classes = y.nunique() n_components = self.parameters["n_components"] if n_components is not None and n_components > min(n_classes, n_features): raise ValueError(f"n_components value {n_components} is too large") self._component_obj.fit(X, y) return self
[docs] def transform(self, X, y=None): """Transform data using the fitted LDA component. Args: X (pd.DataFrame): The input training data of shape [n_samples, n_features]. y (pd.Series, optional): The target training data of length [n_samples]. Returns: pd.DataFrame: Transformed data. Raises: ValueError: If input data is not all numeric. """ X_ww = infer_feature_types(X) if not is_all_numeric(X_ww): raise ValueError("LDA input must be all numeric") X_t = self._component_obj.transform(X) X_t = pd.DataFrame( X_t, index=X_ww.index, columns=[f"component_{i}" for i in range(X_t.shape[1])], ) X_t.ww.init() return X_t
[docs] def fit_transform(self, X, y=None): """Fit and transform data using the LDA component. Args: X (pd.DataFrame): The input training data of shape [n_samples, n_features]. y (pd.Series, optional): The target training data of length [n_samples]. Returns: pd.DataFrame: Transformed data. Raises: ValueError: If input data is not all numeric. """ X_ww = infer_feature_types(X) if not is_all_numeric(X_ww): raise ValueError("LDA input must be all numeric") y = infer_feature_types(y) X_t = self._component_obj.fit_transform(X, y) X_t = pd.DataFrame( X_t, index=X_ww.index, columns=[f"component_{i}" for i in range(X_t.shape[1])], ) X_t.ww.init() return X_t