import pandas as pd
from sklearn.discriminant_analysis import LinearDiscriminantAnalysis as SkLDA
from evalml.pipelines.components.transformers import Transformer
from evalml.utils import (
_retain_custom_types_and_initalize_woodwork,
infer_feature_types,
is_all_numeric,
)
[docs]class LinearDiscriminantAnalysis(Transformer):
"""
Reduces the number of features by using Linear Discriminant Analysis.
Arguments:
n_components (int): The number of features to maintain after computation. Defaults to None.
random_seed (int): Seed for the random number generator. Defaults to 0.
"""
name = "Linear Discriminant Analysis Transformer"
hyperparameter_ranges = {}
"""{}"""
def __init__(self, n_components=None, random_seed=0, **kwargs):
if n_components and n_components < 1:
raise ValueError(
"Invalid number of compponents for Linear Discriminant Analysis"
)
parameters = {"n_components": n_components}
parameters.update(kwargs)
lda = SkLDA(n_components=n_components, **kwargs)
super().__init__(
parameters=parameters, component_obj=lda, random_seed=random_seed
)
[docs] def fit(self, X, y):
X = infer_feature_types(X)
if not is_all_numeric(X):
raise ValueError("LDA input must be all numeric")
y = infer_feature_types(y)
n_features = X.shape[1]
n_classes = y.nunique()
n_components = self.parameters["n_components"]
if n_components is not None and n_components > min(n_classes, n_features):
raise ValueError(f"n_components value {n_components} is too large")
self._component_obj.fit(X, y)
return self