Source code for evalml.pipelines.components.estimators.regressors.baseline_regressor
"""Baseline regressor that uses a simple strategy to make predictions. This is useful as a simple baseline regressor to compare with other regressors."""
import numpy as np
import pandas as pd
from evalml.model_family import ModelFamily
from evalml.pipelines.components.estimators import Estimator
from evalml.problem_types import ProblemTypes
from evalml.utils import infer_feature_types
[docs]class BaselineRegressor(Estimator):
"""Baseline regressor that uses a simple strategy to make predictions. This is useful as a simple baseline regressor to compare with other regressors.
Args:
strategy (str): Method used to predict. Valid options are "mean", "median". Defaults to "mean".
random_seed (int): Seed for the random number generator. Defaults to 0.
"""
name = "Baseline Regressor"
hyperparameter_ranges = {}
"""{}"""
model_family = ModelFamily.BASELINE
"""ModelFamily.BASELINE"""
supported_problem_types = [
ProblemTypes.REGRESSION,
ProblemTypes.TIME_SERIES_REGRESSION,
]
"""[
ProblemTypes.REGRESSION,
ProblemTypes.TIME_SERIES_REGRESSION,
]"""
def __init__(self, strategy="mean", random_seed=0, **kwargs):
if strategy not in ["mean", "median"]:
raise ValueError(
"'strategy' parameter must equal either 'mean' or 'median'",
)
parameters = {"strategy": strategy}
parameters.update(kwargs)
self._prediction_value = None
self._num_features = None
super().__init__(
parameters=parameters,
component_obj=None,
random_seed=random_seed,
)
[docs] def fit(self, X, y=None):
"""Fits baseline regression component to data.
Args:
X (pd.DataFrame): The input training data of shape [n_samples, n_features].
y (pd.Series): The target training data of length [n_samples].
Returns:
self
Raises:
ValueError: If input y is None.
"""
if y is None:
raise ValueError("Cannot fit Baseline regressor if y is None")
X = infer_feature_types(X)
y = infer_feature_types(y)
if self.parameters["strategy"] == "mean":
self._prediction_value = y.mean()
elif self.parameters["strategy"] == "median":
self._prediction_value = y.median()
self._num_features = X.shape[1]
return self
[docs] def predict(self, X):
"""Make predictions using the baseline regression strategy.
Args:
X (pd.DataFrame): Data of shape [n_samples, n_features].
Returns:
pd.Series: Predicted values.
"""
X = infer_feature_types(X)
predictions = pd.Series([self._prediction_value] * len(X))
return infer_feature_types(predictions)
@property
def feature_importance(self):
"""Returns importance associated with each feature. Since baseline regressors do not use input features to calculate predictions, returns an array of zeroes.
Returns:
np.ndarray (float): An array of zeroes.
"""
return np.zeros(self._num_features)