Source code for evalml.pipelines.components.estimators.regressors.time_series_baseline_estimator

"""Time series estimator that predicts using the naive forecasting approach."""
import numpy as np

from evalml.model_family import ModelFamily
from evalml.pipelines.components.estimators import Estimator
from evalml.pipelines.components.transformers import TimeSeriesFeaturizer
from evalml.problem_types import ProblemTypes
from evalml.utils import infer_feature_types


[docs]class TimeSeriesBaselineEstimator(Estimator): """Time series estimator that predicts using the naive forecasting approach. This is useful as a simple baseline estimator for time series problems. Args: gap (int): Gap between prediction date and target date and must be a positive integer. If gap is 0, target date will be shifted ahead by 1 time period. Defaults to 1. forecast_horizon (int): Number of time steps the model is expected to predict. random_seed (int): Seed for the random number generator. Defaults to 0. """ name = "Time Series Baseline Estimator" hyperparameter_ranges = {} """{}""" model_family = ModelFamily.BASELINE """ModelFamily.BASELINE""" supported_problem_types = [ ProblemTypes.TIME_SERIES_REGRESSION, ProblemTypes.TIME_SERIES_BINARY, ProblemTypes.TIME_SERIES_MULTICLASS, ] """[ ProblemTypes.TIME_SERIES_REGRESSION, ProblemTypes.TIME_SERIES_BINARY, ProblemTypes.TIME_SERIES_MULTICLASS, ]""" def __init__(self, gap=1, forecast_horizon=1, random_seed=0, **kwargs): self._prediction_value = None self.start_delay = forecast_horizon + gap self._classes = None self._num_features = None self._delay_index = None if gap < 0: raise ValueError( f"gap value must be a positive integer. {gap} was provided." ) parameters = {"gap": gap, "forecast_horizon": forecast_horizon} parameters.update(kwargs) super().__init__( parameters=parameters, component_obj=None, random_seed=random_seed )
[docs] def fit(self, X, y=None): """Fits time series baseline estimator to data. Args: X (pd.DataFrame): The input training data of shape [n_samples, n_features]. y (pd.Series): The target training data of length [n_samples]. Returns: self Raises: ValueError: If input y is None. """ X = infer_feature_types(X) if y is None: raise ValueError("Cannot fit Time Series Baseline Classifier if y is None") vals, _ = np.unique(y, return_counts=True) self._classes = list(vals) return self
[docs] def predict(self, X): """Make predictions using fitted time series baseline estimator. Args: X (pd.DataFrame): Data of shape [n_samples, n_features]. Returns: pd.Series: Predicted values. Raises: ValueError: If input y is None. """ X = infer_feature_types(X) feature_name = TimeSeriesFeaturizer.target_colname_prefix.format( self.start_delay ) if feature_name not in X.columns: raise ValueError( "Time Series Baseline Estimator is meant to be used in a pipeline with " "a Time Series Featurizer" ) self._num_features = X.shape[1] self._delay_index = X.columns.tolist().index(feature_name) return X.ww[feature_name]
[docs] def predict_proba(self, X): """Make prediction probabilities using fitted time series baseline estimator. Args: X (pd.DataFrame): Data of shape [n_samples, n_features]. Returns: pd.DataFrame: Predicted probability values. Raises: ValueError: If input y is None. """ preds = self.predict(X).astype("int") proba_arr = np.zeros((len(preds), len(self._classes))) proba_arr[np.arange(len(preds)), preds] = 1 return infer_feature_types(proba_arr)
@property def feature_importance(self): """Returns importance associated with each feature. Since baseline estimators do not use input features to calculate predictions, returns an array of zeroes. Returns: np.ndarray (float): An array of zeroes. """ importance = np.array([0] * self._num_features) importance[self._delay_index] = 1 return importance