Source code for evalml.pipelines.components.transformers.preprocessing.datetime_featurizer

from evalml.pipelines.components.transformers import Transformer
from evalml.utils.gen_utils import (
    _convert_to_woodwork_structure,
    _convert_woodwork_types_wrapper,
    datetime_dtypes
)


def _extract_year(col):
    return col.dt.year


def _extract_month(col):
    return col.dt.month_name().astype('category')


def _extract_day_of_week(col):
    return col.dt.day_name().astype('category')


def _extract_hour(col):
    return col.dt.hour


[docs]class DateTimeFeaturizer(Transformer): """Transformer that can automatically featurize DateTime columns.""" name = "DateTime Featurization Component" hyperparameter_ranges = {} _function_mappings = {"year": _extract_year, "month": _extract_month, "day_of_week": _extract_day_of_week, "hour": _extract_hour}
[docs] def __init__(self, features_to_extract=None, random_state=0, **kwargs): """Extracts features from DateTime columns Arguments: features_to_extract (list): List of features to extract. Valid options include "year", "month", "day_of_week", "hour". random_state (int, np.random.RandomState): Seed for the random number generator. """ if features_to_extract is None: features_to_extract = ["year", "month", "day_of_week", "hour"] invalid_features = set(features_to_extract) - set(self._function_mappings.keys()) if len(invalid_features) > 0: raise ValueError("{} are not valid options for features_to_extract".format(", ".join([f"'{feature}'" for feature in invalid_features]))) parameters = {"features_to_extract": features_to_extract} parameters.update(kwargs) self._date_time_col_names = None super().__init__(parameters=parameters, component_obj=None, random_state=random_state)
[docs] def fit(self, X, y=None): X = _convert_to_woodwork_structure(X) X = _convert_woodwork_types_wrapper(X.to_dataframe()) self._date_time_col_names = X.select_dtypes(include=datetime_dtypes).columns return self
[docs] def transform(self, X, y=None): """Transforms data X by creating new features using existing DateTime columns, and then dropping those DateTime columns Arguments: X (pd.DataFrame): Data to transform y (pd.Series, optional): Ignored. Returns: pd.DataFrame: Transformed X """ X = _convert_to_woodwork_structure(X) X = _convert_woodwork_types_wrapper(X.to_dataframe()) X_t = X features_to_extract = self.parameters["features_to_extract"] if len(features_to_extract) == 0: return X_t for col_name in self._date_time_col_names: for feature in features_to_extract: X_t[f"{col_name}_{feature}"] = self._function_mappings[feature](X_t[col_name]) return X_t.drop(self._date_time_col_names, axis=1)