Source code for evalml.problem_types.utils

"""Utility methods for the ProblemTypes enum in EvalML."""
import pandas as pd
from pandas.api.types import is_numeric_dtype

from .problem_types import ProblemTypes


[docs]def handle_problem_types(problem_type): """Handles problem_type by either returning the ProblemTypes or converting from a str. Args: problem_type (str or ProblemTypes): Problem type that needs to be handled. Returns: ProblemTypes enum Raises: KeyError: If input is not a valid ProblemTypes enum value. ValueError: If input is not a string or ProblemTypes object. """ if isinstance(problem_type, str): try: tpe = ProblemTypes._all_values[problem_type.upper()] except KeyError: raise KeyError("Problem type '{}' does not exist".format(problem_type)) return tpe if isinstance(problem_type, ProblemTypes): return problem_type raise ValueError( "`handle_problem_types` was not passed a str or ProblemTypes object" )
[docs]def detect_problem_type(y): """Determine the type of problem is being solved based on the targets (binary vs multiclass classification, regression). Ignores missing and null data. Args: y (pd.Series): The target labels to predict. Returns: ProblemType: ProblemType Enum Example: >>> y = pd.Series([0, 1, 0, 0, 0, 1, 1, 0, 0, 1, 1]) >>> problem_type = detect_problem_type(y) >>> assert problem_type == ProblemTypes.BINARY Raises: ValueError: If the input has less than two classes. """ y = pd.Series(y).dropna() num_classes = y.nunique() if num_classes < 2: raise ValueError("Less than 2 classes detected! Target unusable for modeling") if num_classes == 2: return ProblemTypes.BINARY if is_numeric_dtype(y.dtype): if num_classes > 10: return ProblemTypes.REGRESSION return ProblemTypes.MULTICLASS
[docs]def is_regression(problem_type): """Determines if the provided problem_type is a regression problem type. Args: problem_type (str or ProblemTypes): type of supervised learning problem. See evalml.problem_types.ProblemType.all_problem_types for a full list. Returns: bool: Whether or not the provided problem_type is a regression problem type. """ return handle_problem_types(problem_type) in [ ProblemTypes.REGRESSION, ProblemTypes.TIME_SERIES_REGRESSION, ]
[docs]def is_binary(problem_type): """Determines if the provided problem_type is a binary classification problem type. Args: problem_type (str or ProblemTypes): type of supervised learning problem. See evalml.problem_types.ProblemType.all_problem_types for a full list. Returns: bool: Whether or not the provided problem_type is a binary classification problem type. """ return handle_problem_types(problem_type) in [ ProblemTypes.BINARY, ProblemTypes.TIME_SERIES_BINARY, ]
[docs]def is_multiclass(problem_type): """Determines if the provided problem_type is a multiclass classification problem type. Args: problem_type (str or ProblemTypes): type of supervised learning problem. See evalml.problem_types.ProblemType.all_problem_types for a full list. Returns: bool: Whether or not the provided problem_type is a multiclass classification problem type. """ return handle_problem_types(problem_type) in [ ProblemTypes.MULTICLASS, ProblemTypes.TIME_SERIES_MULTICLASS, ]
[docs]def is_classification(problem_type): """Determines if the provided problem_type is a classification problem type. Args: problem_type (str or ProblemTypes): type of supervised learning problem. See evalml.problem_types.ProblemType.all_problem_types for a full list. Returns: bool: Whether or not the provided problem_type is a classification problem type. """ return is_binary(problem_type) or is_multiclass(problem_type)
[docs]def is_time_series(problem_type): """Determines if the provided problem_type is a time series problem type. Args: problem_type (str or ProblemTypes): type of supervised learning problem. See evalml.problem_types.ProblemType.all_problem_types for a full list. Returns: bool: Whether or not the provided problem_type is a time series problem type. """ return handle_problem_types(problem_type) in [ ProblemTypes.TIME_SERIES_BINARY, ProblemTypes.TIME_SERIES_MULTICLASS, ProblemTypes.TIME_SERIES_REGRESSION, ]