Source code for evalml.problem_types.utils

import pandas as pd
from pandas.api.types import is_numeric_dtype

from .problem_types import ProblemTypes


[docs]def handle_problem_types(problem_type): """Handles problem_type by either returning the ProblemTypes or converting from a str. Arguments: problem_type (str or ProblemTypes): Problem type that needs to be handled Returns: ProblemTypes """ if isinstance(problem_type, str): try: tpe = ProblemTypes._all_values[problem_type.upper()] except KeyError: raise KeyError("Problem type '{}' does not exist".format(problem_type)) return tpe if isinstance(problem_type, ProblemTypes): return problem_type raise ValueError( "`handle_problem_types` was not passed a str or ProblemTypes object" )
[docs]def detect_problem_type(y): """Determine the type of problem is being solved based on the targets (binary vs multiclass classification, regression) Ignores missing and null data Arguments: y (pd.Series): the target labels to predict Returns: ProblemType: ProblemType Enum Example: >>> y = pd.Series([0, 1, 0, 0, 0, 1, 1, 0, 0, 1, 1]) >>> problem_type = detect_problem_type(y) >>> assert problem_type == ProblemTypes.BINARY """ y = pd.Series(y).dropna() num_classes = y.nunique() if num_classes < 2: raise ValueError("Less than 2 classes detected! Target unusable for modeling") if num_classes == 2: return ProblemTypes.BINARY if is_numeric_dtype(y.dtype): if num_classes > 10: return ProblemTypes.REGRESSION return ProblemTypes.MULTICLASS
[docs]def is_regression(problem_type): """Determines if the provided problem_type is a regression problem type Arguments: problem_type (str or ProblemTypes): type of supervised learning problem. See evalml.problem_types.ProblemType.all_problem_types for a full list. Returns: bool: Whether or not the provided problem_type is a regression problem type.""" return handle_problem_types(problem_type) in [ ProblemTypes.REGRESSION, ProblemTypes.TIME_SERIES_REGRESSION, ]
[docs]def is_binary(problem_type): """Determines if the provided problem_type is a binary classification problem type Arguments: problem_type (str or ProblemTypes): type of supervised learning problem. See evalml.problem_types.ProblemType.all_problem_types for a full list. Returns: bool: Whether or not the provided problem_type is a binary classification problem type.""" return handle_problem_types(problem_type) in [ ProblemTypes.BINARY, ProblemTypes.TIME_SERIES_BINARY, ]
[docs]def is_multiclass(problem_type): """Determines if the provided problem_type is a multiclass classification problem type Arguments: problem_type (str or ProblemTypes): type of supervised learning problem. See evalml.problem_types.ProblemType.all_problem_types for a full list. Returns: bool: Whether or not the provided problem_type is a multiclass classification problem type.""" return handle_problem_types(problem_type) in [ ProblemTypes.MULTICLASS, ProblemTypes.TIME_SERIES_MULTICLASS, ]
[docs]def is_classification(problem_type): """Determines if the provided problem_type is a classification problem type Arguments: problem_type (str or ProblemTypes): type of supervised learning problem. See evalml.problem_types.ProblemType.all_problem_types for a full list. Returns: bool: Whether or not the provided problem_type is a classification problem type.""" return is_binary(problem_type) or is_multiclass(problem_type)
[docs]def is_time_series(problem_type): """Determines if the provided problem_type is a time series problem type Arguments: problem_type (str or ProblemTypes): type of supervised learning problem. See evalml.problem_types.ProblemType.all_problem_types for a full list. Returns: bool: Whether or not the provided problem_type is a time series problem type.""" return handle_problem_types(problem_type) in [ ProblemTypes.TIME_SERIES_BINARY, ProblemTypes.TIME_SERIES_MULTICLASS, ProblemTypes.TIME_SERIES_REGRESSION, ]