"""Utility methods for the ProblemTypes enum in EvalML."""importpandasaspdfrompandas.api.typesimportis_numeric_dtypefromevalml.problem_types.problem_typesimportProblemTypes
[docs]defhandle_problem_types(problem_type):"""Handles problem_type by either returning the ProblemTypes or converting from a str. Args: problem_type (str or ProblemTypes): Problem type that needs to be handled. Returns: ProblemTypes enum Raises: KeyError: If input is not a valid ProblemTypes enum value. ValueError: If input is not a string or ProblemTypes object. Examples: >>> assert handle_problem_types("regression") == ProblemTypes.REGRESSION >>> assert handle_problem_types("TIME SERIES BINARY") == ProblemTypes.TIME_SERIES_BINARY >>> assert handle_problem_types("Multiclass") == ProblemTypes.MULTICLASS """ifisinstance(problem_type,str):try:tpe=ProblemTypes._all_values[problem_type.upper()]exceptKeyError:raiseKeyError("Problem type '{}' does not exist".format(problem_type))returntpeifisinstance(problem_type,ProblemTypes):returnproblem_typeraiseValueError("`handle_problem_types` was not passed a str or ProblemTypes object",)
[docs]defdetect_problem_type(y):"""Determine the type of problem is being solved based on the targets (binary vs multiclass classification, regression). Ignores missing and null data. Args: y (pd.Series): The target labels to predict. Returns: ProblemType: ProblemType Enum Examples: >>> y = pd.Series([0, 1, 0, 0, 0, 1, 1, 0, 0, 1, 1]) >>> assert detect_problem_type(y) == ProblemTypes.BINARY ... >>> y = pd.Series([1, 2, 3, 2, 1, 1, 1, 2, 2, 3, 3]) >>> assert detect_problem_type(y) == ProblemTypes.MULTICLASS ... >>> y = pd.Series([1.6, 4.2, 3.3, 2.9, 4, 1, 5.5, 2, -2, -3.2, 3]) >>> assert detect_problem_type(y) == ProblemTypes.REGRESSION Raises: ValueError: If the input has less than two classes. """y=pd.Series(y).dropna()num_classes=y.nunique()ifnum_classes<2:raiseValueError("Less than 2 classes detected! Target unusable for modeling")ifnum_classes==2:returnProblemTypes.BINARYifis_numeric_dtype(y.dtype):ifnum_classes>10:returnProblemTypes.REGRESSIONreturnProblemTypes.MULTICLASS
[docs]defis_regression(problem_type):"""Determines if the provided problem_type is a regression problem type. Args: problem_type (str or ProblemTypes): type of supervised learning problem. See evalml.problem_types.ProblemType.all_problem_types for a full list. Returns: bool: Whether or not the provided problem_type is a regression problem type. Examples: >>> assert is_regression("Regression") >>> assert is_regression(ProblemTypes.REGRESSION) >>> assert is_regression(ProblemTypes.TIME_SERIES_REGRESSION) """returnhandle_problem_types(problem_type)in[ProblemTypes.REGRESSION,ProblemTypes.TIME_SERIES_REGRESSION,ProblemTypes.MULTISERIES_TIME_SERIES_REGRESSION,]
[docs]defis_binary(problem_type):"""Determines if the provided problem_type is a binary classification problem type. Args: problem_type (str or ProblemTypes): type of supervised learning problem. See evalml.problem_types.ProblemType.all_problem_types for a full list. Returns: bool: Whether or not the provided problem_type is a binary classification problem type. Examples: >>> assert is_binary("Binary") >>> assert is_binary(ProblemTypes.BINARY) >>> assert is_binary(ProblemTypes.TIME_SERIES_BINARY) """returnhandle_problem_types(problem_type)in[ProblemTypes.BINARY,ProblemTypes.TIME_SERIES_BINARY,]
[docs]defis_multiclass(problem_type):"""Determines if the provided problem_type is a multiclass classification problem type. Args: problem_type (str or ProblemTypes): type of supervised learning problem. See evalml.problem_types.ProblemType.all_problem_types for a full list. Returns: bool: Whether or not the provided problem_type is a multiclass classification problem type. Examples: >>> assert is_multiclass("Multiclass") >>> assert is_multiclass(ProblemTypes.MULTICLASS) >>> assert is_multiclass(ProblemTypes.TIME_SERIES_MULTICLASS) """returnhandle_problem_types(problem_type)in[ProblemTypes.MULTICLASS,ProblemTypes.TIME_SERIES_MULTICLASS,]
[docs]defis_classification(problem_type):"""Determines if the provided problem_type is a classification problem type. Args: problem_type (str or ProblemTypes): type of supervised learning problem. See evalml.problem_types.ProblemType.all_problem_types for a full list. Returns: bool: Whether or not the provided problem_type is a classification problem type. Examples: >>> assert is_classification("Multiclass") >>> assert is_classification(ProblemTypes.TIME_SERIES_BINARY) >>> assert not is_classification(ProblemTypes.REGRESSION) """returnis_binary(problem_type)oris_multiclass(problem_type)
[docs]defis_time_series(problem_type):"""Determines if the provided problem_type is a time series problem type. Args: problem_type (str or ProblemTypes): type of supervised learning problem. See evalml.problem_types.ProblemType.all_problem_types for a full list. Returns: bool: Whether or not the provided problem_type is a time series problem type. Examples: >>> assert is_time_series("time series regression") >>> assert is_time_series(ProblemTypes.TIME_SERIES_BINARY) >>> assert not is_time_series(ProblemTypes.REGRESSION) """returnhandle_problem_types(problem_type)in[ProblemTypes.TIME_SERIES_BINARY,ProblemTypes.TIME_SERIES_MULTICLASS,ProblemTypes.TIME_SERIES_REGRESSION,ProblemTypes.MULTISERIES_TIME_SERIES_REGRESSION,]
[docs]defis_multiseries(problem_type):"""Determines if the provided problem_type is a multiseries time series problem type. Args: problem_type (str or ProblemTypes): type of supervised learning problem. See evalml.problem_types.ProblemType.all_problem_types for a full list. Returns: bool: Whether or not the provided problem_type is a multiseries time series problem type. """return(handle_problem_types(problem_type)==ProblemTypes.MULTISERIES_TIME_SERIES_REGRESSION)