Source code for evalml.objectives.binary_classification_objective
"""Base class for all binary classification objectives."""importnumpyasnpfromscipy.optimizeimportdifferential_evolutionfromevalml.objectives.objective_baseimportObjectiveBasefromevalml.problem_typesimportProblemTypes
[docs]classBinaryClassificationObjective(ObjectiveBase):"""Base class for all binary classification objectives."""problem_types=[ProblemTypes.BINARY,ProblemTypes.TIME_SERIES_BINARY]"""[ProblemTypes.BINARY, ProblemTypes.TIME_SERIES_BINARY]"""@propertydefcan_optimize_threshold(cls):"""Returns a boolean determining if we can optimize the binary classification objective threshold. This will be false for any objective that works directly with predicted probabilities, like log loss and AUC. Otherwise, it will be true. Returns: bool: Whether or not an objective can be optimized. """returnnotcls.score_needs_proba
[docs]defoptimize_threshold(self,ypred_proba,y_true,X=None):"""Learn a binary classification threshold which optimizes the current objective. Args: ypred_proba (pd.Series): The classifier's predicted probabilities y_true (pd.Series): The ground truth for the predictions. X (pd.DataFrame, optional): Any extra columns that are needed from training data. Returns: Optimal threshold for this objective. Raises: RuntimeError: If objective cannot be optimized. """ypred_proba=self._standardize_input_type(ypred_proba)y_true=self._standardize_input_type(y_true)ifXisnotNone:X=self._standardize_input_type(X)ifnotself.can_optimize_threshold:raiseRuntimeError("Trying to optimize objective that can't be optimized!")defcost(threshold):y_predicted=self.decision_function(ypred_proba=ypred_proba,threshold=threshold[0],X=X,)cost=self.objective_function(y_true,y_predicted,X=X)return-costifself.greater_is_betterelsecostoptimal=differential_evolution(cost,bounds=[(0,1)],seed=0,maxiter=250)returnoptimal.x[0]
[docs]defdecision_function(self,ypred_proba,threshold=0.5,X=None):"""Apply a learned threshold to predicted probabilities to get predicted classes. Args: ypred_proba (pd.Series, np.ndarray): The classifier's predicted probabilities threshold (float, optional): Threshold used to make a prediction. Defaults to 0.5. X (pd.DataFrame, optional): Any extra columns that are needed from training data. Returns: predictions """ypred_proba=self._standardize_input_type(ypred_proba)returnypred_proba>threshold
[docs]defvalidate_inputs(self,y_true,y_predicted):"""Validate inputs for scoring."""super().validate_inputs(y_true,y_predicted)iflen(np.unique(y_true))>2:raiseValueError("y_true contains more than two unique values")iflen(np.unique(y_predicted))>2andnotself.score_needs_proba:raiseValueError("y_predicted contains more than two unique values")