Source code for evalml.objectives.binary_classification_objective

import pandas as pd
from scipy.optimize import minimize_scalar

from .objective_base import ObjectiveBase

from evalml.problem_types import ProblemTypes


class BinaryClassificationObjective(ObjectiveBase):
    """
    Base class for all binary classification objectives.

    problem_type (ProblemTypes): Specifies the type of problem this objective is defined for (binary classification)
    can_optimize_threshold (bool): Determines if threshold used by objective can be optimized or not.
    """
    problem_type = ProblemTypes.BINARY

    @property
    def can_optimize_threshold(cls):
        """Returns a boolean determining if we can optimize the binary classification objective threshold. This will be false for any objective that works directly with predicted probabilities, like log loss and AUC. Otherwise, it will be true."""
        return not cls.score_needs_proba

    def optimize_threshold(self, ypred_proba, y_true, X=None):
        """Learn a binary classification threshold which optimizes the current objective.

        Arguments:
            ypred_proba (list): The classifier's predicted probabilities

            y_true (list): The ground truth for the predictions.

            X (pd.DataFrame, optional): Any extra columns that are needed from training data.

        Returns:
            Optimal threshold for this objective
        """
        if not self.can_optimize_threshold:
            raise RuntimeError("Trying to optimize objective that can't be optimized!")

        def cost(threshold):
            y_predicted = self.decision_function(ypred_proba=ypred_proba, threshold=threshold, X=X)
            cost = self.objective_function(y_true, y_predicted, X=X)
            return -cost if self.greater_is_better else cost

        optimal = minimize_scalar(cost, method='Golden', options={"maxiter": 100})
        return optimal.x

    def decision_function(self, ypred_proba, threshold=0.5, X=None):
        """Apply a learned threshold to predicted probabilities to get predicted classes.

        Arguments:
            ypred_proba (list): The classifier's predicted probabilities

            threshold (float, optional): Threshold used to make a prediction. Defaults to 0.5.

            X (pd.DataFrame, optional): Any extra columns that are needed from training data.

        Returns:
            predictions
        """
        if not isinstance(ypred_proba, pd.Series):
            ypred_proba = pd.Series(ypred_proba)
        return ypred_proba > threshold