Source code for evalml.pipelines.binary_classification_pipeline_mixin

"""Binary classification pipeline mix-in class."""


[docs]class BinaryClassificationPipelineMixin: """Binary classification pipeline mix-in class.""" _threshold = None @property def threshold(self): """Threshold used to make a prediction. Defaults to None.""" return self._threshold @threshold.setter def threshold(self, value): self._threshold = value def _predict_with_objective(self, X, ypred_proba, objective): ypred_proba = ypred_proba.iloc[:, 1] if objective is None: return ypred_proba > self.threshold return objective.decision_function(ypred_proba, threshold=self.threshold, X=X) def _compute_predictions(self, X, y, objectives, time_series=False): """Compute predictions/probabilities based on objectives.""" y_predicted = None y_predicted_proba = None if any(o.score_needs_proba for o in objectives) or self.threshold is not None: y_predicted_proba = ( self.predict_proba(X, y) if time_series else self.predict_proba(X) ) if any(not o.score_needs_proba for o in objectives) and self.threshold is None: y_predicted = ( self._predict(X, y, pad=True) if time_series else self._predict(X) ) return y_predicted, y_predicted_proba def _select_y_pred_for_score(self, X, y, y_pred, y_pred_proba, objective): y_pred_to_use = y_pred if self.threshold is not None and not objective.score_needs_proba: y_pred_to_use = self._predict_with_objective(X, y_pred_proba, objective) return y_pred_to_use
[docs] def optimize_threshold(self, X, y, y_pred_proba, objective): """Optimize the pipeline threshold given the objective to use. Only used for binary problems with objectives whose thresholds can be tuned. Args: X (pd.DataFrame): Input features. y (pd.Series): Input target values. y_pred_proba (pd.Series): The predicted probabilities of the target outputted by the pipeline. objective (ObjectiveBase): The objective to threshold with. Must have a tunable threshold. Raises: ValueError: If objective is not optimizable. """ if self.can_tune_threshold_with_objective(objective): targets = self._encode_targets(y) self.threshold = objective.optimize_threshold(y_pred_proba, targets, X) else: raise ValueError( "Problem type must be binary and objective must be optimizable." )