Source code for evalml.objectives.fraud_cost

"""Score the percentage of money lost of the total transaction amount process due to fraud."""
from evalml.objectives.binary_classification_objective import (
    BinaryClassificationObjective,
)


[docs]class FraudCost(BinaryClassificationObjective): """Score the percentage of money lost of the total transaction amount process due to fraud. Args: retry_percentage (float): What percentage of customers that will retry a transaction if it is declined. Between 0 and 1. Defaults to 0.5. interchange_fee (float): How much of each successful transaction you pay. Between 0 and 1. Defaults to 0.02. fraud_payout_percentage (float): Percentage of fraud you will not be able to collect. Between 0 and 1. Defaults to 1.0. amount_col (str): Name of column in data that contains the amount. Defaults to "amount". """ name = "Fraud Cost" greater_is_better = False score_needs_proba = False perfect_score = 0.0 is_bounded_like_percentage = True expected_range = [0, float("inf")] def __init__( self, retry_percentage=0.5, interchange_fee=0.02, fraud_payout_percentage=1.0, amount_col="amount", ): self.retry_percentage = retry_percentage self.interchange_fee = interchange_fee self.fraud_payout_percentage = fraud_payout_percentage self.amount_col = amount_col
[docs] def objective_function( self, y_true, y_predicted, X, y_train=None, sample_weight=None, ): """Calculate amount lost to fraud per transaction given predictions, true values, and dataframe with transaction amount. Args: y_predicted (pd.Series): Predicted fraud labels. y_true (pd.Series): True fraud labels. y_train (pd.Series): Ignored. X (pd.DataFrame): Data with transaction amounts. sample_weight (pd.DataFrame): Ignored. Returns: float: Amount lost to fraud per transaction. Raises: ValueError: If amount_col is not a valid column in the input data. """ X = self._standardize_input_type(X) y_true = self._standardize_input_type(y_true) y_predicted = self._standardize_input_type(y_predicted) self.validate_inputs(y_true, y_predicted) # extract transaction using the amount columns in users data try: transaction_amount = X[self.amount_col] except KeyError: raise ValueError("`{}` is not a valid column in X.".format(self.amount_col)) # amount paid if transaction is fraud fraud_cost = transaction_amount * self.fraud_payout_percentage # money paid from interchange fees on transaction interchange_cost = ( transaction_amount * (1 - self.retry_percentage) * self.interchange_fee ) # calculate cost of missing fraudulent transactions false_negatives = (y_true & ~y_predicted) * fraud_cost # calculate money lost from fees false_positives = (~y_true & y_predicted) * interchange_cost # add a penalty if we output naive predictions all_one_prediction_cost = (2 - len(set(y_predicted))) * fraud_cost.sum() loss = false_negatives.sum() + false_positives.sum() + all_one_prediction_cost loss_per_total_processed = loss / transaction_amount.sum() return loss_per_total_processed