Source code for evalml.objectives.fraud_cost

import pandas as pd

from .binary_classification_objective import BinaryClassificationObjective


[docs]class FraudCost(BinaryClassificationObjective): """Score the percentage of money lost of the total transaction amount process due to fraud.""" name = "Fraud Cost" greater_is_better = False score_needs_proba = False
[docs] def __init__(self, retry_percentage=.5, interchange_fee=.02, fraud_payout_percentage=1.0, amount_col='amount'): """Create instance of FraudCost Arguments: retry_percentage (float): What percentage of customers that will retry a transaction if it is declined. Between 0 and 1. Defaults to .5 interchange_fee (float): How much of each successful transaction you can collect. Between 0 and 1. Defaults to .02 fraud_payout_percentage (float): Percentage of fraud you will not be able to collect. Between 0 and 1. Defaults to 1.0 amount_col (str): Name of column in data that contains the amount. Defaults to "amount" """ self.retry_percentage = retry_percentage self.interchange_fee = interchange_fee self.fraud_payout_percentage = fraud_payout_percentage self.amount_col = amount_col
[docs] def decision_function(self, ypred_proba, threshold=0.0, X=None): """Determine if a transaction is fraud given predicted probabilities, threshold, and dataframe with transaction amount. Arguments: ypred_proba (pd.Series): Predicted probablities X (pd.DataFrame): Dataframe containing transaction amount threshold (float): Dollar threshold to determine if transaction is fraud Returns: pd.Series: Series of predicted fraud labels using X and threshold """ if not isinstance(X, pd.DataFrame): X = pd.DataFrame(X) if not isinstance(ypred_proba, pd.Series): ypred_proba = pd.Series(ypred_proba) transformed_probs = (ypred_proba.values * X[self.amount_col]) return transformed_probs > threshold
[docs] def objective_function(self, y_true, y_predicted, X): """Calculate amount lost to fraud per transaction given predictions, true values, and dataframe with transaction amount. Arguments: y_predicted (pd.Series): predicted fraud labels y_true (pd.Series): true fraud labels X (pd.DataFrame): dataframe with transaction amounts Returns: float: amount lost to fraud per transaction """ if not isinstance(X, pd.DataFrame): X = pd.DataFrame(X) if not isinstance(y_predicted, pd.Series): y_predicted = pd.Series(y_predicted) if not isinstance(y_true, pd.Series): y_true = pd.Series(y_true) # extract transaction using the amount columns in users data try: transaction_amount = X[self.amount_col] except KeyError: raise ValueError("`{}` is not a valid column in X.".format(self.amount_col)) # amount paid if transaction is fraud fraud_cost = transaction_amount * self.fraud_payout_percentage # money made from interchange fees on transaction interchange_cost = transaction_amount * (1 - self.retry_percentage) * self.interchange_fee # calculate cost of missing fraudulent transactions false_negatives = (y_true & ~y_predicted) * fraud_cost # calculate money lost from fees false_positives = (~y_true & y_predicted) * interchange_cost loss = false_negatives.sum() + false_positives.sum() loss_per_total_processed = loss / transaction_amount.sum() return loss_per_total_processed