Source code for evalml.objectives.fraud_cost

import pandas as pd

from .objective_base import ObjectiveBase

from evalml.problem_types import ProblemTypes


[docs]class FraudCost(ObjectiveBase): """Score the percentage of money lost of the total transaction amount process due to fraud""" name = "Fraud Cost" problem_types = [ProblemTypes.BINARY] needs_fitting = True greater_is_better = False uses_extra_columns = True fit_needs_proba = True score_needs_proba = False
[docs] def __init__(self, retry_percentage=.5, interchange_fee=.02, fraud_payout_percentage=1.0, amount_col='amount', verbose=False): """Create instance of FraudCost Arguments: retry_percentage (float): what percentage of customers will retry a transaction if it is declined? Between 0 and 1. Defaults to .5 interchange_fee (float): how much of each successful transaction do you collect? Between 0 and 1. Defaults to .02 fraud_payout_percentage (float): how percentage of fraud will you be unable to collect. Between 0 and 1. Defaults to 1.0 amount_col (str): name of column in data that contains the amount. defaults to "amount" """ self.retry_percentage = retry_percentage self.interchange_fee = interchange_fee self.fraud_payout_percentage = fraud_payout_percentage self.amount_col = amount_col super().__init__(verbose=verbose)
[docs] def decision_function(self, y_predicted, extra_cols, threshold): """Determine if transaction is fraud given predicted probabilities, dataframe with transaction amount, and threshold Arguments: y_predicted (pd.Series): predicted labels extra_cols (pd.DataFrame): extra data needed threshold (float): dollar threshold to determine if transaction is fraud Returns: pd.Series: series of predicted fraud label using extra cols and threshold """ if not isinstance(extra_cols, pd.DataFrame): extra_cols = pd.DataFrame(extra_cols) if not isinstance(y_predicted, pd.Series): y_predicted = pd.Series(y_predicted) transformed_probs = (y_predicted.values * extra_cols[self.amount_col]) return transformed_probs > threshold
[docs] def objective_function(self, y_predicted, y_true, extra_cols): """Calculate amount lost to fraud per transaction given predictions, true values, and dataframe with transaction amount Arguments: y_predicted (pd.Series): predicted fraud labels y_true (pd.Series): true fraud labels extra_cols (pd.DataFrame): extra data needed Returns: float: amount lost to fraud per transaction """ if not isinstance(extra_cols, pd.DataFrame): extra_cols = pd.DataFrame(extra_cols) if not isinstance(y_predicted, pd.Series): y_predicted = pd.Series(y_predicted) if not isinstance(y_true, pd.Series): y_true = pd.Series(y_true) # extract transaction using the amount columns in users data transaction_amount = extra_cols[self.amount_col] # amount paid if transaction is fraud fraud_cost = transaction_amount * self.fraud_payout_percentage # money made from interchange fees on transaction interchange_cost = transaction_amount * (1 - self.retry_percentage) * self.interchange_fee # calculate cost of missing fraudulent transactions false_negatives = (y_true & ~y_predicted) * fraud_cost # calculate money lost from fees false_positives = (~y_true & y_predicted) * interchange_cost loss = false_negatives.sum() + false_positives.sum() loss_per_total_processed = loss / transaction_amount.sum() return loss_per_total_processed