Source code for evalml.objectives.fraud_cost


from .binary_classification_objective import BinaryClassificationObjective


[docs]class FraudCost(BinaryClassificationObjective):
    """Score the percentage of money lost of the total transaction amount process due to fraud."""
    name = "Fraud Cost"
    greater_is_better = False
    score_needs_proba = False
    perfect_score = 0.0

[docs]    def __init__(self, retry_percentage=.5, interchange_fee=.02,
                 fraud_payout_percentage=1.0, amount_col='amount'):
        """Create instance of FraudCost

        Arguments:
            retry_percentage (float): What percentage of customers that will retry a transaction if it
                is declined. Between 0 and 1. Defaults to .5

            interchange_fee (float): How much of each successful transaction you can collect.
                Between 0 and 1. Defaults to .02

            fraud_payout_percentage (float): Percentage of fraud you will not be able to collect.
                Between 0 and 1. Defaults to 1.0

            amount_col (str): Name of column in data that contains the amount. Defaults to "amount"
        """
        self.retry_percentage = retry_percentage
        self.interchange_fee = interchange_fee
        self.fraud_payout_percentage = fraud_payout_percentage
        self.amount_col = amount_col

[docs]    def decision_function(self, ypred_proba, threshold=0.0, X=None):
        """Determine if a transaction is fraud given predicted probabilities, threshold, and dataframe with transaction amount.

        Arguments:
            ypred_proba (ww.DataColumn, pd.Series): Predicted probablities
            threshold (float): Dollar threshold to determine if transaction is fraud
            X (ww.DataTable, pd.DataFrame): Data containing transaction amounts

        Returns:
            pd.Series: pd.Series of predicted fraud labels using X and threshold
        """
        if X is not None:
            X = self._standardize_input_type(X)
        ypred_proba = self._standardize_input_type(ypred_proba)
        transformed_probs = (ypred_proba.values * X[self.amount_col])
        return transformed_probs > threshold

[docs]    def objective_function(self, y_true, y_predicted, X):
        """Calculate amount lost to fraud per transaction given predictions, true values, and dataframe with transaction amount.

        Arguments:
            y_predicted (ww.DataColumn, pd.Series): Predicted fraud labels
            y_true (ww.DataColumn, pd.Series): True fraud labels
            X (ww.DataTable, pd.DataFrame): Data with transaction amounts

        Returns:
            float: Amount lost to fraud per transaction
        """
        X = self._standardize_input_type(X)
        y_true = self._standardize_input_type(y_true)
        y_predicted = self._standardize_input_type(y_predicted)
        self.validate_inputs(y_true, y_predicted)

        # extract transaction using the amount columns in users data
        try:
            transaction_amount = X[self.amount_col]
        except KeyError:
            raise ValueError("`{}` is not a valid column in X.".format(self.amount_col))

        # amount paid if transaction is fraud
        fraud_cost = transaction_amount * self.fraud_payout_percentage

        # money made from interchange fees on transaction
        interchange_cost = transaction_amount * (1 - self.retry_percentage) * self.interchange_fee

        # calculate cost of missing fraudulent transactions
        false_negatives = (y_true & ~y_predicted) * fraud_cost

        # calculate money lost from fees
        false_positives = (~y_true & y_predicted) * interchange_cost

        loss = false_negatives.sum() + false_positives.sum()

        loss_per_total_processed = loss / transaction_amount.sum()

        return loss_per_total_processed