""" Title: Imbalanced classification: credit card fraud detection Author: fchollet Date created: 2019/05/28 Last modified: 2020/04/17 Description: Demonstration of how to handle highly imbalanced classification problems. Accelerator: GPU """
"""
This example looks at the Kaggle Credit Card Fraud Detection dataset to demonstrate how to train a classification model on data with highly imbalanced classes. """
"""
"""
import csv import numpy as np
fname = "/Users/fchollet/Downloads/creditcard.csv"
all_features = [] all_targets = [] with open(fname) as f: for i, line in enumerate(f): if i == 0: print("HEADER:", line.strip()) continue # Skip header fields = line.strip().split(",") all_features.append([float(v.replace('"', "")) for v in fields[:-1]]) all_targets.append([int(fields[-1].replace('"', ""))]) if i == 1: print("EXAMPLE FEATURES:", all_features[-1])
features = np.array(all_features, dtype="float32") targets = np.array(all_targets, dtype="uint8") print("features.shape:", features.shape) print("targets.shape:", targets.shape)
"""
"""
num_val_samples = int(len(features) * 0.2) train_features = features[:-num_val_samples] train_targets = targets[:-num_val_samples] val_features = features[-num_val_samples:] val_targets = targets[-num_val_samples:]
print("Number of training samples:", len(train_features)) print("Number of validation samples:", len(val_features))
"""
"""
counts = np.bincount(train_targets[:, 0]) print( "Number of positive samples in training data: {} ({:.2f}% of total)".format( counts[1], 100 * float(counts[1]) / len(train_targets) ) )
weight_for_0 = 1.0 / counts[0] weight_for_1 = 1.0 / counts[1]
"""
"""
mean = np.mean(train_features, axis=0) train_features -= mean val_features -= mean std = np.std(train_features, axis=0) train_features /= std val_features /= std
"""
"""
import keras
model = keras.Sequential( [ keras.Input(shape=train_features.shape[1:]), keras.layers.Dense(256, activation="relu"), keras.layers.Dense(256, activation="relu"), keras.layers.Dropout(0.3), keras.layers.Dense(256, activation="relu"), keras.layers.Dropout(0.3), keras.layers.Dense(1, activation="sigmoid"), ] ) model.summary()
"""
class_weight
argument"""
metrics = [ keras.metrics.FalseNegatives(name="fn"), keras.metrics.FalsePositives(name="fp"), keras.metrics.TrueNegatives(name="tn"), keras.metrics.TruePositives(name="tp"), keras.metrics.Precision(name="precision"), keras.metrics.Recall(name="recall"), ]
model.compile( optimizer=keras.optimizers.Adam(1e-2), loss="binary_crossentropy", metrics=metrics )
callbacks = [keras.callbacks.ModelCheckpoint("fraud_model_at_epoch_{epoch}.keras")] class_weight = {0: weight_for_0, 1: weight_for_1}
model.fit( train_features, train_targets, batch_size=2048, epochs=30, verbose=2, callbacks=callbacks, validation_data=(val_features, val_targets), class_weight=class_weight, )
"""
At the end of training, out of 56,961 validation transactions, we are:
In the real world, one would put an even higher weight on class 1, so as to reflect that False Negatives are more costly than False Positives.
Next time your credit card gets declined in an online purchase -- this is why.
"""
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.