Precision-Recall Tradeoff

Implementation Example

// Precision-Recall Tradeoff: Threshold optimization

from sklearn.metrics import precision_recall_curve, f1_score
import numpy as np

# Get predicted probabilities
y_proba = model.predict_proba(X_test)[:, 1]

# Calculate precision-recall at different thresholds
precisions, recalls, thresholds = precision_recall_curve(y_test, y_proba)

# Find optimal threshold based on business costs
# Cost = FP_cost * FP + FN_cost * FN
def total_cost(y_true, y_pred, fp_cost=1, fn_cost=10):
    fp = ((y_pred == 1) & (y_true == 0)).sum()
    fn = ((y_pred == 0) & (y_true == 1)).sum()
    return fp_cost * fp + fn_cost * fn

costs = []
for threshold in thresholds:
    y_pred = (y_proba >= threshold).astype(int)
    cost = total_cost(y_test, y_pred, fp_cost=1, fn_cost=10)
    costs.append(cost)

optimal_threshold = thresholds[np.argmin(costs)]
print(f"Optimal threshold: {optimal_threshold:.3f}")

# Alternative: Maximize F1-score
f1_scores = 2 * (precisions * recalls) / (precisions + recalls)
optimal_threshold_f1 = thresholds[np.argmax(f1_scores)]
print(f"F1-optimal threshold: {optimal_threshold_f1:.3f}")

Intent & Description

🎯 Intent

📋 Context

💡 Solution

Real-world Use Case

Source

📌 TL;DR

Advantages

Disadvantages