Batch vs. Online Learning

Implementation Example

# Batch vs. Online Learning

from sklearn.linear_model import SGDClassifier
river import linear_model, drift

# Batch Learning: Train on full dataset periodically
def batch_learning(X_train, y_train, X_test, y_test):
    model = LogisticRegression()
    model.fit(X_train, y_train)
    return model

# Online Learning: Update continuously with new data
def online_learning():
    model = linear_model.LogisticRegression()
    drift_detector = drift.ADWIN()

    for X_new, y_new in data_stream:
        # Update model with new sample
        model.learn_one(X_new, y_new)

        # Check for concept drift
        if drift_detector.update(y_new, model.predict_one(X_new)):
            print("Concept drift detected!")
            model = linear_model.LogisticRegression()  # Reset

    return model

# Mini-batch Learning: Pragmatic middle ground
def mini_batch_learning(X_train, y_train, batch_size=32):
    model = SGDClassifier(loss='log_loss', learning_rate='adaptive')

    for i in range(0, len(X_train), batch_size):
        X_batch = X_train[i:i+batch_size]
        y_batch = y_train[i:i+batch_size]
        model.partial_fit(X_batch, y_batch, classes=np.unique(y_train))

    return model

# Concept Drift Detection
def detect_concept_drift(predictions, true_values, window_size=100):
    detector = drift.PageHinkley()

    for pred, true in zip(predictions, true_values):
        error = 1 if pred != true else 0
        detector.update(error)

        if detector.drift_detected:
            return True  # Retraining needed

    return False

Intent & Description

🎯 Intent

📋 Context

💡 Solution

Real-world Use Case

📌 TL;DR

Advantages

Disadvantages