Latency vs. Throughput

Implementation Example

// Latency vs. Throughput Optimization Strategies

class PerformanceOptimizer {
  constructor() {
    this.requestQueue = [];
    this.batchSize = 32;
    this.batchTimeout = 10; // ms
    this.currentBatch = [];
    this.batchTimer = null;
  }

  // Batching: High throughput, higher latency
  async processWithBatching(request) {
    return new Promise((resolve, reject) => {
      this.currentBatch.push({ request, resolve, reject });

      if (this.currentBatch.length >= this.batchSize) {
        this.flushBatch();
      } else if (!this.batchTimer) {
        this.batchTimer = setTimeout(() => this.flushBatch(), this.batchTimeout);
      }
    });
  }

  async flushBatch() {
    if (this.batchTimer) {
      clearTimeout(this.batchTimer);
      this.batchTimer = null;
    }

    const batch = this.currentBatch;
    this.currentBatch = [];

    try {
      // Process entire batch at once (high throughput)
      const results = await this.processBatch(batch.map(b => b.request));
      batch.forEach((item, index) => item.resolve(results[index]));
    } catch (error) {
      batch.forEach(item => item.reject(error));
    }
  }

  // Immediate processing: Low latency, lower throughput
  async processImmediately(request) {
    return await this.processSingle(request);
  }

  // Dynamic batching: Balance both for ML serving
  async dynamicBatchMLInference(request) {
    // For GPU inference, balance batch size vs. latency
    const optimalBatchSize = this.calculateOptimalBatchSize();
    return await this.processWithDynamicBatching(request, optimalBatchSize);
  }

  calculateOptimalBatchSize() {
    // Based on current load and latency SLO
    const currentLatency = this.getCurrentLatency();
    const targetLatency = this.latencySLO;

    if (currentLatency > targetLatency * 0.8) {
      return Math.max(1, this.batchSize / 2); // Reduce batch for latency
    } else {
      return this.batchSize; // Maximize batch for throughput
    }
  }

  // Caching: Improves both latency and throughput
  async getCachedResult(key) {
    const cached = await this.cache.get(key);
    if (cached) {
      return cached; // Fast path (low latency)
    }
    return null; // Cache miss
  }

  // Connection pooling: Reduces connection overhead
  async queryWithPooling(sql, params) {
    const connection = await this.pool.getConnection();
    try {
      const result = await connection.query(sql, params);
      return result;
    } finally {
      connection.release(); // Return to pool
    }
  }
}

// Little's Law in practice
function applyLittlesLaw(currentLatency, targetThroughput) {
  // Throughput = Concurrency / Latency
  // Concurrency = Throughput × Latency
  const requiredConcurrency = targetThroughput * currentLatency;
  return Math.ceil(requiredConcurrency);
}

Intent & Description

🎯 Intent

📋 Context

💡 Solution

Real-world Use Case

📌 TL;DR

Advantages

Disadvantages