Here's the code that actually works in production. Not the pretty examples from tutorials, but the battle-tested implementations that survive traffic spikes, Redis failures, and all the other shit that goes wrong at 3am.
Node.js Implementation (The One That Won't Shit The Bed)
Prerequisites and Pain Points
First, the dependencies. Use ioredis
, not the old redis
package. The old one has connection leaks that will slowly murder your server:
npm install express ioredis
## DON'T use express-rate-limit - it's garbage for distributed systems
Don't use Node.js 18.12.0 - it has a nasty memory leak with Redis connections. I found this out the hard way after our staging server crashed every 8 hours for a week. Use 18.15.0 or you'll be restarting containers all day.
Token Bucket That Actually Works in Production
Here's the implementation that survived multiple production incidents:
const express = require('express');
const Redis = require('ioredis');
const app = express();
class TokenBucketRateLimiter {
constructor(redis, capacity = 10, refillRate = 1, windowMs = 60000) {
this.redis = redis;
this.capacity = capacity;
this.refillRate = refillRate;
this.windowMs = windowMs;
this.redisDown = false; // Track Redis health
}
async isAllowed(key) {
try {
const now = Date.now();
const bucketKey = `rate_limit:${key}`;
// Use pipeline for atomic operations (learned this the hard way)
const pipeline = this.redis.pipeline();
pipeline.hmget(bucketKey, 'tokens', 'lastRefill');
const [[, result]] = await pipeline.exec();
if (!result) {
// Redis error - fail open (allow request)
console.error('Redis HMGET failed - allowing request');
this.redisDown = true;
return { allowed: true, remaining: 0, failedOpen: true };
}
const [tokens, lastRefill] = result;
let currentTokens = parseInt(tokens) || this.capacity;
let lastRefillTime = parseInt(lastRefill) || now;
// Token bucket refill logic
const timePassed = now - lastRefillTime;
const tokensToAdd = Math.floor(timePassed / this.windowMs * this.refillRate);
currentTokens = Math.min(this.capacity, currentTokens + tokensToAdd);
if (currentTokens >= 1) {
currentTokens -= 1;
// Set with expiration in one command - prevents memory leaks
await this.redis.hset(bucketKey, 'tokens', currentTokens, 'lastRefill', now);
await this.redis.expire(bucketKey, Math.ceil(this.windowMs / 1000 * 2)); // 2x window for safety
this.redisDown = false;
return { allowed: true, remaining: currentTokens };
} else {
return { allowed: false, remaining: 0, retryAfter: Math.ceil(this.windowMs / 1000) };
}
} catch (error) {
// Redis is fucked - fail open and log
console.error('Rate limiter Redis error:', error.message);
this.redisDown = true;
return { allowed: true, remaining: 0, failedOpen: true };
}
}
}
// Redis setup with proper error handling
const redis = new Redis(process.env.REDIS_URL || 'redis://localhost:6379', {
retryDelayOnFailover: 100,
enableReadyCheck: false,
maxRetriesPerRequest: 1, // Don't retry forever
lazyConnect: true,
connectTimeout: 5000,
commandTimeout: 3000, // Fail fast on slow Redis
});
// Redis event handlers (CRITICAL - without these your app will crash)
redis.on('error', (error) => {
console.error('Redis connection error:', error.message);
// Don't crash the app just because Redis is down
});
redis.on('connect', () => {
console.log('Redis connected successfully');
});
const rateLimiter = new TokenBucketRateLimiter(redis);
// Production middleware with all the edge cases
const rateLimit = async (req, res, next) => {
// Get client IP - handling all the proxy fuckery
const clientId = req.headers['x-forwarded-for']?.split(',')[0] ||
req.headers['x-real-ip'] ||
req.connection.remoteAddress ||
req.socket.remoteAddress ||
'unknown';
// Skip rate limiting for health checks (trust me on this)
if (req.path === '/health' || req.path === '/ping') {
return next();
}
try {
const result = await rateLimiter.isAllowed(clientId);
// Always set these headers - clients need them
res.set({
'X-RateLimit-Limit': rateLimiter.capacity,
'X-RateLimit-Remaining': result.remaining || 0,
'X-RateLimit-Reset': Math.floor(Date.now() / 1000) + Math.ceil(rateLimiter.windowMs / 1000)
});
// Add warning when Redis is failing open
if (result.failedOpen) {
res.set('X-RateLimit-Status', 'DEGRADED');
}
if (!result.allowed) {
res.set('Retry-After', result.retryAfter);
return res.status(429).json({
error: 'Too Many Requests',
message: `Rate limit exceeded. Try again in ${result.retryAfter} seconds.`,
retryAfter: result.retryAfter
});
}
next();
} catch (error) {
// Something went really wrong - log and fail open
console.error('Rate limiting middleware error:', error);
next();
}
};
// Apply rate limiting globally (but not to health checks)
app.use(rateLimit);
app.get('/health', (req, res) => {
res.json({ status: 'ok', timestamp: Date.now() });
});
app.get('/api/data', (req, res) => {
res.json({
message: 'Data retrieved successfully',
timestamp: new Date().toISOString(),
served_by: process.env.HOSTNAME || 'unknown' // Helps with debugging in k8s
});
});
const PORT = process.env.PORT || 3000;
app.listen(PORT, () => {
console.log(`Server running on port ${PORT}`);
});
// Graceful shutdown (because Docker will SIGTERM your ass)
process.on('SIGTERM', async () => {
console.log('SIGTERM received, closing Redis connection...');
await redis.quit();
process.exit(0);
});
Python FastAPI Version (If You Must Use Python)
Look, Node.js is faster for this stuff, but if you're stuck with Python:
from fastapi import FastAPI, HTTPException, Request
from fastapi.middleware.base import BaseHTTPMiddleware
import redis.asyncio as redis
import time
app = FastAPI()
## The sliding window approach - eats RAM but more accurate
class SlidingWindowRateLimiter:
def __init__(self, redis_client, limit: int = 100, window_seconds: int = 60):
self.redis = redis_client
self.limit = limit
self.window = window_seconds
async def is_allowed(self, key: str) -> bool:
now = time.time()
pipe = self.redis.pipeline()
# Clean old entries and count current ones
pipe.zremrangebyscore(f\"rl:{key}\", 0, now - self.window)
pipe.zcard(f\"rl:{key}\")
pipe.zadd(f\"rl:{key}\", {str(now): now})
pipe.expire(f\"rl:{key}\", self.window)
results = await pipe.execute()
count = results[1]
if count >= self.limit:
# Remove the entry we just added
await self.redis.zrem(f\"rl:{key}\", str(now))
return False
return True
## Don't use localhost in production, you know this
redis_client = redis.Redis.from_url(\"redis://redis:6379\")
limiter = SlidingWindowRateLimiter(redis_client)
@app.middleware(\"http\")
async def rate_limit_middleware(request: Request, call_next):
if request.url.path in [\"/health\", \"/docs\"]:
return await call_next(request)
client_ip = request.client.host
try:
if not await limiter.is_allowed(client_ip):
raise HTTPException(429, \"Rate limit exceeded\")
except Exception as e:
# Redis down? Fail open
pass
return await call_next(request)
@app.get(\"/api/data\")
async def get_data():
return {\"message\": \"It works\", \"timestamp\": time.time()}
Python 3.8 is fucked - the redis.asyncio
module randomly drops connections. Spent 2 days debugging "connection pool exhausted" errors before I realized it was a Python version issue. Use 3.9+ or hate your life.
If you're getting serious traffic, Go is the way to go. This is a minimal fixed-window implementation:
package main
import (
\"context\"
\"fmt\"
\"net/http\"
\"time\"
\"github.com/gin-gonic/gin\"
\"github.com/redis/go-redis/v9\" // Use v9, not the old go-redis
)
func main() {
rdb := redis.NewClient(&redis.Options{
Addr: \"redis:6379\",
PoolSize: 10,
PoolTimeout: 30 * time.Second,
DialTimeout: 5 * time.Second,
ReadTimeout: 3 * time.Second,
WriteTimeout: 3 * time.Second,
})
r := gin.Default()
r.Use(func(c *gin.Context) {
if c.Request.URL.Path == \"/health\" {
c.Next()
return
}
key := fmt.Sprintf(\"rl:%s:%d\", c.ClientIP(), time.Now().Unix()/60)
count, err := rdb.Incr(context.Background(), key).Result()
if err != nil {
// Redis down - fail open
c.Next()
return
}
if count == 1 {
rdb.Expire(context.Background(), key, time.Minute)
}
if count > 100 {
c.JSON(429, gin.H{\"error\": \"Rate limited\"})
c.Abort()
return
}
c.Next()
})
r.GET(\"/api/data\", func(c *gin.Context) {
c.JSON(200, gin.H{\"message\": \"success\"})
})
r.Run(\":8080\")
}
Don't use go-redis v8 - it leaks connections like a sieve. Our production Go service went from 50MB to 2GB memory usage over 48 hours. Upgrade to v9 or watch htop in horror as your memory disappears.
The Production Deployment Reality Check
Here's where your beautiful rate limiting implementation meets the harsh reality of production. Everything that can go wrong, will go wrong.
Docker: Where Simple Becomes Complicated
Your Dockerfile needs to handle the fact that Redis won't be ready when your app starts:
FROM node:18.15-alpine
## 18.12 will leak memory with Redis - learned this the hard way
WORKDIR /app
COPY package*.json ./
RUN npm ci --only=production && npm cache clean --force
COPY . .
## Create non-root user (security theater but required)
RUN addgroup -g 1001 -S nodejs && adduser -S nodejs -u 1001
USER nodejs
EXPOSE 3000
## Wait for Redis before starting
CMD [\"sh\", \"-c\", \"sleep 5 && node server.js\"]
Docker-compose with Redis persistence (because losing rate limit data on restart sucks):
version: '3.8'
services:
app:
build: .
ports:
- \"3000:3000\"
environment:
- REDIS_URL=redis://redis:6379
depends_on:
- redis
restart: unless-stopped
redis:
image: redis:7-alpine
command: redis-server --appendonly yes --maxmemory 100mb --maxmemory-policy allkeys-lru
volumes:
- redis_data:/data
restart: unless-stopped
volumes:
redis_data:
Monitoring That Actually Helps at 3am
Forget fancy metrics. Here's what you need to know when shit hits the fan:
// Dead simple monitoring that actually helps
let stats = {
requests_allowed: 0,
requests_blocked: 0,
redis_errors: 0,
last_reset: Date.now()
};
// In your rate limiter
if (result.allowed) stats.requests_allowed++;
else stats.requests_blocked++;
if (error) stats.redis_errors++;
// Log every minute with actionable info
setInterval(() => {
const total = stats.requests_allowed + stats.requests_blocked;
const block_rate = total ? (stats.requests_blocked / total * 100).toFixed(1) : 0;
console.log(`RATE_LIMIT_STATS allowed=${stats.requests_allowed} blocked=${stats.requests_blocked} redis_errors=${stats.redis_errors} block_rate=${block_rate}%`);
// Alert thresholds that actually matter
if (block_rate > 20) console.error('HIGH_BLOCK_RATE - Possible attack or limits too strict');
if (stats.redis_errors > 0) console.error('REDIS_ERRORS - Rate limiting degraded');
// Reset counters
stats = { requests_allowed: 0, requests_blocked: 0, redis_errors: 0, last_reset: Date.now() };
}, 60000);
The Things That Will Break (And How to Fix Them)
Redis Memory Explosion: Set maxmemory
and maxmemory-policy allkeys-lru
or your rate limiter will eat all available RAM.
Clock Skew in Kubernetes: When your pods have different times, sliding window algorithms break. Use NTP sync or stick with fixed windows.
Load Balancer Fuckery: Your LB might strip or modify the X-Forwarded-For
header. Test with real traffic, not just curl.
Redis Sentinel Failover: During Redis master failover, expect 10-30 seconds where rate limiting is inconsistent. Plan for it.
The key is failing gracefully. Better to allow some extra traffic than to block all your users because Redis hiccupped.