Memory Leaks: The Silent Killers
Your app starts at 200MB RAM. Six hours later it's at 1.8GB and climbing. The V8 heap limit is ~2GB on 64-bit systems - hit it and your app dies with FATAL ERROR: Reached heap limit
. Use heap profiling tools and Chrome DevTools to track down the leaks before they kill your production server.
Most Common Culprits:
Global variables that never get cleared:
// WRONG - creates a memory leak
let userCache = new Map();
app.get('/users/:id', (req, res) => {
userCache.set(req.params.id, userData); // Never cleaned up
});
// RIGHT - use TTL cache with cleanup
const NodeCache = require('node-cache');
const userCache = new NodeCache({ stdTTL: 600 }); // 10-minute expiry
Event listeners that pile up:
// WRONG - adds listener on every request
app.get('/data', (req, res) => {
req.on('close', handleClose); // Memory leak
});
// RIGHT - remove listeners
app.get('/data', (req, res) => {
req.on('close', handleClose);
res.on('finish', () => {
req.removeListener('close', handleClose);
});
});
Debugging Memory Leaks - Tools That Actually Work:
**Clinic.js Doctor** - Free and catches most leaks:
npm install -g @nodejs/clinic
clinic doctor -- node app.js
## Let it run for 10+ minutes under load
## Kill with Ctrl+C and check the generated report
**0x Profiler** - Shows exactly what's eating memory:
npm install -g 0x
0x app.js
## Generate load, then kill process
## Opens flame graph showing memory hotspots
Production Memory Monitoring:
// Real production memory monitoring
const memoryUsage = () => {
const usage = process.memoryUsage();
console.log({
rss: Math.round(usage.rss / 1024 / 1024) + 'MB',
heapUsed: Math.round(usage.heapUsed / 1024 / 1024) + 'MB',
heapTotal: Math.round(usage.heapTotal / 1024 / 1024) + 'MB',
external: Math.round(usage.external / 1024 / 1024) + 'MB'
});
// Kill process if heap usage > 1.5GB (before hitting 2GB limit)
if (usage.heapUsed > 1.5 * 1024 * 1024 * 1024) {
console.error('Memory usage too high, restarting...');
process.exit(1);
}
};
setInterval(memoryUsage, 30000); // Check every 30 seconds
Event Loop Blocking - When Everything Stops
The event loop is single-threaded. Block it and your entire API becomes unresponsive. I've seen 2-second API responses turn into 30-second timeouts because someone processed a CSV file synchronously.
Event Loop Lag Detection:
const { performance } = require('perf_hooks');
let previousNow = performance.now();
setInterval(() => {
const now = performance.now();
const lag = now - previousNow - 1000; // Expected 1000ms interval
if (lag > 100) {
console.warn(`Event loop lag: ${lag.toFixed(2)}ms`);
// Log stack trace to find the blocking code
console.trace('Event loop blocked here');
}
previousNow = now;
}, 1000);
Common Event Loop Blockers:
Synchronous file operations - Never use these in production:
// WRONG - blocks the event loop completely
const fs = require('fs');
const data = fs.readFileSync('./large-file.json'); // BLOCKS EVERYTHING
// RIGHT - async file operations
const fs = require('fs').promises;
const data = await fs.readFile('./large-file.json'); // Non-blocking
JSON.parse() on large payloads:
// WRONG - blocks on large JSON
app.post('/upload', (req, res) => {
const data = JSON.parse(req.body); // Can block for seconds
});
// RIGHT - stream processing or worker threads
const { Worker } = require('worker_threads');
app.post('/upload', (req, res) => {
const worker = new Worker(`
const { parentPort } = require('worker_threads');
parentPort.on('message', (data) => {
try {
const parsed = JSON.parse(data);
parentPort.postMessage({ success: true, data: parsed });
} catch (error) {
parentPort.postMessage({ success: false, error: error.message });
}
});
`, { eval: true });
worker.postMessage(req.body);
worker.on('message', (result) => {
res.json(result);
worker.terminate();
});
});
Database Connection Hell
Database connections are where most production Node.js apps die. Connection pools run out, queries hang forever, and suddenly your API returns 500 errors.
Connection Pool Debugging:
// Most apps get this wrong
const mysql = require('mysql2');
const pool = mysql.createPool({
host: 'localhost',
user: 'app',
password: 'secret',
database: 'production',
connectionLimit: 10, // Too low for production load
acquireTimeout: 60000, // Default timeout often too high
timeout: 60000,
reconnect: true
});
// RIGHT - production-ready pool with monitoring
const pool = mysql.createPool({
host: process.env.DB_HOST,
user: process.env.DB_USER,
password: process.env.DB_PASSWORD,
database: process.env.DB_NAME,
connectionLimit: 50, // Higher limit for production
acquireTimeout: 10000, // Fail fast on connection issues
timeout: 30000, // Reasonable query timeout
reconnect: true,
multipleStatements: false // Security
});
// Monitor pool health
setInterval(() => {
console.log('DB Pool Stats:', {
totalConnections: pool.config.connectionLimit,
activeConnections: pool._allConnections.length,
freeConnections: pool._freeConnections.length,
queuedConnections: pool._connectionQueue.length
});
// Alert if pool is running low
const freeConnections = pool._freeConnections.length;
const totalConnections = pool.config.connectionLimit;
if (freeConnections / totalConnections < 0.2) {
console.error('Database connection pool running low!');
}
}, 30000);
Query Timeout Hell:
// Production query with proper timeout handling
const executeQuery = (query, params) => {
return new Promise((resolve, reject) => {
const timeout = setTimeout(() => {
reject(new Error('Query timeout'));
}, 15000); // 15 second timeout
pool.execute(query, params, (error, results) => {
clearTimeout(timeout);
if (error) {
console.error('Query failed:', {
query: query.substring(0, 100) + '...',
error: error.message,
code: error.code,
errno: error.errno
});
reject(error);
} else {
resolve(results);
}
});
});
};
// Usage with error handling
app.get('/users/:id', async (req, res) => {
try {
const results = await executeQuery(
'SELECT * FROM users WHERE id = ?',
[req.params.id]
);
if (results.length === 0) {
return res.status(404).json({ error: 'User not found' });
}
res.json(results[0]);
} catch (error) {
console.error('Database error:', error);
if (error.message === 'Query timeout') {
res.status(504).json({ error: 'Database timeout' });
} else {
res.status(500).json({ error: 'Database error' });
}
}
});
Process Crashes and Recovery
Your Node.js process will crash. The question is whether you'll recover gracefully or leave users staring at error pages.
Graceful Shutdown Handling:
// Production-ready graceful shutdown
const gracefulShutdown = (signal) => {
console.log(`Received ${signal}, starting graceful shutdown...`);
// Stop accepting new connections
server.close((err) => {
if (err) {
console.error('Error during server close:', err);
process.exit(1);
}
console.log('HTTP server closed');
// Close database connections
if (pool) {
pool.end(() => {
console.log('Database pool closed');
process.exit(0);
});
} else {
process.exit(0);
}
});
// Force exit after 30 seconds
setTimeout(() => {
console.error('Forced exit after timeout');
process.exit(1);
}, 30000);
};
// Handle shutdown signals
process.on('SIGTERM', () => gracefulShutdown('SIGTERM'));
process.on('SIGINT', () => gracefulShutdown('SIGINT'));
// Handle uncaught exceptions
process.on('uncaughtException', (error) => {
console.error('Uncaught Exception:', error);
// Log to external service (Sentry, LogRocket, etc.)
if (typeof logError === 'function') {
logError(error);
}
// Graceful shutdown after logging
setTimeout(() => {
process.exit(1);
}, 1000);
});
// Handle unhandled promise rejections
process.on('unhandledRejection', (reason, promise) => {
console.error('Unhandled Promise Rejection at:', promise, 'reason:', reason);
// Log the error but don't exit immediately
if (typeof logError === 'function') {
logError(reason);
}
});
The key to production Node.js troubleshooting is preparation. Set up monitoring before things break, because when your app crashes at 3AM, you need data immediately - not time to install debugging tools.