Quick Start
Create a new Express.js LLM project:
mkdir express-llm-api && cd express-llm-api npm init -y npm install express axios dotenv cors helmet npm install -D @types/express typescript nodemon ts-node
1. Project Setup & Structure
TypeScript Configuration
// tsconfig.json { "compilerOptions": { "target": "ES2022", "module": "commonjs", "lib": ["ES2022"], "outDir": "./dist", "rootDir": "./src", "strict": true, "esModuleInterop": true, "skipLibCheck": true, "forceConsistentCasingInFileNames": true, "resolveJsonModule": true, "moduleResolution": "node", "allowSyntheticDefaultImports": true, "experimentalDecorators": true, "emitDecoratorMetadata": true }, "include": ["src/**/*"], "exclude": ["node_modules", "dist"] }
Environment Configuration
# .env NODE_ENV=development PORT=3000 # LLM API Keys OPENAI_API_KEY=sk-... ANTHROPIC_API_KEY=sk-ant-... GOOGLE_API_KEY=... # Security JWT_SECRET=your-secret-key API_KEY=your-api-key # Redis (for rate limiting and sessions) REDIS_URL=redis://localhost:6379 # Monitoring SENTRY_DSN=https://...
Project Structure
src/ ├── config/ │ ├── database.ts # Database configuration │ ├── llm.ts # LLM provider configuration │ └── redis.ts # Redis client setup ├── controllers/ │ ├── chat.controller.ts │ ├── completion.controller.ts │ └── stream.controller.ts ├── middleware/ │ ├── auth.middleware.ts │ ├── error.middleware.ts │ ├── rateLimiter.middleware.ts │ └── validation.middleware.ts ├── services/ │ ├── llm.service.ts # LLM provider abstraction │ ├── cache.service.ts # Response caching │ └── session.service.ts ├── routes/ │ ├── chat.routes.ts │ └── index.ts ├── types/ │ └── index.ts ├── utils/ │ ├── logger.ts │ └── validators.ts ├── websocket/ │ └── chat.handler.ts └── index.ts # Main server file
2. Basic LLM Endpoints
Main Server Setup
// src/index.ts import express from 'express' import cors from 'cors' import helmet from 'helmet' import dotenv from 'dotenv' import { createServer } from 'http' import { Server } from 'socket.io' import { errorHandler } from './middleware/error.middleware' import { logger } from './utils/logger' import routes from './routes' import { initWebSocket } from './websocket/chat.handler' dotenv.config() const app = express() const httpServer = createServer(app) const io = new Server(httpServer, { cors: { origin: process.env.CLIENT_URL || 'http://localhost:3001', credentials: true, }, }) // Middleware app.use(helmet()) app.use(cors()) app.use(express.json({ limit: '10mb' })) app.use(express.urlencoded({ extended: true })) // Routes app.use('/api', routes) // Error handling app.use(errorHandler) // Initialize WebSocket initWebSocket(io) const PORT = process.env.PORT || 3000 httpServer.listen(PORT, () => { logger.info(`Server running on port ${PORT}`) })
LLM Service Abstraction
// src/services/llm.service.ts import OpenAI from 'openai' import Anthropic from '@anthropic-ai/sdk' import { GoogleGenerativeAI } from '@google/generative-ai' export interface LLMProvider { name: string generateCompletion(prompt: string, options?: any): Promise<string> generateStream(prompt: string, options?: any): AsyncGenerator<string> } export class OpenAIProvider implements LLMProvider { name = 'openai' private client: OpenAI constructor() { this.client = new OpenAI({ apiKey: process.env.OPENAI_API_KEY, }) } async generateCompletion(prompt: string, options: any = {}): Promise<string> { const completion = await this.client.chat.completions.create({ model: options.model || 'gpt-3.5-turbo', messages: [{ role: 'user', content: prompt }], temperature: options.temperature || 0.7, max_tokens: options.maxTokens || 1000, }) return completion.choices[0]?.message?.content || '' } async *generateStream(prompt: string, options: any = {}): AsyncGenerator<string> { const stream = await this.client.chat.completions.create({ model: options.model || 'gpt-3.5-turbo', messages: [{ role: 'user', content: prompt }], temperature: options.temperature || 0.7, max_tokens: options.maxTokens || 1000, stream: true, }) for await (const chunk of stream) { yield chunk.choices[0]?.delta?.content || '' } } } export class AnthropicProvider implements LLMProvider { name = 'anthropic' private client: Anthropic constructor() { this.client = new Anthropic({ apiKey: process.env.ANTHROPIC_API_KEY, }) } async generateCompletion(prompt: string, options: any = {}): Promise<string> { const message = await this.client.messages.create({ model: options.model || 'claude-3-opus-20240229', max_tokens: options.maxTokens || 1000, messages: [{ role: 'user', content: prompt }], }) return message.content[0].type === 'text' ? message.content[0].text : '' } async *generateStream(prompt: string, options: any = {}): AsyncGenerator<string> { const stream = await this.client.messages.create({ model: options.model || 'claude-3-opus-20240229', max_tokens: options.maxTokens || 1000, messages: [{ role: 'user', content: prompt }], stream: true, }) for await (const event of stream) { if (event.type === 'content_block_delta') { yield event.delta.text } } } } // Provider factory export function createLLMProvider(provider: string): LLMProvider { switch (provider) { case 'openai': return new OpenAIProvider() case 'anthropic': return new AnthropicProvider() default: throw new Error(`Unsupported provider: ${provider}`) } }
Chat Controller
// src/controllers/chat.controller.ts import { Request, Response, NextFunction } from 'express' import { createLLMProvider } from '../services/llm.service' import { logger } from '../utils/logger' export async function chatCompletion( req: Request, res: Response, next: NextFunction ) { try { const { message, provider = 'openai', options = {} } = req.body const llmProvider = createLLMProvider(provider) const response = await llmProvider.generateCompletion(message, options) res.json({ success: true, response, provider: llmProvider.name, timestamp: new Date().toISOString(), }) } catch (error) { logger.error('Chat completion error:', error) next(error) } }
3. Authentication & Rate Limiting
JWT Authentication Middleware
// src/middleware/auth.middleware.ts import { Request, Response, NextFunction } from 'express' import jwt from 'jsonwebtoken' interface AuthRequest extends Request { user?: { id: string; email: string } } export function authenticateToken( req: AuthRequest, res: Response, next: NextFunction ) { const authHeader = req.headers['authorization'] const token = authHeader && authHeader.split(' ')[1] if (!token) { return res.status(401).json({ error: 'Access token required' }) } jwt.verify(token, process.env.JWT_SECRET!, (err, user) => { if (err) { return res.status(403).json({ error: 'Invalid token' }) } req.user = user as { id: string; email: string } next() }) } // API Key authentication for simpler use cases export function authenticateApiKey( req: Request, res: Response, next: NextFunction ) { const apiKey = req.headers['x-api-key'] if (!apiKey || apiKey !== process.env.API_KEY) { return res.status(401).json({ error: 'Invalid API key' }) } next() }
Rate Limiting with Redis
// src/middleware/rateLimiter.middleware.ts import { Request, Response, NextFunction } from 'express' import { RateLimiterRedis } from 'rate-limiter-flexible' import Redis from 'ioredis' const redis = new Redis(process.env.REDIS_URL!) const rateLimiter = new RateLimiterRedis({ storeClient: redis, keyPrefix: 'rl', points: 100, // Number of requests duration: 60, // Per minute blockDuration: 60, // Block for 1 minute }) export async function rateLimitMiddleware( req: Request, res: Response, next: NextFunction ) { try { const key = req.ip || 'unknown' await rateLimiter.consume(key) next() } catch (rejRes: any) { res.status(429).json({ error: 'Too many requests', retryAfter: Math.round(rejRes.msBeforeNext / 1000) || 60, }) } } // Token-based rate limiting for LLM usage export class TokenRateLimiter { private limits = new Map<string, { tokens: number; reset: Date }>() private readonly maxTokens = 100000 // per day async checkTokenLimit(userId: string, tokensRequested: number): Promise<boolean> { const userLimit = this.limits.get(userId) const now = new Date() if (!userLimit || userLimit.reset < now) { this.limits.set(userId, { tokens: this.maxTokens - tokensRequested, reset: new Date(now.getTime() + 24 * 60 * 60 * 1000), }) return true } if (userLimit.tokens >= tokensRequested) { userLimit.tokens -= tokensRequested return true } return false } }
4. Streaming with Server-Sent Events
SSE Streaming Controller
// src/controllers/stream.controller.ts import { Request, Response } from 'express' import { createLLMProvider } from '../services/llm.service' export async function streamChat(req: Request, res: Response) { // Set SSE headers res.setHeader('Content-Type', 'text/event-stream') res.setHeader('Cache-Control', 'no-cache') res.setHeader('Connection', 'keep-alive') res.setHeader('X-Accel-Buffering', 'no') // Disable nginx buffering const { message, provider = 'openai', options = {} } = req.body try { const llmProvider = createLLMProvider(provider) const stream = llmProvider.generateStream(message, options) // Send initial connection message res.write('event: connected data: {"status": "connected"} ') // Stream tokens for await (const chunk of stream) { const data = JSON.stringify({ content: chunk }) res.write(`data: ${data} `) // Check if client disconnected if (res.writableEnded) { break } } // Send completion message res.write('event: complete data: {"status": "complete"} ') res.end() } catch (error: any) { const errorData = JSON.stringify({ error: error.message || 'Stream error occurred' }) res.write(`event: error data: ${errorData} `) res.end() } } // Client-side example const clientExample = ` const eventSource = new EventSource('/api/stream'); eventSource.addEventListener('message', (event) => { const data = JSON.parse(event.data); console.log('Received:', data.content); }); eventSource.addEventListener('complete', () => { eventSource.close(); }); eventSource.addEventListener('error', (event) => { console.error('Stream error:', event); eventSource.close(); }); `
Performance Tip
For better streaming performance, disable proxy buffering in nginx withproxy_buffering off;
and setX-Accel-Buffering: no
header.
5. WebSocket Real-time Chat
WebSocket Handler
// src/websocket/chat.handler.ts import { Server, Socket } from 'socket.io' import { createLLMProvider } from '../services/llm.service' import { logger } from '../utils/logger' import { SessionService } from '../services/session.service' interface ChatMessage { id: string content: string role: 'user' | 'assistant' timestamp: Date } export function initWebSocket(io: Server) { const sessionService = new SessionService() io.on('connection', (socket: Socket) => { logger.info(`WebSocket connected: ${socket.id}`) // Join user to their room socket.on('join', (userId: string) => { socket.join(`user:${userId}`) socket.emit('joined', { room: `user:${userId}` }) }) // Handle chat messages socket.on('chat:message', async (data: { message: string provider?: string sessionId: string }) => { try { const { message, provider = 'openai', sessionId } = data // Add user message to session const userMessage: ChatMessage = { id: generateId(), content: message, role: 'user', timestamp: new Date(), } await sessionService.addMessage(sessionId, userMessage) socket.emit('chat:message', userMessage) // Generate AI response const llmProvider = createLLMProvider(provider) const stream = llmProvider.generateStream(message) const assistantMessage: ChatMessage = { id: generateId(), content: '', role: 'assistant', timestamp: new Date(), } // Send initial assistant message socket.emit('chat:start', assistantMessage) // Stream response for await (const chunk of stream) { assistantMessage.content += chunk socket.emit('chat:token', { messageId: assistantMessage.id, token: chunk }) } // Save complete message await sessionService.addMessage(sessionId, assistantMessage) socket.emit('chat:complete', assistantMessage) } catch (error: any) { logger.error('WebSocket chat error:', error) socket.emit('chat:error', { error: error.message || 'Chat error occurred' }) } }) // Handle typing indicators socket.on('typing:start', (data) => { socket.to(`user:${data.userId}`).emit('typing:start', { userId: socket.id, }) }) socket.on('typing:stop', (data) => { socket.to(`user:${data.userId}`).emit('typing:stop', { userId: socket.id, }) }) socket.on('disconnect', () => { logger.info(`WebSocket disconnected: ${socket.id}`) }) }) } function generateId(): string { return Math.random().toString(36).substring(2, 15) }
Client-Side WebSocket Integration
// Client example import { io, Socket } from 'socket.io-client' class ChatClient { private socket: Socket private messages: ChatMessage[] = [] constructor(serverUrl: string) { this.socket = io(serverUrl, { transports: ['websocket'], auth: { token: localStorage.getItem('authToken'), }, }) this.setupListeners() } private setupListeners() { this.socket.on('connect', () => { console.log('Connected to chat server') this.socket.emit('join', getUserId()) }) this.socket.on('chat:message', (message: ChatMessage) => { this.messages.push(message) this.onMessage(message) }) this.socket.on('chat:token', ({ messageId, token }) => { const message = this.messages.find(m => m.id === messageId) if (message) { message.content += token this.onUpdate(message) } }) this.socket.on('chat:error', ({ error }) => { console.error('Chat error:', error) this.onError(error) }) } sendMessage(content: string) { this.socket.emit('chat:message', { message: content, provider: 'openai', sessionId: getSessionId(), }) } // Override these methods onMessage(message: ChatMessage) {} onUpdate(message: ChatMessage) {} onError(error: string) {} }
6. Error Handling & Retry Logic
Comprehensive Error Handler
// src/middleware/error.middleware.ts import { Request, Response, NextFunction } from 'express' import { logger } from '../utils/logger' export class AppError extends Error { statusCode: number isOperational: boolean constructor(message: string, statusCode: number) { super(message) this.statusCode = statusCode this.isOperational = true Error.captureStackTrace(this, this.constructor) } } export function errorHandler( err: Error | AppError, req: Request, res: Response, next: NextFunction ) { let error = err as AppError // Default to 500 server error if (!error.statusCode) { error.statusCode = 500 } // Log error logger.error({ error: { message: error.message, stack: error.stack, statusCode: error.statusCode, }, request: { method: req.method, url: req.url, ip: req.ip, userAgent: req.get('user-agent'), }, }) // Send error response res.status(error.statusCode).json({ success: false, error: { message: error.message, ...(process.env.NODE_ENV === 'development' && { stack: error.stack }), }, }) }
Retry Logic with Exponential Backoff
// src/utils/retry.ts interface RetryOptions { maxRetries?: number initialDelay?: number maxDelay?: number factor?: number onRetry?: (error: Error, attempt: number) => void } export async function withRetry<T>( fn: () => Promise<T>, options: RetryOptions = {} ): Promise<T> { const { maxRetries = 3, initialDelay = 1000, maxDelay = 10000, factor = 2, onRetry, } = options let lastError: Error for (let attempt = 0; attempt < maxRetries; attempt++) { try { return await fn() } catch (error: any) { lastError = error // Don't retry on non-retryable errors if ( error.statusCode === 401 || // Unauthorized error.statusCode === 403 || // Forbidden error.statusCode === 404 // Not found ) { throw error } if (attempt < maxRetries - 1) { const delay = Math.min( initialDelay * Math.pow(factor, attempt), maxDelay ) if (onRetry) { onRetry(error, attempt + 1) } await new Promise(resolve => setTimeout(resolve, delay)) } } } throw lastError! } // Usage in LLM service export async function callLLMWithRetry( provider: LLMProvider, prompt: string, options: any ): Promise<string> { return withRetry( () => provider.generateCompletion(prompt, options), { maxRetries: 3, onRetry: (error, attempt) => { logger.warn(`LLM call failed, retry ${attempt}/3`, { error }) }, } ) }
7. Request Validation
Validation Middleware with Joi
// src/middleware/validation.middleware.ts import { Request, Response, NextFunction } from 'express' import Joi from 'joi' export function validate(schema: Joi.ObjectSchema) { return (req: Request, res: Response, next: NextFunction) => { const { error, value } = schema.validate(req.body, { abortEarly: false, stripUnknown: true, }) if (error) { const errors = error.details.map(detail => ({ field: detail.path.join('.'), message: detail.message, })) return res.status(400).json({ success: false, error: 'Validation failed', details: errors, }) } // Replace body with validated value req.body = value next() } } // Validation schemas export const chatSchema = Joi.object({ message: Joi.string().min(1).max(4000).required(), provider: Joi.string().valid('openai', 'anthropic', 'google').optional(), options: Joi.object({ model: Joi.string().optional(), temperature: Joi.number().min(0).max(2).optional(), maxTokens: Joi.number().positive().max(4000).optional(), }).optional(), }) export const streamSchema = chatSchema.keys({ stream: Joi.boolean().default(true), }) // Input sanitization export function sanitizeInput(input: string): string { return input .trim() .replace(/[<>]/g, '') // Remove potential HTML .replace(/\u0000/g, '') // Remove null bytes .substring(0, 4000) // Enforce max length }
Route Implementation with Validation
// src/routes/chat.routes.ts import { Router } from 'express' import { chatCompletion } from '../controllers/chat.controller' import { streamChat } from '../controllers/stream.controller' import { authenticateApiKey } from '../middleware/auth.middleware' import { rateLimitMiddleware } from '../middleware/rateLimiter.middleware' import { validate, chatSchema, streamSchema } from '../middleware/validation.middleware' const router = Router() // Apply common middleware router.use(authenticateApiKey) router.use(rateLimitMiddleware) // Chat completion endpoint router.post( '/completion', validate(chatSchema), chatCompletion ) // Streaming endpoint router.post( '/stream', validate(streamSchema), streamChat ) export default router
8. Session Management
Redis-Based Session Service
// src/services/session.service.ts import Redis from 'ioredis' import { v4 as uuidv4 } from 'uuid' interface Session { id: string userId: string messages: ChatMessage[] metadata: Record<string, any> createdAt: Date updatedAt: Date } export class SessionService { private redis: Redis private ttl = 24 * 60 * 60 // 24 hours constructor() { this.redis = new Redis(process.env.REDIS_URL!) } async createSession(userId: string): Promise<Session> { const session: Session = { id: uuidv4(), userId, messages: [], metadata: {}, createdAt: new Date(), updatedAt: new Date(), } await this.saveSession(session) return session } async getSession(sessionId: string): Promise<Session | null> { const data = await this.redis.get(`session:${sessionId}`) if (!data) return null return JSON.parse(data) } async saveSession(session: Session): Promise<void> { session.updatedAt = new Date() await this.redis.setex( `session:${session.id}`, this.ttl, JSON.stringify(session) ) } async addMessage(sessionId: string, message: ChatMessage): Promise<void> { const session = await this.getSession(sessionId) if (!session) { throw new Error('Session not found') } session.messages.push(message) await this.saveSession(session) } async getUserSessions(userId: string): Promise<Session[]> { const keys = await this.redis.keys(`session:*`) const sessions: Session[] = [] for (const key of keys) { const data = await this.redis.get(key) if (data) { const session = JSON.parse(data) if (session.userId === userId) { sessions.push(session) } } } return sessions.sort((a, b) => new Date(b.updatedAt).getTime() - new Date(a.updatedAt).getTime() ) } // Context management for conversations async getConversationContext(sessionId: string, limit = 10): Promise<ChatMessage[]> { const session = await this.getSession(sessionId) if (!session) return [] // Return last N messages for context return session.messages.slice(-limit) } }
Session Middleware
// src/middleware/session.middleware.ts import { Request, Response, NextFunction } from 'express' import { SessionService } from '../services/session.service' interface SessionRequest extends Request { session?: any sessionId?: string } const sessionService = new SessionService() export async function sessionMiddleware( req: SessionRequest, res: Response, next: NextFunction ) { const sessionId = req.headers['x-session-id'] as string if (sessionId) { const session = await sessionService.getSession(sessionId) if (session) { req.session = session req.sessionId = sessionId } } // Create new session if needed if (!req.session && req.user) { const session = await sessionService.createSession(req.user.id) req.session = session req.sessionId = session.id res.setHeader('X-Session-Id', session.id) } next() }
9. File Uploads for Multimodal
Multer Configuration
// src/config/multer.ts import multer from 'multer' import path from 'path' import { v4 as uuidv4 } from 'uuid' const storage = multer.diskStorage({ destination: (req, file, cb) => { cb(null, 'uploads/') }, filename: (req, file, cb) => { const uniqueName = `${uuidv4()}${path.extname(file.originalname)}` cb(null, uniqueName) }, }) const fileFilter = (req: any, file: any, cb: any) => { const allowedTypes = ['image/jpeg', 'image/png', 'image/gif', 'image/webp'] if (allowedTypes.includes(file.mimetype)) { cb(null, true) } else { cb(new Error('Invalid file type. Only JPEG, PNG, GIF, and WebP are allowed.')) } } export const upload = multer({ storage, fileFilter, limits: { fileSize: 10 * 1024 * 1024, // 10MB }, })
Multimodal Endpoint
// src/controllers/multimodal.controller.ts import { Request, Response, NextFunction } from 'express' import fs from 'fs/promises' import OpenAI from 'openai' const openai = new OpenAI({ apiKey: process.env.OPENAI_API_KEY, }) export async function analyzeImage( req: Request, res: Response, next: NextFunction ) { try { if (!req.file) { throw new AppError('No image file provided', 400) } const { prompt = 'What is in this image?' } = req.body // Read image and convert to base64 const imageBuffer = await fs.readFile(req.file.path) const base64Image = imageBuffer.toString('base64') // Call vision model const response = await openai.chat.completions.create({ model: 'gpt-4-vision-preview', messages: [ { role: 'user', content: [ { type: 'text', text: prompt }, { type: 'image_url', image_url: { url: `data:image/jpeg;base64,${base64Image}`, }, }, ], }, ], max_tokens: 300, }) // Clean up uploaded file await fs.unlink(req.file.path) res.json({ success: true, analysis: response.choices[0]?.message?.content, imageInfo: { originalName: req.file.originalname, size: req.file.size, mimetype: req.file.mimetype, }, }) } catch (error) { // Clean up file on error if (req.file) { await fs.unlink(req.file.path).catch(() => {}) } next(error) } } // Route setup router.post( '/analyze-image', authenticateApiKey, upload.single('image'), analyzeImage )
10. Production Deployment
PM2 Configuration
// ecosystem.config.js module.exports = { apps: [{ name: 'llm-api', script: './dist/index.js', instances: 'max', exec_mode: 'cluster', env: { NODE_ENV: 'production', PORT: 3000, }, error_file: './logs/pm2-error.log', out_file: './logs/pm2-out.log', log_date_format: 'YYYY-MM-DD HH:mm:ss Z', max_memory_restart: '1G', }], }
Nginx Configuration
# /etc/nginx/sites-available/llm-api upstream llm_api { server localhost:3000; keepalive 64; } server { listen 80; server_name api.example.com; # Redirect to HTTPS return 301 https://$server_name$request_uri; } server { listen 443 ssl http2; server_name api.example.com; ssl_certificate /etc/letsencrypt/live/api.example.com/fullchain.pem; ssl_certificate_key /etc/letsencrypt/live/api.example.com/privkey.pem; # Security headers add_header X-Frame-Options "SAMEORIGIN" always; add_header X-Content-Type-Options "nosniff" always; add_header X-XSS-Protection "1; mode=block" always; location / { proxy_pass http://llm_api; proxy_http_version 1.1; proxy_set_header Upgrade $http_upgrade; proxy_set_header Connection 'upgrade'; proxy_set_header Host $host; proxy_cache_bypass $http_upgrade; proxy_set_header X-Real-IP $remote_addr; proxy_set_header X-Forwarded-For $proxy_add_x_forwarded_for; proxy_set_header X-Forwarded-Proto $scheme; # Timeouts proxy_connect_timeout 60s; proxy_send_timeout 60s; proxy_read_timeout 60s; } # WebSocket support location /socket.io/ { proxy_pass http://llm_api; proxy_http_version 1.1; proxy_set_header Upgrade $http_upgrade; proxy_set_header Connection "upgrade"; proxy_set_header Host $host; proxy_set_header X-Real-IP $remote_addr; proxy_set_header X-Forwarded-For $proxy_add_x_forwarded_for; proxy_set_header X-Forwarded-Proto $scheme; } # SSE endpoint - disable buffering location /api/stream { proxy_pass http://llm_api; proxy_http_version 1.1; proxy_set_header Host $host; proxy_set_header X-Real-IP $remote_addr; proxy_set_header X-Forwarded-For $proxy_add_x_forwarded_for; proxy_set_header X-Forwarded-Proto $scheme; # Disable buffering for SSE proxy_buffering off; proxy_cache off; proxy_set_header X-Accel-Buffering no; } }
Docker Configuration
# Dockerfile FROM node:18-alpine AS builder WORKDIR /app # Copy package files COPY package*.json ./ COPY tsconfig.json ./ # Install dependencies RUN npm ci # Copy source code COPY src ./src # Build application RUN npm run build # Production image FROM node:18-alpine WORKDIR /app # Install dumb-init for proper signal handling RUN apk add --no-cache dumb-init # Copy package files COPY package*.json ./ # Install production dependencies only RUN npm ci --production && npm cache clean --force # Copy built application COPY --from=builder /app/dist ./dist # Create non-root user RUN addgroup -g 1001 -S nodejs && adduser -S nodejs -u 1001 # Create uploads directory RUN mkdir -p uploads && chown -R nodejs:nodejs uploads USER nodejs EXPOSE 3000 # Use dumb-init to handle signals properly ENTRYPOINT ["dumb-init", "--"] CMD ["node", "dist/index.js"]
Monitoring Setup
// src/utils/monitoring.ts import * as Sentry from '@sentry/node' import { ProfilingIntegration } from '@sentry/profiling-node' import promClient from 'prom-client' // Initialize Sentry Sentry.init({ dsn: process.env.SENTRY_DSN, integrations: [ new ProfilingIntegration(), ], tracesSampleRate: 0.1, profilesSampleRate: 0.1, }) // Prometheus metrics const httpRequestDuration = new promClient.Histogram({ name: 'http_request_duration_seconds', help: 'Duration of HTTP requests in seconds', labelNames: ['method', 'route', 'status_code'], }) const llmRequestDuration = new promClient.Histogram({ name: 'llm_request_duration_seconds', help: 'Duration of LLM API requests in seconds', labelNames: ['provider', 'model'], }) const llmTokenUsage = new promClient.Counter({ name: 'llm_token_usage_total', help: 'Total number of tokens used', labelNames: ['provider', 'model'], }) // Middleware for metrics export function metricsMiddleware(req: Request, res: Response, next: NextFunction) { const start = Date.now() res.on('finish', () => { const duration = (Date.now() - start) / 1000 httpRequestDuration .labels(req.method, req.route?.path || req.path, res.statusCode.toString()) .observe(duration) }) next() } // Metrics endpoint router.get('/metrics', (req, res) => { res.set('Content-Type', promClient.register.contentType) res.end(promClient.register.metrics()) })
✓ Production Checklist
- ☐ Environment variables configured securely
- ☐ SSL/TLS certificates installed
- ☐ Rate limiting configured
- ☐ Error monitoring (Sentry) set up
- ☐ Metrics collection (Prometheus) configured
- ☐ Log aggregation implemented
- ☐ Health check endpoints created
- ☐ Backup strategy for Redis data
- ☐ Auto-scaling configured
- ☐ Security headers implemented
Complete Example Application
Here's a complete example that combines all the concepts:
// Complete server setup with all features import express from 'express' import cors from 'cors' import helmet from 'helmet' import compression from 'compression' import { createServer } from 'http' import { Server } from 'socket.io' import Redis from 'ioredis' import session from 'express-session' import RedisStore from 'connect-redis' import * as Sentry from '@sentry/node' import { config } from './config' import { logger } from './utils/logger' import { errorHandler } from './middleware/error.middleware' import { metricsMiddleware } from './utils/monitoring' import routes from './routes' import { initWebSocket } from './websocket/chat.handler' // Initialize services const app = express() const httpServer = createServer(app) const io = new Server(httpServer, { cors: { origin: config.clientUrl } }) const redis = new Redis(config.redisUrl) // Sentry initialization Sentry.init({ dsn: config.sentryDsn }) // Global middleware app.use(Sentry.Handlers.requestHandler()) app.use(helmet()) app.use(cors({ origin: config.clientUrl, credentials: true })) app.use(compression()) app.use(express.json({ limit: '10mb' })) app.use(metricsMiddleware) // Session middleware app.use(session({ store: new RedisStore({ client: redis }), secret: config.sessionSecret, resave: false, saveUninitialized: false, cookie: { secure: config.isProduction, httpOnly: true, maxAge: 24 * 60 * 60 * 1000, // 24 hours }, })) // Health check app.get('/health', (req, res) => { res.json({ status: 'healthy', timestamp: new Date().toISOString() }) }) // API routes app.use('/api', routes) // Error handling app.use(Sentry.Handlers.errorHandler()) app.use(errorHandler) // Initialize WebSocket initWebSocket(io) // Graceful shutdown process.on('SIGTERM', async () => { logger.info('SIGTERM received, shutting down gracefully') httpServer.close(() => { logger.info('HTTP server closed') }) await redis.quit() process.exit(0) }) // Start server const PORT = config.port || 3000 httpServer.listen(PORT, () => { logger.info(`Server running on port ${PORT} in ${config.nodeEnv} mode`) })
References & Citations
Ready to Deploy?
Start building production-ready Express.js applications with LLM integration using our unified API gateway.
References
- [1] AWS. "Lambda Documentation" (2024)
- [2] Vercel. "Streaming Responses" (2024)
- [3] GitHub. "OpenAI Node.js Library" (2024)