Quick Start
Create a new Express.js LLM project:
mkdir express-llm-api && cd express-llm-api npm init -y npm install express axios dotenv cors helmet npm install -D @types/express typescript nodemon ts-node
1. Project Setup & Structure
TypeScript Configuration
// tsconfig.json
{
"compilerOptions": {
"target": "ES2022",
"module": "commonjs",
"lib": ["ES2022"],
"outDir": "./dist",
"rootDir": "./src",
"strict": true,
"esModuleInterop": true,
"skipLibCheck": true,
"forceConsistentCasingInFileNames": true,
"resolveJsonModule": true,
"moduleResolution": "node",
"allowSyntheticDefaultImports": true,
"experimentalDecorators": true,
"emitDecoratorMetadata": true
},
"include": ["src/**/*"],
"exclude": ["node_modules", "dist"]
}Environment Configuration
# .env NODE_ENV=development PORT=3000 # LLM API Keys OPENAI_API_KEY=sk-... ANTHROPIC_API_KEY=sk-ant-... GOOGLE_API_KEY=... # Security JWT_SECRET=your-secret-key API_KEY=your-api-key # Redis (for rate limiting and sessions) REDIS_URL=redis://localhost:6379 # Monitoring SENTRY_DSN=https://...
Project Structure
src/ ├── config/ │ ├── database.ts # Database configuration │ ├── llm.ts # LLM provider configuration │ └── redis.ts # Redis client setup ├── controllers/ │ ├── chat.controller.ts │ ├── completion.controller.ts │ └── stream.controller.ts ├── middleware/ │ ├── auth.middleware.ts │ ├── error.middleware.ts │ ├── rateLimiter.middleware.ts │ └── validation.middleware.ts ├── services/ │ ├── llm.service.ts # LLM provider abstraction │ ├── cache.service.ts # Response caching │ └── session.service.ts ├── routes/ │ ├── chat.routes.ts │ └── index.ts ├── types/ │ └── index.ts ├── utils/ │ ├── logger.ts │ └── validators.ts ├── websocket/ │ └── chat.handler.ts └── index.ts # Main server file
2. Basic LLM Endpoints
Main Server Setup
// src/index.ts
import express from 'express'
import cors from 'cors'
import helmet from 'helmet'
import dotenv from 'dotenv'
import { createServer } from 'http'
import { Server } from 'socket.io'
import { errorHandler } from './middleware/error.middleware'
import { logger } from './utils/logger'
import routes from './routes'
import { initWebSocket } from './websocket/chat.handler'
dotenv.config()
const app = express()
const httpServer = createServer(app)
const io = new Server(httpServer, {
cors: {
origin: process.env.CLIENT_URL || 'http://localhost:3001',
credentials: true,
},
})
// Middleware
app.use(helmet())
app.use(cors())
app.use(express.json({ limit: '10mb' }))
app.use(express.urlencoded({ extended: true }))
// Routes
app.use('/api', routes)
// Error handling
app.use(errorHandler)
// Initialize WebSocket
initWebSocket(io)
const PORT = process.env.PORT || 3000
httpServer.listen(PORT, () => {
logger.info(`Server running on port ${PORT}`)
})LLM Service Abstraction
// src/services/llm.service.ts
import OpenAI from 'openai'
import Anthropic from '@anthropic-ai/sdk'
import { GoogleGenerativeAI } from '@google/generative-ai'
export interface LLMProvider {
name: string
generateCompletion(prompt: string, options?: any): Promise<string>
generateStream(prompt: string, options?: any): AsyncGenerator<string>
}
export class OpenAIProvider implements LLMProvider {
name = 'openai'
private client: OpenAI
constructor() {
this.client = new OpenAI({
apiKey: process.env.OPENAI_API_KEY,
})
}
async generateCompletion(prompt: string, options: any = {}): Promise<string> {
const completion = await this.client.chat.completions.create({
model: options.model || 'gpt-3.5-turbo',
messages: [{ role: 'user', content: prompt }],
temperature: options.temperature || 0.7,
max_tokens: options.maxTokens || 1000,
})
return completion.choices[0]?.message?.content || ''
}
async *generateStream(prompt: string, options: any = {}): AsyncGenerator<string> {
const stream = await this.client.chat.completions.create({
model: options.model || 'gpt-3.5-turbo',
messages: [{ role: 'user', content: prompt }],
temperature: options.temperature || 0.7,
max_tokens: options.maxTokens || 1000,
stream: true,
})
for await (const chunk of stream) {
yield chunk.choices[0]?.delta?.content || ''
}
}
}
export class AnthropicProvider implements LLMProvider {
name = 'anthropic'
private client: Anthropic
constructor() {
this.client = new Anthropic({
apiKey: process.env.ANTHROPIC_API_KEY,
})
}
async generateCompletion(prompt: string, options: any = {}): Promise<string> {
const message = await this.client.messages.create({
model: options.model || 'claude-3-opus-20240229',
max_tokens: options.maxTokens || 1000,
messages: [{ role: 'user', content: prompt }],
})
return message.content[0].type === 'text' ? message.content[0].text : ''
}
async *generateStream(prompt: string, options: any = {}): AsyncGenerator<string> {
const stream = await this.client.messages.create({
model: options.model || 'claude-3-opus-20240229',
max_tokens: options.maxTokens || 1000,
messages: [{ role: 'user', content: prompt }],
stream: true,
})
for await (const event of stream) {
if (event.type === 'content_block_delta') {
yield event.delta.text
}
}
}
}
// Provider factory
export function createLLMProvider(provider: string): LLMProvider {
switch (provider) {
case 'openai':
return new OpenAIProvider()
case 'anthropic':
return new AnthropicProvider()
default:
throw new Error(`Unsupported provider: ${provider}`)
}
}Chat Controller
// src/controllers/chat.controller.ts
import { Request, Response, NextFunction } from 'express'
import { createLLMProvider } from '../services/llm.service'
import { logger } from '../utils/logger'
export async function chatCompletion(
req: Request,
res: Response,
next: NextFunction
) {
try {
const { message, provider = 'openai', options = {} } = req.body
const llmProvider = createLLMProvider(provider)
const response = await llmProvider.generateCompletion(message, options)
res.json({
success: true,
response,
provider: llmProvider.name,
timestamp: new Date().toISOString(),
})
} catch (error) {
logger.error('Chat completion error:', error)
next(error)
}
}3. Authentication & Rate Limiting
JWT Authentication Middleware
// src/middleware/auth.middleware.ts
import { Request, Response, NextFunction } from 'express'
import jwt from 'jsonwebtoken'
interface AuthRequest extends Request {
user?: { id: string; email: string }
}
export function authenticateToken(
req: AuthRequest,
res: Response,
next: NextFunction
) {
const authHeader = req.headers['authorization']
const token = authHeader && authHeader.split(' ')[1]
if (!token) {
return res.status(401).json({ error: 'Access token required' })
}
jwt.verify(token, process.env.JWT_SECRET!, (err, user) => {
if (err) {
return res.status(403).json({ error: 'Invalid token' })
}
req.user = user as { id: string; email: string }
next()
})
}
// API Key authentication for simpler use cases
export function authenticateApiKey(
req: Request,
res: Response,
next: NextFunction
) {
const apiKey = req.headers['x-api-key']
if (!apiKey || apiKey !== process.env.API_KEY) {
return res.status(401).json({ error: 'Invalid API key' })
}
next()
}Rate Limiting with Redis
// src/middleware/rateLimiter.middleware.ts
import { Request, Response, NextFunction } from 'express'
import { RateLimiterRedis } from 'rate-limiter-flexible'
import Redis from 'ioredis'
const redis = new Redis(process.env.REDIS_URL!)
const rateLimiter = new RateLimiterRedis({
storeClient: redis,
keyPrefix: 'rl',
points: 100, // Number of requests
duration: 60, // Per minute
blockDuration: 60, // Block for 1 minute
})
export async function rateLimitMiddleware(
req: Request,
res: Response,
next: NextFunction
) {
try {
const key = req.ip || 'unknown'
await rateLimiter.consume(key)
next()
} catch (rejRes: any) {
res.status(429).json({
error: 'Too many requests',
retryAfter: Math.round(rejRes.msBeforeNext / 1000) || 60,
})
}
}
// Token-based rate limiting for LLM usage
export class TokenRateLimiter {
private limits = new Map<string, { tokens: number; reset: Date }>()
private readonly maxTokens = 100000 // per day
async checkTokenLimit(userId: string, tokensRequested: number): Promise<boolean> {
const userLimit = this.limits.get(userId)
const now = new Date()
if (!userLimit || userLimit.reset < now) {
this.limits.set(userId, {
tokens: this.maxTokens - tokensRequested,
reset: new Date(now.getTime() + 24 * 60 * 60 * 1000),
})
return true
}
if (userLimit.tokens >= tokensRequested) {
userLimit.tokens -= tokensRequested
return true
}
return false
}
}4. Streaming with Server-Sent Events
SSE Streaming Controller
// src/controllers/stream.controller.ts
import { Request, Response } from 'express'
import { createLLMProvider } from '../services/llm.service'
export async function streamChat(req: Request, res: Response) {
// Set SSE headers
res.setHeader('Content-Type', 'text/event-stream')
res.setHeader('Cache-Control', 'no-cache')
res.setHeader('Connection', 'keep-alive')
res.setHeader('X-Accel-Buffering', 'no') // Disable nginx buffering
const { message, provider = 'openai', options = {} } = req.body
try {
const llmProvider = createLLMProvider(provider)
const stream = llmProvider.generateStream(message, options)
// Send initial connection message
res.write('event: connected
data: {"status": "connected"}
')
// Stream tokens
for await (const chunk of stream) {
const data = JSON.stringify({ content: chunk })
res.write(`data: ${data}
`)
// Check if client disconnected
if (res.writableEnded) {
break
}
}
// Send completion message
res.write('event: complete
data: {"status": "complete"}
')
res.end()
} catch (error: any) {
const errorData = JSON.stringify({
error: error.message || 'Stream error occurred'
})
res.write(`event: error
data: ${errorData}
`)
res.end()
}
}
// Client-side example
const clientExample = `
const eventSource = new EventSource('/api/stream');
eventSource.addEventListener('message', (event) => {
const data = JSON.parse(event.data);
console.log('Received:', data.content);
});
eventSource.addEventListener('complete', () => {
eventSource.close();
});
eventSource.addEventListener('error', (event) => {
console.error('Stream error:', event);
eventSource.close();
});
`Performance Tip
For better streaming performance, disable proxy buffering in nginx withproxy_buffering off; and setX-Accel-Buffering: no header.
5. WebSocket Real-time Chat
WebSocket Handler
// src/websocket/chat.handler.ts
import { Server, Socket } from 'socket.io'
import { createLLMProvider } from '../services/llm.service'
import { logger } from '../utils/logger'
import { SessionService } from '../services/session.service'
interface ChatMessage {
id: string
content: string
role: 'user' | 'assistant'
timestamp: Date
}
export function initWebSocket(io: Server) {
const sessionService = new SessionService()
io.on('connection', (socket: Socket) => {
logger.info(`WebSocket connected: ${socket.id}`)
// Join user to their room
socket.on('join', (userId: string) => {
socket.join(`user:${userId}`)
socket.emit('joined', { room: `user:${userId}` })
})
// Handle chat messages
socket.on('chat:message', async (data: {
message: string
provider?: string
sessionId: string
}) => {
try {
const { message, provider = 'openai', sessionId } = data
// Add user message to session
const userMessage: ChatMessage = {
id: generateId(),
content: message,
role: 'user',
timestamp: new Date(),
}
await sessionService.addMessage(sessionId, userMessage)
socket.emit('chat:message', userMessage)
// Generate AI response
const llmProvider = createLLMProvider(provider)
const stream = llmProvider.generateStream(message)
const assistantMessage: ChatMessage = {
id: generateId(),
content: '',
role: 'assistant',
timestamp: new Date(),
}
// Send initial assistant message
socket.emit('chat:start', assistantMessage)
// Stream response
for await (const chunk of stream) {
assistantMessage.content += chunk
socket.emit('chat:token', {
messageId: assistantMessage.id,
token: chunk
})
}
// Save complete message
await sessionService.addMessage(sessionId, assistantMessage)
socket.emit('chat:complete', assistantMessage)
} catch (error: any) {
logger.error('WebSocket chat error:', error)
socket.emit('chat:error', {
error: error.message || 'Chat error occurred'
})
}
})
// Handle typing indicators
socket.on('typing:start', (data) => {
socket.to(`user:${data.userId}`).emit('typing:start', {
userId: socket.id,
})
})
socket.on('typing:stop', (data) => {
socket.to(`user:${data.userId}`).emit('typing:stop', {
userId: socket.id,
})
})
socket.on('disconnect', () => {
logger.info(`WebSocket disconnected: ${socket.id}`)
})
})
}
function generateId(): string {
return Math.random().toString(36).substring(2, 15)
}Client-Side WebSocket Integration
// Client example
import { io, Socket } from 'socket.io-client'
class ChatClient {
private socket: Socket
private messages: ChatMessage[] = []
constructor(serverUrl: string) {
this.socket = io(serverUrl, {
transports: ['websocket'],
auth: {
token: localStorage.getItem('authToken'),
},
})
this.setupListeners()
}
private setupListeners() {
this.socket.on('connect', () => {
console.log('Connected to chat server')
this.socket.emit('join', getUserId())
})
this.socket.on('chat:message', (message: ChatMessage) => {
this.messages.push(message)
this.onMessage(message)
})
this.socket.on('chat:token', ({ messageId, token }) => {
const message = this.messages.find(m => m.id === messageId)
if (message) {
message.content += token
this.onUpdate(message)
}
})
this.socket.on('chat:error', ({ error }) => {
console.error('Chat error:', error)
this.onError(error)
})
}
sendMessage(content: string) {
this.socket.emit('chat:message', {
message: content,
provider: 'openai',
sessionId: getSessionId(),
})
}
// Override these methods
onMessage(message: ChatMessage) {}
onUpdate(message: ChatMessage) {}
onError(error: string) {}
}6. Error Handling & Retry Logic
Comprehensive Error Handler
// src/middleware/error.middleware.ts
import { Request, Response, NextFunction } from 'express'
import { logger } from '../utils/logger'
export class AppError extends Error {
statusCode: number
isOperational: boolean
constructor(message: string, statusCode: number) {
super(message)
this.statusCode = statusCode
this.isOperational = true
Error.captureStackTrace(this, this.constructor)
}
}
export function errorHandler(
err: Error | AppError,
req: Request,
res: Response,
next: NextFunction
) {
let error = err as AppError
// Default to 500 server error
if (!error.statusCode) {
error.statusCode = 500
}
// Log error
logger.error({
error: {
message: error.message,
stack: error.stack,
statusCode: error.statusCode,
},
request: {
method: req.method,
url: req.url,
ip: req.ip,
userAgent: req.get('user-agent'),
},
})
// Send error response
res.status(error.statusCode).json({
success: false,
error: {
message: error.message,
...(process.env.NODE_ENV === 'development' && {
stack: error.stack
}),
},
})
}Retry Logic with Exponential Backoff
// src/utils/retry.ts
interface RetryOptions {
maxRetries?: number
initialDelay?: number
maxDelay?: number
factor?: number
onRetry?: (error: Error, attempt: number) => void
}
export async function withRetry<T>(
fn: () => Promise<T>,
options: RetryOptions = {}
): Promise<T> {
const {
maxRetries = 3,
initialDelay = 1000,
maxDelay = 10000,
factor = 2,
onRetry,
} = options
let lastError: Error
for (let attempt = 0; attempt < maxRetries; attempt++) {
try {
return await fn()
} catch (error: any) {
lastError = error
// Don't retry on non-retryable errors
if (
error.statusCode === 401 || // Unauthorized
error.statusCode === 403 || // Forbidden
error.statusCode === 404 // Not found
) {
throw error
}
if (attempt < maxRetries - 1) {
const delay = Math.min(
initialDelay * Math.pow(factor, attempt),
maxDelay
)
if (onRetry) {
onRetry(error, attempt + 1)
}
await new Promise(resolve => setTimeout(resolve, delay))
}
}
}
throw lastError!
}
// Usage in LLM service
export async function callLLMWithRetry(
provider: LLMProvider,
prompt: string,
options: any
): Promise<string> {
return withRetry(
() => provider.generateCompletion(prompt, options),
{
maxRetries: 3,
onRetry: (error, attempt) => {
logger.warn(`LLM call failed, retry ${attempt}/3`, { error })
},
}
)
}7. Request Validation
Validation Middleware with Joi
// src/middleware/validation.middleware.ts
import { Request, Response, NextFunction } from 'express'
import Joi from 'joi'
export function validate(schema: Joi.ObjectSchema) {
return (req: Request, res: Response, next: NextFunction) => {
const { error, value } = schema.validate(req.body, {
abortEarly: false,
stripUnknown: true,
})
if (error) {
const errors = error.details.map(detail => ({
field: detail.path.join('.'),
message: detail.message,
}))
return res.status(400).json({
success: false,
error: 'Validation failed',
details: errors,
})
}
// Replace body with validated value
req.body = value
next()
}
}
// Validation schemas
export const chatSchema = Joi.object({
message: Joi.string().min(1).max(4000).required(),
provider: Joi.string().valid('openai', 'anthropic', 'google').optional(),
options: Joi.object({
model: Joi.string().optional(),
temperature: Joi.number().min(0).max(2).optional(),
maxTokens: Joi.number().positive().max(4000).optional(),
}).optional(),
})
export const streamSchema = chatSchema.keys({
stream: Joi.boolean().default(true),
})
// Input sanitization
export function sanitizeInput(input: string): string {
return input
.trim()
.replace(/[<>]/g, '') // Remove potential HTML
.replace(/\u0000/g, '') // Remove null bytes
.substring(0, 4000) // Enforce max length
}Route Implementation with Validation
// src/routes/chat.routes.ts
import { Router } from 'express'
import { chatCompletion } from '../controllers/chat.controller'
import { streamChat } from '../controllers/stream.controller'
import { authenticateApiKey } from '../middleware/auth.middleware'
import { rateLimitMiddleware } from '../middleware/rateLimiter.middleware'
import { validate, chatSchema, streamSchema } from '../middleware/validation.middleware'
const router = Router()
// Apply common middleware
router.use(authenticateApiKey)
router.use(rateLimitMiddleware)
// Chat completion endpoint
router.post(
'/completion',
validate(chatSchema),
chatCompletion
)
// Streaming endpoint
router.post(
'/stream',
validate(streamSchema),
streamChat
)
export default router8. Session Management
Redis-Based Session Service
// src/services/session.service.ts
import Redis from 'ioredis'
import { v4 as uuidv4 } from 'uuid'
interface Session {
id: string
userId: string
messages: ChatMessage[]
metadata: Record<string, any>
createdAt: Date
updatedAt: Date
}
export class SessionService {
private redis: Redis
private ttl = 24 * 60 * 60 // 24 hours
constructor() {
this.redis = new Redis(process.env.REDIS_URL!)
}
async createSession(userId: string): Promise<Session> {
const session: Session = {
id: uuidv4(),
userId,
messages: [],
metadata: {},
createdAt: new Date(),
updatedAt: new Date(),
}
await this.saveSession(session)
return session
}
async getSession(sessionId: string): Promise<Session | null> {
const data = await this.redis.get(`session:${sessionId}`)
if (!data) return null
return JSON.parse(data)
}
async saveSession(session: Session): Promise<void> {
session.updatedAt = new Date()
await this.redis.setex(
`session:${session.id}`,
this.ttl,
JSON.stringify(session)
)
}
async addMessage(sessionId: string, message: ChatMessage): Promise<void> {
const session = await this.getSession(sessionId)
if (!session) {
throw new Error('Session not found')
}
session.messages.push(message)
await this.saveSession(session)
}
async getUserSessions(userId: string): Promise<Session[]> {
const keys = await this.redis.keys(`session:*`)
const sessions: Session[] = []
for (const key of keys) {
const data = await this.redis.get(key)
if (data) {
const session = JSON.parse(data)
if (session.userId === userId) {
sessions.push(session)
}
}
}
return sessions.sort((a, b) =>
new Date(b.updatedAt).getTime() - new Date(a.updatedAt).getTime()
)
}
// Context management for conversations
async getConversationContext(sessionId: string, limit = 10): Promise<ChatMessage[]> {
const session = await this.getSession(sessionId)
if (!session) return []
// Return last N messages for context
return session.messages.slice(-limit)
}
}Session Middleware
// src/middleware/session.middleware.ts
import { Request, Response, NextFunction } from 'express'
import { SessionService } from '../services/session.service'
interface SessionRequest extends Request {
session?: any
sessionId?: string
}
const sessionService = new SessionService()
export async function sessionMiddleware(
req: SessionRequest,
res: Response,
next: NextFunction
) {
const sessionId = req.headers['x-session-id'] as string
if (sessionId) {
const session = await sessionService.getSession(sessionId)
if (session) {
req.session = session
req.sessionId = sessionId
}
}
// Create new session if needed
if (!req.session && req.user) {
const session = await sessionService.createSession(req.user.id)
req.session = session
req.sessionId = session.id
res.setHeader('X-Session-Id', session.id)
}
next()
}9. File Uploads for Multimodal
Multer Configuration
// src/config/multer.ts
import multer from 'multer'
import path from 'path'
import { v4 as uuidv4 } from 'uuid'
const storage = multer.diskStorage({
destination: (req, file, cb) => {
cb(null, 'uploads/')
},
filename: (req, file, cb) => {
const uniqueName = `${uuidv4()}${path.extname(file.originalname)}`
cb(null, uniqueName)
},
})
const fileFilter = (req: any, file: any, cb: any) => {
const allowedTypes = ['image/jpeg', 'image/png', 'image/gif', 'image/webp']
if (allowedTypes.includes(file.mimetype)) {
cb(null, true)
} else {
cb(new Error('Invalid file type. Only JPEG, PNG, GIF, and WebP are allowed.'))
}
}
export const upload = multer({
storage,
fileFilter,
limits: {
fileSize: 10 * 1024 * 1024, // 10MB
},
})Multimodal Endpoint
// src/controllers/multimodal.controller.ts
import { Request, Response, NextFunction } from 'express'
import fs from 'fs/promises'
import OpenAI from 'openai'
const openai = new OpenAI({
apiKey: process.env.OPENAI_API_KEY,
})
export async function analyzeImage(
req: Request,
res: Response,
next: NextFunction
) {
try {
if (!req.file) {
throw new AppError('No image file provided', 400)
}
const { prompt = 'What is in this image?' } = req.body
// Read image and convert to base64
const imageBuffer = await fs.readFile(req.file.path)
const base64Image = imageBuffer.toString('base64')
// Call vision model
const response = await openai.chat.completions.create({
model: 'gpt-4-vision-preview',
messages: [
{
role: 'user',
content: [
{ type: 'text', text: prompt },
{
type: 'image_url',
image_url: {
url: `data:image/jpeg;base64,${base64Image}`,
},
},
],
},
],
max_tokens: 300,
})
// Clean up uploaded file
await fs.unlink(req.file.path)
res.json({
success: true,
analysis: response.choices[0]?.message?.content,
imageInfo: {
originalName: req.file.originalname,
size: req.file.size,
mimetype: req.file.mimetype,
},
})
} catch (error) {
// Clean up file on error
if (req.file) {
await fs.unlink(req.file.path).catch(() => {})
}
next(error)
}
}
// Route setup
router.post(
'/analyze-image',
authenticateApiKey,
upload.single('image'),
analyzeImage
)10. Production Deployment
PM2 Configuration
// ecosystem.config.js
module.exports = {
apps: [{
name: 'llm-api',
script: './dist/index.js',
instances: 'max',
exec_mode: 'cluster',
env: {
NODE_ENV: 'production',
PORT: 3000,
},
error_file: './logs/pm2-error.log',
out_file: './logs/pm2-out.log',
log_date_format: 'YYYY-MM-DD HH:mm:ss Z',
max_memory_restart: '1G',
}],
}Nginx Configuration
# /etc/nginx/sites-available/llm-api
upstream llm_api {
server localhost:3000;
keepalive 64;
}
server {
listen 80;
server_name api.example.com;
# Redirect to HTTPS
return 301 https://$server_name$request_uri;
}
server {
listen 443 ssl http2;
server_name api.example.com;
ssl_certificate /etc/letsencrypt/live/api.example.com/fullchain.pem;
ssl_certificate_key /etc/letsencrypt/live/api.example.com/privkey.pem;
# Security headers
add_header X-Frame-Options "SAMEORIGIN" always;
add_header X-Content-Type-Options "nosniff" always;
add_header X-XSS-Protection "1; mode=block" always;
location / {
proxy_pass http://llm_api;
proxy_http_version 1.1;
proxy_set_header Upgrade $http_upgrade;
proxy_set_header Connection 'upgrade';
proxy_set_header Host $host;
proxy_cache_bypass $http_upgrade;
proxy_set_header X-Real-IP $remote_addr;
proxy_set_header X-Forwarded-For $proxy_add_x_forwarded_for;
proxy_set_header X-Forwarded-Proto $scheme;
# Timeouts
proxy_connect_timeout 60s;
proxy_send_timeout 60s;
proxy_read_timeout 60s;
}
# WebSocket support
location /socket.io/ {
proxy_pass http://llm_api;
proxy_http_version 1.1;
proxy_set_header Upgrade $http_upgrade;
proxy_set_header Connection "upgrade";
proxy_set_header Host $host;
proxy_set_header X-Real-IP $remote_addr;
proxy_set_header X-Forwarded-For $proxy_add_x_forwarded_for;
proxy_set_header X-Forwarded-Proto $scheme;
}
# SSE endpoint - disable buffering
location /api/stream {
proxy_pass http://llm_api;
proxy_http_version 1.1;
proxy_set_header Host $host;
proxy_set_header X-Real-IP $remote_addr;
proxy_set_header X-Forwarded-For $proxy_add_x_forwarded_for;
proxy_set_header X-Forwarded-Proto $scheme;
# Disable buffering for SSE
proxy_buffering off;
proxy_cache off;
proxy_set_header X-Accel-Buffering no;
}
}Docker Configuration
# Dockerfile FROM node:18-alpine AS builder WORKDIR /app # Copy package files COPY package*.json ./ COPY tsconfig.json ./ # Install dependencies RUN npm ci # Copy source code COPY src ./src # Build application RUN npm run build # Production image FROM node:18-alpine WORKDIR /app # Install dumb-init for proper signal handling RUN apk add --no-cache dumb-init # Copy package files COPY package*.json ./ # Install production dependencies only RUN npm ci --production && npm cache clean --force # Copy built application COPY --from=builder /app/dist ./dist # Create non-root user RUN addgroup -g 1001 -S nodejs && adduser -S nodejs -u 1001 # Create uploads directory RUN mkdir -p uploads && chown -R nodejs:nodejs uploads USER nodejs EXPOSE 3000 # Use dumb-init to handle signals properly ENTRYPOINT ["dumb-init", "--"] CMD ["node", "dist/index.js"]
Monitoring Setup
// src/utils/monitoring.ts
import * as Sentry from '@sentry/node'
import { ProfilingIntegration } from '@sentry/profiling-node'
import promClient from 'prom-client'
// Initialize Sentry
Sentry.init({
dsn: process.env.SENTRY_DSN,
integrations: [
new ProfilingIntegration(),
],
tracesSampleRate: 0.1,
profilesSampleRate: 0.1,
})
// Prometheus metrics
const httpRequestDuration = new promClient.Histogram({
name: 'http_request_duration_seconds',
help: 'Duration of HTTP requests in seconds',
labelNames: ['method', 'route', 'status_code'],
})
const llmRequestDuration = new promClient.Histogram({
name: 'llm_request_duration_seconds',
help: 'Duration of LLM API requests in seconds',
labelNames: ['provider', 'model'],
})
const llmTokenUsage = new promClient.Counter({
name: 'llm_token_usage_total',
help: 'Total number of tokens used',
labelNames: ['provider', 'model'],
})
// Middleware for metrics
export function metricsMiddleware(req: Request, res: Response, next: NextFunction) {
const start = Date.now()
res.on('finish', () => {
const duration = (Date.now() - start) / 1000
httpRequestDuration
.labels(req.method, req.route?.path || req.path, res.statusCode.toString())
.observe(duration)
})
next()
}
// Metrics endpoint
router.get('/metrics', (req, res) => {
res.set('Content-Type', promClient.register.contentType)
res.end(promClient.register.metrics())
})✓ Production Checklist
- ☐ Environment variables configured securely
- ☐ SSL/TLS certificates installed
- ☐ Rate limiting configured
- ☐ Error monitoring (Sentry) set up
- ☐ Metrics collection (Prometheus) configured
- ☐ Log aggregation implemented
- ☐ Health check endpoints created
- ☐ Backup strategy for Redis data
- ☐ Auto-scaling configured
- ☐ Security headers implemented
Complete Example Application
Here's a complete example that combines all the concepts:
// Complete server setup with all features
import express from 'express'
import cors from 'cors'
import helmet from 'helmet'
import compression from 'compression'
import { createServer } from 'http'
import { Server } from 'socket.io'
import Redis from 'ioredis'
import session from 'express-session'
import RedisStore from 'connect-redis'
import * as Sentry from '@sentry/node'
import { config } from './config'
import { logger } from './utils/logger'
import { errorHandler } from './middleware/error.middleware'
import { metricsMiddleware } from './utils/monitoring'
import routes from './routes'
import { initWebSocket } from './websocket/chat.handler'
// Initialize services
const app = express()
const httpServer = createServer(app)
const io = new Server(httpServer, { cors: { origin: config.clientUrl } })
const redis = new Redis(config.redisUrl)
// Sentry initialization
Sentry.init({ dsn: config.sentryDsn })
// Global middleware
app.use(Sentry.Handlers.requestHandler())
app.use(helmet())
app.use(cors({ origin: config.clientUrl, credentials: true }))
app.use(compression())
app.use(express.json({ limit: '10mb' }))
app.use(metricsMiddleware)
// Session middleware
app.use(session({
store: new RedisStore({ client: redis }),
secret: config.sessionSecret,
resave: false,
saveUninitialized: false,
cookie: {
secure: config.isProduction,
httpOnly: true,
maxAge: 24 * 60 * 60 * 1000, // 24 hours
},
}))
// Health check
app.get('/health', (req, res) => {
res.json({ status: 'healthy', timestamp: new Date().toISOString() })
})
// API routes
app.use('/api', routes)
// Error handling
app.use(Sentry.Handlers.errorHandler())
app.use(errorHandler)
// Initialize WebSocket
initWebSocket(io)
// Graceful shutdown
process.on('SIGTERM', async () => {
logger.info('SIGTERM received, shutting down gracefully')
httpServer.close(() => {
logger.info('HTTP server closed')
})
await redis.quit()
process.exit(0)
})
// Start server
const PORT = config.port || 3000
httpServer.listen(PORT, () => {
logger.info(`Server running on port ${PORT} in ${config.nodeEnv} mode`)
})References & Citations
Ready to Deploy?
Start building production-ready Express.js applications with LLM integration using our unified API gateway.
References
- [1] AWS. "Lambda Documentation" (2024)
- [2] Vercel. "Streaming Responses" (2024)
- [3] GitHub. "OpenAI Node.js Library" (2024)