What You'll Build
Architecture Overview
Building a production chatbot requires careful consideration of architecture patterns. Let's explore the key decisions[1]:
Stateless Architecture
- ✓ Simple to scale horizontally
- ✓ No server-side session state
- ✓ Easy to deploy and maintain
- ✗ Limited conversation context
- ✗ Client manages all state
Stateful Architecture
- ✓ Rich conversation context
- ✓ User personalization
- ✓ Complex multi-turn dialogs
- ✗ Requires session storage
- ✗ More complex scaling
Complete Implementation
Let's build a full-featured chatbot from scratch with all production essentials[2]:
import express from 'express';
import { WebSocket, WebSocketServer } from 'ws';
import { OpenAI } from 'openai';
import Redis from 'ioredis';
import { v4 as uuidv4 } from 'uuid';
import jwt from 'jsonwebtoken';
// Initialize services
const app = express();
const redis = new Redis();
const openai = new OpenAI({ apiKey: process.env.OPENAI_API_KEY });
// Types
interface Message {
role: 'user' | 'assistant' | 'system';
content: string;
timestamp: Date;
}
interface Session {
id: string;
userId: string;
messages: Message[];
metadata: {
created: Date;
lastActive: Date;
messageCount: number;
};
}
interface ChatRequest {
message: string;
sessionId?: string;
stream?: boolean;
}
// Session Manager
class SessionManager {
private readonly TTL = 3600 * 24; // 24 hours
async createSession(userId: string): Promise<Session> {
const session: Session = {
id: uuidv4(),
userId,
messages: [],
metadata: {
created: new Date(),
lastActive: new Date(),
messageCount: 0
}
};
await this.saveSession(session);
return session;
}
async getSession(sessionId: string): Promise<Session | null> {
const data = await redis.get(`session:${sessionId}`);
if (!data) return null;
const session = JSON.parse(data);
// Update last active
session.metadata.lastActive = new Date();
await this.saveSession(session);
return session;
}
async saveSession(session: Session): Promise<void> {
await redis.setex(
`session:${session.id}`,
this.TTL,
JSON.stringify(session)
);
}
async addMessage(sessionId: string, message: Message): Promise<void> {
const session = await this.getSession(sessionId);
if (!session) throw new Error('Session not found');
session.messages.push(message);
session.metadata.messageCount++;
session.metadata.lastActive = new Date();
// Implement sliding window to manage context size
const MAX_MESSAGES = 50;
if (session.messages.length > MAX_MESSAGES) {
// Keep system message and recent messages
const systemMessage = session.messages.find(m => m.role === 'system');
const recentMessages = session.messages.slice(-MAX_MESSAGES + 1);
session.messages = systemMessage ? [systemMessage, ...recentMessages] : recentMessages;
}
await this.saveSession(session);
}
async getSummary(sessionId: string): Promise<string> {
const session = await this.getSession(sessionId);
if (!session || session.messages.length === 0) return '';
// Generate summary of older messages for context
const oldMessages = session.messages.slice(0, -10);
if (oldMessages.length < 5) return '';
const summaryPrompt = `Summarize this conversation concisely: ${
oldMessages.map(m => `${m.role}: ${m.content}`).join('\n')
}`;
const summary = await openai.chat.completions.create({
model: 'gpt-3.5-turbo',
messages: [{ role: 'user', content: summaryPrompt }],
max_tokens: 150
});
return summary.choices[0].message.content || '';
}
}
// Conversation Handler
class ConversationHandler {
constructor(private sessionManager: SessionManager) {}
async processMessage(
sessionId: string,
userMessage: string,
stream: boolean = false
): Promise<any> {
const session = await this.sessionManager.getSession(sessionId);
if (!session) throw new Error('Invalid session');
// Add user message
await this.sessionManager.addMessage(sessionId, {
role: 'user',
content: userMessage,
timestamp: new Date()
});
// Build context with optimization
const context = await this.buildContext(session);
if (stream) {
return this.streamResponse(sessionId, context);
} else {
return this.getResponse(sessionId, context);
}
}
private async buildContext(session: Session): Promise<any[]> {
const messages: any[] = [];
// Add system prompt
messages.push({
role: 'system',
content: process.env.SYSTEM_PROMPT ||
'You are a helpful assistant. Be concise and friendly.'
});
// Add conversation summary if needed
if (session.messages.length > 20) {
const summary = await this.sessionManager.getSummary(session.id);
if (summary) {
messages.push({
role: 'system',
content: `Previous conversation summary: ${summary}`
});
}
}
// Add recent messages
const recentMessages = session.messages.slice(-10);
messages.push(...recentMessages.map(m => ({
role: m.role,
content: m.content
})));
return messages;
}
private async getResponse(
sessionId: string,
messages: any[]
): Promise<string> {
try {
const completion = await openai.chat.completions.create({
model: 'gpt-3.5-turbo',
messages,
temperature: 0.7,
max_tokens: 500
});
const assistantMessage = completion.choices[0].message.content || '';
// Save assistant response
await this.sessionManager.addMessage(sessionId, {
role: 'assistant',
content: assistantMessage,
timestamp: new Date()
});
return assistantMessage;
} catch (error) {
console.error('LLM Error:', error);
return 'I apologize, but I encountered an error. Please try again.';
}
}
private async *streamResponse(
sessionId: string,
messages: any[]
): AsyncGenerator<string> {
try {
const stream = await openai.chat.completions.create({
model: 'gpt-3.5-turbo',
messages,
temperature: 0.7,
max_tokens: 500,
stream: true
});
let fullResponse = '';
for await (const chunk of stream) {
const content = chunk.choices[0]?.delta?.content || '';
if (content) {
fullResponse += content;
yield content;
}
}
// Save complete response
await this.sessionManager.addMessage(sessionId, {
role: 'assistant',
content: fullResponse,
timestamp: new Date()
});
} catch (error) {
console.error('Stream Error:', error);
yield 'Error: Unable to generate response';
}
}
}
// Initialize handlers
const sessionManager = new SessionManager();
const conversationHandler = new ConversationHandler(sessionManager);
// Middleware
app.use(express.json());
app.use(cors());
// Authentication middleware
const authenticateUser = (req: any, res: any, next: any) => {
const token = req.headers.authorization?.split(' ')[1];
if (!token) {
return res.status(401).json({ error: 'No token provided' });
}
try {
const decoded = jwt.verify(token, process.env.JWT_SECRET!);
req.userId = decoded.userId;
next();
} catch (error) {
return res.status(401).json({ error: 'Invalid token' });
}
};
// REST Endpoints
app.post('/api/sessions', authenticateUser, async (req, res) => {
try {
const session = await sessionManager.createSession(req.userId);
res.json({ sessionId: session.id });
} catch (error) {
res.status(500).json({ error: 'Failed to create session' });
}
});
app.post('/api/chat', authenticateUser, async (req, res) => {
try {
const { message, sessionId } = req.body;
const response = await conversationHandler.processMessage(
sessionId,
message,
false
);
res.json({ response });
} catch (error) {
res.status(500).json({ error: 'Failed to process message' });
}
});
app.get('/api/sessions/:sessionId/history', authenticateUser, async (req, res) => {
try {
const session = await sessionManager.getSession(req.params.sessionId);
if (!session || session.userId !== req.userId) {
return res.status(404).json({ error: 'Session not found' });
}
res.json({ messages: session.messages });
} catch (error) {
res.status(500).json({ error: 'Failed to retrieve history' });
}
});
// WebSocket for streaming
const wss = new WebSocketServer({ port: 8080 });
wss.on('connection', (ws: WebSocket) => {
ws.on('message', async (data: string) => {
try {
const { type, sessionId, message, token } = JSON.parse(data);
// Verify token
const decoded = jwt.verify(token, process.env.JWT_SECRET!) as any;
if (type === 'chat') {
const stream = conversationHandler.processMessage(
sessionId,
message,
true
);
for await (const chunk of stream) {
ws.send(JSON.stringify({
type: 'stream',
content: chunk
}));
}
ws.send(JSON.stringify({ type: 'done' }));
}
} catch (error) {
ws.send(JSON.stringify({
type: 'error',
message: 'Failed to process request'
}));
}
});
});
// Error recovery middleware
app.use((error: any, req: any, res: any, next: any) => {
console.error('Unhandled error:', error);
res.status(500).json({
error: 'An unexpected error occurred',
message: process.env.NODE_ENV === 'development' ? error.message : undefined
});
});
// Start server
const PORT = process.env.PORT || 3000;
app.listen(PORT, () => {
console.log(`Server running on port ${PORT}`);
});
Advanced Features
Let's implement sophisticated features that make your chatbot production-ready[3]:
class ContextOptimizer {
private readonly MAX_TOKENS = 4000;
private readonly SUMMARY_THRESHOLD = 2000;
async optimizeContext(
messages: Message[],
systemPrompt: string
): Promise<any[]> {
const tokenCount = this.estimateTokens(messages);
if (tokenCount < this.MAX_TOKENS) {
// Context fits, return as-is
return this.formatMessages(messages, systemPrompt);
}
// Need optimization
const strategy = this.selectStrategy(messages, tokenCount);
switch (strategy) {
case 'summarize':
return this.summarizeContext(messages, systemPrompt);
case 'sliding-window':
return this.slidingWindow(messages, systemPrompt);
case 'importance-based':
return this.importanceBasedSelection(messages, systemPrompt);
default:
return this.slidingWindow(messages, systemPrompt);
}
}
private async summarizeContext(
messages: Message[],
systemPrompt: string
): Promise<any[]> {
// Split messages into chunks
const oldMessages = messages.slice(0, -10);
const recentMessages = messages.slice(-10);
// Generate summary of old messages
const summary = await this.generateSummary(oldMessages);
return [
{ role: 'system', content: systemPrompt },
{ role: 'system', content: `Previous conversation summary: ${summary}` },
...recentMessages.map(m => ({
role: m.role,
content: m.content
}))
];
}
private slidingWindow(
messages: Message[],
systemPrompt: string
): any[] {
const result = [{ role: 'system', content: systemPrompt }];
let tokenCount = this.estimateTokens([result[0]]);
// Add messages from most recent, backwards
for (let i = messages.length - 1; i >= 0; i--) {
const messageTokens = this.estimateTokens([messages[i]]);
if (tokenCount + messageTokens > this.MAX_TOKENS) {
break;
}
result.splice(1, 0, {
role: messages[i].role,
content: messages[i].content
});
tokenCount += messageTokens;
}
return result;
}
private async importanceBasedSelection(
messages: Message[],
systemPrompt: string
): Promise<any[]> {
// Score messages by importance
const scoredMessages = await this.scoreMessages(messages);
// Sort by importance
scoredMessages.sort((a, b) => b.score - a.score);
const result = [{ role: 'system', content: systemPrompt }];
let tokenCount = this.estimateTokens([result[0]]);
// Add most important messages first
for (const { message } of scoredMessages) {
const messageTokens = this.estimateTokens([message]);
if (tokenCount + messageTokens > this.MAX_TOKENS) {
break;
}
result.push({
role: message.role,
content: message.content
});
tokenCount += messageTokens;
}
// Sort chronologically
return result.sort((a, b) => {
const aIndex = messages.findIndex(m => m.content === a.content);
const bIndex = messages.findIndex(m => m.content === b.content);
return aIndex - bIndex;
});
}
private estimateTokens(messages: Message[]): number {
// Rough estimation: 1 token ≈ 4 characters
return messages.reduce((total, msg) => {
return total + Math.ceil(msg.content.length / 4);
}, 0);
}
private async generateSummary(messages: Message[]): Promise<string> {
const conversation = messages
.map(m => `${m.role}: ${m.content}`)
.join('\n');
const summaryPrompt = `Summarize this conversation concisely, preserving key information and context:\n\n${conversation}`;
// Use a smaller model for efficiency
const response = await openai.chat.completions.create({
model: 'gpt-3.5-turbo',
messages: [{ role: 'user', content: summaryPrompt }],
max_tokens: 200,
temperature: 0.3
});
return response.choices[0].message.content || '';
}
private async scoreMessages(
messages: Message[]
): Promise<{ message: Message; score: number }[]> {
// Simple scoring based on recency and content
return messages.map((message, index) => {
let score = 0;
// Recency score
score += (index / messages.length) * 50;
// Length score (longer = more important)
score += Math.min(message.content.length / 100, 20);
// Question score (questions are important)
if (message.content.includes('?')) score += 10;
// Keywords score
const importantKeywords = ['important', 'remember', 'note', 'key'];
importantKeywords.forEach(keyword => {
if (message.content.toLowerCase().includes(keyword)) {
score += 15;
}
});
return { message, score };
});
}
private selectStrategy(
messages: Message[],
tokenCount: number
): 'summarize' | 'sliding-window' | 'importance-based' {
// Use summarization for very long conversations
if (tokenCount > this.MAX_TOKENS * 3) {
return 'summarize';
}
// Use importance-based for medium conversations
if (tokenCount > this.MAX_TOKENS * 1.5) {
return 'importance-based';
}
// Default to sliding window
return 'sliding-window';
}
private formatMessages(
messages: Message[],
systemPrompt: string
): any[] {
return [
{ role: 'system', content: systemPrompt },
...messages.map(m => ({
role: m.role,
content: m.content
}))
];
}
}
interface PersonalityConfig {
name: string;
role: string;
traits: string[];
tone: 'professional' | 'friendly' | 'casual' | 'formal';
knowledge: string[];
constraints: string[];
}
class PersonalityManager {
private personalities: Map<string, PersonalityConfig> = new Map();
constructor() {
this.initializeDefaultPersonalities();
}
private initializeDefaultPersonalities() {
this.personalities.set('customer-support', {
name: 'Alex',
role: 'Customer Support Specialist',
traits: ['helpful', 'patient', 'empathetic', 'solution-oriented'],
tone: 'friendly',
knowledge: ['product features', 'troubleshooting', 'policies'],
constraints: [
'Always be polite and professional',
'Never make promises about refunds without manager approval',
'Escalate complex technical issues to engineering'
]
});
this.personalities.set('technical-assistant', {
name: 'Dev',
role: 'Technical Assistant',
traits: ['precise', 'knowledgeable', 'detail-oriented'],
tone: 'professional',
knowledge: ['programming', 'debugging', 'best practices'],
constraints: [
'Provide code examples when relevant',
'Cite sources for technical claims',
'Warn about deprecated practices'
]
});
this.personalities.set('creative-writer', {
name: 'Sage',
role: 'Creative Writing Assistant',
traits: ['imaginative', 'eloquent', 'inspiring'],
tone: 'casual',
knowledge: ['storytelling', 'grammar', 'literary devices'],
constraints: [
'Encourage creativity',
'Provide constructive feedback',
'Respect the writer's voice'
]
});
}
generateSystemPrompt(personalityKey: string): string {
const personality = this.personalities.get(personalityKey);
if (!personality) {
return 'You are a helpful assistant.';
}
return `You are ${personality.name}, a ${personality.role}.
Your personality traits: ${personality.traits.join(', ')}.
Communication style: ${personality.tone}
Your areas of expertise: ${personality.knowledge.join(', ')}.
Important guidelines:
${personality.constraints.map(c => `- ${c}`).join('\n')}
Maintain this personality consistently throughout the conversation.`;
}
adjustResponseForPersonality(
response: string,
personalityKey: string
): string {
const personality = this.personalities.get(personalityKey);
if (!personality) return response;
// Add personality-specific adjustments
switch (personality.tone) {
case 'friendly':
// Add warmth
if (!response.includes('!') && Math.random() > 0.7) {
response = response.replace(/\.$/, '!');
}
break;
case 'professional':
// Ensure formal language
response = response
.replace(/\byou're\b/g, 'you are')
.replace(/\bcan't\b/g, 'cannot')
.replace(/\bwon't\b/g, 'will not');
break;
case 'casual':
// Add conversational elements
if (Math.random() > 0.8) {
response = `So, ${response}`;
}
break;
}
return response;
}
validateResponseConsistency(
response: string,
personalityKey: string,
conversationHistory: Message[]
): {
isConsistent: boolean;
issues: string[];
suggestions: string[];
} {
const personality = this.personalities.get(personalityKey);
if (!personality) {
return { isConsistent: true, issues: [], suggestions: [] };
}
const issues: string[] = [];
const suggestions: string[] = [];
// Check tone consistency
if (personality.tone === 'professional' && response.includes('lol')) {
issues.push('Informal language detected');
suggestions.push('Remove casual expressions');
}
// Check constraint violations
personality.constraints.forEach(constraint => {
if (constraint.includes('Never') &&
response.toLowerCase().includes(constraint.split(' ')[2].toLowerCase())) {
issues.push(`Potential constraint violation: ${constraint}`);
}
});
// Check name consistency
if (personality.name && !response.includes(personality.name) &&
conversationHistory.length === 0) {
suggestions.push(`Consider introducing yourself as ${personality.name}`);
}
return {
isConsistent: issues.length === 0,
issues,
suggestions
};
}
}
Platform Integration
Integrate your chatbot with popular messaging platforms[4]:
import { App } from '@slack/bolt';
const slackApp = new App({
token: process.env.SLACK_BOT_TOKEN,
signingSecret: process.env.SLACK_SIGNING_SECRET,
});
// Handle direct messages
slackApp.message(async ({ message, say }) => {
const session = await getOrCreateSlackSession(message.user);
const response = await conversationHandler.processMessage(
session.id,
message.text
);
await say({
text: response,
thread_ts: message.ts // Thread replies
});
});
// Handle slash commands
slackApp.command('/ai', async ({ command, ack, respond }) => {
await ack();
const session = await getOrCreateSlackSession(command.user_id);
const response = await conversationHandler.processMessage(
session.id,
command.text
);
await respond({
text: response,
response_type: 'ephemeral' // Only visible to user
});
});
import twilio from 'twilio';
const twilioClient = twilio(
process.env.TWILIO_ACCOUNT_SID,
process.env.TWILIO_AUTH_TOKEN
);
app.post('/webhooks/whatsapp', async (req, res) => {
const { From, Body } = req.body;
const session = await getOrCreateWhatsAppSession(From);
const response = await conversationHandler.processMessage(
session.id,
Body
);
await twilioClient.messages.create({
body: response,
from: `whatsapp:${process.env.TWILIO_WHATSAPP_NUMBER}`,
to: From
});
res.status(200).send('OK');
});
Production Deployment
Deploy your chatbot with enterprise-grade reliability and monitoring[2][4]:
# Dockerfile
FROM node:18-alpine AS builder
WORKDIR /app
COPY package*.json ./
RUN npm ci --only=production
COPY . .
RUN npm run build
FROM node:18-alpine
RUN apk add --no-cache tini
ENTRYPOINT ["/sbin/tini", "--"]
WORKDIR /app
COPY --from=builder /app/dist ./dist
COPY --from=builder /app/node_modules ./node_modules
COPY package*.json ./
EXPOSE 3000 8080
USER node
CMD ["node", "dist/server.js"]
# docker-compose.yml
version: '3.8'
services:
chatbot-api:
build: .
ports:
- "3000:3000"
- "8080:8080"
environment:
- NODE_ENV=production
- REDIS_URL=redis://redis:6379
- DATABASE_URL=postgresql://user:pass@postgres:5432/chatbot
depends_on:
- redis
- postgres
restart: unless-stopped
healthcheck:
test: ["CMD", "curl", "-f", "http://localhost:3000/health"]
interval: 30s
timeout: 10s
retries: 3
redis:
image: redis:7-alpine
volumes:
- redis-data:/data
restart: unless-stopped
postgres:
image: postgres:15-alpine
environment:
- POSTGRES_DB=chatbot
- POSTGRES_USER=user
- POSTGRES_PASSWORD=pass
volumes:
- postgres-data:/var/lib/postgresql/data
restart: unless-stopped
nginx:
image: nginx:alpine
ports:
- "80:80"
- "443:443"
volumes:
- ./nginx.conf:/etc/nginx/nginx.conf
- ./ssl:/etc/nginx/ssl
depends_on:
- chatbot-api
restart: unless-stopped
volumes:
redis-data:
postgres-data:
import * as Sentry from '@sentry/node';
import { StatsD } from 'node-statsd';
import winston from 'winston';
// Initialize monitoring
Sentry.init({
dsn: process.env.SENTRY_DSN,
environment: process.env.NODE_ENV,
tracesSampleRate: 0.1,
});
const statsd = new StatsD({
host: process.env.STATSD_HOST,
port: 8125,
});
const logger = winston.createLogger({
level: 'info',
format: winston.format.json(),
transports: [
new winston.transports.File({ filename: 'error.log', level: 'error' }),
new winston.transports.File({ filename: 'combined.log' }),
new winston.transports.Console({
format: winston.format.simple(),
}),
],
});
// Metrics middleware
app.use((req, res, next) => {
const start = Date.now();
res.on('finish', () => {
const duration = Date.now() - start;
// Track request metrics
statsd.timing('api.request.duration', duration);
statsd.increment('api.request.count');
statsd.increment(`api.request.status.${res.statusCode}`);
logger.info('API Request', {
method: req.method,
path: req.path,
status: res.statusCode,
duration,
userId: req.userId,
});
});
next();
});
// Track chatbot metrics
class ChatbotMetrics {
trackMessage(sessionId: string, role: string, tokenCount: number) {
statsd.increment(`chatbot.messages.${role}`);
statsd.gauge('chatbot.tokens.used', tokenCount);
}
trackError(error: string, sessionId: string) {
statsd.increment('chatbot.errors');
logger.error('Chatbot Error', { error, sessionId });
Sentry.captureException(new Error(error), {
tags: { sessionId },
});
}
trackLatency(operation: string, duration: number) {
statsd.timing(`chatbot.${operation}.latency`, duration);
}
trackSessionMetrics(session: Session) {
statsd.gauge('chatbot.session.message_count', session.messages.length);
statsd.gauge('chatbot.session.duration',
Date.now() - session.metadata.created.getTime()
);
}
}
// Health check endpoint
app.get('/health', async (req, res) => {
const checks = {
api: 'healthy',
redis: 'unknown',
database: 'unknown',
openai: 'unknown',
};
// Check Redis
try {
await redis.ping();
checks.redis = 'healthy';
} catch (error) {
checks.redis = 'unhealthy';
}
// Check Database
try {
await db.query('SELECT 1');
checks.database = 'healthy';
} catch (error) {
checks.database = 'unhealthy';
}
// Check OpenAI
try {
await openai.models.list();
checks.openai = 'healthy';
} catch (error) {
checks.openai = 'unhealthy';
}
const isHealthy = Object.values(checks).every(status => status === 'healthy');
res.status(isHealthy ? 200 : 503).json({
status: isHealthy ? 'healthy' : 'degraded',
checks,
timestamp: new Date().toISOString(),
});
});
Best Practices Checklist
✅ Core Features
- Conversation history persistence
- Real-time streaming responses
- Context window optimization
- Error handling & recovery
- User session management
🔒 Security & Reliability
- Authentication & authorization
- Input validation & sanitization
- Rate limiting per user
- Automated backups
- SSL/TLS encryption
📊 Monitoring
- Error tracking (Sentry)
- Performance metrics
- Usage analytics
- Health checks
- Alerting system
🚀 Deployment
- Containerized (Docker)
- Auto-scaling configured
- Load balancing
- CI/CD pipeline
- Rollback strategy
Conclusion
You now have a complete, production-ready AI chatbot with all the essential features: streaming responses, conversation persistence, error handling, and scalable architecture. This foundation can be extended with additional features like voice input, multi-language support, or custom integrations[4].
Next Steps
- • Deploy to your preferred cloud provider
- • Integrate with your existing authentication system
- • Customize the personality and knowledge base
- • Add analytics to understand user interactions
- • Consider using ParrotRouter for simplified multi-provider support
References
- [1] LangChain. "Build a Chatbot Tutorial" (2024)
- [2] Real Python. "Build an LLM RAG Chatbot with LangChain" (2024)
- [3] OpenAI. "Chat Completions API Guide" (2024)
- [4] Streamlit. "Build Conversational Apps" (2024)
- [5] Anthropic. "Conversations and Threads" (2024)
- [6] Vercel AI SDK. "AI SDK Documentation" (2024)
- [7] Google Cloud. "Dialogflow Documentation" (2024)
- [8] Botpress. "Open Source Chatbots Guide" (2024)
- [9] Chatbase. "Chatbot Development Tools" (2024)
- [10] MobiDev. "Chatbot Development Guide" (2024)
- [11] Octal Software. "Chatbot Development Frameworks" (2024)
- [12] Next.js. "Route Handlers Documentation" (2024)