Quick Start
Get up and running in minutes:
npx create-next-app@latest my-llm-app --typescript --tailwind --app cd my-llm-app npm install ai openai @anthropic-ai/sdk
1. Project Setup & Configuration
Environment Setup
Create a .env.local
file for your API keys:
# .env.local OPENAI_API_KEY=sk-... ANTHROPIC_API_KEY=sk-ant-... GOOGLE_API_KEY=...
Package Installation
# Core dependencies npm install ai openai @anthropic-ai/sdk @google/generative-ai # Additional utilities npm install zod uuid rate-limiter-flexible npm install @upstash/ratelimit @upstash/redis # For serverless rate limiting
TypeScript Configuration
// tsconfig.json { "compilerOptions": { "target": "ES2022", "lib": ["dom", "dom.iterable", "esnext"], "allowJs": true, "skipLibCheck": true, "strict": true, "forceConsistentCasingInFileNames": true, "noEmit": true, "esModuleInterop": true, "module": "esnext", "moduleResolution": "bundler", "resolveJsonModule": true, "isolatedModules": true, "jsx": "preserve", "incremental": true, "plugins": [ { "name": "next" } ], "paths": { "@/*": ["./src/*"] } } }
2. App Router Architecture for AI Apps
Next.js 14's App Router provides the ideal architecture for LLM-powered applications:
Directory Structure
app/ ├── api/ │ ├── chat/ │ │ └── route.ts # Streaming chat endpoint │ ├── completion/ │ │ └── route.ts # Single completion endpoint │ └── embeddings/ │ └── route.ts # Vector embeddings endpoint ├── chat/ │ ├── page.tsx # Chat interface (Client Component) │ └── layout.tsx # Chat layout ├── lib/ │ ├── llm-providers.ts # LLM provider configuration │ ├── rate-limit.ts # Rate limiting utilities │ └── validators.ts # Request/response validation └── components/ ├── chat-interface.tsx # Chat UI component └── message-list.tsx # Message display component
Server vs Client Components
Best Practice
Always make LLM API calls from Server Components or API Routes. Never expose API keys to client-side code.
// app/api/chat/route.ts - Server-side API route import { NextResponse } from 'next/server' import { OpenAI } from 'openai' const openai = new OpenAI({ apiKey: process.env.OPENAI_API_KEY, }) export async function POST(req: Request) { try { const { messages } = await req.json() const completion = await openai.chat.completions.create({ model: 'gpt-4-turbo-preview', messages, temperature: 0.7, }) return NextResponse.json(completion) } catch (error) { console.error('OpenAI API error:', error) return NextResponse.json( { error: 'Failed to generate response' }, { status: 500 } ) } }
3. Creating LLM API Routes
Multi-Provider Support
Create a unified interface for multiple LLM providers:
// app/lib/llm-providers.ts import { OpenAI } from 'openai' import Anthropic from '@anthropic-ai/sdk' import { GoogleGenerativeAI } from '@google/generative-ai' export interface LLMProvider { name: string generateCompletion: (prompt: string) => Promise<string> generateStream: (prompt: string) => AsyncGenerator<string> } class OpenAIProvider implements LLMProvider { name = 'openai' private client: OpenAI constructor(apiKey: string) { this.client = new OpenAI({ apiKey }) } async generateCompletion(prompt: string): Promise<string> { const completion = await this.client.chat.completions.create({ model: 'gpt-4-turbo-preview', messages: [{ role: 'user', content: prompt }], }) return completion.choices[0].message.content || '' } async *generateStream(prompt: string): AsyncGenerator<string> { const stream = await this.client.chat.completions.create({ model: 'gpt-4-turbo-preview', messages: [{ role: 'user', content: prompt }], stream: true, }) for await (const chunk of stream) { yield chunk.choices[0]?.delta?.content || '' } } } class AnthropicProvider implements LLMProvider { name = 'anthropic' private client: Anthropic constructor(apiKey: string) { this.client = new Anthropic({ apiKey }) } async generateCompletion(prompt: string): Promise<string> { const message = await this.client.messages.create({ model: 'claude-3-opus-20240229', max_tokens: 1000, messages: [{ role: 'user', content: prompt }], }) return message.content[0].type === 'text' ? message.content[0].text : '' } async *generateStream(prompt: string): AsyncGenerator<string> { const stream = await this.client.messages.create({ model: 'claude-3-opus-20240229', max_tokens: 1000, messages: [{ role: 'user', content: prompt }], stream: true, }) for await (const event of stream) { if (event.type === 'content_block_delta') { yield event.delta.text } } } } // Provider factory export function createProvider(provider: string): LLMProvider { switch (provider) { case 'openai': return new OpenAIProvider(process.env.OPENAI_API_KEY!) case 'anthropic': return new AnthropicProvider(process.env.ANTHROPIC_API_KEY!) default: throw new Error(`Unsupported provider: ${provider}`) } }
Request Validation
// app/lib/validators.ts import { z } from 'zod' export const chatRequestSchema = z.object({ messages: z.array( z.object({ role: z.enum(['system', 'user', 'assistant']), content: z.string().min(1).max(10000), }) ), provider: z.enum(['openai', 'anthropic', 'google']).optional(), temperature: z.number().min(0).max(2).optional(), maxTokens: z.number().positive().optional(), }) export type ChatRequest = z.infer<typeof chatRequestSchema>
4. Implementing Streaming Responses
Streaming is essential for responsive AI interfaces. Here's how to implement it:
Basic Streaming API Route
// app/api/stream/route.ts import { NextRequest } from 'next/server' import { createProvider } from '@/lib/llm-providers' export async function POST(req: NextRequest) { const { prompt, provider = 'openai' } = await req.json() const llmProvider = createProvider(provider) const stream = llmProvider.generateStream(prompt) // Create a TransformStream to handle the streaming const encoder = new TextEncoder() const customStream = new ReadableStream({ async start(controller) { for await (const chunk of stream) { controller.enqueue(encoder.encode(`data: ${JSON.stringify({ content: chunk })} `)) } controller.enqueue(encoder.encode('data: [DONE] ')) controller.close() }, }) return new Response(customStream, { headers: { 'Content-Type': 'text/event-stream', 'Cache-Control': 'no-cache', 'Connection': 'keep-alive', }, }) }
Client-Side Streaming Consumer
// app/components/chat-interface.tsx 'use client' import { useState } from 'react' export function ChatInterface() { const [messages, setMessages] = useState<string[]>([]) const [input, setInput] = useState('') const [isStreaming, setIsStreaming] = useState(false) const handleSubmit = async (e: React.FormEvent) => { e.preventDefault() if (!input.trim() || isStreaming) return setIsStreaming(true) setMessages(prev => [...prev, `User: ${input}`, 'Assistant: ']) try { const response = await fetch('/api/stream', { method: 'POST', headers: { 'Content-Type': 'application/json' }, body: JSON.stringify({ prompt: input }), }) const reader = response.body?.getReader() const decoder = new TextDecoder() let assistantMessage = '' while (true) { const { done, value } = await reader!.read() if (done) break const chunk = decoder.decode(value) const lines = chunk.split(' ') for (const line of lines) { if (line.startsWith('data: ')) { const data = line.slice(6) if (data === '[DONE]') continue try { const parsed = JSON.parse(data) assistantMessage += parsed.content setMessages(prev => { const newMessages = [...prev] newMessages[newMessages.length - 1] = `Assistant: ${assistantMessage}` return newMessages }) } catch (e) { console.error('Failed to parse SSE data:', e) } } } } } catch (error) { console.error('Streaming error:', error) setMessages(prev => [...prev.slice(0, -1), 'Assistant: Error occurred']) } finally { setIsStreaming(false) setInput('') } } return ( <div className="max-w-2xl mx-auto p-4"> <div className="mb-4 h-96 overflow-y-auto border rounded p-4"> {messages.map((msg, idx) => ( <div key={idx} className="mb-2">{msg}</div> ))} </div> <form onSubmit={handleSubmit} className="flex gap-2"> <input type="text" value={input} onChange={(e) => setInput(e.target.value)} className="flex-1 p-2 border rounded" placeholder="Type your message..." disabled={isStreaming} /> <button type="submit" disabled={isStreaming} className="px-4 py-2 bg-blue-500 text-white rounded disabled:opacity-50" > {isStreaming ? 'Generating...' : 'Send'} </button> </form> </div> ) }
5. Edge Runtime Optimization
Deploy your API routes to the edge for lower latency:
// app/api/edge-chat/route.ts export const runtime = 'edge' // Enable edge runtime import { NextRequest } from 'next/server' export async function POST(req: NextRequest) { const { prompt } = await req.json() // Edge runtime has limitations - use fetch-based APIs const response = await fetch('https://api.openai.com/v1/chat/completions', { method: 'POST', headers: { 'Content-Type': 'application/json', 'Authorization': `Bearer ${process.env.OPENAI_API_KEY}`, }, body: JSON.stringify({ model: 'gpt-3.5-turbo', messages: [{ role: 'user', content: prompt }], stream: true, }), }) // Forward the streaming response return new Response(response.body, { headers: { 'Content-Type': 'text/event-stream', 'Cache-Control': 'no-cache', }, }) }
Edge Runtime Benefits
- • Faster cold starts (10-50ms vs 200-500ms)
- • Global deployment closer to users
- • Automatic scaling
- • Lower costs for high-traffic apps
6. Using Vercel AI SDK
The Vercel AI SDK simplifies LLM integration with built-in streaming support:
Installation
npm install ai @ai-sdk/openai @ai-sdk/anthropic
Streaming Chat with AI SDK
// app/api/ai-sdk-chat/route.ts import { openai } from '@ai-sdk/openai' import { anthropic } from '@ai-sdk/anthropic' import { streamText } from 'ai' export async function POST(req: Request) { const { messages, provider = 'openai' } = await req.json() const model = provider === 'anthropic' ? anthropic('claude-3-opus-20240229') : openai('gpt-4-turbo-preview') const result = await streamText({ model, messages, temperature: 0.7, maxTokens: 1000, }) return result.toAIStreamResponse() }
Client Component with useChat Hook
// app/chat/page.tsx 'use client' import { useChat } from 'ai/react' export default function ChatPage() { const { messages, input, handleInputChange, handleSubmit, isLoading } = useChat({ api: '/api/ai-sdk-chat', }) return ( <div className="max-w-2xl mx-auto p-4"> <div className="mb-4 h-96 overflow-y-auto"> {messages.map((m) => ( <div key={m.id} className="mb-2"> <strong>{m.role === 'user' ? 'You: ' : 'AI: '}</strong> {m.content} </div> ))} </div> <form onSubmit={handleSubmit} className="flex gap-2"> <input value={input} onChange={handleInputChange} placeholder="Say something..." className="flex-1 p-2 border rounded" disabled={isLoading} /> <button type="submit" disabled={isLoading} className="px-4 py-2 bg-blue-500 text-white rounded" > Send </button> </form> </div> ) }
7. Security & API Key Management
Environment Variables Best Practices
// app/lib/config.ts const requiredEnvVars = [ 'OPENAI_API_KEY', 'ANTHROPIC_API_KEY', 'NEXTAUTH_SECRET', ] as const type EnvVarKey = typeof requiredEnvVars[number] // Validate environment variables at startup function validateEnv(): Record<EnvVarKey, string> { const env: Partial<Record<EnvVarKey, string>> = {} const missing: string[] = [] for (const key of requiredEnvVars) { const value = process.env[key] if (!value) { missing.push(key) } else { env[key] = value } } if (missing.length > 0) { throw new Error(`Missing environment variables: ${missing.join(', ')}`) } return env as Record<EnvVarKey, string> } export const config = validateEnv()
API Route Authentication
// app/lib/auth.ts import { NextRequest } from 'next/server' import { getToken } from 'next-auth/jwt' export async function authenticateRequest(req: NextRequest) { // For NextAuth.js const token = await getToken({ req }) if (!token) { return { authenticated: false, user: null } } return { authenticated: true, user: token } } // Usage in API route export async function POST(req: NextRequest) { const { authenticated, user } = await authenticateRequest(req) if (!authenticated) { return new Response('Unauthorized', { status: 401 }) } // Process authenticated request }
Request Sanitization
// app/lib/sanitize.ts import DOMPurify from 'isomorphic-dompurify' export function sanitizeInput(input: string): string { // Remove potential injection attempts const cleaned = DOMPurify.sanitize(input, { ALLOWED_TAGS: [], ALLOWED_ATTR: [] }) // Additional LLM-specific sanitization return cleaned .replace(/\[INST\]/g, '') // Remove instruction markers .replace(/\[/INST\]/g, '') .trim() }
8. Rate Limiting Implementation
Upstash Rate Limiting (Serverless)
// app/lib/rate-limit.ts import { Ratelimit } from '@upstash/ratelimit' import { Redis } from '@upstash/redis' const redis = new Redis({ url: process.env.UPSTASH_REDIS_URL!, token: process.env.UPSTASH_REDIS_TOKEN!, }) export const rateLimiter = new Ratelimit({ redis, limiter: Ratelimit.slidingWindow(10, '1 m'), // 10 requests per minute analytics: true, }) // API route with rate limiting export async function POST(req: NextRequest) { const ip = req.ip ?? '127.0.0.1' const { success, limit, reset, remaining } = await rateLimiter.limit(ip) if (!success) { return new Response('Rate limit exceeded', { status: 429, headers: { 'X-RateLimit-Limit': limit.toString(), 'X-RateLimit-Remaining': remaining.toString(), 'X-RateLimit-Reset': new Date(reset).toISOString(), }, }) } // Process request }
Token-Based Rate Limiting
// app/lib/token-limiter.ts interface TokenBucket { tokens: number lastRefill: number userId: string } const TOKEN_LIMIT = 100000 // tokens per day const REFILL_RATE = TOKEN_LIMIT / (24 * 60 * 60 * 1000) // tokens per ms export class TokenLimiter { private buckets = new Map<string, TokenBucket>() async checkLimit(userId: string, tokensRequested: number): Promise<boolean> { let bucket = this.buckets.get(userId) const now = Date.now() if (!bucket) { bucket = { tokens: TOKEN_LIMIT, lastRefill: now, userId, } } else { // Refill tokens based on time passed const timePassed = now - bucket.lastRefill const tokensToAdd = timePassed * REFILL_RATE bucket.tokens = Math.min(TOKEN_LIMIT, bucket.tokens + tokensToAdd) bucket.lastRefill = now } if (bucket.tokens >= tokensRequested) { bucket.tokens -= tokensRequested this.buckets.set(userId, bucket) return true } return false } }
9. Error Handling Patterns
// app/lib/errors.ts export class LLMError extends Error { constructor( message: string, public statusCode: number, public provider?: string, public details?: any ) { super(message) this.name = 'LLMError' } } export async function handleLLMRequest<T>( request: () => Promise<T>, provider: string ): Promise<T> { try { return await request() } catch (error: any) { // Provider-specific error handling if (error.response?.status === 429) { throw new LLMError( 'Rate limit exceeded', 429, provider, { retryAfter: error.response.headers['retry-after'] } ) } if (error.response?.status === 401) { throw new LLMError('Invalid API key', 401, provider) } if (error.code === 'ECONNREFUSED') { throw new LLMError('Service unavailable', 503, provider) } // Log unexpected errors console.error(`LLM Error [${provider}]:`, error) throw new LLMError('Internal server error', 500, provider) } } // Usage in API route export async function POST(req: NextRequest) { try { const result = await handleLLMRequest( () => openai.chat.completions.create({ ... }), 'openai' ) return NextResponse.json(result) } catch (error) { if (error instanceof LLMError) { return NextResponse.json( { error: error.message, provider: error.provider, details: error.details }, { status: error.statusCode } ) } return NextResponse.json( { error: 'Unexpected error occurred' }, { status: 500 } ) } }
10. Production Deployment
Vercel Deployment Configuration
// vercel.json { "functions": { "app/api/chat/route.ts": { "maxDuration": 60 }, "app/api/edge-chat/route.ts": { "runtime": "edge", "regions": ["iad1", "sfo1", "sin1"] } }, "env": { "OPENAI_API_KEY": "@openai-api-key", "ANTHROPIC_API_KEY": "@anthropic-api-key" } }
Production Checklist
✓ Pre-deployment Checklist
- ☐ Environment variables configured in Vercel dashboard
- ☐ API routes protected with authentication
- ☐ Rate limiting implemented
- ☐ Error handling and logging configured
- ☐ CORS headers configured if needed
- ☐ Response caching strategy implemented
- ☐ Monitoring and alerts set up
- ☐ Cost alerts configured for LLM usage
Monitoring Setup
// app/lib/monitoring.ts export function trackLLMUsage( provider: string, model: string, tokens: number, duration: number ) { // Send to analytics service if (process.env.NODE_ENV === 'production') { fetch('https://analytics.example.com/track', { method: 'POST', headers: { 'Content-Type': 'application/json' }, body: JSON.stringify({ event: 'llm_usage', properties: { provider, model, tokens, duration, timestamp: new Date().toISOString(), }, }), }).catch(console.error) } }
Example: Complete Chat Application
Here's a production-ready chat application combining all the concepts:
// app/api/production-chat/route.ts import { NextRequest } from 'next/server' import { authenticateRequest } from '@/lib/auth' import { rateLimiter } from '@/lib/rate-limit' import { chatRequestSchema } from '@/lib/validators' import { createProvider } from '@/lib/llm-providers' import { handleLLMRequest } from '@/lib/errors' import { trackLLMUsage } from '@/lib/monitoring' export const runtime = 'edge' export async function POST(req: NextRequest) { // 1. Authentication const { authenticated, user } = await authenticateRequest(req) if (!authenticated) { return new Response('Unauthorized', { status: 401 }) } // 2. Rate limiting const { success } = await rateLimiter.limit(user.id) if (!success) { return new Response('Rate limit exceeded', { status: 429 }) } // 3. Validation const body = await req.json() const validation = chatRequestSchema.safeParse(body) if (!validation.success) { return new Response(JSON.stringify(validation.error), { status: 400 }) } const { messages, provider = 'openai' } = validation.data const startTime = Date.now() try { // 4. LLM request with error handling const llmProvider = createProvider(provider) const stream = await handleLLMRequest( () => llmProvider.generateStream(messages[messages.length - 1].content), provider ) // 5. Stream response with monitoring let tokenCount = 0 const encoder = new TextEncoder() const customStream = new ReadableStream({ async start(controller) { for await (const chunk of stream) { tokenCount += chunk.split(' ').length // Rough estimate controller.enqueue(encoder.encode(`data: ${JSON.stringify({ content: chunk })} `)) } // Track usage trackLLMUsage(provider, 'chat', tokenCount, Date.now() - startTime) controller.enqueue(encoder.encode('data: [DONE] ')) controller.close() }, }) return new Response(customStream, { headers: { 'Content-Type': 'text/event-stream', 'Cache-Control': 'no-cache', 'Connection': 'keep-alive', }, }) } catch (error) { // Error response handled by handleLLMRequest throw error } }
Next Steps
Advanced Features
- • Implement conversation memory with Redis
- • Add file upload support for multimodal models
- • Build real-time collaboration features
- • Implement function calling capabilities
Performance Optimization
- • Implement response caching strategies
- • Use CDN for static assets
- • Optimize bundle size with dynamic imports
- • Implement request debouncing
References & Citations
Ready to Build?
Start integrating LLM APIs into your Next.js application today with our unified API gateway.
- [1] AWS. "Lambda Documentation" (2024)
- [2] Vercel. "Streaming Responses" (2024)
- [3] GitHub. "OpenAI Node.js Library" (2024)