Quick Start
Get up and running in minutes:
npx create-next-app@latest my-llm-app --typescript --tailwind --app cd my-llm-app npm install ai openai @anthropic-ai/sdk
1. Project Setup & Configuration
Environment Setup
Create a .env.local file for your API keys:
# .env.local OPENAI_API_KEY=sk-... ANTHROPIC_API_KEY=sk-ant-... GOOGLE_API_KEY=...
Package Installation
# Core dependencies npm install ai openai @anthropic-ai/sdk @google/generative-ai # Additional utilities npm install zod uuid rate-limiter-flexible npm install @upstash/ratelimit @upstash/redis # For serverless rate limiting
TypeScript Configuration
// tsconfig.json
{
"compilerOptions": {
"target": "ES2022",
"lib": ["dom", "dom.iterable", "esnext"],
"allowJs": true,
"skipLibCheck": true,
"strict": true,
"forceConsistentCasingInFileNames": true,
"noEmit": true,
"esModuleInterop": true,
"module": "esnext",
"moduleResolution": "bundler",
"resolveJsonModule": true,
"isolatedModules": true,
"jsx": "preserve",
"incremental": true,
"plugins": [
{
"name": "next"
}
],
"paths": {
"@/*": ["./src/*"]
}
}
}2. App Router Architecture for AI Apps
Next.js 14's App Router provides the ideal architecture for LLM-powered applications:
Directory Structure
app/
├── api/
│ ├── chat/
│ │ └── route.ts # Streaming chat endpoint
│ ├── completion/
│ │ └── route.ts # Single completion endpoint
│ └── embeddings/
│ └── route.ts # Vector embeddings endpoint
├── chat/
│ ├── page.tsx # Chat interface (Client Component)
│ └── layout.tsx # Chat layout
├── lib/
│ ├── llm-providers.ts # LLM provider configuration
│ ├── rate-limit.ts # Rate limiting utilities
│ └── validators.ts # Request/response validation
└── components/
├── chat-interface.tsx # Chat UI component
└── message-list.tsx # Message display componentServer vs Client Components
Best Practice
Always make LLM API calls from Server Components or API Routes. Never expose API keys to client-side code.
// app/api/chat/route.ts - Server-side API route
import { NextResponse } from 'next/server'
import { OpenAI } from 'openai'
const openai = new OpenAI({
apiKey: process.env.OPENAI_API_KEY,
})
export async function POST(req: Request) {
try {
const { messages } = await req.json()
const completion = await openai.chat.completions.create({
model: 'gpt-4-turbo-preview',
messages,
temperature: 0.7,
})
return NextResponse.json(completion)
} catch (error) {
console.error('OpenAI API error:', error)
return NextResponse.json(
{ error: 'Failed to generate response' },
{ status: 500 }
)
}
}3. Creating LLM API Routes
Multi-Provider Support
Create a unified interface for multiple LLM providers:
// app/lib/llm-providers.ts
import { OpenAI } from 'openai'
import Anthropic from '@anthropic-ai/sdk'
import { GoogleGenerativeAI } from '@google/generative-ai'
export interface LLMProvider {
name: string
generateCompletion: (prompt: string) => Promise<string>
generateStream: (prompt: string) => AsyncGenerator<string>
}
class OpenAIProvider implements LLMProvider {
name = 'openai'
private client: OpenAI
constructor(apiKey: string) {
this.client = new OpenAI({ apiKey })
}
async generateCompletion(prompt: string): Promise<string> {
const completion = await this.client.chat.completions.create({
model: 'gpt-4-turbo-preview',
messages: [{ role: 'user', content: prompt }],
})
return completion.choices[0].message.content || ''
}
async *generateStream(prompt: string): AsyncGenerator<string> {
const stream = await this.client.chat.completions.create({
model: 'gpt-4-turbo-preview',
messages: [{ role: 'user', content: prompt }],
stream: true,
})
for await (const chunk of stream) {
yield chunk.choices[0]?.delta?.content || ''
}
}
}
class AnthropicProvider implements LLMProvider {
name = 'anthropic'
private client: Anthropic
constructor(apiKey: string) {
this.client = new Anthropic({ apiKey })
}
async generateCompletion(prompt: string): Promise<string> {
const message = await this.client.messages.create({
model: 'claude-3-opus-20240229',
max_tokens: 1000,
messages: [{ role: 'user', content: prompt }],
})
return message.content[0].type === 'text' ? message.content[0].text : ''
}
async *generateStream(prompt: string): AsyncGenerator<string> {
const stream = await this.client.messages.create({
model: 'claude-3-opus-20240229',
max_tokens: 1000,
messages: [{ role: 'user', content: prompt }],
stream: true,
})
for await (const event of stream) {
if (event.type === 'content_block_delta') {
yield event.delta.text
}
}
}
}
// Provider factory
export function createProvider(provider: string): LLMProvider {
switch (provider) {
case 'openai':
return new OpenAIProvider(process.env.OPENAI_API_KEY!)
case 'anthropic':
return new AnthropicProvider(process.env.ANTHROPIC_API_KEY!)
default:
throw new Error(`Unsupported provider: ${provider}`)
}
}Request Validation
// app/lib/validators.ts
import { z } from 'zod'
export const chatRequestSchema = z.object({
messages: z.array(
z.object({
role: z.enum(['system', 'user', 'assistant']),
content: z.string().min(1).max(10000),
})
),
provider: z.enum(['openai', 'anthropic', 'google']).optional(),
temperature: z.number().min(0).max(2).optional(),
maxTokens: z.number().positive().optional(),
})
export type ChatRequest = z.infer<typeof chatRequestSchema>4. Implementing Streaming Responses
Streaming is essential for responsive AI interfaces. Here's how to implement it:
Basic Streaming API Route
// app/api/stream/route.ts
import { NextRequest } from 'next/server'
import { createProvider } from '@/lib/llm-providers'
export async function POST(req: NextRequest) {
const { prompt, provider = 'openai' } = await req.json()
const llmProvider = createProvider(provider)
const stream = llmProvider.generateStream(prompt)
// Create a TransformStream to handle the streaming
const encoder = new TextEncoder()
const customStream = new ReadableStream({
async start(controller) {
for await (const chunk of stream) {
controller.enqueue(encoder.encode(`data: ${JSON.stringify({ content: chunk })}
`))
}
controller.enqueue(encoder.encode('data: [DONE]
'))
controller.close()
},
})
return new Response(customStream, {
headers: {
'Content-Type': 'text/event-stream',
'Cache-Control': 'no-cache',
'Connection': 'keep-alive',
},
})
}Client-Side Streaming Consumer
// app/components/chat-interface.tsx
'use client'
import { useState } from 'react'
export function ChatInterface() {
const [messages, setMessages] = useState<string[]>([])
const [input, setInput] = useState('')
const [isStreaming, setIsStreaming] = useState(false)
const handleSubmit = async (e: React.FormEvent) => {
e.preventDefault()
if (!input.trim() || isStreaming) return
setIsStreaming(true)
setMessages(prev => [...prev, `User: ${input}`, 'Assistant: '])
try {
const response = await fetch('/api/stream', {
method: 'POST',
headers: { 'Content-Type': 'application/json' },
body: JSON.stringify({ prompt: input }),
})
const reader = response.body?.getReader()
const decoder = new TextDecoder()
let assistantMessage = ''
while (true) {
const { done, value } = await reader!.read()
if (done) break
const chunk = decoder.decode(value)
const lines = chunk.split('
')
for (const line of lines) {
if (line.startsWith('data: ')) {
const data = line.slice(6)
if (data === '[DONE]') continue
try {
const parsed = JSON.parse(data)
assistantMessage += parsed.content
setMessages(prev => {
const newMessages = [...prev]
newMessages[newMessages.length - 1] = `Assistant: ${assistantMessage}`
return newMessages
})
} catch (e) {
console.error('Failed to parse SSE data:', e)
}
}
}
}
} catch (error) {
console.error('Streaming error:', error)
setMessages(prev => [...prev.slice(0, -1), 'Assistant: Error occurred'])
} finally {
setIsStreaming(false)
setInput('')
}
}
return (
<div className="max-w-2xl mx-auto p-4">
<div className="mb-4 h-96 overflow-y-auto border rounded p-4">
{messages.map((msg, idx) => (
<div key={idx} className="mb-2">{msg}</div>
))}
</div>
<form onSubmit={handleSubmit} className="flex gap-2">
<input
type="text"
value={input}
onChange={(e) => setInput(e.target.value)}
className="flex-1 p-2 border rounded"
placeholder="Type your message..."
disabled={isStreaming}
/>
<button
type="submit"
disabled={isStreaming}
className="px-4 py-2 bg-blue-500 text-white rounded disabled:opacity-50"
>
{isStreaming ? 'Generating...' : 'Send'}
</button>
</form>
</div>
)
}5. Edge Runtime Optimization
Deploy your API routes to the edge for lower latency:
// app/api/edge-chat/route.ts
export const runtime = 'edge' // Enable edge runtime
import { NextRequest } from 'next/server'
export async function POST(req: NextRequest) {
const { prompt } = await req.json()
// Edge runtime has limitations - use fetch-based APIs
const response = await fetch('https://api.openai.com/v1/chat/completions', {
method: 'POST',
headers: {
'Content-Type': 'application/json',
'Authorization': `Bearer ${process.env.OPENAI_API_KEY}`,
},
body: JSON.stringify({
model: 'gpt-3.5-turbo',
messages: [{ role: 'user', content: prompt }],
stream: true,
}),
})
// Forward the streaming response
return new Response(response.body, {
headers: {
'Content-Type': 'text/event-stream',
'Cache-Control': 'no-cache',
},
})
}Edge Runtime Benefits
- • Faster cold starts (10-50ms vs 200-500ms)
- • Global deployment closer to users
- • Automatic scaling
- • Lower costs for high-traffic apps
6. Using Vercel AI SDK
The Vercel AI SDK simplifies LLM integration with built-in streaming support:
Installation
npm install ai @ai-sdk/openai @ai-sdk/anthropic
Streaming Chat with AI SDK
// app/api/ai-sdk-chat/route.ts
import { openai } from '@ai-sdk/openai'
import { anthropic } from '@ai-sdk/anthropic'
import { streamText } from 'ai'
export async function POST(req: Request) {
const { messages, provider = 'openai' } = await req.json()
const model = provider === 'anthropic'
? anthropic('claude-3-opus-20240229')
: openai('gpt-4-turbo-preview')
const result = await streamText({
model,
messages,
temperature: 0.7,
maxTokens: 1000,
})
return result.toAIStreamResponse()
}Client Component with useChat Hook
// app/chat/page.tsx
'use client'
import { useChat } from 'ai/react'
export default function ChatPage() {
const { messages, input, handleInputChange, handleSubmit, isLoading } = useChat({
api: '/api/ai-sdk-chat',
})
return (
<div className="max-w-2xl mx-auto p-4">
<div className="mb-4 h-96 overflow-y-auto">
{messages.map((m) => (
<div key={m.id} className="mb-2">
<strong>{m.role === 'user' ? 'You: ' : 'AI: '}</strong>
{m.content}
</div>
))}
</div>
<form onSubmit={handleSubmit} className="flex gap-2">
<input
value={input}
onChange={handleInputChange}
placeholder="Say something..."
className="flex-1 p-2 border rounded"
disabled={isLoading}
/>
<button
type="submit"
disabled={isLoading}
className="px-4 py-2 bg-blue-500 text-white rounded"
>
Send
</button>
</form>
</div>
)
}7. Security & API Key Management
Environment Variables Best Practices
// app/lib/config.ts
const requiredEnvVars = [
'OPENAI_API_KEY',
'ANTHROPIC_API_KEY',
'NEXTAUTH_SECRET',
] as const
type EnvVarKey = typeof requiredEnvVars[number]
// Validate environment variables at startup
function validateEnv(): Record<EnvVarKey, string> {
const env: Partial<Record<EnvVarKey, string>> = {}
const missing: string[] = []
for (const key of requiredEnvVars) {
const value = process.env[key]
if (!value) {
missing.push(key)
} else {
env[key] = value
}
}
if (missing.length > 0) {
throw new Error(`Missing environment variables: ${missing.join(', ')}`)
}
return env as Record<EnvVarKey, string>
}
export const config = validateEnv()API Route Authentication
// app/lib/auth.ts
import { NextRequest } from 'next/server'
import { getToken } from 'next-auth/jwt'
export async function authenticateRequest(req: NextRequest) {
// For NextAuth.js
const token = await getToken({ req })
if (!token) {
return { authenticated: false, user: null }
}
return { authenticated: true, user: token }
}
// Usage in API route
export async function POST(req: NextRequest) {
const { authenticated, user } = await authenticateRequest(req)
if (!authenticated) {
return new Response('Unauthorized', { status: 401 })
}
// Process authenticated request
}Request Sanitization
// app/lib/sanitize.ts
import DOMPurify from 'isomorphic-dompurify'
export function sanitizeInput(input: string): string {
// Remove potential injection attempts
const cleaned = DOMPurify.sanitize(input, {
ALLOWED_TAGS: [],
ALLOWED_ATTR: []
})
// Additional LLM-specific sanitization
return cleaned
.replace(/\[INST\]/g, '') // Remove instruction markers
.replace(/\[/INST\]/g, '')
.trim()
}8. Rate Limiting Implementation
Upstash Rate Limiting (Serverless)
// app/lib/rate-limit.ts
import { Ratelimit } from '@upstash/ratelimit'
import { Redis } from '@upstash/redis'
const redis = new Redis({
url: process.env.UPSTASH_REDIS_URL!,
token: process.env.UPSTASH_REDIS_TOKEN!,
})
export const rateLimiter = new Ratelimit({
redis,
limiter: Ratelimit.slidingWindow(10, '1 m'), // 10 requests per minute
analytics: true,
})
// API route with rate limiting
export async function POST(req: NextRequest) {
const ip = req.ip ?? '127.0.0.1'
const { success, limit, reset, remaining } = await rateLimiter.limit(ip)
if (!success) {
return new Response('Rate limit exceeded', {
status: 429,
headers: {
'X-RateLimit-Limit': limit.toString(),
'X-RateLimit-Remaining': remaining.toString(),
'X-RateLimit-Reset': new Date(reset).toISOString(),
},
})
}
// Process request
}Token-Based Rate Limiting
// app/lib/token-limiter.ts
interface TokenBucket {
tokens: number
lastRefill: number
userId: string
}
const TOKEN_LIMIT = 100000 // tokens per day
const REFILL_RATE = TOKEN_LIMIT / (24 * 60 * 60 * 1000) // tokens per ms
export class TokenLimiter {
private buckets = new Map<string, TokenBucket>()
async checkLimit(userId: string, tokensRequested: number): Promise<boolean> {
let bucket = this.buckets.get(userId)
const now = Date.now()
if (!bucket) {
bucket = {
tokens: TOKEN_LIMIT,
lastRefill: now,
userId,
}
} else {
// Refill tokens based on time passed
const timePassed = now - bucket.lastRefill
const tokensToAdd = timePassed * REFILL_RATE
bucket.tokens = Math.min(TOKEN_LIMIT, bucket.tokens + tokensToAdd)
bucket.lastRefill = now
}
if (bucket.tokens >= tokensRequested) {
bucket.tokens -= tokensRequested
this.buckets.set(userId, bucket)
return true
}
return false
}
}9. Error Handling Patterns
// app/lib/errors.ts
export class LLMError extends Error {
constructor(
message: string,
public statusCode: number,
public provider?: string,
public details?: any
) {
super(message)
this.name = 'LLMError'
}
}
export async function handleLLMRequest<T>(
request: () => Promise<T>,
provider: string
): Promise<T> {
try {
return await request()
} catch (error: any) {
// Provider-specific error handling
if (error.response?.status === 429) {
throw new LLMError(
'Rate limit exceeded',
429,
provider,
{ retryAfter: error.response.headers['retry-after'] }
)
}
if (error.response?.status === 401) {
throw new LLMError('Invalid API key', 401, provider)
}
if (error.code === 'ECONNREFUSED') {
throw new LLMError('Service unavailable', 503, provider)
}
// Log unexpected errors
console.error(`LLM Error [${provider}]:`, error)
throw new LLMError('Internal server error', 500, provider)
}
}
// Usage in API route
export async function POST(req: NextRequest) {
try {
const result = await handleLLMRequest(
() => openai.chat.completions.create({ ... }),
'openai'
)
return NextResponse.json(result)
} catch (error) {
if (error instanceof LLMError) {
return NextResponse.json(
{
error: error.message,
provider: error.provider,
details: error.details
},
{ status: error.statusCode }
)
}
return NextResponse.json(
{ error: 'Unexpected error occurred' },
{ status: 500 }
)
}
}10. Production Deployment
Vercel Deployment Configuration
// vercel.json
{
"functions": {
"app/api/chat/route.ts": {
"maxDuration": 60
},
"app/api/edge-chat/route.ts": {
"runtime": "edge",
"regions": ["iad1", "sfo1", "sin1"]
}
},
"env": {
"OPENAI_API_KEY": "@openai-api-key",
"ANTHROPIC_API_KEY": "@anthropic-api-key"
}
}Production Checklist
✓ Pre-deployment Checklist
- ☐ Environment variables configured in Vercel dashboard
- ☐ API routes protected with authentication
- ☐ Rate limiting implemented
- ☐ Error handling and logging configured
- ☐ CORS headers configured if needed
- ☐ Response caching strategy implemented
- ☐ Monitoring and alerts set up
- ☐ Cost alerts configured for LLM usage
Monitoring Setup
// app/lib/monitoring.ts
export function trackLLMUsage(
provider: string,
model: string,
tokens: number,
duration: number
) {
// Send to analytics service
if (process.env.NODE_ENV === 'production') {
fetch('https://analytics.example.com/track', {
method: 'POST',
headers: { 'Content-Type': 'application/json' },
body: JSON.stringify({
event: 'llm_usage',
properties: {
provider,
model,
tokens,
duration,
timestamp: new Date().toISOString(),
},
}),
}).catch(console.error)
}
}Example: Complete Chat Application
Here's a production-ready chat application combining all the concepts:
// app/api/production-chat/route.ts
import { NextRequest } from 'next/server'
import { authenticateRequest } from '@/lib/auth'
import { rateLimiter } from '@/lib/rate-limit'
import { chatRequestSchema } from '@/lib/validators'
import { createProvider } from '@/lib/llm-providers'
import { handleLLMRequest } from '@/lib/errors'
import { trackLLMUsage } from '@/lib/monitoring'
export const runtime = 'edge'
export async function POST(req: NextRequest) {
// 1. Authentication
const { authenticated, user } = await authenticateRequest(req)
if (!authenticated) {
return new Response('Unauthorized', { status: 401 })
}
// 2. Rate limiting
const { success } = await rateLimiter.limit(user.id)
if (!success) {
return new Response('Rate limit exceeded', { status: 429 })
}
// 3. Validation
const body = await req.json()
const validation = chatRequestSchema.safeParse(body)
if (!validation.success) {
return new Response(JSON.stringify(validation.error), { status: 400 })
}
const { messages, provider = 'openai' } = validation.data
const startTime = Date.now()
try {
// 4. LLM request with error handling
const llmProvider = createProvider(provider)
const stream = await handleLLMRequest(
() => llmProvider.generateStream(messages[messages.length - 1].content),
provider
)
// 5. Stream response with monitoring
let tokenCount = 0
const encoder = new TextEncoder()
const customStream = new ReadableStream({
async start(controller) {
for await (const chunk of stream) {
tokenCount += chunk.split(' ').length // Rough estimate
controller.enqueue(encoder.encode(`data: ${JSON.stringify({ content: chunk })}
`))
}
// Track usage
trackLLMUsage(provider, 'chat', tokenCount, Date.now() - startTime)
controller.enqueue(encoder.encode('data: [DONE]
'))
controller.close()
},
})
return new Response(customStream, {
headers: {
'Content-Type': 'text/event-stream',
'Cache-Control': 'no-cache',
'Connection': 'keep-alive',
},
})
} catch (error) {
// Error response handled by handleLLMRequest
throw error
}
}Next Steps
Advanced Features
- • Implement conversation memory with Redis
- • Add file upload support for multimodal models
- • Build real-time collaboration features
- • Implement function calling capabilities
Performance Optimization
- • Implement response caching strategies
- • Use CDN for static assets
- • Optimize bundle size with dynamic imports
- • Implement request debouncing
References & Citations
Ready to Build?
Start integrating LLM APIs into your Next.js application today with our unified API gateway.
- [1] AWS. "Lambda Documentation" (2024)
- [2] Vercel. "Streaming Responses" (2024)
- [3] GitHub. "OpenAI Node.js Library" (2024)