Back to Integrations

Next.js 14 + LLM APIs Complete Integration Guide

Master the integration of LLM APIs with Next.js 14, leveraging App Router, server components, streaming responses, and edge deployment for high-performance AI applications.

Quick Start

Get up and running in minutes:

npx create-next-app@latest my-llm-app --typescript --tailwind --app
cd my-llm-app
npm install ai openai @anthropic-ai/sdk

1. Project Setup & Configuration

Environment Setup

Create a .env.local file for your API keys:

# .env.local
OPENAI_API_KEY=sk-...
ANTHROPIC_API_KEY=sk-ant-...
GOOGLE_API_KEY=...

Package Installation

# Core dependencies
npm install ai openai @anthropic-ai/sdk @google/generative-ai

# Additional utilities
npm install zod uuid rate-limiter-flexible
npm install @upstash/ratelimit @upstash/redis # For serverless rate limiting

TypeScript Configuration

// tsconfig.json
{
  "compilerOptions": {
    "target": "ES2022",
    "lib": ["dom", "dom.iterable", "esnext"],
    "allowJs": true,
    "skipLibCheck": true,
    "strict": true,
    "forceConsistentCasingInFileNames": true,
    "noEmit": true,
    "esModuleInterop": true,
    "module": "esnext",
    "moduleResolution": "bundler",
    "resolveJsonModule": true,
    "isolatedModules": true,
    "jsx": "preserve",
    "incremental": true,
    "plugins": [
      {
        "name": "next"
      }
    ],
    "paths": {
      "@/*": ["./src/*"]
    }
  }
}

2. App Router Architecture for AI Apps

Next.js 14's App Router provides the ideal architecture for LLM-powered applications:

Directory Structure

app/
├── api/
│   ├── chat/
│   │   └── route.ts        # Streaming chat endpoint
│   ├── completion/
│   │   └── route.ts        # Single completion endpoint
│   └── embeddings/
│       └── route.ts        # Vector embeddings endpoint
├── chat/
│   ├── page.tsx           # Chat interface (Client Component)
│   └── layout.tsx         # Chat layout
├── lib/
│   ├── llm-providers.ts  # LLM provider configuration
│   ├── rate-limit.ts     # Rate limiting utilities
│   └── validators.ts     # Request/response validation
└── components/
    ├── chat-interface.tsx # Chat UI component
    └── message-list.tsx   # Message display component

Server vs Client Components

Best Practice

Always make LLM API calls from Server Components or API Routes. Never expose API keys to client-side code.

// app/api/chat/route.ts - Server-side API route
import { NextResponse } from 'next/server'
import { OpenAI } from 'openai'

const openai = new OpenAI({
  apiKey: process.env.OPENAI_API_KEY,
})

export async function POST(req: Request) {
  try {
    const { messages } = await req.json()
    
    const completion = await openai.chat.completions.create({
      model: 'gpt-4-turbo-preview',
      messages,
      temperature: 0.7,
    })
    
    return NextResponse.json(completion)
  } catch (error) {
    console.error('OpenAI API error:', error)
    return NextResponse.json(
      { error: 'Failed to generate response' },
      { status: 500 }
    )
  }
}

3. Creating LLM API Routes

Multi-Provider Support

Create a unified interface for multiple LLM providers:

// app/lib/llm-providers.ts
import { OpenAI } from 'openai'
import Anthropic from '@anthropic-ai/sdk'
import { GoogleGenerativeAI } from '@google/generative-ai'

export interface LLMProvider {
  name: string
  generateCompletion: (prompt: string) => Promise<string>
  generateStream: (prompt: string) => AsyncGenerator<string>
}

class OpenAIProvider implements LLMProvider {
  name = 'openai'
  private client: OpenAI
  
  constructor(apiKey: string) {
    this.client = new OpenAI({ apiKey })
  }
  
  async generateCompletion(prompt: string): Promise<string> {
    const completion = await this.client.chat.completions.create({
      model: 'gpt-4-turbo-preview',
      messages: [{ role: 'user', content: prompt }],
    })
    return completion.choices[0].message.content || ''
  }
  
  async *generateStream(prompt: string): AsyncGenerator<string> {
    const stream = await this.client.chat.completions.create({
      model: 'gpt-4-turbo-preview',
      messages: [{ role: 'user', content: prompt }],
      stream: true,
    })
    
    for await (const chunk of stream) {
      yield chunk.choices[0]?.delta?.content || ''
    }
  }
}

class AnthropicProvider implements LLMProvider {
  name = 'anthropic'
  private client: Anthropic
  
  constructor(apiKey: string) {
    this.client = new Anthropic({ apiKey })
  }
  
  async generateCompletion(prompt: string): Promise<string> {
    const message = await this.client.messages.create({
      model: 'claude-3-opus-20240229',
      max_tokens: 1000,
      messages: [{ role: 'user', content: prompt }],
    })
    return message.content[0].type === 'text' ? message.content[0].text : ''
  }
  
  async *generateStream(prompt: string): AsyncGenerator<string> {
    const stream = await this.client.messages.create({
      model: 'claude-3-opus-20240229',
      max_tokens: 1000,
      messages: [{ role: 'user', content: prompt }],
      stream: true,
    })
    
    for await (const event of stream) {
      if (event.type === 'content_block_delta') {
        yield event.delta.text
      }
    }
  }
}

// Provider factory
export function createProvider(provider: string): LLMProvider {
  switch (provider) {
    case 'openai':
      return new OpenAIProvider(process.env.OPENAI_API_KEY!)
    case 'anthropic':
      return new AnthropicProvider(process.env.ANTHROPIC_API_KEY!)
    default:
      throw new Error(`Unsupported provider: ${provider}`)
  }
}

Request Validation

// app/lib/validators.ts
import { z } from 'zod'

export const chatRequestSchema = z.object({
  messages: z.array(
    z.object({
      role: z.enum(['system', 'user', 'assistant']),
      content: z.string().min(1).max(10000),
    })
  ),
  provider: z.enum(['openai', 'anthropic', 'google']).optional(),
  temperature: z.number().min(0).max(2).optional(),
  maxTokens: z.number().positive().optional(),
})

export type ChatRequest = z.infer<typeof chatRequestSchema>

4. Implementing Streaming Responses

Streaming is essential for responsive AI interfaces. Here's how to implement it:

Basic Streaming API Route

// app/api/stream/route.ts
import { NextRequest } from 'next/server'
import { createProvider } from '@/lib/llm-providers'

export async function POST(req: NextRequest) {
  const { prompt, provider = 'openai' } = await req.json()
  
  const llmProvider = createProvider(provider)
  const stream = llmProvider.generateStream(prompt)
  
  // Create a TransformStream to handle the streaming
  const encoder = new TextEncoder()
  const customStream = new ReadableStream({
    async start(controller) {
      for await (const chunk of stream) {
        controller.enqueue(encoder.encode(`data: ${JSON.stringify({ content: chunk })}

`))
      }
      controller.enqueue(encoder.encode('data: [DONE]

'))
      controller.close()
    },
  })
  
  return new Response(customStream, {
    headers: {
      'Content-Type': 'text/event-stream',
      'Cache-Control': 'no-cache',
      'Connection': 'keep-alive',
    },
  })
}

Client-Side Streaming Consumer

// app/components/chat-interface.tsx
'use client'

import { useState } from 'react'

export function ChatInterface() {
  const [messages, setMessages] = useState<string[]>([])
  const [input, setInput] = useState('')
  const [isStreaming, setIsStreaming] = useState(false)
  
  const handleSubmit = async (e: React.FormEvent) => {
    e.preventDefault()
    if (!input.trim() || isStreaming) return
    
    setIsStreaming(true)
    setMessages(prev => [...prev, `User: ${input}`, 'Assistant: '])
    
    try {
      const response = await fetch('/api/stream', {
        method: 'POST',
        headers: { 'Content-Type': 'application/json' },
        body: JSON.stringify({ prompt: input }),
      })
      
      const reader = response.body?.getReader()
      const decoder = new TextDecoder()
      let assistantMessage = ''
      
      while (true) {
        const { done, value } = await reader!.read()
        if (done) break
        
        const chunk = decoder.decode(value)
        const lines = chunk.split('
')
        
        for (const line of lines) {
          if (line.startsWith('data: ')) {
            const data = line.slice(6)
            if (data === '[DONE]') continue
            
            try {
              const parsed = JSON.parse(data)
              assistantMessage += parsed.content
              setMessages(prev => {
                const newMessages = [...prev]
                newMessages[newMessages.length - 1] = `Assistant: ${assistantMessage}`
                return newMessages
              })
            } catch (e) {
              console.error('Failed to parse SSE data:', e)
            }
          }
        }
      }
    } catch (error) {
      console.error('Streaming error:', error)
      setMessages(prev => [...prev.slice(0, -1), 'Assistant: Error occurred'])
    } finally {
      setIsStreaming(false)
      setInput('')
    }
  }
  
  return (
    <div className="max-w-2xl mx-auto p-4">
      <div className="mb-4 h-96 overflow-y-auto border rounded p-4">
        {messages.map((msg, idx) => (
          <div key={idx} className="mb-2">{msg}</div>
        ))}
      </div>
      <form onSubmit={handleSubmit} className="flex gap-2">
        <input
          type="text"
          value={input}
          onChange={(e) => setInput(e.target.value)}
          className="flex-1 p-2 border rounded"
          placeholder="Type your message..."
          disabled={isStreaming}
        />
        <button
          type="submit"
          disabled={isStreaming}
          className="px-4 py-2 bg-blue-500 text-white rounded disabled:opacity-50"
        >
          {isStreaming ? 'Generating...' : 'Send'}
        </button>
      </form>
    </div>
  )
}

5. Edge Runtime Optimization

Deploy your API routes to the edge for lower latency:

// app/api/edge-chat/route.ts
export const runtime = 'edge' // Enable edge runtime

import { NextRequest } from 'next/server'

export async function POST(req: NextRequest) {
  const { prompt } = await req.json()
  
  // Edge runtime has limitations - use fetch-based APIs
  const response = await fetch('https://api.openai.com/v1/chat/completions', {
    method: 'POST',
    headers: {
      'Content-Type': 'application/json',
      'Authorization': `Bearer ${process.env.OPENAI_API_KEY}`,
    },
    body: JSON.stringify({
      model: 'gpt-3.5-turbo',
      messages: [{ role: 'user', content: prompt }],
      stream: true,
    }),
  })
  
  // Forward the streaming response
  return new Response(response.body, {
    headers: {
      'Content-Type': 'text/event-stream',
      'Cache-Control': 'no-cache',
    },
  })
}

Edge Runtime Benefits

  • • Faster cold starts (10-50ms vs 200-500ms)
  • • Global deployment closer to users
  • • Automatic scaling
  • • Lower costs for high-traffic apps

6. Using Vercel AI SDK

The Vercel AI SDK simplifies LLM integration with built-in streaming support:

Installation

npm install ai @ai-sdk/openai @ai-sdk/anthropic

Streaming Chat with AI SDK

// app/api/ai-sdk-chat/route.ts
import { openai } from '@ai-sdk/openai'
import { anthropic } from '@ai-sdk/anthropic'
import { streamText } from 'ai'

export async function POST(req: Request) {
  const { messages, provider = 'openai' } = await req.json()
  
  const model = provider === 'anthropic' 
    ? anthropic('claude-3-opus-20240229')
    : openai('gpt-4-turbo-preview')
  
  const result = await streamText({
    model,
    messages,
    temperature: 0.7,
    maxTokens: 1000,
  })
  
  return result.toAIStreamResponse()
}

Client Component with useChat Hook

// app/chat/page.tsx
'use client'

import { useChat } from 'ai/react'

export default function ChatPage() {
  const { messages, input, handleInputChange, handleSubmit, isLoading } = useChat({
    api: '/api/ai-sdk-chat',
  })
  
  return (
    <div className="max-w-2xl mx-auto p-4">
      <div className="mb-4 h-96 overflow-y-auto">
        {messages.map((m) => (
          <div key={m.id} className="mb-2">
            <strong>{m.role === 'user' ? 'You: ' : 'AI: '}</strong>
            {m.content}
          </div>
        ))}
      </div>
      
      <form onSubmit={handleSubmit} className="flex gap-2">
        <input
          value={input}
          onChange={handleInputChange}
          placeholder="Say something..."
          className="flex-1 p-2 border rounded"
          disabled={isLoading}
        />
        <button
          type="submit"
          disabled={isLoading}
          className="px-4 py-2 bg-blue-500 text-white rounded"
        >
          Send
        </button>
      </form>
    </div>
  )
}

7. Security & API Key Management

Environment Variables Best Practices

// app/lib/config.ts
const requiredEnvVars = [
  'OPENAI_API_KEY',
  'ANTHROPIC_API_KEY',
  'NEXTAUTH_SECRET',
] as const

type EnvVarKey = typeof requiredEnvVars[number]

// Validate environment variables at startup
function validateEnv(): Record<EnvVarKey, string> {
  const env: Partial<Record<EnvVarKey, string>> = {}
  const missing: string[] = []
  
  for (const key of requiredEnvVars) {
    const value = process.env[key]
    if (!value) {
      missing.push(key)
    } else {
      env[key] = value
    }
  }
  
  if (missing.length > 0) {
    throw new Error(`Missing environment variables: ${missing.join(', ')}`)
  }
  
  return env as Record<EnvVarKey, string>
}

export const config = validateEnv()

API Route Authentication

// app/lib/auth.ts
import { NextRequest } from 'next/server'
import { getToken } from 'next-auth/jwt'

export async function authenticateRequest(req: NextRequest) {
  // For NextAuth.js
  const token = await getToken({ req })
  if (!token) {
    return { authenticated: false, user: null }
  }
  
  return { authenticated: true, user: token }
}

// Usage in API route
export async function POST(req: NextRequest) {
  const { authenticated, user } = await authenticateRequest(req)
  
  if (!authenticated) {
    return new Response('Unauthorized', { status: 401 })
  }
  
  // Process authenticated request
}

Request Sanitization

// app/lib/sanitize.ts
import DOMPurify from 'isomorphic-dompurify'

export function sanitizeInput(input: string): string {
  // Remove potential injection attempts
  const cleaned = DOMPurify.sanitize(input, { 
    ALLOWED_TAGS: [],
    ALLOWED_ATTR: [] 
  })
  
  // Additional LLM-specific sanitization
  return cleaned
    .replace(/\[INST\]/g, '') // Remove instruction markers
    .replace(/\[/INST\]/g, '')
    .trim()
}

8. Rate Limiting Implementation

Upstash Rate Limiting (Serverless)

// app/lib/rate-limit.ts
import { Ratelimit } from '@upstash/ratelimit'
import { Redis } from '@upstash/redis'

const redis = new Redis({
  url: process.env.UPSTASH_REDIS_URL!,
  token: process.env.UPSTASH_REDIS_TOKEN!,
})

export const rateLimiter = new Ratelimit({
  redis,
  limiter: Ratelimit.slidingWindow(10, '1 m'), // 10 requests per minute
  analytics: true,
})

// API route with rate limiting
export async function POST(req: NextRequest) {
  const ip = req.ip ?? '127.0.0.1'
  const { success, limit, reset, remaining } = await rateLimiter.limit(ip)
  
  if (!success) {
    return new Response('Rate limit exceeded', {
      status: 429,
      headers: {
        'X-RateLimit-Limit': limit.toString(),
        'X-RateLimit-Remaining': remaining.toString(),
        'X-RateLimit-Reset': new Date(reset).toISOString(),
      },
    })
  }
  
  // Process request
}

Token-Based Rate Limiting

// app/lib/token-limiter.ts
interface TokenBucket {
  tokens: number
  lastRefill: number
  userId: string
}

const TOKEN_LIMIT = 100000 // tokens per day
const REFILL_RATE = TOKEN_LIMIT / (24 * 60 * 60 * 1000) // tokens per ms

export class TokenLimiter {
  private buckets = new Map<string, TokenBucket>()
  
  async checkLimit(userId: string, tokensRequested: number): Promise<boolean> {
    let bucket = this.buckets.get(userId)
    const now = Date.now()
    
    if (!bucket) {
      bucket = {
        tokens: TOKEN_LIMIT,
        lastRefill: now,
        userId,
      }
    } else {
      // Refill tokens based on time passed
      const timePassed = now - bucket.lastRefill
      const tokensToAdd = timePassed * REFILL_RATE
      bucket.tokens = Math.min(TOKEN_LIMIT, bucket.tokens + tokensToAdd)
      bucket.lastRefill = now
    }
    
    if (bucket.tokens >= tokensRequested) {
      bucket.tokens -= tokensRequested
      this.buckets.set(userId, bucket)
      return true
    }
    
    return false
  }
}

9. Error Handling Patterns

// app/lib/errors.ts
export class LLMError extends Error {
  constructor(
    message: string,
    public statusCode: number,
    public provider?: string,
    public details?: any
  ) {
    super(message)
    this.name = 'LLMError'
  }
}

export async function handleLLMRequest<T>(
  request: () => Promise<T>,
  provider: string
): Promise<T> {
  try {
    return await request()
  } catch (error: any) {
    // Provider-specific error handling
    if (error.response?.status === 429) {
      throw new LLMError(
        'Rate limit exceeded',
        429,
        provider,
        { retryAfter: error.response.headers['retry-after'] }
      )
    }
    
    if (error.response?.status === 401) {
      throw new LLMError('Invalid API key', 401, provider)
    }
    
    if (error.code === 'ECONNREFUSED') {
      throw new LLMError('Service unavailable', 503, provider)
    }
    
    // Log unexpected errors
    console.error(`LLM Error [${provider}]:`, error)
    throw new LLMError('Internal server error', 500, provider)
  }
}

// Usage in API route
export async function POST(req: NextRequest) {
  try {
    const result = await handleLLMRequest(
      () => openai.chat.completions.create({ ... }),
      'openai'
    )
    return NextResponse.json(result)
  } catch (error) {
    if (error instanceof LLMError) {
      return NextResponse.json(
        { 
          error: error.message,
          provider: error.provider,
          details: error.details 
        },
        { status: error.statusCode }
      )
    }
    return NextResponse.json(
      { error: 'Unexpected error occurred' },
      { status: 500 }
    )
  }
}

10. Production Deployment

Vercel Deployment Configuration

// vercel.json
{
  "functions": {
    "app/api/chat/route.ts": {
      "maxDuration": 60
    },
    "app/api/edge-chat/route.ts": {
      "runtime": "edge",
      "regions": ["iad1", "sfo1", "sin1"]
    }
  },
  "env": {
    "OPENAI_API_KEY": "@openai-api-key",
    "ANTHROPIC_API_KEY": "@anthropic-api-key"
  }
}

Production Checklist

✓ Pre-deployment Checklist

  • ☐ Environment variables configured in Vercel dashboard
  • ☐ API routes protected with authentication
  • ☐ Rate limiting implemented
  • ☐ Error handling and logging configured
  • ☐ CORS headers configured if needed
  • ☐ Response caching strategy implemented
  • ☐ Monitoring and alerts set up
  • ☐ Cost alerts configured for LLM usage

Monitoring Setup

// app/lib/monitoring.ts
export function trackLLMUsage(
  provider: string,
  model: string,
  tokens: number,
  duration: number
) {
  // Send to analytics service
  if (process.env.NODE_ENV === 'production') {
    fetch('https://analytics.example.com/track', {
      method: 'POST',
      headers: { 'Content-Type': 'application/json' },
      body: JSON.stringify({
        event: 'llm_usage',
        properties: {
          provider,
          model,
          tokens,
          duration,
          timestamp: new Date().toISOString(),
        },
      }),
    }).catch(console.error)
  }
}

Example: Complete Chat Application

Here's a production-ready chat application combining all the concepts:

// app/api/production-chat/route.ts
import { NextRequest } from 'next/server'
import { authenticateRequest } from '@/lib/auth'
import { rateLimiter } from '@/lib/rate-limit'
import { chatRequestSchema } from '@/lib/validators'
import { createProvider } from '@/lib/llm-providers'
import { handleLLMRequest } from '@/lib/errors'
import { trackLLMUsage } from '@/lib/monitoring'

export const runtime = 'edge'

export async function POST(req: NextRequest) {
  // 1. Authentication
  const { authenticated, user } = await authenticateRequest(req)
  if (!authenticated) {
    return new Response('Unauthorized', { status: 401 })
  }
  
  // 2. Rate limiting
  const { success } = await rateLimiter.limit(user.id)
  if (!success) {
    return new Response('Rate limit exceeded', { status: 429 })
  }
  
  // 3. Validation
  const body = await req.json()
  const validation = chatRequestSchema.safeParse(body)
  if (!validation.success) {
    return new Response(JSON.stringify(validation.error), { status: 400 })
  }
  
  const { messages, provider = 'openai' } = validation.data
  const startTime = Date.now()
  
  try {
    // 4. LLM request with error handling
    const llmProvider = createProvider(provider)
    const stream = await handleLLMRequest(
      () => llmProvider.generateStream(messages[messages.length - 1].content),
      provider
    )
    
    // 5. Stream response with monitoring
    let tokenCount = 0
    const encoder = new TextEncoder()
    const customStream = new ReadableStream({
      async start(controller) {
        for await (const chunk of stream) {
          tokenCount += chunk.split(' ').length // Rough estimate
          controller.enqueue(encoder.encode(`data: ${JSON.stringify({ content: chunk })}

`))
        }
        
        // Track usage
        trackLLMUsage(provider, 'chat', tokenCount, Date.now() - startTime)
        
        controller.enqueue(encoder.encode('data: [DONE]

'))
        controller.close()
      },
    })
    
    return new Response(customStream, {
      headers: {
        'Content-Type': 'text/event-stream',
        'Cache-Control': 'no-cache',
        'Connection': 'keep-alive',
      },
    })
  } catch (error) {
    // Error response handled by handleLLMRequest
    throw error
  }
}

Next Steps

Advanced Features

  • • Implement conversation memory with Redis
  • • Add file upload support for multimodal models
  • • Build real-time collaboration features
  • • Implement function calling capabilities

Performance Optimization

  • • Implement response caching strategies
  • • Use CDN for static assets
  • • Optimize bundle size with dynamic imports
  • • Implement request debouncing

References & Citations

Ready to Build?

Start integrating LLM APIs into your Next.js application today with our unified API gateway.

References
  1. [1] AWS. "Lambda Documentation" (2024)
  2. [2] Vercel. "Streaming Responses" (2024)
  3. [3] GitHub. "OpenAI Node.js Library" (2024)