Advanced
January 17, 202425 min read

LLM Response Validation

Build robust validation layers to ensure LLM outputs are accurate, safe, and reliable in production environments with schema validation, content filtering, and quality checks.

Safety
99.9%
Accuracy
95%+
Format Valid
100%
Checks
12+ Types

Validation Types

Schema Validation
Ensure responses match expected data structures and formats
  • • JSON schema validation
  • • Required field checks
  • • Data type validation
  • • Format constraints (email, URL, etc.)
  • • Length and boundary limits

Schema Validation Implementation

Use JSON Schema or TypeScript interfaces to ensure LLM responses match expected formats.

JSON Schema Validation

import Ajv from 'ajv'
import addFormats from 'ajv-formats'

const ajv = new Ajv({ allErrors: true })
addFormats(ajv)

// Define expected response schema
const responseSchema = {
  type: 'object',
  properties: {
    answer: {
      type: 'string',
      minLength: 10,
      maxLength: 500
    },
    confidence: {
      type: 'number',
      minimum: 0,
      maximum: 1
    },
    sources: {
      type: 'array',
      items: {
        type: 'object',
        properties: {
          url: { type: 'string', format: 'uri' },
          title: { type: 'string' },
          relevance: { type: 'number', minimum: 0, maximum: 1 }
        },
        required: ['url', 'title', 'relevance']
      },
      maxItems: 5
    },
    metadata: {
      type: 'object',
      properties: {
        model: { type: 'string' },
        timestamp: { type: 'string', format: 'date-time' },
        tokens_used: { type: 'integer', minimum: 0 }
      },
      required: ['model', 'timestamp']
    }
  },
  required: ['answer', 'confidence'],
  additionalProperties: false
}

const validate = ajv.compile(responseSchema)

class ResponseValidator {
  validateSchema(response: any): ValidationResult {
    const valid = validate(response)
    
    if (!valid) {
      return {
        isValid: false,
        errors: validate.errors?.map(error => ({
          path: error.instancePath,
          message: error.message,
          value: error.data
        })) || []
      }
    }
    
    return { isValid: true, errors: [] }
  }

  async validateLLMResponse(response: string): Promise<ValidationResult> {
    try {
      // Parse JSON response
      const parsed = JSON.parse(response)
      
      // Validate schema
      const schemaResult = this.validateSchema(parsed)
      if (!schemaResult.isValid) {
        return schemaResult
      }
      
      // Additional semantic validation
      const semanticResult = await this.validateSemantics(parsed)
      
      return {
        isValid: schemaResult.isValid && semanticResult.isValid,
        errors: [...schemaResult.errors, ...semanticResult.errors]
      }
    } catch (error) {
      return {
        isValid: false,
        errors: [{ path: 'root', message: 'Invalid JSON format', value: response }]
      }
    }
  }
}

Content Safety Validation

Implement multiple layers of safety checks to filter harmful content.

Safety Checker Implementation

class SafetyValidator {
  private moderationAPI: OpenAI
  private toxicityClassifier: any
  private piiDetector: any

  constructor() {
    this.moderationAPI = new OpenAI({ apiKey: process.env.OPENAI_API_KEY })
    // Initialize other classifiers
  }

  async validateSafety(content: string): Promise<SafetyResult> {
    const checks = await Promise.all([
      this.checkModeration(content),
      this.checkToxicity(content),
      this.checkPII(content),
      this.checkHarmfulInstructions(content)
    ])

    const violations = checks.filter(check => !check.safe)
    
    return {
      safe: violations.length === 0,
      violations: violations.map(v => v.violation),
      confidence: Math.min(...checks.map(c => c.confidence))
    }
  }

  async checkModeration(content: string): Promise<SafetyCheck> {
    try {
      const response = await this.moderationAPI.moderations.create({
        input: content
      })

      const result = response.results[0]
      const flagged = result.flagged
      
      return {
        safe: !flagged,
        confidence: 0.95,
        violation: flagged ? {
          type: 'moderation',
          categories: Object.entries(result.categories)
            .filter(([_, flagged]) => flagged)
            .map(([category, _]) => category),
          scores: result.category_scores
        } : null
      }
    } catch (error) {
      return { safe: true, confidence: 0.5, violation: null }
    }
  }

  async checkToxicity(content: string): Promise<SafetyCheck> {
    // Use Perspective API or similar toxicity classifier
    const toxicityScore = await this.toxicityClassifier.predict(content)
    
    return {
      safe: toxicityScore < 0.7,
      confidence: 0.85,
      violation: toxicityScore >= 0.7 ? {
        type: 'toxicity',
        score: toxicityScore,
        threshold: 0.7
      } : null
    }
  }

  async checkPII(content: string): Promise<SafetyCheck> {
    const piiEntities = await this.piiDetector.detectPII(content)
    const hasPII = piiEntities.length > 0
    
    return {
      safe: !hasPII,
      confidence: 0.90,
      violation: hasPII ? {
        type: 'pii',
        entities: piiEntities.map(e => ({
          type: e.type,
          text: e.text,
          confidence: e.confidence
        }))
      } : null
    }
  }

  async checkHarmfulInstructions(content: string): Promise<SafetyCheck> {
    // Check for instructions that could cause harm
    const harmfulPatterns = [
      /how to (make|create|build).*(bomb|explosive|weapon)/i,
      /instructions.*(harm|hurt|kill|suicide)/i,
      /(illegal|unlawful).*(download|obtain|acquire)/i
    ]

    const hasHarmfulContent = harmfulPatterns.some(pattern => 
      pattern.test(content)
    )

    return {
      safe: !hasHarmfulContent,
      confidence: 0.80,
      violation: hasHarmfulContent ? {
        type: 'harmful_instructions',
        detected_patterns: harmfulPatterns
          .filter(p => p.test(content))
          .map(p => p.source)
      } : null
    }
  }
}

Quality Validation

Assess response quality using multiple metrics and models.

Quality Assessment

class QualityValidator {
  async validateQuality(
    prompt: string, 
    response: string, 
    context?: any
  ): Promise<QualityResult> {
    const checks = await Promise.all([
      this.checkRelevance(prompt, response),
      this.checkCoherence(response),
      this.checkFactuality(response, context),
      this.checkCompleteness(prompt, response),
      this.checkHallucination(response, context)
    ])

    const scores = checks.reduce((acc, check) => {
      acc[check.metric] = check.score
      return acc
    }, {} as Record<string, number>)

    const overallScore = Object.values(scores).reduce((a, b) => a + b, 0) / Object.keys(scores).length

    return {
      scores,
      overallScore,
      passed: overallScore >= 0.7,
      issues: checks.filter(c => c.score < 0.6).map(c => c.issue)
    }
  }

  async checkRelevance(prompt: string, response: string): Promise<QualityCheck> {
    // Use embedding similarity or trained relevance model
    const promptEmbedding = await this.getEmbedding(prompt)
    const responseEmbedding = await this.getEmbedding(response)
    
    const similarity = this.cosineSimilarity(promptEmbedding, responseEmbedding)
    
    return {
      metric: 'relevance',
      score: similarity,
      issue: similarity < 0.6 ? 'Response not relevant to prompt' : null
    }
  }

  async checkCoherence(response: string): Promise<QualityCheck> {
    // Check for logical flow and readability
    const sentences = response.split(/[.!?]+/).filter(s => s.trim())
    
    if (sentences.length < 2) {
      return {
        metric: 'coherence',
        score: 0.5,
        issue: 'Response too short for coherence assessment'
      }
    }

    // Calculate sentence-to-sentence similarity for flow
    const similarities = []
    for (let i = 0; i < sentences.length - 1; i++) {
      const sim = await this.getSentenceSimilarity(sentences[i], sentences[i + 1])
      similarities.push(sim)
    }

    const coherenceScore = similarities.reduce((a, b) => a + b, 0) / similarities.length
    
    return {
      metric: 'coherence',
      score: coherenceScore,
      issue: coherenceScore < 0.4 ? 'Response lacks logical flow' : null
    }
  }

  async checkFactuality(response: string, context?: any): Promise<QualityCheck> {
    // Extract factual claims and verify against knowledge base
    const claims = await this.extractFactualClaims(response)
    
    if (claims.length === 0) {
      return {
        metric: 'factuality',
        score: 0.8, // Neutral for non-factual content
        issue: null
      }
    }

    const verificationResults = await Promise.all(
      claims.map(claim => this.verifyFactualClaim(claim, context))
    )

    const factualityScore = verificationResults
      .reduce((sum, result) => sum + result.confidence, 0) / verificationResults.length

    const falseFactsCount = verificationResults.filter(r => !r.verified).length

    return {
      metric: 'factuality',
      score: factualityScore,
      issue: falseFactsCount > 0 ? 
        `Contains ${falseFactsCount} potentially false facts` : null
    }
  }

  async checkHallucination(response: string, context?: any): Promise<QualityCheck> {
    // Check if response contains information not supported by context
    if (!context || !context.sources) {
      return {
        metric: 'hallucination',
        score: 0.7, // Neutral when no context available
        issue: null
      }
    }

    const responseEntities = await this.extractEntities(response)
    const contextEntities = context.sources.flatMap(s => this.extractEntities(s.content))
    
    const supportedEntities = responseEntities.filter(entity =>
      contextEntities.some(contextEntity => 
        this.entitiesMatch(entity, contextEntity)
      )
    )

    const supportRatio = supportedEntities.length / responseEntities.length
    
    return {
      metric: 'hallucination',
      score: supportRatio,
      issue: supportRatio < 0.8 ? 
        'Response contains unsupported information' : null
    }
  }
}

Validation Pipeline

Combine all validation layers into a comprehensive pipeline.

Complete Validation System

class LLMValidationPipeline {
  private schemaValidator: ResponseValidator
  private safetyValidator: SafetyValidator
  private qualityValidator: QualityValidator
  
  constructor() {
    this.schemaValidator = new ResponseValidator()
    this.safetyValidator = new SafetyValidator()
    this.qualityValidator = new QualityValidator()
  }

  async validateResponse(
    prompt: string,
    response: string,
    options: ValidationOptions = {}
  ): Promise<ValidationReport> {
    const validations: ValidationStep[] = []
    
    try {
      // Step 1: Schema validation
      if (options.validateSchema !== false) {
        const schemaResult = await this.schemaValidator.validateLLMResponse(response)
        validations.push({
          type: 'schema',
          passed: schemaResult.isValid,
          errors: schemaResult.errors,
          duration: Date.now() - start
        })
        
        if (!schemaResult.isValid && options.failFast) {
          return this.createReport(validations, false)
        }
      }

      // Step 2: Safety validation
      if (options.validateSafety !== false) {
        const start = Date.now()
        const safetyResult = await this.safetyValidator.validateSafety(response)
        validations.push({
          type: 'safety',
          passed: safetyResult.safe,
          violations: safetyResult.violations,
          confidence: safetyResult.confidence,
          duration: Date.now() - start
        })
        
        if (!safetyResult.safe && options.failFast) {
          return this.createReport(validations, false)
        }
      }

      // Step 3: Quality validation
      if (options.validateQuality !== false) {
        const start = Date.now()
        const qualityResult = await this.qualityValidator.validateQuality(
          prompt, 
          response, 
          options.context
        )
        validations.push({
          type: 'quality',
          passed: qualityResult.passed,
          scores: qualityResult.scores,
          issues: qualityResult.issues,
          duration: Date.now() - start
        })
      }

      const overallPassed = validations.every(v => v.passed)
      return this.createReport(validations, overallPassed)
      
    } catch (error) {
      return {
        passed: false,
        error: error.message,
        validations,
        timestamp: new Date().toISOString()
      }
    }
  }

  private createReport(
    validations: ValidationStep[], 
    passed: boolean
  ): ValidationReport {
    return {
      passed,
      validations,
      summary: {
        total: validations.length,
        passed: validations.filter(v => v.passed).length,
        failed: validations.filter(v => !v.passed).length,
        totalDuration: validations.reduce((sum, v) => sum + v.duration, 0)
      },
      timestamp: new Date().toISOString()
    }
  }

  async validateWithRetry(
    prompt: string,
    llmCall: () => Promise<string>,
    maxRetries: number = 3
  ): Promise<{ response: string, report: ValidationReport }> {
    for (let attempt = 1; attempt <= maxRetries; attempt++) {
      const response = await llmCall()
      const report = await this.validateResponse(prompt, response)
      
      if (report.passed) {
        return { response, report }
      }
      
      if (attempt === maxRetries) {
        throw new Error(`Validation failed after ${maxRetries} attempts: ${JSON.stringify(report)}`)
      }
      
      // Log validation failure and retry
      console.warn(`Validation failed on attempt ${attempt}, retrying...`, report)
    }
    
    throw new Error('Max retries exceeded')
  }
}

Best Practices

References

  1. [1] OpenAI. "Moderation API Guide" (2024)
  2. [2] EvidentlyAI. "LLM-as-a-Judge Guide" (2025)
  3. [3] Confident AI. "LLM Evaluation Metrics" (2025)
  4. [4] Weights & Biases. "LLM Evaluation: Metrics, Frameworks, and Best Practices" (2025)
  5. [5] SuperAnnotate. "LLM Evaluation Guide" (2025)
  6. [6] Anthropic. "Content Moderation with Claude" (2024)
  7. [7] Google. "Perspective API Documentation" (2024)
  8. [8] Parasoft. "Controlling LLM Hallucinations: Application-Level Best Practices" (2025)
  9. [9] JSON Schema. "Understanding JSON Schema" (2024)
  10. [10] Guardrails AI. "LLM Validation Framework" (2024)
  11. [11] LangKit. "LLM Monitoring and Observability" (2024)
  12. [12] NVIDIA NeMo. "NeMo Guardrails" (2024)
  13. [13] Microsoft. "Azure AI Content Safety" (2024)