Feature

Uptime Optimization

Achieve 99.9% uptime with multi-provider redundancy and intelligent failover

High Availability Architecture

ParrotRouter's distributed architecture ensures your AI applications stay online even when individual providers experience outages. Our system automatically detects failures and routes around them in milliseconds.

99.9% Uptime SLA

Guaranteed availability with credits for downtime

Automatic Failover

Instant switching between providers

Circuit Breakers

Prevent cascade failures automatically

Automatic Failover

Configure automatic failover to maintain service continuity:

Basic Failover Configurationpython

from openai import OpenAI

client = OpenAI(
    base_url="https://api.parrotrouter.com/v1",
    api_key="your-api-key"
)

# Simple failover configuration
response = client.chat.completions.create(
    model="gpt-4-turbo-preview",
    messages=[{"role": "user", "content": "Critical request"}],
    extra_headers={
        "X-Failover-Enabled": "true",
        "X-Failover-Providers": "openai,anthropic,google",
        "X-Max-Retries": "3",
        "X-Retry-Delay-Ms": "100"
    }
)

# Advanced failover with conditions
failover_config = {
    "primary": {
        "provider": "openai",
        "model": "gpt-4-turbo-preview"
    },
    "fallbacks": [
        {
            "trigger": ["timeout", "rate_limit"],
            "provider": "anthropic",
            "model": "claude-3-opus",
            "max_latency_increase": 500  # Accept 500ms more latency
        },
        {
            "trigger": ["server_error", "model_overloaded"],
            "provider": "google",
            "model": "gemini-pro",
            "reduce_quality": True  # Accept lower quality for availability
        },
        {
            "trigger": "any_error",
            "provider": "openai",
            "model": "gpt-3.5-turbo",
            "reduce_max_tokens": 0.5  # Reduce by 50%
        }
    ],
    "health_check_interval": 30,  # seconds
    "failure_threshold": 3,
    "success_threshold": 2
}

response = client.chat.completions.create(
    model="gpt-4-turbo-preview",
    messages=[{"role": "user", "content": "Process this request"}],
    extra_headers={
        "X-Failover-Config": json.dumps(failover_config)
    }
)

Failover happens automatically within 50-200ms, ensuring minimal disruption to your users.

Circuit Breaker Pattern

Prevent cascade failures with intelligent circuit breakers:

Circuit Breaker States

Closed

Normal operation - requests flow through

Open

Failure detected - requests blocked, fallback active

Half-Open

Testing recovery - limited requests allowed

Configure Circuit Breakers

# Circuit breaker configuration
circuit_config = {
    "error_threshold": 5,           # Failures to trigger open state
    "error_threshold_window": 60,   # Window in seconds
    "timeout": 30,                  # Seconds before half-open
    "half_open_requests": 3,        # Test requests in half-open
    "success_threshold": 2,         # Successes to close circuit
    "excluded_errors": ["rate_limit", "quota_exceeded"],
    "monitor_latency": True,
    "latency_threshold_ms": 5000    # Open if latency too high
}

response = client.chat.completions.create(
    model="gpt-4-turbo-preview",
    messages=[{"role": "user", "content": "Request"}],
    extra_headers={
        "X-Circuit-Breaker": json.dumps(circuit_config),
        "X-Circuit-Breaker-Fallback": "cache"  # or "degraded", "error"
    }
)

# Check circuit status
if response.headers.get("X-Circuit-Status") == "open":
    print("Circuit open - using fallback")
    print(f"Retry after: {response.headers.get('X-Retry-After')}s")

Health Monitoring

Real-time health monitoring across all providers and models:

Health Check APIpython

import requests
import websocket

# Get current health status
health = requests.get(
    "https://api.parrotrouter.com/v1/health",
    headers={"Authorization": "Bearer your-api-key"}
).json()

print(f"System Status: {health['status']}")  # healthy, degraded, down
print(f"Overall Uptime: {health['uptime_percentage']}%")

# Provider-specific health
for provider in health['providers']:
    print(f"
{provider['name']}:")
    print(f"  Status: {provider['status']}")
    print(f"  Uptime (24h): {provider['uptime_24h']}%")
    print(f"  Response time: {provider['avg_response_time_ms']}ms")
    print(f"  Error rate: {provider['error_rate']}%")
    
    # Model health
    for model in provider['models']:
        status_icon = "✅" if model['healthy'] else "❌"
        print(f"    {status_icon} {model['name']}: {model['status']}")

# Real-time health updates
def on_health_update(ws, message):
    update = json.loads(message)
    if update['type'] == 'provider_down':
        print(f"ALERT: {update['provider']} is down!")
        print(f"Affected models: {update['models']}")
        print(f"Fallback active: {update['fallback_provider']}")
    elif update['type'] == 'provider_recovered':
        print(f"RECOVERY: {update['provider']} is back online")

ws = websocket.WebSocketApp(
    "wss://api.parrotrouter.com/v1/health/stream",
    header=["Authorization: Bearer your-api-key"],
    on_message=on_health_update
)
ws.run_forever()

Retry Strategies

Exponential Backoff

Intelligent retry timing to avoid overwhelming providers.

# Configure retry strategy
response = client.chat.completions.create(
    model="gpt-4-turbo-preview",
    messages=[{"role": "user", "content": "Important request"}],
    extra_headers={
        "X-Retry-Strategy": "exponential",
        "X-Retry-Config": json.dumps({
            "initial_delay_ms": 100,
            "max_delay_ms": 10000,
            "multiplier": 2,
            "max_retries": 5,
            "jitter": True,  # Add randomness to prevent thundering herd
            "retry_on": ["timeout", "server_error", "rate_limit"]
        })
    }
)

# Custom retry logic
def custom_retry():
    retry_config = {
        "strategy": "custom",
        "rules": [
            {
                "error_type": "rate_limit",
                "wait_ms": "parse_retry_after_header",
                "max_wait_ms": 60000
            },
            {
                "error_type": "timeout",
                "attempts": [100, 200, 500, 1000, 2000],  # ms delays
                "switch_provider_after": 2
            },
            {
                "error_type": "server_error",
                "immediate_failover": True
            }
        ]
    }
    
    return client.chat.completions.create(
        model="gpt-4-turbo-preview",
        messages=[{"role": "user", "content": "Request"}],
        extra_headers={
            "X-Retry-Config": json.dumps(retry_config)
        }
    )

Request Hedging

Send parallel requests to multiple providers for critical operations.

# Hedge requests for lowest latency
response = client.chat.completions.create(
    model="gpt-4-turbo-preview",
    messages=[{"role": "user", "content": "Time-critical request"}],
    extra_headers={
        "X-Request-Hedging": "true",
        "X-Hedge-Config": json.dumps({
            "strategy": "latency",  # or "first-success"
            "providers": ["openai", "anthropic"],
            "delay_ms": 200,  # Start second request after 200ms
            "cancel_slower": True,
            "max_additional_cost": 0.05  # Cost limit for hedging
        })
    }
)

print(f"Winner: {response.headers.get('X-Hedge-Winner')}")
print(f"Latency saved: {response.headers.get('X-Hedge-Latency-Saved')}ms")

Degraded Mode Operations

Maintain partial functionality during outages:

Graceful Degradationpython

# Configure degraded mode behavior
degraded_config = {
    "enabled": True,
    "triggers": ["high_latency", "partial_outage", "rate_limiting"],
    "strategies": [
        {
            "condition": "high_latency",
            "action": "reduce_quality",
            "config": {
                "use_model": "gpt-3.5-turbo",  # Faster model
                "reduce_max_tokens": 0.7,
                "disable_streaming": False,
                "cache_aggressively": True
            }
        },
        {
            "condition": "partial_outage",
            "action": "feature_flags",
            "config": {
                "disable_features": ["image_generation", "code_execution"],
                "simplify_prompts": True,
                "use_cached_responses": True,
                "cache_ttl_multiplier": 10  # 10x longer cache
            }
        },
        {
            "condition": "rate_limiting",
            "action": "queue_and_batch",
            "config": {
                "batch_size": 10,
                "max_queue_time_ms": 5000,
                "priority_queue": True,
                "notify_users": True
            }
        }
    ],
    "user_notification": {
        "enabled": True,
        "message": "We're experiencing high demand. Responses may be simplified.",
        "show_status": True
    }
}

response = client.chat.completions.create(
    model="gpt-4-turbo-preview",
    messages=[{"role": "user", "content": "Process request"}],
    extra_headers={
        "X-Degraded-Mode-Config": json.dumps(degraded_config),
        "X-User-Priority": "high"  # Priority users skip degradation
    }
)

# Check if degraded mode was activated
if response.headers.get("X-Degraded-Mode") == "active":
    print(f"Degraded mode: {response.headers.get('X-Degradation-Reason')}")
    print(f"Quality impact: {response.headers.get('X-Quality-Score')}/10")

Uptime Analytics

Historical Uptime

Last 24 hours

99.9%

Last 7 days

99.95%

Last 30 days

99.92%

Incident Management

# Get incident history
incidents = requests.get(
    "https://api.parrotrouter.com/v1/incidents",
    headers={"Authorization": "Bearer your-api-key"},
    params={"days": 30}
).json()

for incident in incidents['incidents']:
    print(f"
Incident: {incident['id']}")
    print(f"Date: {incident['timestamp']}")
    print(f"Duration: {incident['duration_minutes']} minutes")
    print(f"Impact: {incident['impact']}")
    print(f"Affected: {incident['affected_services']}")
    print(f"Resolution: {incident['resolution']}")

# Subscribe to incident notifications
notification_config = {
    "channels": ["email", "webhook", "sms"],
    "severity_threshold": "medium",  # low, medium, high, critical
    "webhook_url": "https://your-app.com/incidents",
    "include_types": ["outage", "degraded_performance", "partial_outage"],
    "regions": ["us-east-1", "eu-west-1"]
}

response = requests.post(
    "https://api.parrotrouter.com/v1/incidents/subscribe",
    headers={"Authorization": "Bearer your-api-key"},
    json=notification_config
)

Best Practices

1.
Configure Multiple Fallbacks
Always have at least 2-3 fallback providers configured
2.
Test Failover Regularly
Use chaos engineering to verify failover works correctly
3.
Monitor Circuit Breaker Metrics
Track circuit breaker trips to identify problematic providers
4.
Plan for Degraded Mode
Design your application to handle reduced functionality gracefully

Related Features

Provider Routing

Multi-provider load balancing

Performance

Optimize response times

Zero Completion Insurance

Never lose a request