Feature

Uptime Optimization

Achieve 99.9% uptime with multi-provider redundancy and intelligent failover

High Availability Architecture

ParrotRouter's distributed architecture ensures your AI applications stay online even when individual providers experience outages. Our system automatically detects failures and routes around them in milliseconds.

99.9% Uptime SLA

Guaranteed availability with credits for downtime

Automatic Failover

Instant switching between providers

Circuit Breakers

Prevent cascade failures automatically

Automatic Failover

Configure automatic failover to maintain service continuity:

Basic Failover Configurationpython
from openai import OpenAI

client = OpenAI(
    base_url="https://api.parrotrouter.com/v1",
    api_key="your-api-key"
)

# Simple failover configuration
response = client.chat.completions.create(
    model="gpt-4-turbo-preview",
    messages=[{"role": "user", "content": "Critical request"}],
    extra_headers={
        "X-Failover-Enabled": "true",
        "X-Failover-Providers": "openai,anthropic,google",
        "X-Max-Retries": "3",
        "X-Retry-Delay-Ms": "100"
    }
)

# Advanced failover with conditions
failover_config = {
    "primary": {
        "provider": "openai",
        "model": "gpt-4-turbo-preview"
    },
    "fallbacks": [
        {
            "trigger": ["timeout", "rate_limit"],
            "provider": "anthropic",
            "model": "claude-3-opus",
            "max_latency_increase": 500  # Accept 500ms more latency
        },
        {
            "trigger": ["server_error", "model_overloaded"],
            "provider": "google",
            "model": "gemini-pro",
            "reduce_quality": True  # Accept lower quality for availability
        },
        {
            "trigger": "any_error",
            "provider": "openai",
            "model": "gpt-3.5-turbo",
            "reduce_max_tokens": 0.5  # Reduce by 50%
        }
    ],
    "health_check_interval": 30,  # seconds
    "failure_threshold": 3,
    "success_threshold": 2
}

response = client.chat.completions.create(
    model="gpt-4-turbo-preview",
    messages=[{"role": "user", "content": "Process this request"}],
    extra_headers={
        "X-Failover-Config": json.dumps(failover_config)
    }
)

Circuit Breaker Pattern

Prevent cascade failures with intelligent circuit breakers:

Circuit Breaker States

Closed
Normal operation - requests flow through
Open
Failure detected - requests blocked, fallback active
Half-Open
Testing recovery - limited requests allowed

Configure Circuit Breakers

# Circuit breaker configuration
circuit_config = {
    "error_threshold": 5,           # Failures to trigger open state
    "error_threshold_window": 60,   # Window in seconds
    "timeout": 30,                  # Seconds before half-open
    "half_open_requests": 3,        # Test requests in half-open
    "success_threshold": 2,         # Successes to close circuit
    "excluded_errors": ["rate_limit", "quota_exceeded"],
    "monitor_latency": True,
    "latency_threshold_ms": 5000    # Open if latency too high
}

response = client.chat.completions.create(
    model="gpt-4-turbo-preview",
    messages=[{"role": "user", "content": "Request"}],
    extra_headers={
        "X-Circuit-Breaker": json.dumps(circuit_config),
        "X-Circuit-Breaker-Fallback": "cache"  # or "degraded", "error"
    }
)

# Check circuit status
if response.headers.get("X-Circuit-Status") == "open":
    print("Circuit open - using fallback")
    print(f"Retry after: {response.headers.get('X-Retry-After')}s")

Health Monitoring

Real-time health monitoring across all providers and models:

Health Check APIpython
import requests
import websocket

# Get current health status
health = requests.get(
    "https://api.parrotrouter.com/v1/health",
    headers={"Authorization": "Bearer your-api-key"}
).json()

print(f"System Status: {health['status']}")  # healthy, degraded, down
print(f"Overall Uptime: {health['uptime_percentage']}%")

# Provider-specific health
for provider in health['providers']:
    print(f"
{provider['name']}:")
    print(f"  Status: {provider['status']}")
    print(f"  Uptime (24h): {provider['uptime_24h']}%")
    print(f"  Response time: {provider['avg_response_time_ms']}ms")
    print(f"  Error rate: {provider['error_rate']}%")
    
    # Model health
    for model in provider['models']:
        status_icon = "✅" if model['healthy'] else "❌"
        print(f"    {status_icon} {model['name']}: {model['status']}")

# Real-time health updates
def on_health_update(ws, message):
    update = json.loads(message)
    if update['type'] == 'provider_down':
        print(f"ALERT: {update['provider']} is down!")
        print(f"Affected models: {update['models']}")
        print(f"Fallback active: {update['fallback_provider']}")
    elif update['type'] == 'provider_recovered':
        print(f"RECOVERY: {update['provider']} is back online")

ws = websocket.WebSocketApp(
    "wss://api.parrotrouter.com/v1/health/stream",
    header=["Authorization: Bearer your-api-key"],
    on_message=on_health_update
)
ws.run_forever()

Retry Strategies

Exponential Backoff

Intelligent retry timing to avoid overwhelming providers.

# Configure retry strategy
response = client.chat.completions.create(
    model="gpt-4-turbo-preview",
    messages=[{"role": "user", "content": "Important request"}],
    extra_headers={
        "X-Retry-Strategy": "exponential",
        "X-Retry-Config": json.dumps({
            "initial_delay_ms": 100,
            "max_delay_ms": 10000,
            "multiplier": 2,
            "max_retries": 5,
            "jitter": True,  # Add randomness to prevent thundering herd
            "retry_on": ["timeout", "server_error", "rate_limit"]
        })
    }
)

# Custom retry logic
def custom_retry():
    retry_config = {
        "strategy": "custom",
        "rules": [
            {
                "error_type": "rate_limit",
                "wait_ms": "parse_retry_after_header",
                "max_wait_ms": 60000
            },
            {
                "error_type": "timeout",
                "attempts": [100, 200, 500, 1000, 2000],  # ms delays
                "switch_provider_after": 2
            },
            {
                "error_type": "server_error",
                "immediate_failover": True
            }
        ]
    }
    
    return client.chat.completions.create(
        model="gpt-4-turbo-preview",
        messages=[{"role": "user", "content": "Request"}],
        extra_headers={
            "X-Retry-Config": json.dumps(retry_config)
        }
    )

Request Hedging

Send parallel requests to multiple providers for critical operations.

# Hedge requests for lowest latency
response = client.chat.completions.create(
    model="gpt-4-turbo-preview",
    messages=[{"role": "user", "content": "Time-critical request"}],
    extra_headers={
        "X-Request-Hedging": "true",
        "X-Hedge-Config": json.dumps({
            "strategy": "latency",  # or "first-success"
            "providers": ["openai", "anthropic"],
            "delay_ms": 200,  # Start second request after 200ms
            "cancel_slower": True,
            "max_additional_cost": 0.05  # Cost limit for hedging
        })
    }
)

print(f"Winner: {response.headers.get('X-Hedge-Winner')}")
print(f"Latency saved: {response.headers.get('X-Hedge-Latency-Saved')}ms")

Degraded Mode Operations

Maintain partial functionality during outages:

Graceful Degradationpython
# Configure degraded mode behavior
degraded_config = {
    "enabled": True,
    "triggers": ["high_latency", "partial_outage", "rate_limiting"],
    "strategies": [
        {
            "condition": "high_latency",
            "action": "reduce_quality",
            "config": {
                "use_model": "gpt-3.5-turbo",  # Faster model
                "reduce_max_tokens": 0.7,
                "disable_streaming": False,
                "cache_aggressively": True
            }
        },
        {
            "condition": "partial_outage",
            "action": "feature_flags",
            "config": {
                "disable_features": ["image_generation", "code_execution"],
                "simplify_prompts": True,
                "use_cached_responses": True,
                "cache_ttl_multiplier": 10  # 10x longer cache
            }
        },
        {
            "condition": "rate_limiting",
            "action": "queue_and_batch",
            "config": {
                "batch_size": 10,
                "max_queue_time_ms": 5000,
                "priority_queue": True,
                "notify_users": True
            }
        }
    ],
    "user_notification": {
        "enabled": True,
        "message": "We're experiencing high demand. Responses may be simplified.",
        "show_status": True
    }
}

response = client.chat.completions.create(
    model="gpt-4-turbo-preview",
    messages=[{"role": "user", "content": "Process request"}],
    extra_headers={
        "X-Degraded-Mode-Config": json.dumps(degraded_config),
        "X-User-Priority": "high"  # Priority users skip degradation
    }
)

# Check if degraded mode was activated
if response.headers.get("X-Degraded-Mode") == "active":
    print(f"Degraded mode: {response.headers.get('X-Degradation-Reason')}")
    print(f"Quality impact: {response.headers.get('X-Quality-Score')}/10")

Uptime Analytics

Historical Uptime

Last 24 hours
99.9%
Last 7 days
99.95%
Last 30 days
99.92%

Incident Management

# Get incident history
incidents = requests.get(
    "https://api.parrotrouter.com/v1/incidents",
    headers={"Authorization": "Bearer your-api-key"},
    params={"days": 30}
).json()

for incident in incidents['incidents']:
    print(f"
Incident: {incident['id']}")
    print(f"Date: {incident['timestamp']}")
    print(f"Duration: {incident['duration_minutes']} minutes")
    print(f"Impact: {incident['impact']}")
    print(f"Affected: {incident['affected_services']}")
    print(f"Resolution: {incident['resolution']}")

# Subscribe to incident notifications
notification_config = {
    "channels": ["email", "webhook", "sms"],
    "severity_threshold": "medium",  # low, medium, high, critical
    "webhook_url": "https://your-app.com/incidents",
    "include_types": ["outage", "degraded_performance", "partial_outage"],
    "regions": ["us-east-1", "eu-west-1"]
}

response = requests.post(
    "https://api.parrotrouter.com/v1/incidents/subscribe",
    headers={"Authorization": "Bearer your-api-key"},
    json=notification_config
)

Best Practices

  • 1.
    Configure Multiple Fallbacks

    Always have at least 2-3 fallback providers configured

  • 2.
    Test Failover Regularly

    Use chaos engineering to verify failover works correctly

  • 3.
    Monitor Circuit Breaker Metrics

    Track circuit breaker trips to identify problematic providers

  • 4.
    Plan for Degraded Mode

    Design your application to handle reduced functionality gracefully

Related Features