Uptime Optimization
Achieve 99.9% uptime with multi-provider redundancy and intelligent failover
High Availability Architecture
ParrotRouter's distributed architecture ensures your AI applications stay online even when individual providers experience outages. Our system automatically detects failures and routes around them in milliseconds.
99.9% Uptime SLA
Guaranteed availability with credits for downtime
Automatic Failover
Instant switching between providers
Circuit Breakers
Prevent cascade failures automatically
Automatic Failover
Configure automatic failover to maintain service continuity:
from openai import OpenAI
client = OpenAI(
base_url="https://api.parrotrouter.com/v1",
api_key="your-api-key"
)
# Simple failover configuration
response = client.chat.completions.create(
model="gpt-4-turbo-preview",
messages=[{"role": "user", "content": "Critical request"}],
extra_headers={
"X-Failover-Enabled": "true",
"X-Failover-Providers": "openai,anthropic,google",
"X-Max-Retries": "3",
"X-Retry-Delay-Ms": "100"
}
)
# Advanced failover with conditions
failover_config = {
"primary": {
"provider": "openai",
"model": "gpt-4-turbo-preview"
},
"fallbacks": [
{
"trigger": ["timeout", "rate_limit"],
"provider": "anthropic",
"model": "claude-3-opus",
"max_latency_increase": 500 # Accept 500ms more latency
},
{
"trigger": ["server_error", "model_overloaded"],
"provider": "google",
"model": "gemini-pro",
"reduce_quality": True # Accept lower quality for availability
},
{
"trigger": "any_error",
"provider": "openai",
"model": "gpt-3.5-turbo",
"reduce_max_tokens": 0.5 # Reduce by 50%
}
],
"health_check_interval": 30, # seconds
"failure_threshold": 3,
"success_threshold": 2
}
response = client.chat.completions.create(
model="gpt-4-turbo-preview",
messages=[{"role": "user", "content": "Process this request"}],
extra_headers={
"X-Failover-Config": json.dumps(failover_config)
}
)
Circuit Breaker Pattern
Prevent cascade failures with intelligent circuit breakers:
Circuit Breaker States
Configure Circuit Breakers
# Circuit breaker configuration
circuit_config = {
"error_threshold": 5, # Failures to trigger open state
"error_threshold_window": 60, # Window in seconds
"timeout": 30, # Seconds before half-open
"half_open_requests": 3, # Test requests in half-open
"success_threshold": 2, # Successes to close circuit
"excluded_errors": ["rate_limit", "quota_exceeded"],
"monitor_latency": True,
"latency_threshold_ms": 5000 # Open if latency too high
}
response = client.chat.completions.create(
model="gpt-4-turbo-preview",
messages=[{"role": "user", "content": "Request"}],
extra_headers={
"X-Circuit-Breaker": json.dumps(circuit_config),
"X-Circuit-Breaker-Fallback": "cache" # or "degraded", "error"
}
)
# Check circuit status
if response.headers.get("X-Circuit-Status") == "open":
print("Circuit open - using fallback")
print(f"Retry after: {response.headers.get('X-Retry-After')}s")
Health Monitoring
Real-time health monitoring across all providers and models:
import requests
import websocket
# Get current health status
health = requests.get(
"https://api.parrotrouter.com/v1/health",
headers={"Authorization": "Bearer your-api-key"}
).json()
print(f"System Status: {health['status']}") # healthy, degraded, down
print(f"Overall Uptime: {health['uptime_percentage']}%")
# Provider-specific health
for provider in health['providers']:
print(f"
{provider['name']}:")
print(f" Status: {provider['status']}")
print(f" Uptime (24h): {provider['uptime_24h']}%")
print(f" Response time: {provider['avg_response_time_ms']}ms")
print(f" Error rate: {provider['error_rate']}%")
# Model health
for model in provider['models']:
status_icon = "✅" if model['healthy'] else "❌"
print(f" {status_icon} {model['name']}: {model['status']}")
# Real-time health updates
def on_health_update(ws, message):
update = json.loads(message)
if update['type'] == 'provider_down':
print(f"ALERT: {update['provider']} is down!")
print(f"Affected models: {update['models']}")
print(f"Fallback active: {update['fallback_provider']}")
elif update['type'] == 'provider_recovered':
print(f"RECOVERY: {update['provider']} is back online")
ws = websocket.WebSocketApp(
"wss://api.parrotrouter.com/v1/health/stream",
header=["Authorization: Bearer your-api-key"],
on_message=on_health_update
)
ws.run_forever()
Retry Strategies
Exponential Backoff
Intelligent retry timing to avoid overwhelming providers.
# Configure retry strategy
response = client.chat.completions.create(
model="gpt-4-turbo-preview",
messages=[{"role": "user", "content": "Important request"}],
extra_headers={
"X-Retry-Strategy": "exponential",
"X-Retry-Config": json.dumps({
"initial_delay_ms": 100,
"max_delay_ms": 10000,
"multiplier": 2,
"max_retries": 5,
"jitter": True, # Add randomness to prevent thundering herd
"retry_on": ["timeout", "server_error", "rate_limit"]
})
}
)
# Custom retry logic
def custom_retry():
retry_config = {
"strategy": "custom",
"rules": [
{
"error_type": "rate_limit",
"wait_ms": "parse_retry_after_header",
"max_wait_ms": 60000
},
{
"error_type": "timeout",
"attempts": [100, 200, 500, 1000, 2000], # ms delays
"switch_provider_after": 2
},
{
"error_type": "server_error",
"immediate_failover": True
}
]
}
return client.chat.completions.create(
model="gpt-4-turbo-preview",
messages=[{"role": "user", "content": "Request"}],
extra_headers={
"X-Retry-Config": json.dumps(retry_config)
}
)
Request Hedging
Send parallel requests to multiple providers for critical operations.
# Hedge requests for lowest latency
response = client.chat.completions.create(
model="gpt-4-turbo-preview",
messages=[{"role": "user", "content": "Time-critical request"}],
extra_headers={
"X-Request-Hedging": "true",
"X-Hedge-Config": json.dumps({
"strategy": "latency", # or "first-success"
"providers": ["openai", "anthropic"],
"delay_ms": 200, # Start second request after 200ms
"cancel_slower": True,
"max_additional_cost": 0.05 # Cost limit for hedging
})
}
)
print(f"Winner: {response.headers.get('X-Hedge-Winner')}")
print(f"Latency saved: {response.headers.get('X-Hedge-Latency-Saved')}ms")
Degraded Mode Operations
Maintain partial functionality during outages:
# Configure degraded mode behavior
degraded_config = {
"enabled": True,
"triggers": ["high_latency", "partial_outage", "rate_limiting"],
"strategies": [
{
"condition": "high_latency",
"action": "reduce_quality",
"config": {
"use_model": "gpt-3.5-turbo", # Faster model
"reduce_max_tokens": 0.7,
"disable_streaming": False,
"cache_aggressively": True
}
},
{
"condition": "partial_outage",
"action": "feature_flags",
"config": {
"disable_features": ["image_generation", "code_execution"],
"simplify_prompts": True,
"use_cached_responses": True,
"cache_ttl_multiplier": 10 # 10x longer cache
}
},
{
"condition": "rate_limiting",
"action": "queue_and_batch",
"config": {
"batch_size": 10,
"max_queue_time_ms": 5000,
"priority_queue": True,
"notify_users": True
}
}
],
"user_notification": {
"enabled": True,
"message": "We're experiencing high demand. Responses may be simplified.",
"show_status": True
}
}
response = client.chat.completions.create(
model="gpt-4-turbo-preview",
messages=[{"role": "user", "content": "Process request"}],
extra_headers={
"X-Degraded-Mode-Config": json.dumps(degraded_config),
"X-User-Priority": "high" # Priority users skip degradation
}
)
# Check if degraded mode was activated
if response.headers.get("X-Degraded-Mode") == "active":
print(f"Degraded mode: {response.headers.get('X-Degradation-Reason')}")
print(f"Quality impact: {response.headers.get('X-Quality-Score')}/10")
Uptime Analytics
Historical Uptime
Incident Management
# Get incident history
incidents = requests.get(
"https://api.parrotrouter.com/v1/incidents",
headers={"Authorization": "Bearer your-api-key"},
params={"days": 30}
).json()
for incident in incidents['incidents']:
print(f"
Incident: {incident['id']}")
print(f"Date: {incident['timestamp']}")
print(f"Duration: {incident['duration_minutes']} minutes")
print(f"Impact: {incident['impact']}")
print(f"Affected: {incident['affected_services']}")
print(f"Resolution: {incident['resolution']}")
# Subscribe to incident notifications
notification_config = {
"channels": ["email", "webhook", "sms"],
"severity_threshold": "medium", # low, medium, high, critical
"webhook_url": "https://your-app.com/incidents",
"include_types": ["outage", "degraded_performance", "partial_outage"],
"regions": ["us-east-1", "eu-west-1"]
}
response = requests.post(
"https://api.parrotrouter.com/v1/incidents/subscribe",
headers={"Authorization": "Bearer your-api-key"},
json=notification_config
)
Best Practices
- 1.Configure Multiple Fallbacks
Always have at least 2-3 fallback providers configured
- 2.Test Failover Regularly
Use chaos engineering to verify failover works correctly
- 3.Monitor Circuit Breaker Metrics
Track circuit breaker trips to identify problematic providers
- 4.Plan for Degraded Mode
Design your application to handle reduced functionality gracefully