Provider Routing
Seamlessly route requests across OpenAI, Anthropic, Google, and more
Multi-Provider Architecture
Provider routing distributes your AI requests across multiple providers to maximize uptime, reduce costs, and access the best models for each task. One API, multiple providers.
Load Balancing
Distribute load across providers to avoid rate limits
Global Coverage
Route to the nearest provider for lower latency
Automatic Failover
Switch providers instantly if one goes down
Supported Providers
OpenAI
GPT-4, GPT-3.5, DALL-E
Anthropic
Claude 3 Opus, Sonnet, Haiku
Gemini Pro, Gemini Ultra
Mistral
Mistral Large, Medium, Small
Cohere
Command R+, Command R
Meta
Llama 3 70B, 8B
Basic Provider Routing
Specify preferred providers or let ParrotRouter choose the best one:
from openai import OpenAI
client = OpenAI(
base_url="https://api.parrotrouter.com/v1",
api_key="your-api-key"
)
# Let ParrotRouter choose the best provider
response = client.chat.completions.create(
model="gpt-4-turbo-preview",
messages=[{"role": "user", "content": "Hello!"}]
)
print(f"Provider used: {response.provider}") # e.g., "openai"
# Prefer specific provider
response = client.chat.completions.create(
model="gpt-4-turbo-preview",
messages=[{"role": "user", "content": "Hello!"}],
extra_headers={
"X-Provider-Preference": "anthropic,openai,google"
}
)
# Force specific provider
response = client.chat.completions.create(
model="gpt-4-turbo-preview",
messages=[{"role": "user", "content": "Hello!"}],
extra_headers={
"X-Provider": "openai" # Only use OpenAI
}
)
Advanced Routing Strategies
Geographic Routing
Route to providers based on geographic location for optimal latency.
# Route to nearest provider
response = client.chat.completions.create(
model="gpt-4-turbo-preview",
messages=[{"role": "user", "content": "Process this quickly"}],
extra_headers={
"X-Routing-Strategy": "geographic",
"X-User-Region": "us-west-2", # Optional: specify region
"X-Max-Latency-Ms": "500"
}
)
# Regional provider preferences
response = client.chat.completions.create(
model="claude-3-opus",
messages=[{"role": "user", "content": "GDPR compliant request"}],
extra_headers={
"X-Provider-Regions": "eu-west-1,eu-central-1",
"X-Data-Residency": "EU" # Keep data in EU
}
)
Cost-Based Routing
Route to the most cost-effective provider for your needs.
# Use cheapest available provider
response = client.chat.completions.create(
model="gpt-4-turbo-preview",
messages=[{"role": "user", "content": "Analyze this data"}],
extra_headers={
"X-Routing-Strategy": "cost-optimized",
"X-Max-Cost-Per-Request": "0.10",
"X-Include-Providers": "openai,anthropic,google,mistral"
}
)
# Provider cost comparison
cost_analysis = client.analyze_costs(
model="gpt-4-turbo-preview",
prompt_tokens=1000,
completion_tokens=500
)
# Returns:
# {
# "openai": {"cost": 0.045, "available": true},
# "anthropic": {"cost": 0.042, "available": true},
# "google": {"cost": 0.040, "available": false}
# }
Performance-Based Routing
Route based on real-time performance metrics.
# Route to fastest provider
response = client.chat.completions.create(
model="gpt-4-turbo-preview",
messages=[{"role": "user", "content": "Time-sensitive request"}],
extra_headers={
"X-Routing-Strategy": "performance",
"X-Performance-Metric": "ttft", # time-to-first-token
"X-Performance-Window": "5m" # last 5 minutes
}
)
# Get provider performance stats
import requests
stats = requests.get(
"https://api.parrotrouter.com/v1/providers/performance",
headers={"Authorization": "Bearer your-api-key"}
).json()
for provider in stats['providers']:
print(f"{provider['name']}:")
print(f" Avg latency: {provider['avg_latency_ms']}ms")
print(f" Success rate: {provider['success_rate']}%")
print(f" Current load: {provider['load_percentage']}%")
Provider-Specific Features
Access provider-specific capabilities while maintaining a unified API:
# Use OpenAI's function calling
response = client.chat.completions.create(
model="gpt-4-turbo-preview",
messages=[{"role": "user", "content": "What's the weather?"}],
tools=[{
"type": "function",
"function": {
"name": "get_weather",
"description": "Get current weather",
"parameters": {
"type": "object",
"properties": {
"location": {"type": "string"}
}
}
}
}],
extra_headers={
"X-Provider": "openai" # OpenAI has native function calling
}
)
# Use Anthropic's system prompts
response = client.chat.completions.create(
model="claude-3-opus",
messages=[
{"role": "system", "content": "You are a helpful assistant"},
{"role": "user", "content": "Help me code"}
],
extra_headers={
"X-Provider": "anthropic" # Anthropic handles system prompts differently
}
)
# Use Google's safety settings
response = client.chat.completions.create(
model="gemini-pro",
messages=[{"role": "user", "content": "Generate content"}],
extra_headers={
"X-Provider": "google",
"X-Google-Safety-Settings": json.dumps({
"harm_block_threshold": "BLOCK_MEDIUM_AND_ABOVE",
"categories": ["HARM_CATEGORY_HATE_SPEECH", "HARM_CATEGORY_VIOLENCE"]
})
}
)
Failover Configuration
Configure automatic failover between providers for high availability:
# Simple failover chain
response = client.chat.completions.create(
model="gpt-4-turbo-preview",
messages=[{"role": "user", "content": "Important request"}],
extra_headers={
"X-Provider-Fallbacks": "openai,anthropic,google",
"X-Retry-On-Provider-Error": "true",
"X-Max-Provider-Retries": "3"
}
)
# Advanced failover configuration
failover_config = {
"primary_provider": "openai",
"fallback_rules": [
{
"provider": "anthropic",
"trigger": ["rate_limit", "timeout"],
"model_mapping": {
"gpt-4-turbo-preview": "claude-3-opus",
"gpt-3.5-turbo": "claude-3-haiku"
}
},
{
"provider": "google",
"trigger": ["server_error", "model_overloaded"],
"model_mapping": {
"gpt-4-turbo-preview": "gemini-pro",
"gpt-3.5-turbo": "gemini-pro"
},
"max_additional_latency_ms": 1000
}
],
"circuit_breaker": {
"enabled": true,
"failure_threshold": 5,
"recovery_timeout": 300 # seconds
}
}
response = client.chat.completions.create(
model="gpt-4-turbo-preview",
messages=[{"role": "user", "content": "Process with failover"}],
extra_headers={
"X-Failover-Config": json.dumps(failover_config)
}
)
Load Balancing
Round-Robin
# Distribute evenly across providers
response = client.chat.completions.create(
model="gpt-4-turbo-preview",
messages=[{"role": "user", "content": "Request"}],
extra_headers={
"X-Load-Balance-Strategy": "round-robin",
"X-Load-Balance-Providers": "openai,anthropic,google"
}
)
Weighted Distribution
# Distribute based on weights
response = client.chat.completions.create(
model="gpt-4-turbo-preview",
messages=[{"role": "user", "content": "Request"}],
extra_headers={
"X-Load-Balance-Strategy": "weighted",
"X-Provider-Weights": json.dumps({
"openai": 50, # 50% of traffic
"anthropic": 30, # 30% of traffic
"google": 20 # 20% of traffic
})
}
)
Provider Health Monitoring
Real-time monitoring of provider health and status:
import requests
# Get provider health status
response = requests.get(
"https://api.parrotrouter.com/v1/providers/health",
headers={"Authorization": "Bearer your-api-key"}
)
health_data = response.json()
for provider in health_data['providers']:
print(f"
{provider['name']}:")
print(f" Status: {provider['status']}") # healthy, degraded, down
print(f" Uptime: {provider['uptime_percentage']}%")
print(f" Response time: {provider['avg_response_time_ms']}ms")
print(f" Error rate: {provider['error_rate']}%")
print(f" Rate limit usage: {provider['rate_limit_usage']}%")
for model in provider['models']:
print(f" {model['name']}: {model['status']}")
# Subscribe to provider status updates
import websocket
ws = websocket.WebSocket()
ws.connect("wss://api.parrotrouter.com/v1/providers/health/stream",
header=["Authorization: Bearer your-api-key"])
while True:
update = json.loads(ws.recv())
if update['type'] == 'provider_status_change':
print(f"Provider {update['provider']} is now {update['status']}")
Best Practices
- 1.Use Automatic Routing
Let ParrotRouter handle provider selection unless you have specific needs
- 2.Configure Fallbacks
Always have at least one fallback provider for critical applications
- 3.Monitor Provider Health
Subscribe to health updates to stay informed about outages
- 4.Test Model Compatibility
Ensure your prompts work across different providers