Streaming Responses
Get AI responses in real-time using Server-Sent Events (SSE)
Overview
Streaming allows you to receive AI responses token by token as they're generated, rather than waiting for the complete response. This provides a better user experience, especially for longer outputs.
Benefits
- • Instant feedback - see tokens as they arrive
- • Better perceived performance
- • Early cancellation of unwanted responses
- • Lower time to first token (TTFT)
How It Works
- • Uses Server-Sent Events (SSE)
- • Each chunk sent as
data:
line - • Stream ends with
data: [DONE]
- • Maintains persistent connection
Basic Usage
from openai import OpenAI
client = OpenAI(
base_url="https://api.parrotrouter.com/v1",
api_key="your-api-key"
)
# Create a streaming request
stream = client.chat.completions.create(
model="gpt-4",
messages=[
{"role": "user", "content": "Write a story about a robot"}
],
stream=True # Enable streaming
)
# Process the stream
for chunk in stream:
if chunk.choices[0].delta.content is not None:
print(chunk.choices[0].delta.content, end="")
import OpenAI from 'openai';
const client = new OpenAI({
baseURL: 'https://api.parrotrouter.com/v1',
apiKey: 'your-api-key',
});
// Create a streaming request
const stream = await client.chat.completions.create({
model: 'gpt-4',
messages: [
{ role: 'user', content: 'Write a story about a robot' }
],
stream: true,
});
// Process the stream
for await (const chunk of stream) {
const content = chunk.choices[0]?.delta?.content || '';
process.stdout.write(content);
}
Raw SSE Implementation
If you're not using an SDK, here's how to handle the raw SSE stream:
const response = await fetch('https://api.parrotrouter.com/v1/chat/completions', {
method: 'POST',
headers: {
'Authorization': 'Bearer your-api-key',
'Content-Type': 'application/json',
},
body: JSON.stringify({
model: 'gpt-4',
messages: [{ role: 'user', content: 'Hello!' }],
stream: true,
}),
});
const reader = response.body.getReader();
const decoder = new TextDecoder();
while (true) {
const { done, value } = await reader.read();
if (done) break;
const chunk = decoder.decode(value);
const lines = chunk.split('\n');
for (const line of lines) {
if (line.startsWith('data: ')) {
const data = line.slice(6);
if (data === '[DONE]') {
console.log('Stream finished');
break;
}
try {
const json = JSON.parse(data);
const content = json.choices[0]?.delta?.content || '';
process.stdout.write(content);
} catch (e) {
// Handle parsing errors
}
}
}
}
import requests
import json
response = requests.post(
'https://api.parrotrouter.com/v1/chat/completions',
headers={
'Authorization': 'Bearer your-api-key',
'Content-Type': 'application/json',
},
json={
'model': 'gpt-4',
'messages': [{'role': 'user', 'content': 'Hello!'}],
'stream': True,
},
stream=True
)
for line in response.iter_lines():
if line:
line = line.decode('utf-8')
if line.startswith('data: '):
data = line[6:]
if data == '[DONE]':
break
try:
json_data = json.loads(data)
content = json_data['choices'][0]['delta'].get('content', '')
print(content, end='')
except json.JSONDecodeError:
pass
Stream Event Format
Each chunk in the stream follows this format:
data: {"id":"chatcmpl-123","object":"chat.completion.chunk","created":1699000000,"model":"gpt-4","choices":[{"index":0,"delta":{"role":"assistant","content":""},"finish_reason":null}]}
data: {"id":"chatcmpl-123","object":"chat.completion.chunk","created":1699000000,"model":"gpt-4","choices":[{"index":0,"delta":{"content":"Hello"},"finish_reason":null}]}
data: {"id":"chatcmpl-123","object":"chat.completion.chunk","created":1699000000,"model":"gpt-4","choices":[{"index":0,"delta":{"content":" there"},"finish_reason":null}]}
data: {"id":"chatcmpl-123","object":"chat.completion.chunk","created":1699000000,"model":"gpt-4","choices":[{"index":0,"delta":{"content":"!"},"finish_reason":null}]}
data: {"id":"chatcmpl-123","object":"chat.completion.chunk","created":1699000000,"model":"gpt-4","choices":[{"index":0,"delta":{},"finish_reason":"stop"}]}
data: [DONE]
Handling Function Calls
When streaming with function calling enabled, function information comes in chunks:
function_name = ""
function_args = ""
for chunk in stream:
delta = chunk.choices[0].delta
# Check for function call
if hasattr(delta, 'tool_calls') and delta.tool_calls:
tool_call = delta.tool_calls[0]
if tool_call.function.name:
function_name = tool_call.function.name
if tool_call.function.arguments:
function_args += tool_call.function.arguments
# Regular content
elif delta.content:
print(delta.content, end="")
# Parse complete function arguments
if function_name:
import json
args = json.loads(function_args)
print(f"\nFunction: {function_name}, Args: {args}")
Error Handling
Errors can occur during streaming. Here's how to handle them:
try:
stream = client.chat.completions.create(
model="gpt-4",
messages=messages,
stream=True
)
for chunk in stream:
# Check for error in chunk
if hasattr(chunk, 'error'):
print(f"Stream error: {chunk.error}")
break
# Process normal chunk
if chunk.choices[0].delta.content:
print(chunk.choices[0].delta.content, end="")
except Exception as e:
print(f"Connection error: {e}")
# Implement retry logic here
Performance Tips
Best Practices
- 1.Buffer Management
Process chunks immediately to avoid memory buildup
- 2.Connection Timeout
Set appropriate timeouts for long-running streams
- 3.Early Termination
Allow users to cancel streams to save costs
- 4.Chunk Aggregation
Batch UI updates for better performance
UI Integration Example
Here's how to integrate streaming into a React component:
import { useState } from 'react';
import OpenAI from 'openai';
function ChatComponent() {
const [response, setResponse] = useState('');
const [isStreaming, setIsStreaming] = useState(false);
const handleStream = async (message: string) => {
setIsStreaming(true);
setResponse('');
const client = new OpenAI({
baseURL: 'https://api.parrotrouter.com/v1',
apiKey: 'your-api-key',
dangerouslyAllowBrowser: true, // Only for demos
});
try {
const stream = await client.chat.completions.create({
model: 'gpt-4',
messages: [{ role: 'user', content: message }],
stream: true,
});
for await (const chunk of stream) {
const content = chunk.choices[0]?.delta?.content || '';
setResponse(prev => prev + content);
}
} catch (error) {
console.error('Streaming error:', error);
} finally {
setIsStreaming(false);
}
};
return (
<div>
<button onClick={() => handleStream('Hello!')} disabled={isStreaming}>
{isStreaming ? 'Streaming...' : 'Start Chat'}
</button>
<div>{response}</div>
</div>
);
}