API Reference

Streaming Responses

Get AI responses in real-time using Server-Sent Events (SSE)

Overview

Streaming allows you to receive AI responses token by token as they're generated, rather than waiting for the complete response. This provides a better user experience, especially for longer outputs.

Benefits

  • • Instant feedback - see tokens as they arrive
  • • Better perceived performance
  • • Early cancellation of unwanted responses
  • • Lower time to first token (TTFT)

How It Works

  • • Uses Server-Sent Events (SSE)
  • • Each chunk sent as data: line
  • • Stream ends with data: [DONE]
  • • Maintains persistent connection

Basic Usage

Python - Using OpenAI SDKpython
from openai import OpenAI

client = OpenAI(
    base_url="https://api.parrotrouter.com/v1",
    api_key="your-api-key"
)

# Create a streaming request
stream = client.chat.completions.create(
    model="gpt-4",
    messages=[
        {"role": "user", "content": "Write a story about a robot"}
    ],
    stream=True  # Enable streaming
)

# Process the stream
for chunk in stream:
    if chunk.choices[0].delta.content is not None:
        print(chunk.choices[0].delta.content, end="")
TypeScript - Using OpenAI SDKtypescript
import OpenAI from 'openai';

const client = new OpenAI({
  baseURL: 'https://api.parrotrouter.com/v1',
  apiKey: 'your-api-key',
});

// Create a streaming request
const stream = await client.chat.completions.create({
  model: 'gpt-4',
  messages: [
    { role: 'user', content: 'Write a story about a robot' }
  ],
  stream: true,
});

// Process the stream
for await (const chunk of stream) {
  const content = chunk.choices[0]?.delta?.content || '';
  process.stdout.write(content);
}

Raw SSE Implementation

If you're not using an SDK, here's how to handle the raw SSE stream:

JavaScript - Fetch APIjavascript
const response = await fetch('https://api.parrotrouter.com/v1/chat/completions', {
  method: 'POST',
  headers: {
    'Authorization': 'Bearer your-api-key',
    'Content-Type': 'application/json',
  },
  body: JSON.stringify({
    model: 'gpt-4',
    messages: [{ role: 'user', content: 'Hello!' }],
    stream: true,
  }),
});

const reader = response.body.getReader();
const decoder = new TextDecoder();

while (true) {
  const { done, value } = await reader.read();
  if (done) break;
  
  const chunk = decoder.decode(value);
  const lines = chunk.split('\n');
  
  for (const line of lines) {
    if (line.startsWith('data: ')) {
      const data = line.slice(6);
      
      if (data === '[DONE]') {
        console.log('Stream finished');
        break;
      }
      
      try {
        const json = JSON.parse(data);
        const content = json.choices[0]?.delta?.content || '';
        process.stdout.write(content);
      } catch (e) {
        // Handle parsing errors
      }
    }
  }
}
Python - Requests Librarypython
import requests
import json

response = requests.post(
    'https://api.parrotrouter.com/v1/chat/completions',
    headers={
        'Authorization': 'Bearer your-api-key',
        'Content-Type': 'application/json',
    },
    json={
        'model': 'gpt-4',
        'messages': [{'role': 'user', 'content': 'Hello!'}],
        'stream': True,
    },
    stream=True
)

for line in response.iter_lines():
    if line:
        line = line.decode('utf-8')
        if line.startswith('data: '):
            data = line[6:]
            
            if data == '[DONE]':
                break
            
            try:
                json_data = json.loads(data)
                content = json_data['choices'][0]['delta'].get('content', '')
                print(content, end='')
            except json.JSONDecodeError:
                pass

Stream Event Format

Each chunk in the stream follows this format:

First Chunktext
data: {"id":"chatcmpl-123","object":"chat.completion.chunk","created":1699000000,"model":"gpt-4","choices":[{"index":0,"delta":{"role":"assistant","content":""},"finish_reason":null}]}
Content Chunkstext
data: {"id":"chatcmpl-123","object":"chat.completion.chunk","created":1699000000,"model":"gpt-4","choices":[{"index":0,"delta":{"content":"Hello"},"finish_reason":null}]}

data: {"id":"chatcmpl-123","object":"chat.completion.chunk","created":1699000000,"model":"gpt-4","choices":[{"index":0,"delta":{"content":" there"},"finish_reason":null}]}

data: {"id":"chatcmpl-123","object":"chat.completion.chunk","created":1699000000,"model":"gpt-4","choices":[{"index":0,"delta":{"content":"!"},"finish_reason":null}]}
Final Chunktext
data: {"id":"chatcmpl-123","object":"chat.completion.chunk","created":1699000000,"model":"gpt-4","choices":[{"index":0,"delta":{},"finish_reason":"stop"}]}

data: [DONE]

Handling Function Calls

When streaming with function calling enabled, function information comes in chunks:

Streaming with Functionspython
function_name = ""
function_args = ""

for chunk in stream:
    delta = chunk.choices[0].delta
    
    # Check for function call
    if hasattr(delta, 'tool_calls') and delta.tool_calls:
        tool_call = delta.tool_calls[0]
        
        if tool_call.function.name:
            function_name = tool_call.function.name
            
        if tool_call.function.arguments:
            function_args += tool_call.function.arguments
    
    # Regular content
    elif delta.content:
        print(delta.content, end="")

# Parse complete function arguments
if function_name:
    import json
    args = json.loads(function_args)
    print(f"\nFunction: {function_name}, Args: {args}")

Error Handling

Errors can occur during streaming. Here's how to handle them:

Robust Error Handlingpython
try:
    stream = client.chat.completions.create(
        model="gpt-4",
        messages=messages,
        stream=True
    )
    
    for chunk in stream:
        # Check for error in chunk
        if hasattr(chunk, 'error'):
            print(f"Stream error: {chunk.error}")
            break
            
        # Process normal chunk
        if chunk.choices[0].delta.content:
            print(chunk.choices[0].delta.content, end="")
            
except Exception as e:
    print(f"Connection error: {e}")
    # Implement retry logic here

Performance Tips

Best Practices

  • 1.
    Buffer Management

    Process chunks immediately to avoid memory buildup

  • 2.
    Connection Timeout

    Set appropriate timeouts for long-running streams

  • 3.
    Early Termination

    Allow users to cancel streams to save costs

  • 4.
    Chunk Aggregation

    Batch UI updates for better performance

UI Integration Example

Here's how to integrate streaming into a React component:

React Componenttypescript
import { useState } from 'react';
import OpenAI from 'openai';

function ChatComponent() {
  const [response, setResponse] = useState('');
  const [isStreaming, setIsStreaming] = useState(false);
  
  const handleStream = async (message: string) => {
    setIsStreaming(true);
    setResponse('');
    
    const client = new OpenAI({
      baseURL: 'https://api.parrotrouter.com/v1',
      apiKey: 'your-api-key',
      dangerouslyAllowBrowser: true, // Only for demos
    });
    
    try {
      const stream = await client.chat.completions.create({
        model: 'gpt-4',
        messages: [{ role: 'user', content: message }],
        stream: true,
      });
      
      for await (const chunk of stream) {
        const content = chunk.choices[0]?.delta?.content || '';
        setResponse(prev => prev + content);
      }
    } catch (error) {
      console.error('Streaming error:', error);
    } finally {
      setIsStreaming(false);
    }
  };
  
  return (
    <div>
      <button onClick={() => handleStream('Hello!')} disabled={isStreaming}>
        {isStreaming ? 'Streaming...' : 'Start Chat'}
      </button>
      <div>{response}</div>
    </div>
  );
}

Related Topics