# Chat Completions

## Create Chat Completion

> Creates a chat completion for the provided messages. Supports streaming, function calling, vision, multimodal inputs, and reasoning tokens for supported models (OpenAI o-series, Grok, Gemini thinking, Anthropic). Compatible with OpenAI SDK.

```json
{"openapi":"3.1.0","info":{"title":"FastRouter API Reference","version":"1.0.0"},"tags":[{"name":"Chat Completions","description":"Create AI-powered chat responses with support for text, images, audio, video, streaming, and tool calling."}],"servers":[{"url":"https://api.fastrouter.ai","description":"Production API"}],"security":[{"bearerAuth":[]}],"components":{"securitySchemes":{"bearerAuth":{"type":"http","scheme":"bearer","bearerFormat":"API Key","description":"FastRouter API Key. Get yours at https://fastrouter.ai\n\nFormat: `Authorization: Bearer YOUR_API_KEY`"}},"responses":{"UnauthorizedError":{"description":"Invalid Credentials - Your API key is invalid, disabled, or your OAuth session has expired. Check your credentials.","content":{"application/json":{"schema":{"type":"object","properties":{"error":{"type":"object","properties":{"message":{"type":"string"},"type":{"type":"string"},"code":{"type":"string"},"status":{"type":"integer"}}}}}}}},"RateLimitError":{"description":"Rate Limited - You have exceeded your request limits (TPM/RPM). Slow down or increase your limits.","content":{"application/json":{"schema":{"type":"object","properties":{"error":{"type":"object","properties":{"message":{"type":"string"},"type":{"type":"string"},"code":{"type":"string"},"status":{"type":"integer"}}}}}}}}}},"paths":{"/api/v1/chat/completions":{"post":{"operationId":"createChatCompletion","tags":["Chat Completions"],"summary":"Create Chat Completion","description":"Creates a chat completion for the provided messages. Supports streaming, function calling, vision, multimodal inputs, and reasoning tokens for supported models (OpenAI o-series, Grok, Gemini thinking, Anthropic). Compatible with OpenAI SDK.","requestBody":{"required":true,"content":{"application/json":{"schema":{"type":"object","required":["model","messages"],"properties":{"model":{"type":"string","description":"Model ID in format 'provider/model'. Examples: openai/gpt-5.1, google/gemini-3-pro-preview, anthropic/claude-4.5-sonnet"},"messages":{"type":"array","description":"Array of message objects forming the conversation history. Each message has a role (system/user/assistant/tool) and content.","minItems":1,"items":{"type":"object","required":["role","content"],"properties":{"role":{"type":"string","enum":["system","user","assistant","tool"],"description":"Role of the message author:\n- system: Instructions for the AI\n- user: User messages\n- assistant: AI responses\n- tool: Tool/function outputs"},"content":{"oneOf":[{"type":"string","description":"Text content"},{"type":"array","description":"Multimodal content (text, images, etc.)"}],"description":"Message content - can be a string for text-only or an array for multimodal inputs (text, images, audio, video)"},"name":{"type":"string","description":"Optional name of the message author"}}}},"temperature":{"type":"number","minimum":0,"maximum":2,"default":1,"description":"Controls randomness in responses. Lower values (0-0.7) make output more focused and deterministic. Higher values (0.8-2) make output more creative and random."},"max_tokens":{"type":"integer","minimum":1,"description":"Maximum number of tokens to generate in the completion. Limits the length of the response."},"top_p":{"type":"number","minimum":0,"maximum":1,"default":1,"description":"Nucleus sampling parameter. Alternative to temperature. Lower values make output more focused."},"frequency_penalty":{"type":"number","minimum":-2,"maximum":2,"default":0,"description":"Penalizes repeated tokens based on frequency. Positive values reduce repetition."},"presence_penalty":{"type":"number","minimum":-2,"maximum":2,"default":0,"description":"Penalizes tokens that have appeared. Positive values encourage new topics."},"stream":{"type":"boolean","default":false,"description":"Enable streaming responses for real-time output. When true, responses are sent as Server-Sent Events (SSE)."},"stop":{"oneOf":[{"type":"string"},{"type":"array","items":{"type":"string"}}],"description":"Stop sequences where the model will stop generating. Can be a string or array of strings."},"provider":{"type":"object","description":"Optional: Control provider routing behavior. If not specified, FastRouter intelligently selects the best provider based on availability, performance, and cost. Use either 'only' OR 'order', not both.","properties":{"only":{"type":"array","description":"Force routing to specific providers only. Request will only use providers in this list. Use when you need guaranteed provider selection.","items":{"type":"string"},"minItems":1},"order":{"type":"array","description":"Ordered list of providers to try in sequence. FastRouter attempts each provider in order. Use with allow_fallbacks for high-availability routing.","items":{"type":"string"},"minItems":1},"allow_fallbacks":{"type":"boolean","description":"When used with 'order', enables automatic fallback to the next provider in the list if the current provider is unavailable. Set to true for high-availability routing.","default":true}}},"tools":{"type":"array","description":"Array of tool/function definitions"},"aspectRatio":{"type":"string","description":"Image aspect ratio for Nano Banana (google/gemini-2.5-flash-image). Supported ratios: 1:1, 2:3, 3:2, 3:4, 4:3, 4:5, 5:4, 9:16, 16:9, 21:9","enum":["1:1","2:3","3:2","3:4","4:3","4:5","5:4","9:16","16:9","21:9"]},"prompt":{"type":"string","description":"Natural-language description for audio generation (ace-step/prompt-to-audio). Describe the audio, music, or ambient sound to generate."},"duration":{"type":"integer","minimum":1,"description":"Duration of the audio clip in seconds for text-to-audio generation (ace-step/prompt-to-audio). Optional parameter."},"reasoning":{"type":"object","description":"Control reasoning token behavior for supported models (OpenAI o-series, Grok, Gemini thinking, Anthropic). Reasoning tokens represent the model's internal reasoning process and improve output quality for complex tasks. Enabled by default. Use either 'effort' OR 'max_tokens', not both.","properties":{"effort":{"type":"string","enum":["low","medium","high"],"description":"Reasoning effort level (OpenAI o-series, Grok). Controls token allocation: low (~20% of max_tokens), medium (~50%), high (~80%). Do not use with max_tokens."},"max_tokens":{"type":"integer","minimum":1024,"maximum":32000,"description":"Maximum reasoning tokens (Gemini thinking, Anthropic). For Anthropic: minimum 1024, maximum 32000. max_tokens must be strictly greater than this value. Do not use with effort."},"exclude":{"type":"boolean","default":false,"description":"If true, model reasons internally but reasoning tokens are not returned in the response. Works across all models. Reduces costs while maintaining reasoning benefits."},"enabled":{"type":"boolean","default":true,"description":"Enable or disable reasoning tokens. Default is true for supported models."}}}}}}}},"responses":{"200":{"description":"Successful chat completion response","content":{"application/json":{"schema":{"type":"object","properties":{"id":{"type":"string","description":"Unique identifier for the completion"},"object":{"type":"string","description":"Object type, always 'chat.completion' or 'chat.completion.chunk' for streaming"},"created":{"type":"integer","description":"Unix timestamp of when the completion was created"},"model":{"type":"string","description":"Model used for completion"},"choices":{"type":"array","description":"Array of completion choices","items":{"type":"object","properties":{"index":{"type":"integer","description":"Choice index"},"message":{"type":"object","description":"Generated message","properties":{"role":{"type":"string"},"content":{"type":"string","description":"Generated text content"},"reasoning":{"type":"object","description":"Reasoning tokens (if enabled and model supports it). Contains the model's internal reasoning process.","properties":{"text":{"type":"string","description":"The reasoning text showing the model's thought process"}}}}},"finish_reason":{"type":"string","enum":["stop","length","tool_calls","content_filter"],"description":"Reason why the model stopped generating"}}}},"usage":{"type":"object","description":"Token usage statistics","properties":{"prompt_tokens":{"type":"integer","description":"Number of tokens in the prompt"},"completion_tokens":{"type":"integer","description":"Number of tokens in the completion (includes reasoning tokens if present)"},"total_tokens":{"type":"integer","description":"Total tokens used"},"cost":{"type":"number","description":"Cost in USD for this request"}}}}}}}},"400":{"description":"Bad Request - Invalid parameters"},"401":{"$ref":"#/components/responses/UnauthorizedError"},"429":{"$ref":"#/components/responses/RateLimitError"},"500":{"description":"Internal Server Error"}}}}}}
```


---

# Agent Instructions: Querying This Documentation

If you need additional information that is not directly available in this page, you can query the documentation dynamically by asking a question.

Perform an HTTP GET request on the current page URL with the `ask` query parameter:

```
GET https://docs.fastrouter.ai/api-reference/chat-completions.md?ask=<question>
```

The question should be specific, self-contained, and written in natural language.
The response will contain a direct answer to the question and relevant excerpts and sources from the documentation.

Use this mechanism when the answer is not explicitly present in the current page, you need clarification or additional context, or you want to retrieve related documentation sections.
