|
|
@@ -1297,6 +1297,132 @@
|
|
|
"supports_tool_choice": true,
|
|
|
"supports_vision": true
|
|
|
},
|
|
|
+ "azure/eu/gpt-5.1": {
|
|
|
+ "cache_read_input_token_cost": 1.4e-07,
|
|
|
+ "input_cost_per_token": 1.38e-06,
|
|
|
+ "litellm_provider": "azure",
|
|
|
+ "max_input_tokens": 272000,
|
|
|
+ "max_output_tokens": 128000,
|
|
|
+ "max_tokens": 128000,
|
|
|
+ "mode": "chat",
|
|
|
+ "output_cost_per_token": 1.1e-05,
|
|
|
+ "supported_endpoints": [
|
|
|
+ "/v1/chat/completions",
|
|
|
+ "/v1/batch",
|
|
|
+ "/v1/responses"
|
|
|
+ ],
|
|
|
+ "supported_modalities": [
|
|
|
+ "text",
|
|
|
+ "image"
|
|
|
+ ],
|
|
|
+ "supported_output_modalities": [
|
|
|
+ "text",
|
|
|
+ "image"
|
|
|
+ ],
|
|
|
+ "supports_function_calling": true,
|
|
|
+ "supports_native_streaming": true,
|
|
|
+ "supports_parallel_function_calling": true,
|
|
|
+ "supports_pdf_input": true,
|
|
|
+ "supports_prompt_caching": true,
|
|
|
+ "supports_reasoning": true,
|
|
|
+ "supports_response_schema": true,
|
|
|
+ "supports_system_messages": true,
|
|
|
+ "supports_tool_choice": true,
|
|
|
+ "supports_vision": true
|
|
|
+ },
|
|
|
+ "azure/eu/gpt-5.1-chat": {
|
|
|
+ "cache_read_input_token_cost": 1.4e-07,
|
|
|
+ "input_cost_per_token": 1.38e-06,
|
|
|
+ "litellm_provider": "azure",
|
|
|
+ "max_input_tokens": 272000,
|
|
|
+ "max_output_tokens": 128000,
|
|
|
+ "max_tokens": 128000,
|
|
|
+ "mode": "chat",
|
|
|
+ "output_cost_per_token": 1.1e-05,
|
|
|
+ "supported_endpoints": [
|
|
|
+ "/v1/chat/completions",
|
|
|
+ "/v1/batch",
|
|
|
+ "/v1/responses"
|
|
|
+ ],
|
|
|
+ "supported_modalities": [
|
|
|
+ "text",
|
|
|
+ "image"
|
|
|
+ ],
|
|
|
+ "supported_output_modalities": [
|
|
|
+ "text",
|
|
|
+ "image"
|
|
|
+ ],
|
|
|
+ "supports_function_calling": true,
|
|
|
+ "supports_native_streaming": true,
|
|
|
+ "supports_parallel_function_calling": true,
|
|
|
+ "supports_pdf_input": true,
|
|
|
+ "supports_prompt_caching": true,
|
|
|
+ "supports_reasoning": true,
|
|
|
+ "supports_response_schema": true,
|
|
|
+ "supports_system_messages": true,
|
|
|
+ "supports_tool_choice": true,
|
|
|
+ "supports_vision": true
|
|
|
+ },
|
|
|
+ "azure/eu/gpt-5.1-codex": {
|
|
|
+ "cache_read_input_token_cost": 1.4e-07,
|
|
|
+ "input_cost_per_token": 1.38e-06,
|
|
|
+ "litellm_provider": "azure",
|
|
|
+ "max_input_tokens": 272000,
|
|
|
+ "max_output_tokens": 128000,
|
|
|
+ "max_tokens": 128000,
|
|
|
+ "mode": "responses",
|
|
|
+ "output_cost_per_token": 1.1e-05,
|
|
|
+ "supported_endpoints": [
|
|
|
+ "/v1/responses"
|
|
|
+ ],
|
|
|
+ "supported_modalities": [
|
|
|
+ "text",
|
|
|
+ "image"
|
|
|
+ ],
|
|
|
+ "supported_output_modalities": [
|
|
|
+ "text"
|
|
|
+ ],
|
|
|
+ "supports_function_calling": true,
|
|
|
+ "supports_native_streaming": true,
|
|
|
+ "supports_parallel_function_calling": true,
|
|
|
+ "supports_pdf_input": true,
|
|
|
+ "supports_prompt_caching": true,
|
|
|
+ "supports_reasoning": true,
|
|
|
+ "supports_response_schema": true,
|
|
|
+ "supports_system_messages": false,
|
|
|
+ "supports_tool_choice": true,
|
|
|
+ "supports_vision": true
|
|
|
+ },
|
|
|
+ "azure/eu/gpt-5.1-codex-mini": {
|
|
|
+ "cache_read_input_token_cost": 2.8e-08,
|
|
|
+ "input_cost_per_token": 2.75e-07,
|
|
|
+ "litellm_provider": "azure",
|
|
|
+ "max_input_tokens": 272000,
|
|
|
+ "max_output_tokens": 128000,
|
|
|
+ "max_tokens": 128000,
|
|
|
+ "mode": "responses",
|
|
|
+ "output_cost_per_token": 2.2e-06,
|
|
|
+ "supported_endpoints": [
|
|
|
+ "/v1/responses"
|
|
|
+ ],
|
|
|
+ "supported_modalities": [
|
|
|
+ "text",
|
|
|
+ "image"
|
|
|
+ ],
|
|
|
+ "supported_output_modalities": [
|
|
|
+ "text"
|
|
|
+ ],
|
|
|
+ "supports_function_calling": true,
|
|
|
+ "supports_native_streaming": true,
|
|
|
+ "supports_parallel_function_calling": true,
|
|
|
+ "supports_pdf_input": true,
|
|
|
+ "supports_prompt_caching": true,
|
|
|
+ "supports_reasoning": true,
|
|
|
+ "supports_response_schema": true,
|
|
|
+ "supports_system_messages": false,
|
|
|
+ "supports_tool_choice": true,
|
|
|
+ "supports_vision": true
|
|
|
+ },
|
|
|
"azure/eu/gpt-5-nano-2025-08-07": {
|
|
|
"cache_read_input_token_cost": 5.5e-09,
|
|
|
"input_cost_per_token": 5.5e-08,
|
|
|
@@ -1471,6 +1597,132 @@
|
|
|
"supports_tool_choice": true,
|
|
|
"supports_vision": true
|
|
|
},
|
|
|
+ "azure/global/gpt-5.1": {
|
|
|
+ "cache_read_input_token_cost": 1.25e-07,
|
|
|
+ "input_cost_per_token": 1.25e-06,
|
|
|
+ "litellm_provider": "azure",
|
|
|
+ "max_input_tokens": 272000,
|
|
|
+ "max_output_tokens": 128000,
|
|
|
+ "max_tokens": 128000,
|
|
|
+ "mode": "chat",
|
|
|
+ "output_cost_per_token": 1e-05,
|
|
|
+ "supported_endpoints": [
|
|
|
+ "/v1/chat/completions",
|
|
|
+ "/v1/batch",
|
|
|
+ "/v1/responses"
|
|
|
+ ],
|
|
|
+ "supported_modalities": [
|
|
|
+ "text",
|
|
|
+ "image"
|
|
|
+ ],
|
|
|
+ "supported_output_modalities": [
|
|
|
+ "text",
|
|
|
+ "image"
|
|
|
+ ],
|
|
|
+ "supports_function_calling": true,
|
|
|
+ "supports_native_streaming": true,
|
|
|
+ "supports_parallel_function_calling": true,
|
|
|
+ "supports_pdf_input": true,
|
|
|
+ "supports_prompt_caching": true,
|
|
|
+ "supports_reasoning": true,
|
|
|
+ "supports_response_schema": true,
|
|
|
+ "supports_system_messages": true,
|
|
|
+ "supports_tool_choice": true,
|
|
|
+ "supports_vision": true
|
|
|
+ },
|
|
|
+ "azure/global/gpt-5.1-chat": {
|
|
|
+ "cache_read_input_token_cost": 1.25e-07,
|
|
|
+ "input_cost_per_token": 1.25e-06,
|
|
|
+ "litellm_provider": "azure",
|
|
|
+ "max_input_tokens": 272000,
|
|
|
+ "max_output_tokens": 128000,
|
|
|
+ "max_tokens": 128000,
|
|
|
+ "mode": "chat",
|
|
|
+ "output_cost_per_token": 1e-05,
|
|
|
+ "supported_endpoints": [
|
|
|
+ "/v1/chat/completions",
|
|
|
+ "/v1/batch",
|
|
|
+ "/v1/responses"
|
|
|
+ ],
|
|
|
+ "supported_modalities": [
|
|
|
+ "text",
|
|
|
+ "image"
|
|
|
+ ],
|
|
|
+ "supported_output_modalities": [
|
|
|
+ "text",
|
|
|
+ "image"
|
|
|
+ ],
|
|
|
+ "supports_function_calling": true,
|
|
|
+ "supports_native_streaming": true,
|
|
|
+ "supports_parallel_function_calling": true,
|
|
|
+ "supports_pdf_input": true,
|
|
|
+ "supports_prompt_caching": true,
|
|
|
+ "supports_reasoning": true,
|
|
|
+ "supports_response_schema": true,
|
|
|
+ "supports_system_messages": true,
|
|
|
+ "supports_tool_choice": true,
|
|
|
+ "supports_vision": true
|
|
|
+ },
|
|
|
+ "azure/global/gpt-5.1-codex": {
|
|
|
+ "cache_read_input_token_cost": 1.25e-07,
|
|
|
+ "input_cost_per_token": 1.25e-06,
|
|
|
+ "litellm_provider": "azure",
|
|
|
+ "max_input_tokens": 272000,
|
|
|
+ "max_output_tokens": 128000,
|
|
|
+ "max_tokens": 128000,
|
|
|
+ "mode": "responses",
|
|
|
+ "output_cost_per_token": 1e-05,
|
|
|
+ "supported_endpoints": [
|
|
|
+ "/v1/responses"
|
|
|
+ ],
|
|
|
+ "supported_modalities": [
|
|
|
+ "text",
|
|
|
+ "image"
|
|
|
+ ],
|
|
|
+ "supported_output_modalities": [
|
|
|
+ "text"
|
|
|
+ ],
|
|
|
+ "supports_function_calling": true,
|
|
|
+ "supports_native_streaming": true,
|
|
|
+ "supports_parallel_function_calling": true,
|
|
|
+ "supports_pdf_input": true,
|
|
|
+ "supports_prompt_caching": true,
|
|
|
+ "supports_reasoning": true,
|
|
|
+ "supports_response_schema": true,
|
|
|
+ "supports_system_messages": false,
|
|
|
+ "supports_tool_choice": true,
|
|
|
+ "supports_vision": true
|
|
|
+ },
|
|
|
+ "azure/global/gpt-5.1-codex-mini": {
|
|
|
+ "cache_read_input_token_cost": 2.5e-08,
|
|
|
+ "input_cost_per_token": 2.5e-07,
|
|
|
+ "litellm_provider": "azure",
|
|
|
+ "max_input_tokens": 272000,
|
|
|
+ "max_output_tokens": 128000,
|
|
|
+ "max_tokens": 128000,
|
|
|
+ "mode": "responses",
|
|
|
+ "output_cost_per_token": 2e-06,
|
|
|
+ "supported_endpoints": [
|
|
|
+ "/v1/responses"
|
|
|
+ ],
|
|
|
+ "supported_modalities": [
|
|
|
+ "text",
|
|
|
+ "image"
|
|
|
+ ],
|
|
|
+ "supported_output_modalities": [
|
|
|
+ "text"
|
|
|
+ ],
|
|
|
+ "supports_function_calling": true,
|
|
|
+ "supports_native_streaming": true,
|
|
|
+ "supports_parallel_function_calling": true,
|
|
|
+ "supports_pdf_input": true,
|
|
|
+ "supports_prompt_caching": true,
|
|
|
+ "supports_reasoning": true,
|
|
|
+ "supports_response_schema": true,
|
|
|
+ "supports_system_messages": false,
|
|
|
+ "supports_tool_choice": true,
|
|
|
+ "supports_vision": true
|
|
|
+ },
|
|
|
"azure/gpt-3.5-turbo": {
|
|
|
"input_cost_per_token": 5e-07,
|
|
|
"litellm_provider": "azure",
|
|
|
@@ -2534,8 +2786,134 @@
|
|
|
"max_output_tokens": 128000,
|
|
|
"max_tokens": 128000,
|
|
|
"mode": "chat",
|
|
|
- "output_cost_per_token": 1e-05,
|
|
|
- "source": "https://azure.microsoft.com/en-us/blog/gpt-5-in-azure-ai-foundry-the-future-of-ai-apps-and-agents-starts-here/",
|
|
|
+ "output_cost_per_token": 1e-05,
|
|
|
+ "source": "https://azure.microsoft.com/en-us/blog/gpt-5-in-azure-ai-foundry-the-future-of-ai-apps-and-agents-starts-here/",
|
|
|
+ "supported_endpoints": [
|
|
|
+ "/v1/chat/completions",
|
|
|
+ "/v1/batch",
|
|
|
+ "/v1/responses"
|
|
|
+ ],
|
|
|
+ "supported_modalities": [
|
|
|
+ "text",
|
|
|
+ "image"
|
|
|
+ ],
|
|
|
+ "supported_output_modalities": [
|
|
|
+ "text"
|
|
|
+ ],
|
|
|
+ "supports_function_calling": true,
|
|
|
+ "supports_native_streaming": true,
|
|
|
+ "supports_parallel_function_calling": true,
|
|
|
+ "supports_pdf_input": true,
|
|
|
+ "supports_prompt_caching": true,
|
|
|
+ "supports_reasoning": true,
|
|
|
+ "supports_response_schema": true,
|
|
|
+ "supports_system_messages": true,
|
|
|
+ "supports_tool_choice": false,
|
|
|
+ "supports_vision": true
|
|
|
+ },
|
|
|
+ "azure/gpt-5-chat-latest": {
|
|
|
+ "cache_read_input_token_cost": 1.25e-07,
|
|
|
+ "input_cost_per_token": 1.25e-06,
|
|
|
+ "litellm_provider": "azure",
|
|
|
+ "max_input_tokens": 128000,
|
|
|
+ "max_output_tokens": 16384,
|
|
|
+ "max_tokens": 16384,
|
|
|
+ "mode": "chat",
|
|
|
+ "output_cost_per_token": 1e-05,
|
|
|
+ "supported_endpoints": [
|
|
|
+ "/v1/chat/completions",
|
|
|
+ "/v1/batch",
|
|
|
+ "/v1/responses"
|
|
|
+ ],
|
|
|
+ "supported_modalities": [
|
|
|
+ "text",
|
|
|
+ "image"
|
|
|
+ ],
|
|
|
+ "supported_output_modalities": [
|
|
|
+ "text"
|
|
|
+ ],
|
|
|
+ "supports_function_calling": true,
|
|
|
+ "supports_native_streaming": true,
|
|
|
+ "supports_parallel_function_calling": true,
|
|
|
+ "supports_pdf_input": true,
|
|
|
+ "supports_prompt_caching": true,
|
|
|
+ "supports_reasoning": true,
|
|
|
+ "supports_response_schema": true,
|
|
|
+ "supports_system_messages": true,
|
|
|
+ "supports_tool_choice": false,
|
|
|
+ "supports_vision": true
|
|
|
+ },
|
|
|
+ "azure/gpt-5-codex": {
|
|
|
+ "cache_read_input_token_cost": 1.25e-07,
|
|
|
+ "input_cost_per_token": 1.25e-06,
|
|
|
+ "litellm_provider": "azure",
|
|
|
+ "max_input_tokens": 272000,
|
|
|
+ "max_output_tokens": 128000,
|
|
|
+ "max_tokens": 128000,
|
|
|
+ "mode": "responses",
|
|
|
+ "output_cost_per_token": 1e-05,
|
|
|
+ "supported_endpoints": [
|
|
|
+ "/v1/responses"
|
|
|
+ ],
|
|
|
+ "supported_modalities": [
|
|
|
+ "text",
|
|
|
+ "image"
|
|
|
+ ],
|
|
|
+ "supported_output_modalities": [
|
|
|
+ "text"
|
|
|
+ ],
|
|
|
+ "supports_function_calling": true,
|
|
|
+ "supports_native_streaming": true,
|
|
|
+ "supports_parallel_function_calling": true,
|
|
|
+ "supports_pdf_input": true,
|
|
|
+ "supports_prompt_caching": true,
|
|
|
+ "supports_reasoning": true,
|
|
|
+ "supports_response_schema": true,
|
|
|
+ "supports_system_messages": true,
|
|
|
+ "supports_tool_choice": true,
|
|
|
+ "supports_vision": true
|
|
|
+ },
|
|
|
+ "azure/gpt-5-mini": {
|
|
|
+ "cache_read_input_token_cost": 2.5e-08,
|
|
|
+ "input_cost_per_token": 2.5e-07,
|
|
|
+ "litellm_provider": "azure",
|
|
|
+ "max_input_tokens": 272000,
|
|
|
+ "max_output_tokens": 128000,
|
|
|
+ "max_tokens": 128000,
|
|
|
+ "mode": "chat",
|
|
|
+ "output_cost_per_token": 2e-06,
|
|
|
+ "supported_endpoints": [
|
|
|
+ "/v1/chat/completions",
|
|
|
+ "/v1/batch",
|
|
|
+ "/v1/responses"
|
|
|
+ ],
|
|
|
+ "supported_modalities": [
|
|
|
+ "text",
|
|
|
+ "image"
|
|
|
+ ],
|
|
|
+ "supported_output_modalities": [
|
|
|
+ "text"
|
|
|
+ ],
|
|
|
+ "supports_function_calling": true,
|
|
|
+ "supports_native_streaming": true,
|
|
|
+ "supports_parallel_function_calling": true,
|
|
|
+ "supports_pdf_input": true,
|
|
|
+ "supports_prompt_caching": true,
|
|
|
+ "supports_reasoning": true,
|
|
|
+ "supports_response_schema": true,
|
|
|
+ "supports_system_messages": true,
|
|
|
+ "supports_tool_choice": true,
|
|
|
+ "supports_vision": true
|
|
|
+ },
|
|
|
+ "azure/gpt-5-mini-2025-08-07": {
|
|
|
+ "cache_read_input_token_cost": 2.5e-08,
|
|
|
+ "input_cost_per_token": 2.5e-07,
|
|
|
+ "litellm_provider": "azure",
|
|
|
+ "max_input_tokens": 272000,
|
|
|
+ "max_output_tokens": 128000,
|
|
|
+ "max_tokens": 128000,
|
|
|
+ "mode": "chat",
|
|
|
+ "output_cost_per_token": 2e-06,
|
|
|
"supported_endpoints": [
|
|
|
"/v1/chat/completions",
|
|
|
"/v1/batch",
|
|
|
@@ -2556,18 +2934,18 @@
|
|
|
"supports_reasoning": true,
|
|
|
"supports_response_schema": true,
|
|
|
"supports_system_messages": true,
|
|
|
- "supports_tool_choice": false,
|
|
|
+ "supports_tool_choice": true,
|
|
|
"supports_vision": true
|
|
|
},
|
|
|
- "azure/gpt-5-chat-latest": {
|
|
|
- "cache_read_input_token_cost": 1.25e-07,
|
|
|
- "input_cost_per_token": 1.25e-06,
|
|
|
+ "azure/gpt-5-nano": {
|
|
|
+ "cache_read_input_token_cost": 5e-09,
|
|
|
+ "input_cost_per_token": 5e-08,
|
|
|
"litellm_provider": "azure",
|
|
|
- "max_input_tokens": 128000,
|
|
|
- "max_output_tokens": 16384,
|
|
|
- "max_tokens": 16384,
|
|
|
+ "max_input_tokens": 272000,
|
|
|
+ "max_output_tokens": 128000,
|
|
|
+ "max_tokens": 128000,
|
|
|
"mode": "chat",
|
|
|
- "output_cost_per_token": 1e-05,
|
|
|
+ "output_cost_per_token": 4e-07,
|
|
|
"supported_endpoints": [
|
|
|
"/v1/chat/completions",
|
|
|
"/v1/batch",
|
|
|
@@ -2588,19 +2966,21 @@
|
|
|
"supports_reasoning": true,
|
|
|
"supports_response_schema": true,
|
|
|
"supports_system_messages": true,
|
|
|
- "supports_tool_choice": false,
|
|
|
+ "supports_tool_choice": true,
|
|
|
"supports_vision": true
|
|
|
},
|
|
|
- "azure/gpt-5-codex": {
|
|
|
- "cache_read_input_token_cost": 1.25e-07,
|
|
|
- "input_cost_per_token": 1.25e-06,
|
|
|
+ "azure/gpt-5-nano-2025-08-07": {
|
|
|
+ "cache_read_input_token_cost": 5e-09,
|
|
|
+ "input_cost_per_token": 5e-08,
|
|
|
"litellm_provider": "azure",
|
|
|
"max_input_tokens": 272000,
|
|
|
"max_output_tokens": 128000,
|
|
|
"max_tokens": 128000,
|
|
|
- "mode": "responses",
|
|
|
- "output_cost_per_token": 1e-05,
|
|
|
+ "mode": "chat",
|
|
|
+ "output_cost_per_token": 4e-07,
|
|
|
"supported_endpoints": [
|
|
|
+ "/v1/chat/completions",
|
|
|
+ "/v1/batch",
|
|
|
"/v1/responses"
|
|
|
],
|
|
|
"supported_modalities": [
|
|
|
@@ -2621,18 +3001,16 @@
|
|
|
"supports_tool_choice": true,
|
|
|
"supports_vision": true
|
|
|
},
|
|
|
- "azure/gpt-5-mini": {
|
|
|
- "cache_read_input_token_cost": 2.5e-08,
|
|
|
- "input_cost_per_token": 2.5e-07,
|
|
|
+ "azure/gpt-5-pro": {
|
|
|
+ "input_cost_per_token": 1.5e-05,
|
|
|
"litellm_provider": "azure",
|
|
|
"max_input_tokens": 272000,
|
|
|
"max_output_tokens": 128000,
|
|
|
- "max_tokens": 128000,
|
|
|
- "mode": "chat",
|
|
|
- "output_cost_per_token": 2e-06,
|
|
|
+ "max_tokens": 400000,
|
|
|
+ "mode": "responses",
|
|
|
+ "output_cost_per_token": 0.00012,
|
|
|
+ "source": "https://learn.microsoft.com/en-us/azure/ai-foundry/foundry-models/concepts/models-sold-directly-by-azure?pivots=azure-openai&tabs=global-standard-aoai%2Cstandard-chat-completions%2Cglobal-standard#gpt-5",
|
|
|
"supported_endpoints": [
|
|
|
- "/v1/chat/completions",
|
|
|
- "/v1/batch",
|
|
|
"/v1/responses"
|
|
|
],
|
|
|
"supported_modalities": [
|
|
|
@@ -2643,7 +3021,6 @@
|
|
|
"text"
|
|
|
],
|
|
|
"supports_function_calling": true,
|
|
|
- "supports_native_streaming": true,
|
|
|
"supports_parallel_function_calling": true,
|
|
|
"supports_pdf_input": true,
|
|
|
"supports_prompt_caching": true,
|
|
|
@@ -2653,15 +3030,15 @@
|
|
|
"supports_tool_choice": true,
|
|
|
"supports_vision": true
|
|
|
},
|
|
|
- "azure/gpt-5-mini-2025-08-07": {
|
|
|
- "cache_read_input_token_cost": 2.5e-08,
|
|
|
- "input_cost_per_token": 2.5e-07,
|
|
|
+ "azure/gpt-5.1": {
|
|
|
+ "cache_read_input_token_cost": 1.25e-07,
|
|
|
+ "input_cost_per_token": 1.25e-06,
|
|
|
"litellm_provider": "azure",
|
|
|
"max_input_tokens": 272000,
|
|
|
"max_output_tokens": 128000,
|
|
|
"max_tokens": 128000,
|
|
|
"mode": "chat",
|
|
|
- "output_cost_per_token": 2e-06,
|
|
|
+ "output_cost_per_token": 1e-05,
|
|
|
"supported_endpoints": [
|
|
|
"/v1/chat/completions",
|
|
|
"/v1/batch",
|
|
|
@@ -2672,7 +3049,8 @@
|
|
|
"image"
|
|
|
],
|
|
|
"supported_output_modalities": [
|
|
|
- "text"
|
|
|
+ "text",
|
|
|
+ "image"
|
|
|
],
|
|
|
"supports_function_calling": true,
|
|
|
"supports_native_streaming": true,
|
|
|
@@ -2685,15 +3063,15 @@
|
|
|
"supports_tool_choice": true,
|
|
|
"supports_vision": true
|
|
|
},
|
|
|
- "azure/gpt-5-nano": {
|
|
|
- "cache_read_input_token_cost": 5e-09,
|
|
|
- "input_cost_per_token": 5e-08,
|
|
|
+ "azure/gpt-5.1-chat": {
|
|
|
+ "cache_read_input_token_cost": 1.25e-07,
|
|
|
+ "input_cost_per_token": 1.25e-06,
|
|
|
"litellm_provider": "azure",
|
|
|
"max_input_tokens": 272000,
|
|
|
"max_output_tokens": 128000,
|
|
|
"max_tokens": 128000,
|
|
|
"mode": "chat",
|
|
|
- "output_cost_per_token": 4e-07,
|
|
|
+ "output_cost_per_token": 1e-05,
|
|
|
"supported_endpoints": [
|
|
|
"/v1/chat/completions",
|
|
|
"/v1/batch",
|
|
|
@@ -2704,7 +3082,8 @@
|
|
|
"image"
|
|
|
],
|
|
|
"supported_output_modalities": [
|
|
|
- "text"
|
|
|
+ "text",
|
|
|
+ "image"
|
|
|
],
|
|
|
"supports_function_calling": true,
|
|
|
"supports_native_streaming": true,
|
|
|
@@ -2717,18 +3096,16 @@
|
|
|
"supports_tool_choice": true,
|
|
|
"supports_vision": true
|
|
|
},
|
|
|
- "azure/gpt-5-nano-2025-08-07": {
|
|
|
- "cache_read_input_token_cost": 5e-09,
|
|
|
- "input_cost_per_token": 5e-08,
|
|
|
+ "azure/gpt-5.1-codex": {
|
|
|
+ "cache_read_input_token_cost": 1.25e-07,
|
|
|
+ "input_cost_per_token": 1.25e-06,
|
|
|
"litellm_provider": "azure",
|
|
|
"max_input_tokens": 272000,
|
|
|
"max_output_tokens": 128000,
|
|
|
"max_tokens": 128000,
|
|
|
- "mode": "chat",
|
|
|
- "output_cost_per_token": 4e-07,
|
|
|
+ "mode": "responses",
|
|
|
+ "output_cost_per_token": 1e-05,
|
|
|
"supported_endpoints": [
|
|
|
- "/v1/chat/completions",
|
|
|
- "/v1/batch",
|
|
|
"/v1/responses"
|
|
|
],
|
|
|
"supported_modalities": [
|
|
|
@@ -2745,19 +3122,19 @@
|
|
|
"supports_prompt_caching": true,
|
|
|
"supports_reasoning": true,
|
|
|
"supports_response_schema": true,
|
|
|
- "supports_system_messages": true,
|
|
|
+ "supports_system_messages": false,
|
|
|
"supports_tool_choice": true,
|
|
|
"supports_vision": true
|
|
|
},
|
|
|
- "azure/gpt-5-pro": {
|
|
|
- "input_cost_per_token": 1.5e-05,
|
|
|
+ "azure/gpt-5.1-codex-mini": {
|
|
|
+ "cache_read_input_token_cost": 2.5e-08,
|
|
|
+ "input_cost_per_token": 2.5e-07,
|
|
|
"litellm_provider": "azure",
|
|
|
"max_input_tokens": 272000,
|
|
|
"max_output_tokens": 128000,
|
|
|
- "max_tokens": 400000,
|
|
|
+ "max_tokens": 128000,
|
|
|
"mode": "responses",
|
|
|
- "output_cost_per_token": 0.00012,
|
|
|
- "source": "https://learn.microsoft.com/en-us/azure/ai-foundry/foundry-models/concepts/models-sold-directly-by-azure?pivots=azure-openai&tabs=global-standard-aoai%2Cstandard-chat-completions%2Cglobal-standard#gpt-5",
|
|
|
+ "output_cost_per_token": 2e-06,
|
|
|
"supported_endpoints": [
|
|
|
"/v1/responses"
|
|
|
],
|
|
|
@@ -2769,12 +3146,13 @@
|
|
|
"text"
|
|
|
],
|
|
|
"supports_function_calling": true,
|
|
|
+ "supports_native_streaming": true,
|
|
|
"supports_parallel_function_calling": true,
|
|
|
"supports_pdf_input": true,
|
|
|
"supports_prompt_caching": true,
|
|
|
"supports_reasoning": true,
|
|
|
"supports_response_schema": true,
|
|
|
- "supports_system_messages": true,
|
|
|
+ "supports_system_messages": false,
|
|
|
"supports_tool_choice": true,
|
|
|
"supports_vision": true
|
|
|
},
|
|
|
@@ -3695,6 +4073,132 @@
|
|
|
"supports_tool_choice": true,
|
|
|
"supports_vision": true
|
|
|
},
|
|
|
+ "azure/us/gpt-5.1": {
|
|
|
+ "cache_read_input_token_cost": 1.4e-07,
|
|
|
+ "input_cost_per_token": 1.38e-06,
|
|
|
+ "litellm_provider": "azure",
|
|
|
+ "max_input_tokens": 272000,
|
|
|
+ "max_output_tokens": 128000,
|
|
|
+ "max_tokens": 128000,
|
|
|
+ "mode": "chat",
|
|
|
+ "output_cost_per_token": 1.1e-05,
|
|
|
+ "supported_endpoints": [
|
|
|
+ "/v1/chat/completions",
|
|
|
+ "/v1/batch",
|
|
|
+ "/v1/responses"
|
|
|
+ ],
|
|
|
+ "supported_modalities": [
|
|
|
+ "text",
|
|
|
+ "image"
|
|
|
+ ],
|
|
|
+ "supported_output_modalities": [
|
|
|
+ "text",
|
|
|
+ "image"
|
|
|
+ ],
|
|
|
+ "supports_function_calling": true,
|
|
|
+ "supports_native_streaming": true,
|
|
|
+ "supports_parallel_function_calling": true,
|
|
|
+ "supports_pdf_input": true,
|
|
|
+ "supports_prompt_caching": true,
|
|
|
+ "supports_reasoning": true,
|
|
|
+ "supports_response_schema": true,
|
|
|
+ "supports_system_messages": true,
|
|
|
+ "supports_tool_choice": true,
|
|
|
+ "supports_vision": true
|
|
|
+ },
|
|
|
+ "azure/us/gpt-5.1-chat": {
|
|
|
+ "cache_read_input_token_cost": 1.4e-07,
|
|
|
+ "input_cost_per_token": 1.38e-06,
|
|
|
+ "litellm_provider": "azure",
|
|
|
+ "max_input_tokens": 272000,
|
|
|
+ "max_output_tokens": 128000,
|
|
|
+ "max_tokens": 128000,
|
|
|
+ "mode": "chat",
|
|
|
+ "output_cost_per_token": 1.1e-05,
|
|
|
+ "supported_endpoints": [
|
|
|
+ "/v1/chat/completions",
|
|
|
+ "/v1/batch",
|
|
|
+ "/v1/responses"
|
|
|
+ ],
|
|
|
+ "supported_modalities": [
|
|
|
+ "text",
|
|
|
+ "image"
|
|
|
+ ],
|
|
|
+ "supported_output_modalities": [
|
|
|
+ "text",
|
|
|
+ "image"
|
|
|
+ ],
|
|
|
+ "supports_function_calling": true,
|
|
|
+ "supports_native_streaming": true,
|
|
|
+ "supports_parallel_function_calling": true,
|
|
|
+ "supports_pdf_input": true,
|
|
|
+ "supports_prompt_caching": true,
|
|
|
+ "supports_reasoning": true,
|
|
|
+ "supports_response_schema": true,
|
|
|
+ "supports_system_messages": true,
|
|
|
+ "supports_tool_choice": true,
|
|
|
+ "supports_vision": true
|
|
|
+ },
|
|
|
+ "azure/us/gpt-5.1-codex": {
|
|
|
+ "cache_read_input_token_cost": 1.4e-07,
|
|
|
+ "input_cost_per_token": 1.38e-06,
|
|
|
+ "litellm_provider": "azure",
|
|
|
+ "max_input_tokens": 272000,
|
|
|
+ "max_output_tokens": 128000,
|
|
|
+ "max_tokens": 128000,
|
|
|
+ "mode": "responses",
|
|
|
+ "output_cost_per_token": 1.1e-05,
|
|
|
+ "supported_endpoints": [
|
|
|
+ "/v1/responses"
|
|
|
+ ],
|
|
|
+ "supported_modalities": [
|
|
|
+ "text",
|
|
|
+ "image"
|
|
|
+ ],
|
|
|
+ "supported_output_modalities": [
|
|
|
+ "text"
|
|
|
+ ],
|
|
|
+ "supports_function_calling": true,
|
|
|
+ "supports_native_streaming": true,
|
|
|
+ "supports_parallel_function_calling": true,
|
|
|
+ "supports_pdf_input": true,
|
|
|
+ "supports_prompt_caching": true,
|
|
|
+ "supports_reasoning": true,
|
|
|
+ "supports_response_schema": true,
|
|
|
+ "supports_system_messages": false,
|
|
|
+ "supports_tool_choice": true,
|
|
|
+ "supports_vision": true
|
|
|
+ },
|
|
|
+ "azure/us/gpt-5.1-codex-mini": {
|
|
|
+ "cache_read_input_token_cost": 2.8e-08,
|
|
|
+ "input_cost_per_token": 2.75e-07,
|
|
|
+ "litellm_provider": "azure",
|
|
|
+ "max_input_tokens": 272000,
|
|
|
+ "max_output_tokens": 128000,
|
|
|
+ "max_tokens": 128000,
|
|
|
+ "mode": "responses",
|
|
|
+ "output_cost_per_token": 2.2e-06,
|
|
|
+ "supported_endpoints": [
|
|
|
+ "/v1/responses"
|
|
|
+ ],
|
|
|
+ "supported_modalities": [
|
|
|
+ "text",
|
|
|
+ "image"
|
|
|
+ ],
|
|
|
+ "supported_output_modalities": [
|
|
|
+ "text"
|
|
|
+ ],
|
|
|
+ "supports_function_calling": true,
|
|
|
+ "supports_native_streaming": true,
|
|
|
+ "supports_parallel_function_calling": true,
|
|
|
+ "supports_pdf_input": true,
|
|
|
+ "supports_prompt_caching": true,
|
|
|
+ "supports_reasoning": true,
|
|
|
+ "supports_response_schema": true,
|
|
|
+ "supports_system_messages": false,
|
|
|
+ "supports_tool_choice": true,
|
|
|
+ "supports_vision": true
|
|
|
+ },
|
|
|
"azure/us/o1-2024-12-17": {
|
|
|
"cache_read_input_token_cost": 8.25e-06,
|
|
|
"input_cost_per_token": 1.65e-05,
|
|
|
@@ -14623,7 +15127,7 @@
|
|
|
"max_input_tokens": 272000,
|
|
|
"max_output_tokens": 128000,
|
|
|
"max_tokens": 128000,
|
|
|
- "mode": "responses",
|
|
|
+ "mode": "chat",
|
|
|
"output_cost_per_token": 1e-05,
|
|
|
"output_cost_per_token_flex": 5e-06,
|
|
|
"output_cost_per_token_priority": 2e-05,
|
|
|
@@ -14660,7 +15164,7 @@
|
|
|
"max_input_tokens": 272000,
|
|
|
"max_output_tokens": 128000,
|
|
|
"max_tokens": 128000,
|
|
|
- "mode": "responses",
|
|
|
+ "mode": "chat",
|
|
|
"output_cost_per_token": 1e-05,
|
|
|
"output_cost_per_token_priority": 2e-05,
|
|
|
"supported_endpoints": [
|
|
|
@@ -14696,7 +15200,7 @@
|
|
|
"max_input_tokens": 272000,
|
|
|
"max_output_tokens": 128000,
|
|
|
"max_tokens": 128000,
|
|
|
- "mode": "responses",
|
|
|
+ "mode": "chat",
|
|
|
"output_cost_per_token": 1e-05,
|
|
|
"output_cost_per_token_priority": 2e-05,
|
|
|
"supported_endpoints": [
|
|
|
@@ -14732,7 +15236,7 @@
|
|
|
"max_input_tokens": 128000,
|
|
|
"max_output_tokens": 16384,
|
|
|
"max_tokens": 16384,
|
|
|
- "mode": "responses",
|
|
|
+ "mode": "chat",
|
|
|
"output_cost_per_token": 1e-05,
|
|
|
"output_cost_per_token_priority": 2e-05,
|
|
|
"supported_endpoints": [
|
|
|
@@ -14835,7 +15339,7 @@
|
|
|
"max_input_tokens": 272000,
|
|
|
"max_output_tokens": 128000,
|
|
|
"max_tokens": 128000,
|
|
|
- "mode": "responses",
|
|
|
+ "mode": "chat",
|
|
|
"output_cost_per_token": 1e-05,
|
|
|
"output_cost_per_token_flex": 5e-06,
|
|
|
"output_cost_per_token_priority": 2e-05,
|
|
|
@@ -14870,7 +15374,7 @@
|
|
|
"max_input_tokens": 272000,
|
|
|
"max_output_tokens": 128000,
|
|
|
"max_tokens": 128000,
|
|
|
- "mode": "responses",
|
|
|
+ "mode": "chat",
|
|
|
"output_cost_per_token": 1e-05,
|
|
|
"supported_endpoints": [
|
|
|
"/v1/chat/completions",
|
|
|
@@ -14902,7 +15406,7 @@
|
|
|
"max_input_tokens": 128000,
|
|
|
"max_output_tokens": 16384,
|
|
|
"max_tokens": 16384,
|
|
|
- "mode": "responses",
|
|
|
+ "mode": "chat",
|
|
|
"output_cost_per_token": 1e-05,
|
|
|
"supported_endpoints": [
|
|
|
"/v1/chat/completions",
|
|
|
@@ -15034,7 +15538,7 @@
|
|
|
"max_input_tokens": 272000,
|
|
|
"max_output_tokens": 128000,
|
|
|
"max_tokens": 128000,
|
|
|
- "mode": "responses",
|
|
|
+ "mode": "chat",
|
|
|
"output_cost_per_token": 2e-06,
|
|
|
"output_cost_per_token_flex": 1e-06,
|
|
|
"output_cost_per_token_priority": 3.6e-06,
|
|
|
@@ -15073,7 +15577,7 @@
|
|
|
"max_input_tokens": 272000,
|
|
|
"max_output_tokens": 128000,
|
|
|
"max_tokens": 128000,
|
|
|
- "mode": "responses",
|
|
|
+ "mode": "chat",
|
|
|
"output_cost_per_token": 2e-06,
|
|
|
"output_cost_per_token_flex": 1e-06,
|
|
|
"output_cost_per_token_priority": 3.6e-06,
|
|
|
@@ -15111,7 +15615,7 @@
|
|
|
"max_input_tokens": 272000,
|
|
|
"max_output_tokens": 128000,
|
|
|
"max_tokens": 128000,
|
|
|
- "mode": "responses",
|
|
|
+ "mode": "chat",
|
|
|
"output_cost_per_token": 4e-07,
|
|
|
"output_cost_per_token_flex": 2e-07,
|
|
|
"supported_endpoints": [
|
|
|
@@ -15146,7 +15650,7 @@
|
|
|
"max_input_tokens": 272000,
|
|
|
"max_output_tokens": 128000,
|
|
|
"max_tokens": 128000,
|
|
|
- "mode": "responses",
|
|
|
+ "mode": "chat",
|
|
|
"output_cost_per_token": 4e-07,
|
|
|
"output_cost_per_token_flex": 2e-07,
|
|
|
"supported_endpoints": [
|
|
|
@@ -17999,7 +18503,7 @@
|
|
|
"max_input_tokens": 200000,
|
|
|
"max_output_tokens": 100000,
|
|
|
"max_tokens": 100000,
|
|
|
- "mode": "responses",
|
|
|
+ "mode": "chat",
|
|
|
"output_cost_per_token": 6e-05,
|
|
|
"supports_function_calling": true,
|
|
|
"supports_parallel_function_calling": true,
|
|
|
@@ -18018,7 +18522,7 @@
|
|
|
"max_input_tokens": 128000,
|
|
|
"max_output_tokens": 65536,
|
|
|
"max_tokens": 65536,
|
|
|
- "mode": "responses",
|
|
|
+ "mode": "chat",
|
|
|
"output_cost_per_token": 4.4e-06,
|
|
|
"supports_pdf_input": true,
|
|
|
"supports_prompt_caching": true,
|
|
|
@@ -18032,7 +18536,7 @@
|
|
|
"max_input_tokens": 128000,
|
|
|
"max_output_tokens": 65536,
|
|
|
"max_tokens": 65536,
|
|
|
- "mode": "responses",
|
|
|
+ "mode": "chat",
|
|
|
"output_cost_per_token": 1.2e-05,
|
|
|
"supports_pdf_input": true,
|
|
|
"supports_prompt_caching": true,
|
|
|
@@ -18046,7 +18550,7 @@
|
|
|
"max_input_tokens": 128000,
|
|
|
"max_output_tokens": 32768,
|
|
|
"max_tokens": 32768,
|
|
|
- "mode": "responses",
|
|
|
+ "mode": "chat",
|
|
|
"output_cost_per_token": 6e-05,
|
|
|
"supports_pdf_input": true,
|
|
|
"supports_prompt_caching": true,
|
|
|
@@ -18060,7 +18564,7 @@
|
|
|
"max_input_tokens": 128000,
|
|
|
"max_output_tokens": 32768,
|
|
|
"max_tokens": 32768,
|
|
|
- "mode": "responses",
|
|
|
+ "mode": "chat",
|
|
|
"output_cost_per_token": 6e-05,
|
|
|
"supports_pdf_input": true,
|
|
|
"supports_prompt_caching": true,
|
|
|
@@ -18176,7 +18680,7 @@
|
|
|
"max_input_tokens": 200000,
|
|
|
"max_output_tokens": 100000,
|
|
|
"max_tokens": 100000,
|
|
|
- "mode": "responses",
|
|
|
+ "mode": "chat",
|
|
|
"output_cost_per_token": 8e-06,
|
|
|
"supported_endpoints": [
|
|
|
"/v1/responses",
|
|
|
@@ -18274,7 +18778,7 @@
|
|
|
"max_input_tokens": 200000,
|
|
|
"max_output_tokens": 100000,
|
|
|
"max_tokens": 100000,
|
|
|
- "mode": "responses",
|
|
|
+ "mode": "chat",
|
|
|
"output_cost_per_token": 4.4e-06,
|
|
|
"supports_function_calling": true,
|
|
|
"supports_parallel_function_calling": false,
|
|
|
@@ -18291,7 +18795,7 @@
|
|
|
"max_input_tokens": 200000,
|
|
|
"max_output_tokens": 100000,
|
|
|
"max_tokens": 100000,
|
|
|
- "mode": "responses",
|
|
|
+ "mode": "chat",
|
|
|
"output_cost_per_token": 4.4e-06,
|
|
|
"supports_function_calling": true,
|
|
|
"supports_parallel_function_calling": false,
|
|
|
@@ -23930,6 +24434,12 @@
|
|
|
"supports_reasoning": true,
|
|
|
"supports_tool_choice": true
|
|
|
},
|
|
|
+ "vertex_ai/gemini-2.5-flash-image": {
|
|
|
+ "litellm_provider": "vertex_ai-language-models",
|
|
|
+ "mode": "image_generation",
|
|
|
+ "output_cost_per_image": 0.039,
|
|
|
+ "source": "https://cloud.google.com/vertex-ai/generative-ai/docs/multimodal/image-generation#edit-an-image"
|
|
|
+ },
|
|
|
"vertex_ai/imagegeneration@006": {
|
|
|
"litellm_provider": "vertex_ai-image-models",
|
|
|
"mode": "image_generation",
|
|
|
@@ -23954,6 +24464,12 @@
|
|
|
"output_cost_per_image": 0.04,
|
|
|
"source": "https://cloud.google.com/vertex-ai/generative-ai/pricing"
|
|
|
},
|
|
|
+ "vertex_ai/imagen-3.0-capability-001": {
|
|
|
+ "litellm_provider": "vertex_ai-image-models",
|
|
|
+ "mode": "image_generation",
|
|
|
+ "output_cost_per_image": 0.04,
|
|
|
+ "source": "https://cloud.google.com/vertex-ai/generative-ai/docs/image/edit-insert-objects"
|
|
|
+ },
|
|
|
"vertex_ai/imagen-4.0-fast-generate-001": {
|
|
|
"litellm_provider": "vertex_ai-image-models",
|
|
|
"mode": "image_generation",
|