# AIProxy Configuration File # Priority: Environment Variables > Config File > Database # This file allows you to configure channels, model configs, and options without using the database # Channels Configuration # Note: Channels from YAML are assigned negative IDs automatically and are not persisted to database # They are merged with database channels in memory channels: - name: "openai-channel-1" type_name: "openai" # Can use type_name instead of numeric type key: "sk-xxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxx" base_url: "https://api.openai.com" models: - "gpt-4" - "gpt-3.5-turbo" model_mapping: "gpt-4": "gpt-4-0613" status: 1 # 1=Enabled, 2=Disabled priority: 0 balance: 100.0 balance_threshold: 10.0 enabled_auto_balance_check: true sets: - "default" config: spec: organization: "org-xxxxx" - name: "azure-channel-1" type_name: "azure" # Type name is case-insensitive key: "your-azure-api-key" base_url: "https://your-resource.openai.azure.com" models: - "gpt-4" status: 1 priority: 1 sets: - "default" - name: "claude-channel-1" type_name: "claude" # "claude" is an alias for "anthropic" key: "sk-ant-xxxxx" base_url: "https://api.anthropic.com" models: - "claude-3-opus-20240229" - "claude-3-sonnet-20240229" status: 1 priority: 0 sets: - "default" - "premium" # You can also use numeric type if preferred - name: "gemini-channel-1" type: 24 # Google Gemini key: "your-gemini-api-key" models: - "gemini-pro" status: 1 # Model Configurations modelconfigs: - model: "gpt-4" owner: "openai" type_name: "chat" # Can use type_name instead of numeric type # OR use numeric type: # type: 1 # ChatCompletions rpm: 3500 tpm: 80000 retry_times: 3 timeout_config: request_timeout: 300 stream_request_timeout: 600 warn_error_rate: 0.5 max_error_rate: 0.8 price: input: 0.03 output: 0.06 config: max_context_tokens: 8192 max_output_tokens: 4096 vision: false tool_choice: true - model: "gpt-3.5-turbo" owner: "openai" type_name: "chat" rpm: 3500 tpm: 90000 price: input: 0.0005 output: 0.0015 config: max_context_tokens: 16384 max_output_tokens: 4096 - model: "claude-3-opus-20240229" owner: "anthropic" type_name: "chat" rpm: 4000 tpm: 400000 price: input: 0.015 output: 0.075 config: max_context_tokens: 200000 max_output_tokens: 4096 vision: true - model: "claude-3-sonnet-20240229" owner: "anthropic" type_name: "chat" rpm: 4000 tpm: 400000 price: input: 0.003 output: 0.015 config: max_context_tokens: 200000 max_output_tokens: 4096 vision: true - model: "text-embedding-3-small" owner: "openai" type_name: "embedding" # Embedding model rpm: 3000 tpm: 1000000 price: input: 0.00002 output: 0 config: max_context_tokens: 8191 - model: "dall-e-3" owner: "openai" type_name: "image" # Image generation rpm: 50 image_quality_prices: "1024x1024": "standard": 0.040 "hd": 0.080 "1024x1792": "standard": 0.080 "hd": 0.120 "1792x1024": "standard": 0.080 "hd": 0.120 # System Options Configuration options: # Log retention settings (in hours) LogStorageHours: "168" # 7 days RetryLogStorageHours: "72" # 3 days LogDetailStorageHours: "24" # 1 day # Clean log batch size CleanLogBatchSize: "1000" # IP rate limiting IPGroupsThreshold: "100" # Requests per minute IPGroupsBanThreshold: "200" # Ban threshold # Log detail settings SaveAllLogDetail: "false" LogDetailRequestBodyMaxSize: "10000" LogDetailResponseBodyMaxSize: "10000" # Retry settings RetryTimes: "3" # Group settings GroupMaxTokenNum: "0" # 0 means unlimited GroupConsumeLevelRatio: '{"1":1,"2":0.9,"3":0.8}' # Error rate alerts DefaultWarnNotifyErrorRate: "0.5" # Usage alerts UsageAlertThreshold: "100" UsageAlertMinAvgThreshold: "10" # Fuzzy token threshold FuzzyTokenThreshold: "240000" # Disable serve (for maintenance) DisableServe: "false"