Explorar el Código

feat: image usage support (#153)

* feat: image usage support

* chore: swag
zijiren hace 8 meses
padre
commit
7b6a0d9ece

+ 23 - 6
core/common/consume/consume.go

@@ -129,6 +129,9 @@ func CalculateAmount(
 	inputTokens := usage.InputTokens
 	outputTokens := usage.OutputTokens
 
+	if modelPrice.ImageInputPrice > 0 {
+		inputTokens -= usage.ImageInputTokens
+	}
 	if modelPrice.CachedPrice > 0 {
 		inputTokens -= usage.CachedTokens
 	}
@@ -136,27 +139,41 @@ func CalculateAmount(
 		inputTokens -= usage.CacheCreationTokens
 	}
 
-	promptAmount := decimal.NewFromInt(inputTokens).
+	inputAmount := decimal.NewFromInt(inputTokens).
 		Mul(decimal.NewFromFloat(modelPrice.InputPrice)).
 		Div(decimal.NewFromInt(modelPrice.GetInputPriceUnit()))
-	completionAmount := decimal.NewFromInt(outputTokens).
-		Mul(decimal.NewFromFloat(modelPrice.OutputPrice)).
-		Div(decimal.NewFromInt(modelPrice.GetOutputPriceUnit()))
+
+	inputImageAmount := decimal.NewFromInt(usage.ImageInputTokens).
+		Mul(decimal.NewFromFloat(modelPrice.ImageInputPrice)).
+		Div(decimal.NewFromInt(modelPrice.GetImageInputPriceUnit()))
+
 	cachedAmount := decimal.NewFromInt(usage.CachedTokens).
 		Mul(decimal.NewFromFloat(modelPrice.CachedPrice)).
 		Div(decimal.NewFromInt(modelPrice.GetCachedPriceUnit()))
+
 	cacheCreationAmount := decimal.NewFromInt(usage.CacheCreationTokens).
 		Mul(decimal.NewFromFloat(modelPrice.CacheCreationPrice)).
 		Div(decimal.NewFromInt(modelPrice.GetCacheCreationPriceUnit()))
+
 	webSearchAmount := decimal.NewFromInt(usage.WebSearchCount).
 		Mul(decimal.NewFromFloat(modelPrice.WebSearchPrice)).
 		Div(decimal.NewFromInt(modelPrice.GetWebSearchPriceUnit()))
 
-	return promptAmount.
-		Add(completionAmount).
+	outputAmount := decimal.NewFromInt(outputTokens).
+		Mul(decimal.NewFromFloat(modelPrice.OutputPrice)).
+		Div(decimal.NewFromInt(modelPrice.GetOutputPriceUnit()))
+
+	imageOutputAmount := decimal.NewFromInt(usage.ImageOutputNumbers).
+		Mul(decimal.NewFromFloat(modelPrice.ImageOutputPrice)).
+		Div(decimal.NewFromInt(modelPrice.GetImageOutputPriceUnit()))
+
+	return inputAmount.
+		Add(inputImageAmount).
 		Add(cachedAmount).
 		Add(cacheCreationAmount).
 		Add(webSearchAmount).
+		Add(outputAmount).
+		Add(imageOutputAmount).
 		InexactFloat64()
 }
 

+ 4 - 4
core/controller/group.go

@@ -319,9 +319,9 @@ func UpdateGroupsStatus(c *gin.Context) {
 }
 
 type CreateGroupRequest struct {
-	RPMRatio     float64  `json:"rpm_ratio"`
-	TPMRatio     float64  `json:"tpm_ratio"`
-	AvailableSet []string `json:"available_set"`
+	RPMRatio      float64  `json:"rpm_ratio"`
+	TPMRatio      float64  `json:"tpm_ratio"`
+	AvailableSets []string `json:"available_sets"`
 
 	BalanceAlertEnabled   bool    `json:"balance_alert_enabled"`
 	BalanceAlertThreshold float64 `json:"balance_alert_threshold"`
@@ -331,7 +331,7 @@ func (r *CreateGroupRequest) ToGroup() *model.Group {
 	return &model.Group{
 		RPMRatio:      r.RPMRatio,
 		TPMRatio:      r.TPMRatio,
-		AvailableSets: r.AvailableSet,
+		AvailableSets: r.AvailableSets,
 
 		BalanceAlertEnabled:   r.BalanceAlertEnabled,
 		BalanceAlertThreshold: r.BalanceAlertThreshold,

+ 4 - 4
core/controller/relay-controller.go

@@ -346,7 +346,7 @@ func relay(c *gin.Context, mode mode.Mode, relayController RelayController) {
 			return
 		}
 
-		meta.InputTokens = requestUsage.InputTokens
+		meta.RequestUsage = requestUsage
 	}
 
 	// First attempt
@@ -430,7 +430,7 @@ type retryState struct {
 
 	meta             *meta.Meta
 	price            model.Price
-	inputTokens      int64
+	requestUsage     model.Usage
 	result           *controller.HandleResult
 	migratedChannels []*model.Channel
 }
@@ -500,7 +500,7 @@ func initRetryState(retryTimes int, channel *initialChannel, meta *meta.Meta, re
 		meta:             meta,
 		result:           result,
 		price:            price,
-		inputTokens:      meta.InputTokens,
+		requestUsage:     meta.RequestUsage,
 		migratedChannels: channel.migratedChannels,
 	}
 
@@ -556,7 +556,7 @@ func retryLoop(c *gin.Context, mode mode.Mode, state *retryState, relayControlle
 			c,
 			newChannel,
 			mode,
-			meta.WithInputTokens(state.inputTokens),
+			meta.WithRequestUsage(state.requestUsage),
 			meta.WithRetryAt(time.Now()),
 		)
 		var retry bool

+ 115 - 1
core/docs/docs.go

@@ -7185,11 +7185,22 @@ const docTemplate = `{
                     "type": "boolean"
                 },
                 "image_prices": {
+                    "description": "map[size]price_per_image",
                     "type": "object",
                     "additionalProperties": {
                         "type": "number"
                     }
                 },
+                "image_quality_prices": {
+                    "description": "map[size]map[quality]price_per_image",
+                    "type": "object",
+                    "additionalProperties": {
+                        "type": "object",
+                        "additionalProperties": {
+                            "type": "number"
+                        }
+                    }
+                },
                 "model": {
                     "type": "string"
                 },
@@ -7219,7 +7230,7 @@ const docTemplate = `{
         "controller.CreateGroupRequest": {
             "type": "object",
             "properties": {
-                "available_set": {
+                "available_sets": {
                     "type": "array",
                     "items": {
                         "type": "string"
@@ -7412,11 +7423,22 @@ const docTemplate = `{
                     "type": "boolean"
                 },
                 "image_prices": {
+                    "description": "map[size]price_per_image",
                     "type": "object",
                     "additionalProperties": {
                         "type": "number"
                     }
                 },
+                "image_quality_prices": {
+                    "description": "map[size]map[quality]price_per_image",
+                    "type": "object",
+                    "additionalProperties": {
+                        "type": "object",
+                        "additionalProperties": {
+                            "type": "number"
+                        }
+                    }
+                },
                 "model": {
                     "type": "string"
                 },
@@ -7586,6 +7608,12 @@ const docTemplate = `{
                 "cached_tokens": {
                     "type": "integer"
                 },
+                "image_input_tokens": {
+                    "type": "integer"
+                },
+                "image_output_numbers": {
+                    "type": "integer"
+                },
                 "input_tokens": {
                     "type": "integer"
                 },
@@ -8391,28 +8419,58 @@ const docTemplate = `{
                 }
             }
         },
+        "model.ImageInputTokensDetails": {
+            "type": "object",
+            "properties": {
+                "image_tokens": {
+                    "description": "The number of image tokens in the input prompt.",
+                    "type": "integer"
+                },
+                "text_tokens": {
+                    "description": "The number of text tokens in the input prompt.",
+                    "type": "integer"
+                }
+            }
+        },
         "model.ImageRequest": {
             "type": "object",
             "properties": {
+                "background": {
+                    "type": "string"
+                },
                 "model": {
                     "type": "string"
                 },
+                "moderation": {
+                    "type": "string"
+                },
                 "n": {
                     "type": "integer"
                 },
+                "output_compression": {
+                    "type": "integer"
+                },
+                "output_format": {
+                    "description": "png, jpeg, webp",
+                    "type": "string"
+                },
                 "prompt": {
                     "type": "string"
                 },
                 "quality": {
+                    "description": "auto, high, medium, low, hd, standard",
                     "type": "string"
                 },
                 "response_format": {
+                    "description": "url, b64_json",
                     "type": "string"
                 },
                 "size": {
+                    "description": "1024x1024, 1536x1024, 1024x1536, auto, 256x256, 512x512, 1792x1024, 1024x1792",
                     "type": "string"
                 },
                 "style": {
+                    "description": "vivid, natural",
                     "type": "string"
                 },
                 "user": {
@@ -8431,6 +8489,39 @@ const docTemplate = `{
                     "items": {
                         "$ref": "#/definitions/model.ImageData"
                     }
+                },
+                "usage": {
+                    "description": "For gpt-image-1 only, the token usage information for the image generation.",
+                    "allOf": [
+                        {
+                            "$ref": "#/definitions/model.ImageUsage"
+                        }
+                    ]
+                }
+            }
+        },
+        "model.ImageUsage": {
+            "type": "object",
+            "properties": {
+                "input_tokens": {
+                    "description": "The number of tokens (images and text) in the input prompt.",
+                    "type": "integer"
+                },
+                "input_tokens_details": {
+                    "description": "The input tokens detailed information for the image generation.",
+                    "allOf": [
+                        {
+                            "$ref": "#/definitions/model.ImageInputTokensDetails"
+                        }
+                    ]
+                },
+                "outputTokens": {
+                    "description": "The number of image tokens in the output image.",
+                    "type": "integer"
+                },
+                "total_tokens": {
+                    "description": "The total number of tokens (images and text) used for the image generation.",
+                    "type": "integer"
                 }
             }
         },
@@ -8595,11 +8686,22 @@ const docTemplate = `{
                     "type": "boolean"
                 },
                 "image_prices": {
+                    "description": "map[size]price_per_image",
                     "type": "object",
                     "additionalProperties": {
                         "type": "number"
                     }
                 },
+                "image_quality_prices": {
+                    "description": "map[size]map[quality]price_per_image",
+                    "type": "object",
+                    "additionalProperties": {
+                        "type": "object",
+                        "additionalProperties": {
+                            "type": "number"
+                        }
+                    }
+                },
                 "model": {
                     "type": "string"
                 },
@@ -8777,6 +8879,18 @@ const docTemplate = `{
                 "cached_price_unit": {
                     "type": "integer"
                 },
+                "image_input_price": {
+                    "type": "number"
+                },
+                "image_input_price_unit": {
+                    "type": "integer"
+                },
+                "image_output_price": {
+                    "type": "number"
+                },
+                "image_output_price_unit": {
+                    "type": "integer"
+                },
                 "input_price": {
                     "type": "number"
                 },

+ 115 - 1
core/docs/swagger.json

@@ -7176,11 +7176,22 @@
                     "type": "boolean"
                 },
                 "image_prices": {
+                    "description": "map[size]price_per_image",
                     "type": "object",
                     "additionalProperties": {
                         "type": "number"
                     }
                 },
+                "image_quality_prices": {
+                    "description": "map[size]map[quality]price_per_image",
+                    "type": "object",
+                    "additionalProperties": {
+                        "type": "object",
+                        "additionalProperties": {
+                            "type": "number"
+                        }
+                    }
+                },
                 "model": {
                     "type": "string"
                 },
@@ -7210,7 +7221,7 @@
         "controller.CreateGroupRequest": {
             "type": "object",
             "properties": {
-                "available_set": {
+                "available_sets": {
                     "type": "array",
                     "items": {
                         "type": "string"
@@ -7403,11 +7414,22 @@
                     "type": "boolean"
                 },
                 "image_prices": {
+                    "description": "map[size]price_per_image",
                     "type": "object",
                     "additionalProperties": {
                         "type": "number"
                     }
                 },
+                "image_quality_prices": {
+                    "description": "map[size]map[quality]price_per_image",
+                    "type": "object",
+                    "additionalProperties": {
+                        "type": "object",
+                        "additionalProperties": {
+                            "type": "number"
+                        }
+                    }
+                },
                 "model": {
                     "type": "string"
                 },
@@ -7577,6 +7599,12 @@
                 "cached_tokens": {
                     "type": "integer"
                 },
+                "image_input_tokens": {
+                    "type": "integer"
+                },
+                "image_output_numbers": {
+                    "type": "integer"
+                },
                 "input_tokens": {
                     "type": "integer"
                 },
@@ -8382,28 +8410,58 @@
                 }
             }
         },
+        "model.ImageInputTokensDetails": {
+            "type": "object",
+            "properties": {
+                "image_tokens": {
+                    "description": "The number of image tokens in the input prompt.",
+                    "type": "integer"
+                },
+                "text_tokens": {
+                    "description": "The number of text tokens in the input prompt.",
+                    "type": "integer"
+                }
+            }
+        },
         "model.ImageRequest": {
             "type": "object",
             "properties": {
+                "background": {
+                    "type": "string"
+                },
                 "model": {
                     "type": "string"
                 },
+                "moderation": {
+                    "type": "string"
+                },
                 "n": {
                     "type": "integer"
                 },
+                "output_compression": {
+                    "type": "integer"
+                },
+                "output_format": {
+                    "description": "png, jpeg, webp",
+                    "type": "string"
+                },
                 "prompt": {
                     "type": "string"
                 },
                 "quality": {
+                    "description": "auto, high, medium, low, hd, standard",
                     "type": "string"
                 },
                 "response_format": {
+                    "description": "url, b64_json",
                     "type": "string"
                 },
                 "size": {
+                    "description": "1024x1024, 1536x1024, 1024x1536, auto, 256x256, 512x512, 1792x1024, 1024x1792",
                     "type": "string"
                 },
                 "style": {
+                    "description": "vivid, natural",
                     "type": "string"
                 },
                 "user": {
@@ -8422,6 +8480,39 @@
                     "items": {
                         "$ref": "#/definitions/model.ImageData"
                     }
+                },
+                "usage": {
+                    "description": "For gpt-image-1 only, the token usage information for the image generation.",
+                    "allOf": [
+                        {
+                            "$ref": "#/definitions/model.ImageUsage"
+                        }
+                    ]
+                }
+            }
+        },
+        "model.ImageUsage": {
+            "type": "object",
+            "properties": {
+                "input_tokens": {
+                    "description": "The number of tokens (images and text) in the input prompt.",
+                    "type": "integer"
+                },
+                "input_tokens_details": {
+                    "description": "The input tokens detailed information for the image generation.",
+                    "allOf": [
+                        {
+                            "$ref": "#/definitions/model.ImageInputTokensDetails"
+                        }
+                    ]
+                },
+                "outputTokens": {
+                    "description": "The number of image tokens in the output image.",
+                    "type": "integer"
+                },
+                "total_tokens": {
+                    "description": "The total number of tokens (images and text) used for the image generation.",
+                    "type": "integer"
                 }
             }
         },
@@ -8586,11 +8677,22 @@
                     "type": "boolean"
                 },
                 "image_prices": {
+                    "description": "map[size]price_per_image",
                     "type": "object",
                     "additionalProperties": {
                         "type": "number"
                     }
                 },
+                "image_quality_prices": {
+                    "description": "map[size]map[quality]price_per_image",
+                    "type": "object",
+                    "additionalProperties": {
+                        "type": "object",
+                        "additionalProperties": {
+                            "type": "number"
+                        }
+                    }
+                },
                 "model": {
                     "type": "string"
                 },
@@ -8768,6 +8870,18 @@
                 "cached_price_unit": {
                     "type": "integer"
                 },
+                "image_input_price": {
+                    "type": "number"
+                },
+                "image_input_price_unit": {
+                    "type": "integer"
+                },
+                "image_output_price": {
+                    "type": "number"
+                },
+                "image_output_price_unit": {
+                    "type": "integer"
+                },
                 "input_price": {
                     "type": "number"
                 },

+ 82 - 1
core/docs/swagger.yaml

@@ -66,6 +66,14 @@ definitions:
       image_prices:
         additionalProperties:
           type: number
+        description: map[size]price_per_image
+        type: object
+      image_quality_prices:
+        additionalProperties:
+          additionalProperties:
+            type: number
+          type: object
+        description: map[size]map[quality]price_per_image
         type: object
       model:
         type: string
@@ -86,7 +94,7 @@ definitions:
     type: object
   controller.CreateGroupRequest:
     properties:
-      available_set:
+      available_sets:
         items:
           type: string
         type: array
@@ -215,6 +223,14 @@ definitions:
       image_prices:
         additionalProperties:
           type: number
+        description: map[size]price_per_image
+        type: object
+      image_quality_prices:
+        additionalProperties:
+          additionalProperties:
+            type: number
+          type: object
+        description: map[size]map[quality]price_per_image
         type: object
       model:
         type: string
@@ -325,6 +341,10 @@ definitions:
         type: integer
       cached_tokens:
         type: integer
+      image_input_tokens:
+        type: integer
+      image_output_numbers:
+        type: integer
       input_tokens:
         type: integer
       output_tokens:
@@ -869,21 +889,44 @@ definitions:
       url:
         type: string
     type: object
+  model.ImageInputTokensDetails:
+    properties:
+      image_tokens:
+        description: The number of image tokens in the input prompt.
+        type: integer
+      text_tokens:
+        description: The number of text tokens in the input prompt.
+        type: integer
+    type: object
   model.ImageRequest:
     properties:
+      background:
+        type: string
       model:
         type: string
+      moderation:
+        type: string
       "n":
         type: integer
+      output_compression:
+        type: integer
+      output_format:
+        description: png, jpeg, webp
+        type: string
       prompt:
         type: string
       quality:
+        description: auto, high, medium, low, hd, standard
         type: string
       response_format:
+        description: url, b64_json
         type: string
       size:
+        description: 1024x1024, 1536x1024, 1024x1536, auto, 256x256, 512x512, 1792x1024,
+          1024x1792
         type: string
       style:
+        description: vivid, natural
         type: string
       user:
         type: string
@@ -896,6 +939,28 @@ definitions:
         items:
           $ref: '#/definitions/model.ImageData'
         type: array
+      usage:
+        allOf:
+        - $ref: '#/definitions/model.ImageUsage'
+        description: For gpt-image-1 only, the token usage information for the image
+          generation.
+    type: object
+  model.ImageUsage:
+    properties:
+      input_tokens:
+        description: The number of tokens (images and text) in the input prompt.
+        type: integer
+      input_tokens_details:
+        allOf:
+        - $ref: '#/definitions/model.ImageInputTokensDetails'
+        description: The input tokens detailed information for the image generation.
+      outputTokens:
+        description: The number of image tokens in the output image.
+        type: integer
+      total_tokens:
+        description: The total number of tokens (images and text) used for the image
+          generation.
+        type: integer
     type: object
   model.JSONSchema:
     properties:
@@ -1006,6 +1071,14 @@ definitions:
       image_prices:
         additionalProperties:
           type: number
+        description: map[size]price_per_image
+        type: object
+      image_quality_prices:
+        additionalProperties:
+          additionalProperties:
+            type: number
+          type: object
+        description: map[size]map[quality]price_per_image
         type: object
       model:
         type: string
@@ -1145,6 +1218,14 @@ definitions:
         type: number
       cached_price_unit:
         type: integer
+      image_input_price:
+        type: number
+      image_input_price_unit:
+        type: integer
+      image_output_price:
+        type: number
+      image_output_price_unit:
+        type: integer
       input_price:
         type: number
       input_price_unit:

+ 23 - 1
core/model/configkey.go

@@ -1,6 +1,9 @@
 package model
 
-import "reflect"
+import (
+	"reflect"
+	"strings"
+)
 
 //nolint:revive
 type ModelConfigKey string
@@ -13,11 +16,30 @@ const (
 	ModelConfigToolChoiceKey       ModelConfigKey = "tool_choice"
 	ModelConfigSupportFormatsKey   ModelConfigKey = "support_formats"
 	ModelConfigSupportVoicesKey    ModelConfigKey = "support_voices"
+	ModelConfigImageSizes          ModelConfigKey = "image_sizes"
+	ModelConfigImageQualitys       ModelConfigKey = "image_qualitys"
 )
 
 //nolint:revive
 type ModelConfigOption func(config map[ModelConfigKey]any)
 
+func WithModelConfigImageSizes(sizes ...string) ModelConfigOption {
+	for _, size := range sizes {
+		if !strings.Contains(size, "x") {
+			panic("image size format error")
+		}
+	}
+	return func(config map[ModelConfigKey]any) {
+		config[ModelConfigImageSizes] = sizes
+	}
+}
+
+func WithModelConfigImageQualitys(qualitys ...string) ModelConfigOption {
+	return func(config map[ModelConfigKey]any) {
+		config[ModelConfigImageQualitys] = qualitys
+	}
+}
+
 func WithModelConfigMaxContextTokens(maxContextTokens int) ModelConfigOption {
 	return func(config map[ModelConfigKey]any) {
 		config[ModelConfigMaxContextTokensKey] = maxContextTokens

+ 24 - 0
core/model/log.go

@@ -44,9 +44,15 @@ type Price struct {
 	InputPrice     float64 `json:"input_price,omitempty"`
 	InputPriceUnit int64   `json:"input_price_unit,omitempty"`
 
+	ImageInputPrice     float64 `json:"image_input_price,omitempty"`
+	ImageInputPriceUnit int64   `json:"image_input_price_unit,omitempty"`
+
 	OutputPrice     float64 `json:"output_price,omitempty"`
 	OutputPriceUnit int64   `json:"output_price_unit,omitempty"`
 
+	ImageOutputPrice     float64 `json:"image_output_price,omitempty"`
+	ImageOutputPriceUnit int64   `json:"image_output_price_unit,omitempty"`
+
 	CachedPrice     float64 `json:"cached_price,omitempty"`
 	CachedPriceUnit int64   `json:"cached_price_unit,omitempty"`
 
@@ -64,6 +70,20 @@ func (p *Price) GetInputPriceUnit() int64 {
 	return PriceUnit
 }
 
+func (p *Price) GetImageInputPriceUnit() int64 {
+	if p.ImageInputPriceUnit > 0 {
+		return p.ImageInputPriceUnit
+	}
+	return PriceUnit
+}
+
+func (p *Price) GetImageOutputPriceUnit() int64 {
+	if p.ImageOutputPriceUnit > 0 {
+		return p.ImageOutputPriceUnit
+	}
+	return PriceUnit
+}
+
 func (p *Price) GetOutputPriceUnit() int64 {
 	if p.OutputPriceUnit > 0 {
 		return p.OutputPriceUnit
@@ -94,7 +114,9 @@ func (p *Price) GetWebSearchPriceUnit() int64 {
 
 type Usage struct {
 	InputTokens         int64 `json:"input_tokens,omitempty"`
+	ImageInputTokens    int64 `json:"image_input_tokens,omitempty"`
 	OutputTokens        int64 `json:"output_tokens,omitempty"`
+	ImageOutputNumbers  int64 `json:"image_output_numbers,omitempty"`
 	CachedTokens        int64 `json:"cached_tokens,omitempty"`
 	CacheCreationTokens int64 `json:"cache_creation_tokens,omitempty"`
 	TotalTokens         int64 `json:"total_tokens,omitempty"`
@@ -106,7 +128,9 @@ func (u *Usage) Add(other *Usage) {
 		return
 	}
 	u.InputTokens += other.InputTokens
+	u.ImageInputTokens += other.ImageInputTokens
 	u.OutputTokens += other.OutputTokens
+	u.ImageOutputNumbers += other.ImageOutputNumbers
 	u.CachedTokens += other.CachedTokens
 	u.CacheCreationTokens += other.CacheCreationTokens
 	u.TotalTokens += other.TotalTokens

+ 6 - 3
core/model/modelconfig.go

@@ -27,9 +27,12 @@ type ModelConfig struct {
 	ExcludeFromTests bool                   `json:"exclude_from_tests,omitempty"`
 	RPM              int64                  `json:"rpm,omitempty"`
 	TPM              int64                  `json:"tpm,omitempty"`
-	ImagePrices      map[string]float64     `gorm:"serializer:fastjson;type:text" json:"image_prices,omitempty"`
-	Price            Price                  `gorm:"embedded"                      json:"price,omitempty"`
-	RetryTimes       int64                  `json:"retry_times"`
+	// map[size]map[quality]price_per_image
+	ImageQualityPrices map[string]map[string]float64 `gorm:"serializer:fastjson;type:text" json:"image_quality_prices,omitempty"`
+	// map[size]price_per_image
+	ImagePrices map[string]float64 `gorm:"serializer:fastjson;type:text" json:"image_prices,omitempty"`
+	Price       Price              `gorm:"embedded"                      json:"price,omitempty"`
+	RetryTimes  int64              `json:"retry_times"`
 }
 
 func NewDefaultModelConfig(model string) *ModelConfig {

+ 3 - 0
core/model/summary.go

@@ -44,6 +44,9 @@ func (d *SummaryData) buildUpdateData(tableName string) map[string]any {
 	if d.Usage.InputTokens > 0 {
 		data["input_tokens"] = gorm.Expr(tableName+".input_tokens + ?", d.Usage.InputTokens)
 	}
+	if d.Usage.ImageInputTokens > 0 {
+		data["input_image_tokens"] = gorm.Expr(tableName+".input_image_tokens + ?", d.Usage.ImageInputTokens)
+	}
 	if d.Usage.OutputTokens > 0 {
 		data["output_tokens"] = gorm.Expr(tableName+".output_tokens + ?", d.Usage.OutputTokens)
 	}

+ 2 - 2
core/relay/adaptor/ali/rerank.go

@@ -91,8 +91,8 @@ func RerankHandler(meta *meta.Meta, c *gin.Context, resp *http.Response) (*model
 	var usage *model.Usage
 	if rerankResponse.Usage == nil {
 		usage = &model.Usage{
-			InputTokens: meta.InputTokens,
-			TotalTokens: meta.InputTokens,
+			InputTokens: meta.RequestUsage.InputTokens,
+			TotalTokens: meta.RequestUsage.InputTokens,
 		}
 	} else {
 		usage = &model.Usage{

+ 4 - 4
core/relay/adaptor/anthropic/main.go

@@ -193,8 +193,8 @@ func StreamHandler(m *meta.Meta, c *gin.Context, resp *http.Response) (*model.Us
 				}
 				usage.Add(response.Usage)
 				if usage.PromptTokens == 0 {
-					usage.PromptTokens = m.InputTokens
-					usage.TotalTokens += m.InputTokens
+					usage.PromptTokens = m.RequestUsage.InputTokens
+					usage.TotalTokens += m.RequestUsage.InputTokens
 				}
 				response.Usage = usage
 				responseText.Reset()
@@ -217,9 +217,9 @@ func StreamHandler(m *meta.Meta, c *gin.Context, resp *http.Response) (*model.Us
 
 	if usage == nil {
 		usage = &relaymodel.Usage{
-			PromptTokens:     m.InputTokens,
+			PromptTokens:     m.RequestUsage.InputTokens,
 			CompletionTokens: openai.CountTokenText(responseText.String(), m.OriginModel),
-			TotalTokens:      m.InputTokens + openai.CountTokenText(responseText.String(), m.OriginModel),
+			TotalTokens:      m.RequestUsage.InputTokens + openai.CountTokenText(responseText.String(), m.OriginModel),
 		}
 	}
 

+ 5 - 5
core/relay/adaptor/anthropic/openai.go

@@ -415,7 +415,7 @@ func Response2OpenAI(meta *meta.Meta, claudeResponse *Response) *relaymodel.Text
 		},
 	}
 	if fullTextResponse.Usage.PromptTokens == 0 {
-		fullTextResponse.Usage.PromptTokens = meta.InputTokens
+		fullTextResponse.Usage.PromptTokens = meta.RequestUsage.InputTokens
 	}
 	fullTextResponse.Usage.TotalTokens = fullTextResponse.Usage.PromptTokens + fullTextResponse.Usage.CompletionTokens
 	return &fullTextResponse
@@ -470,8 +470,8 @@ func OpenAIStreamHandler(m *meta.Meta, c *gin.Context, resp *http.Response) (*mo
 			}
 			usage.Add(response.Usage)
 			if usage.PromptTokens == 0 {
-				usage.PromptTokens = m.InputTokens
-				usage.TotalTokens += m.InputTokens
+				usage.PromptTokens = m.RequestUsage.InputTokens
+				usage.TotalTokens += m.RequestUsage.InputTokens
 			}
 			response.Usage = usage
 			responseText.Reset()
@@ -493,9 +493,9 @@ func OpenAIStreamHandler(m *meta.Meta, c *gin.Context, resp *http.Response) (*mo
 
 	if usage == nil {
 		usage = &relaymodel.Usage{
-			PromptTokens:     m.InputTokens,
+			PromptTokens:     m.RequestUsage.InputTokens,
 			CompletionTokens: openai.CountTokenText(responseText.String(), m.OriginModel),
-			TotalTokens:      m.InputTokens + openai.CountTokenText(responseText.String(), m.OriginModel),
+			TotalTokens:      m.RequestUsage.InputTokens + openai.CountTokenText(responseText.String(), m.OriginModel),
 		}
 		_ = render.ObjectData(c, &relaymodel.ChatCompletionsStreamResponse{
 			ID:      openai.ChatCompletionID(),

+ 4 - 4
core/relay/adaptor/aws/claude/main.go

@@ -215,8 +215,8 @@ func StreamHandler(m *meta.Meta, c *gin.Context) (*model.Usage, *relaymodel.Erro
 					}
 					usage.Add(response.Usage)
 					if usage.PromptTokens == 0 {
-						usage.PromptTokens = m.InputTokens
-						usage.TotalTokens += m.InputTokens
+						usage.PromptTokens = m.RequestUsage.InputTokens
+						usage.TotalTokens += m.RequestUsage.InputTokens
 					}
 					response.Usage = usage
 					responseText.Reset()
@@ -242,9 +242,9 @@ func StreamHandler(m *meta.Meta, c *gin.Context) (*model.Usage, *relaymodel.Erro
 
 	if usage == nil {
 		usage = &relaymodel.Usage{
-			PromptTokens:     m.InputTokens,
+			PromptTokens:     m.RequestUsage.InputTokens,
 			CompletionTokens: openai.CountTokenText(responseText.String(), m.OriginModel),
-			TotalTokens:      m.InputTokens + openai.CountTokenText(responseText.String(), m.OriginModel),
+			TotalTokens:      m.RequestUsage.InputTokens + openai.CountTokenText(responseText.String(), m.OriginModel),
 		}
 		_ = render.ObjectData(c, &relaymodel.ChatCompletionsStreamResponse{
 			ID:      openai.ChatCompletionID(),

+ 0 - 0
core/relay/adaptor/azure/constants.go → core/relay/adaptor/azure/main.go


+ 2 - 2
core/relay/adaptor/coze/main.go

@@ -143,7 +143,7 @@ func StreamHandler(meta *meta.Meta, c *gin.Context, resp *http.Response) (*model
 
 	render.Done(c)
 
-	return openai.ResponseText2Usage(responseText.String(), meta.ActualModel, meta.InputTokens).ToModelUsage(), nil
+	return openai.ResponseText2Usage(responseText.String(), meta.ActualModel, meta.RequestUsage.InputTokens).ToModelUsage(), nil
 }
 
 func Handler(meta *meta.Meta, c *gin.Context, resp *http.Response) (*model.Usage, *relaymodel.ErrorWithStatusCode) {
@@ -178,5 +178,5 @@ func Handler(meta *meta.Meta, c *gin.Context, resp *http.Response) (*model.Usage
 	if len(fullTextResponse.Choices) > 0 {
 		responseText = fullTextResponse.Choices[0].Message.StringContent()
 	}
-	return openai.ResponseText2Usage(responseText, meta.ActualModel, meta.InputTokens).ToModelUsage(), nil
+	return openai.ResponseText2Usage(responseText, meta.ActualModel, meta.RequestUsage.InputTokens).ToModelUsage(), nil
 }

+ 2 - 2
core/relay/adaptor/doubaoaudio/tts.go

@@ -182,8 +182,8 @@ func TTSDoResponse(meta *meta.Meta, c *gin.Context, _ *http.Response) (*model.Us
 	defer conn.Close()
 
 	usage := &model.Usage{
-		InputTokens: meta.InputTokens,
-		TotalTokens: meta.InputTokens,
+		InputTokens: meta.RequestUsage.InputTokens,
+		TotalTokens: meta.RequestUsage.InputTokens,
 	}
 
 	for {

+ 1 - 1
core/relay/adaptor/gemini/main.go

@@ -604,7 +604,7 @@ func StreamHandler(meta *meta.Meta, c *gin.Context, resp *http.Response) (*model
 	}
 
 	usage := relaymodel.Usage{
-		PromptTokens: meta.InputTokens,
+		PromptTokens: meta.RequestUsage.InputTokens,
 	}
 
 	for scanner.Scan() {

+ 2 - 2
core/relay/adaptor/jina/rerank.go

@@ -44,8 +44,8 @@ func RerankHandler(meta *meta.Meta, c *gin.Context, resp *http.Response) (*model
 	if usage.PromptTokens == 0 && usage.TotalTokens != 0 {
 		usage.PromptTokens = usage.TotalTokens
 	} else if usage.PromptTokens == 0 {
-		usage.PromptTokens = meta.InputTokens
-		usage.TotalTokens = meta.InputTokens
+		usage.PromptTokens = meta.RequestUsage.InputTokens
+		usage.TotalTokens = meta.RequestUsage.InputTokens
 	}
 	modelUsage := usage.ToModelUsage()
 	_, err = node.SetAny("meta", map[string]any{

+ 2 - 2
core/relay/adaptor/minimax/tts.go

@@ -144,7 +144,7 @@ func TTSHandler(meta *meta.Meta, c *gin.Context, resp *http.Response) (*model.Us
 		log.Warnf("write response body failed: %v", err)
 	}
 
-	usageCharacters := meta.InputTokens
+	usageCharacters := meta.RequestUsage.InputTokens
 	if result.ExtraInfo.UsageCharacters > 0 {
 		usageCharacters = result.ExtraInfo.UsageCharacters
 	}
@@ -167,7 +167,7 @@ func ttsStreamHandler(meta *meta.Meta, c *gin.Context, resp *http.Response) (*mo
 	defer openai.PutScannerBuffer(buf)
 	scanner.Buffer(*buf, cap(*buf))
 
-	usageCharacters := meta.InputTokens
+	usageCharacters := meta.RequestUsage.InputTokens
 
 	for scanner.Scan() {
 		data := scanner.Text()

+ 1 - 1
core/relay/adaptor/openai/adaptor.go

@@ -44,7 +44,7 @@ func (a *Adaptor) GetRequestURL(meta *meta.Meta) (string, error) {
 	case mode.ImagesGenerations:
 		path = "/images/generations"
 	case mode.Edits:
-		path = "/edits"
+		path = "/images/edits"
 	case mode.AudioSpeech:
 		path = "/audio/speech"
 	case mode.AudioTranscription:

+ 9 - 5
core/relay/adaptor/openai/image.go

@@ -50,8 +50,6 @@ func ImageHandler(meta *meta.Meta, c *gin.Context, resp *http.Response) (*model.
 
 	log := middleware.GetLogger(c)
 
-	responseFormat := meta.GetString(MetaResponseFormat)
-
 	responseBody, err := io.ReadAll(resp.Body)
 	if err != nil {
 		return nil, ErrorWrapper(err, "read_response_body_failed", http.StatusInternalServerError)
@@ -63,11 +61,17 @@ func ImageHandler(meta *meta.Meta, c *gin.Context, resp *http.Response) (*model.
 	}
 
 	usage := &model.Usage{
-		InputTokens: int64(len(imageResponse.Data)),
-		TotalTokens: int64(len(imageResponse.Data)),
+		InputTokens:        meta.RequestUsage.InputTokens,
+		TotalTokens:        meta.RequestUsage.InputTokens,
+		ImageOutputNumbers: meta.RequestUsage.ImageOutputNumbers,
+	}
+
+	if imageResponse.Usage != nil {
+		usage = imageResponse.Usage.ToModelUsage()
+		usage.ImageOutputNumbers = meta.RequestUsage.ImageOutputNumbers
 	}
 
-	if responseFormat == "b64_json" {
+	if meta.GetString(MetaResponseFormat) == "b64_json" {
 		for _, data := range imageResponse.Data {
 			if len(data.B64Json) > 0 {
 				continue

+ 7 - 7
core/relay/adaptor/openai/main.go

@@ -186,7 +186,7 @@ func StreamHandler(meta *meta.Meta, c *gin.Context, resp *http.Response, preHand
 	}
 
 	if usage == nil || (usage.TotalTokens == 0 && responseText.Len() > 0) {
-		usage = ResponseText2Usage(responseText.String(), meta.ActualModel, meta.InputTokens)
+		usage = ResponseText2Usage(responseText.String(), meta.ActualModel, meta.RequestUsage.InputTokens)
 		_ = render.ObjectData(c, &relaymodel.ChatCompletionsStreamResponse{
 			ID:      ChatCompletionID(),
 			Model:   meta.OriginModel,
@@ -196,8 +196,8 @@ func StreamHandler(meta *meta.Meta, c *gin.Context, resp *http.Response, preHand
 			Usage:   usage,
 		})
 	} else if usage.TotalTokens != 0 && usage.PromptTokens == 0 { // some channels don't return prompt tokens & completion tokens
-		usage.PromptTokens = meta.InputTokens
-		usage.CompletionTokens = usage.TotalTokens - meta.InputTokens
+		usage.PromptTokens = meta.RequestUsage.InputTokens
+		usage.CompletionTokens = usage.TotalTokens - meta.RequestUsage.InputTokens
 	}
 
 	render.Done(c)
@@ -389,17 +389,17 @@ func Handler(meta *meta.Meta, c *gin.Context, resp *http.Response, preHandler Pr
 			completionTokens += CountTokenText(choice.Message.StringContent(), meta.ActualModel)
 		}
 		usage = &relaymodel.Usage{
-			PromptTokens:     meta.InputTokens,
+			PromptTokens:     meta.RequestUsage.InputTokens,
 			CompletionTokens: completionTokens,
-			TotalTokens:      meta.InputTokens + completionTokens,
+			TotalTokens:      meta.RequestUsage.InputTokens + completionTokens,
 		}
 		_, err = node.Set("usage", ast.NewAny(usage))
 		if err != nil {
 			return usage.ToModelUsage(), ErrorWrapper(err, "set_usage_failed", http.StatusInternalServerError)
 		}
 	} else if usage.TotalTokens != 0 && usage.PromptTokens == 0 { // some channels don't return prompt tokens & completion tokens
-		usage.PromptTokens = meta.InputTokens
-		usage.CompletionTokens = usage.TotalTokens - meta.InputTokens
+		usage.PromptTokens = meta.RequestUsage.InputTokens
+		usage.CompletionTokens = usage.TotalTokens - meta.RequestUsage.InputTokens
 		_, err = node.Set("usage", ast.NewAny(usage))
 		if err != nil {
 			return usage.ToModelUsage(), ErrorWrapper(err, "set_usage_failed", http.StatusInternalServerError)

+ 2 - 2
core/relay/adaptor/openai/moderations.go

@@ -42,8 +42,8 @@ func ModerationsHandler(meta *meta.Meta, c *gin.Context, resp *http.Response) (*
 	}
 
 	usage := &model.Usage{
-		InputTokens: meta.InputTokens,
-		TotalTokens: meta.InputTokens,
+		InputTokens: meta.RequestUsage.InputTokens,
+		TotalTokens: meta.RequestUsage.InputTokens,
 	}
 
 	_, err = c.Writer.Write(newData)

+ 3 - 3
core/relay/adaptor/openai/rerank.go

@@ -61,12 +61,12 @@ func RerankHandler(meta *meta.Meta, c *gin.Context, resp *http.Response) (*model
 
 	if rerankResponse.Meta.Tokens == nil {
 		return &model.Usage{
-			InputTokens: meta.InputTokens,
-			TotalTokens: meta.InputTokens,
+			InputTokens: meta.RequestUsage.InputTokens,
+			TotalTokens: meta.RequestUsage.InputTokens,
 		}, nil
 	}
 	if rerankResponse.Meta.Tokens.InputTokens <= 0 {
-		rerankResponse.Meta.Tokens.InputTokens = meta.InputTokens
+		rerankResponse.Meta.Tokens.InputTokens = meta.RequestUsage.InputTokens
 	}
 	return &model.Usage{
 		InputTokens:  rerankResponse.Meta.Tokens.InputTokens,

+ 2 - 2
core/relay/adaptor/openai/stt.go

@@ -112,8 +112,8 @@ func STTHandler(meta *meta.Meta, c *gin.Context, resp *http.Response) (*model.Us
 		return nil, ErrorWrapper(err, "get_text_from_body_err", http.StatusInternalServerError)
 	}
 	var promptTokens int64
-	if meta.InputTokens > 0 {
-		promptTokens = meta.InputTokens
+	if meta.RequestUsage.InputTokens > 0 {
+		promptTokens = meta.RequestUsage.InputTokens
 	} else {
 		promptTokens = CountTokenText(text, meta.ActualModel)
 	}

+ 2 - 2
core/relay/adaptor/openai/tts.go

@@ -73,7 +73,7 @@ func TTSHandler(meta *meta.Meta, c *gin.Context, resp *http.Response) (*model.Us
 		log.Warnf("write response body failed: %v", err)
 	}
 	return &model.Usage{
-		InputTokens: meta.InputTokens,
-		TotalTokens: meta.InputTokens,
+		InputTokens: meta.RequestUsage.InputTokens,
+		TotalTokens: meta.RequestUsage.InputTokens,
 	}, nil
 }

+ 1 - 4
core/relay/controller/dohelper.go

@@ -249,10 +249,7 @@ func handleResponse(a adaptor.Adaptor, c *gin.Context, meta *meta.Meta, resp *ht
 		return model.Usage{}, relayErr
 	}
 
-	return model.Usage{
-		InputTokens: meta.InputTokens,
-		TotalTokens: meta.InputTokens,
-	}, nil
+	return meta.RequestUsage, nil
 }
 
 func updateUsageMetrics(usage model.Usage, log *log.Entry) {

+ 25 - 9
core/relay/controller/image.go

@@ -6,6 +6,7 @@ import (
 
 	"github.com/gin-gonic/gin"
 	"github.com/labring/aiproxy/core/model"
+	"github.com/labring/aiproxy/core/relay/adaptor/openai"
 	relaymodel "github.com/labring/aiproxy/core/relay/model"
 	"github.com/labring/aiproxy/core/relay/utils"
 )
@@ -27,12 +28,19 @@ func getImageRequest(c *gin.Context) (*relaymodel.ImageRequest, error) {
 	return imageRequest, nil
 }
 
-func GetImageSizePrice(modelConfig *model.ModelConfig, size string) (float64, bool) {
-	if len(modelConfig.ImagePrices) == 0 {
-		return modelConfig.Price.InputPrice, true
+func GetImageOutputPrice(modelConfig *model.ModelConfig, size string, quality string) (float64, bool) {
+	switch {
+	case len(modelConfig.ImagePrices) == 0 && len(modelConfig.ImageQualityPrices) == 0:
+		return modelConfig.Price.OutputPrice, true
+	case len(modelConfig.ImageQualityPrices) != 0:
+		price, ok := modelConfig.ImageQualityPrices[size][quality]
+		return price, ok
+	case len(modelConfig.ImagePrices) != 0:
+		price, ok := modelConfig.ImagePrices[size]
+		return price, ok
+	default:
+		return 0, false
 	}
-	price, ok := modelConfig.ImagePrices[size]
-	return price, ok
 }
 
 func GetImageRequestPrice(c *gin.Context, mc *model.ModelConfig) (model.Price, error) {
@@ -41,14 +49,21 @@ func GetImageRequestPrice(c *gin.Context, mc *model.ModelConfig) (model.Price, e
 		return model.Price{}, err
 	}
 
-	imageCostPrice, ok := GetImageSizePrice(mc, imageRequest.Size)
+	imageCostPrice, ok := GetImageOutputPrice(mc, imageRequest.Size, imageRequest.Quality)
 	if !ok {
 		return model.Price{}, fmt.Errorf("invalid image size: %s", imageRequest.Size)
 	}
 
 	return model.Price{
-		PerRequestPrice: mc.Price.PerRequestPrice,
-		InputPrice:      imageCostPrice,
+		PerRequestPrice:      mc.Price.PerRequestPrice,
+		InputPrice:           mc.Price.InputPrice,
+		InputPriceUnit:       mc.Price.InputPriceUnit,
+		ImageInputPrice:      mc.Price.ImageInputPrice,
+		ImageInputPriceUnit:  mc.Price.ImageInputPriceUnit,
+		OutputPrice:          mc.Price.OutputPrice,
+		OutputPriceUnit:      mc.Price.OutputPriceUnit,
+		ImageOutputPrice:     imageCostPrice,
+		ImageOutputPriceUnit: mc.Price.ImageOutputPriceUnit,
 	}, nil
 }
 
@@ -59,6 +74,7 @@ func GetImageRequestUsage(c *gin.Context, _ *model.ModelConfig) (model.Usage, er
 	}
 
 	return model.Usage{
-		InputTokens: int64(imageRequest.N),
+		InputTokens:        openai.CountTokenInput(imageRequest.Prompt, imageRequest.Model),
+		ImageOutputNumbers: int64(imageRequest.N),
 	}, nil
 }

+ 6 - 2
core/relay/controller/rerank.go

@@ -2,10 +2,10 @@ package controller
 
 import (
 	"errors"
-	"strings"
 
 	"github.com/gin-gonic/gin"
 	"github.com/labring/aiproxy/core/model"
+	"github.com/labring/aiproxy/core/relay/adaptor/openai"
 	relaymodel "github.com/labring/aiproxy/core/relay/model"
 	"github.com/labring/aiproxy/core/relay/utils"
 )
@@ -29,7 +29,11 @@ func getRerankRequest(c *gin.Context) (*relaymodel.RerankRequest, error) {
 }
 
 func rerankPromptTokens(rerankRequest *relaymodel.RerankRequest) int64 {
-	return int64(len(rerankRequest.Query)) + int64(len(strings.Join(rerankRequest.Documents, "")))
+	tokens := openai.CountTokenInput(rerankRequest.Query, rerankRequest.Model)
+	for _, d := range rerankRequest.Documents {
+		tokens += openai.CountTokenInput(d, rerankRequest.Model)
+	}
+	return tokens
 }
 
 func GetRerankRequestPrice(_ *gin.Context, mc *model.ModelConfig) (model.Price, error) {

+ 4 - 4
core/relay/meta/meta.go

@@ -32,8 +32,8 @@ type Meta struct {
 	OriginModel string
 	ActualModel string
 	Mode        mode.Mode
-	// TODO: remove this field
-	InputTokens int64
+
+	RequestUsage model.Usage
 }
 
 type Option func(meta *Meta)
@@ -74,9 +74,9 @@ func WithToken(token *model.TokenCache) Option {
 	}
 }
 
-func WithInputTokens(inputTokens int64) Option {
+func WithRequestUsage(requestUsage model.Usage) Option {
 	return func(meta *Meta) {
-		meta.InputTokens = inputTokens
+		meta.RequestUsage = requestUsage
 	}
 }
 

+ 0 - 0
core/relay/model/misc.go → core/relay/model/chat.go


+ 46 - 9
core/relay/model/image.go

@@ -1,14 +1,22 @@
 package model
 
+import "github.com/labring/aiproxy/core/model"
+
+// https://platform.openai.com/docs/api-reference/images/create
+
 type ImageRequest struct {
-	Model          string `json:"model"`
-	Prompt         string `json:"prompt"`
-	Size           string `json:"size,omitempty"`
-	Quality        string `json:"quality,omitempty"`
-	ResponseFormat string `json:"response_format,omitempty"`
-	Style          string `json:"style,omitempty"`
-	User           string `json:"user,omitempty"`
-	N              int    `json:"n,omitempty"`
+	Model             string `json:"model"`
+	Prompt            string `json:"prompt"`
+	Background        string `json:"background,omitempty"`
+	Moderation        string `json:"moderation,omitempty"`
+	OutputCompression int    `json:"output_compression,omitempty"`
+	OutputFormat      string `json:"output_format,omitempty"`   // png, jpeg, webp
+	Size              string `json:"size,omitempty"`            // 1024x1024, 1536x1024, 1024x1536, auto, 256x256, 512x512, 1792x1024, 1024x1792
+	Quality           string `json:"quality,omitempty"`         // auto, high, medium, low, hd, standard
+	ResponseFormat    string `json:"response_format,omitempty"` // url, b64_json
+	Style             string `json:"style,omitempty"`           // vivid, natural
+	User              string `json:"user,omitempty"`
+	N                 int    `json:"n,omitempty"`
 }
 
 type ImageData struct {
@@ -18,6 +26,35 @@ type ImageData struct {
 }
 
 type ImageResponse struct {
-	Data    []*ImageData `json:"data"`
 	Created int64        `json:"created"`
+	Data    []*ImageData `json:"data"`
+	// For gpt-image-1 only, the token usage information for the image generation.
+	Usage *ImageUsage `json:"usage"`
+}
+
+type ImageUsage struct {
+	// The number of tokens (images and text) in the input prompt.
+	InputTokens int64 `json:"input_tokens"`
+	// The number of image tokens in the output image.
+	OutputTokens int64 `jons:"output_tokens"`
+	// The total number of tokens (images and text) used for the image generation.
+	TotalTokens int64 `json:"total_tokens"`
+	// The input tokens detailed information for the image generation.
+	InputTokensDetails ImageInputTokensDetails `json:"input_tokens_details"`
+}
+
+func (i *ImageUsage) ToModelUsage() *model.Usage {
+	return &model.Usage{
+		InputTokens:      i.InputTokens,
+		ImageInputTokens: i.InputTokensDetails.ImageTokens,
+		OutputTokens:     i.OutputTokens,
+		TotalTokens:      i.TotalTokens,
+	}
+}
+
+type ImageInputTokensDetails struct {
+	// The number of text tokens in the input prompt.
+	TextTokens int64 `json:"text_tokens"`
+	// The number of image tokens in the input prompt.
+	ImageTokens int64 `json:"image_tokens"`
 }