gemini.go 13 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422
  1. package dto
  2. import (
  3. "encoding/json"
  4. "strings"
  5. "github.com/QuantumNous/new-api/common"
  6. "github.com/QuantumNous/new-api/logger"
  7. "github.com/QuantumNous/new-api/types"
  8. "github.com/gin-gonic/gin"
  9. )
  10. type GeminiChatRequest struct {
  11. Requests []GeminiChatRequest `json:"requests,omitempty"` // For batch requests
  12. Contents []GeminiChatContent `json:"contents"`
  13. SafetySettings []GeminiChatSafetySettings `json:"safetySettings,omitempty"`
  14. GenerationConfig GeminiChatGenerationConfig `json:"generationConfig,omitempty"`
  15. Tools json.RawMessage `json:"tools,omitempty"`
  16. ToolConfig *ToolConfig `json:"toolConfig,omitempty"`
  17. SystemInstructions *GeminiChatContent `json:"systemInstruction,omitempty"`
  18. CachedContent string `json:"cachedContent,omitempty"`
  19. }
  20. type ToolConfig struct {
  21. FunctionCallingConfig *FunctionCallingConfig `json:"functionCallingConfig,omitempty"`
  22. RetrievalConfig *RetrievalConfig `json:"retrievalConfig,omitempty"`
  23. }
  24. type FunctionCallingConfig struct {
  25. Mode FunctionCallingConfigMode `json:"mode,omitempty"`
  26. AllowedFunctionNames []string `json:"allowedFunctionNames,omitempty"`
  27. }
  28. type FunctionCallingConfigMode string
  29. type RetrievalConfig struct {
  30. LatLng *LatLng `json:"latLng,omitempty"`
  31. LanguageCode string `json:"languageCode,omitempty"`
  32. }
  33. type LatLng struct {
  34. Latitude *float64 `json:"latitude,omitempty"`
  35. Longitude *float64 `json:"longitude,omitempty"`
  36. }
  37. func (r *GeminiChatRequest) GetTokenCountMeta() *types.TokenCountMeta {
  38. var files []*types.FileMeta = make([]*types.FileMeta, 0)
  39. var maxTokens int
  40. if r.GenerationConfig.MaxOutputTokens > 0 {
  41. maxTokens = int(r.GenerationConfig.MaxOutputTokens)
  42. }
  43. var inputTexts []string
  44. for _, content := range r.Contents {
  45. for _, part := range content.Parts {
  46. if part.Text != "" {
  47. inputTexts = append(inputTexts, part.Text)
  48. }
  49. if part.InlineData != nil && part.InlineData.Data != "" {
  50. if strings.HasPrefix(part.InlineData.MimeType, "image/") {
  51. files = append(files, &types.FileMeta{
  52. FileType: types.FileTypeImage,
  53. OriginData: part.InlineData.Data,
  54. })
  55. } else if strings.HasPrefix(part.InlineData.MimeType, "audio/") {
  56. files = append(files, &types.FileMeta{
  57. FileType: types.FileTypeAudio,
  58. OriginData: part.InlineData.Data,
  59. })
  60. } else if strings.HasPrefix(part.InlineData.MimeType, "video/") {
  61. files = append(files, &types.FileMeta{
  62. FileType: types.FileTypeVideo,
  63. OriginData: part.InlineData.Data,
  64. })
  65. } else {
  66. files = append(files, &types.FileMeta{
  67. FileType: types.FileTypeFile,
  68. OriginData: part.InlineData.Data,
  69. })
  70. }
  71. }
  72. }
  73. }
  74. inputText := strings.Join(inputTexts, "\n")
  75. return &types.TokenCountMeta{
  76. CombineText: inputText,
  77. Files: files,
  78. MaxTokens: maxTokens,
  79. }
  80. }
  81. func (r *GeminiChatRequest) IsStream(c *gin.Context) bool {
  82. if c.Query("alt") == "sse" {
  83. return true
  84. }
  85. return false
  86. }
  87. func (r *GeminiChatRequest) SetModelName(modelName string) {
  88. // GeminiChatRequest does not have a model field, so this method does nothing.
  89. }
  90. func (r *GeminiChatRequest) GetTools() []GeminiChatTool {
  91. var tools []GeminiChatTool
  92. if strings.HasSuffix(string(r.Tools), "[") {
  93. // is array
  94. if err := common.Unmarshal(r.Tools, &tools); err != nil {
  95. logger.LogError(nil, "error_unmarshalling_tools: "+err.Error())
  96. return nil
  97. }
  98. } else if strings.HasPrefix(string(r.Tools), "{") {
  99. // is object
  100. singleTool := GeminiChatTool{}
  101. if err := common.Unmarshal(r.Tools, &singleTool); err != nil {
  102. logger.LogError(nil, "error_unmarshalling_single_tool: "+err.Error())
  103. return nil
  104. }
  105. tools = []GeminiChatTool{singleTool}
  106. }
  107. return tools
  108. }
  109. func (r *GeminiChatRequest) SetTools(tools []GeminiChatTool) {
  110. if len(tools) == 0 {
  111. r.Tools = json.RawMessage("[]")
  112. return
  113. }
  114. // Marshal the tools to JSON
  115. data, err := common.Marshal(tools)
  116. if err != nil {
  117. logger.LogError(nil, "error_marshalling_tools: "+err.Error())
  118. return
  119. }
  120. r.Tools = data
  121. }
  122. type GeminiThinkingConfig struct {
  123. IncludeThoughts bool `json:"includeThoughts,omitempty"`
  124. ThinkingBudget *int `json:"thinkingBudget,omitempty"`
  125. }
  126. func (c *GeminiThinkingConfig) SetThinkingBudget(budget int) {
  127. c.ThinkingBudget = &budget
  128. }
  129. type GeminiInlineData struct {
  130. MimeType string `json:"mimeType"`
  131. Data string `json:"data"`
  132. }
  133. // UnmarshalJSON custom unmarshaler for GeminiInlineData to support snake_case and camelCase for MimeType
  134. func (g *GeminiInlineData) UnmarshalJSON(data []byte) error {
  135. type Alias GeminiInlineData // Use type alias to avoid recursion
  136. var aux struct {
  137. Alias
  138. MimeTypeSnake string `json:"mime_type"`
  139. }
  140. if err := common.Unmarshal(data, &aux); err != nil {
  141. return err
  142. }
  143. *g = GeminiInlineData(aux.Alias) // Copy other fields if any in future
  144. // Prioritize snake_case if present
  145. if aux.MimeTypeSnake != "" {
  146. g.MimeType = aux.MimeTypeSnake
  147. } else if aux.MimeType != "" { // Fallback to camelCase from Alias
  148. g.MimeType = aux.MimeType
  149. }
  150. // g.Data would be populated by aux.Alias.Data
  151. return nil
  152. }
  153. type FunctionCall struct {
  154. FunctionName string `json:"name"`
  155. Arguments any `json:"args"`
  156. }
  157. type GeminiFunctionResponse struct {
  158. Name string `json:"name"`
  159. Response map[string]interface{} `json:"response"`
  160. }
  161. type GeminiPartExecutableCode struct {
  162. Language string `json:"language,omitempty"`
  163. Code string `json:"code,omitempty"`
  164. }
  165. type GeminiPartCodeExecutionResult struct {
  166. Outcome string `json:"outcome,omitempty"`
  167. Output string `json:"output,omitempty"`
  168. }
  169. type GeminiFileData struct {
  170. MimeType string `json:"mimeType,omitempty"`
  171. FileUri string `json:"fileUri,omitempty"`
  172. }
  173. type GeminiPart struct {
  174. Text string `json:"text,omitempty"`
  175. Thought bool `json:"thought,omitempty"`
  176. InlineData *GeminiInlineData `json:"inlineData,omitempty"`
  177. FunctionCall *FunctionCall `json:"functionCall,omitempty"`
  178. FunctionResponse *GeminiFunctionResponse `json:"functionResponse,omitempty"`
  179. FileData *GeminiFileData `json:"fileData,omitempty"`
  180. ExecutableCode *GeminiPartExecutableCode `json:"executableCode,omitempty"`
  181. CodeExecutionResult *GeminiPartCodeExecutionResult `json:"codeExecutionResult,omitempty"`
  182. }
  183. // UnmarshalJSON custom unmarshaler for GeminiPart to support snake_case and camelCase for InlineData
  184. func (p *GeminiPart) UnmarshalJSON(data []byte) error {
  185. // Alias to avoid recursion during unmarshalling
  186. type Alias GeminiPart
  187. var aux struct {
  188. Alias
  189. InlineDataSnake *GeminiInlineData `json:"inline_data,omitempty"` // snake_case variant
  190. }
  191. if err := common.Unmarshal(data, &aux); err != nil {
  192. return err
  193. }
  194. // Assign fields from alias
  195. *p = GeminiPart(aux.Alias)
  196. // Prioritize snake_case for InlineData if present
  197. if aux.InlineDataSnake != nil {
  198. p.InlineData = aux.InlineDataSnake
  199. } else if aux.InlineData != nil { // Fallback to camelCase from Alias
  200. p.InlineData = aux.InlineData
  201. }
  202. // Other fields like Text, FunctionCall etc. are already populated via aux.Alias
  203. return nil
  204. }
  205. type GeminiChatContent struct {
  206. Role string `json:"role,omitempty"`
  207. Parts []GeminiPart `json:"parts"`
  208. }
  209. type GeminiChatSafetySettings struct {
  210. Category string `json:"category"`
  211. Threshold string `json:"threshold"`
  212. }
  213. type GeminiChatTool struct {
  214. GoogleSearch any `json:"googleSearch,omitempty"`
  215. GoogleSearchRetrieval any `json:"googleSearchRetrieval,omitempty"`
  216. CodeExecution any `json:"codeExecution,omitempty"`
  217. FunctionDeclarations any `json:"functionDeclarations,omitempty"`
  218. URLContext any `json:"urlContext,omitempty"`
  219. }
  220. type GeminiChatGenerationConfig struct {
  221. Temperature *float64 `json:"temperature,omitempty"`
  222. TopP float64 `json:"topP,omitempty"`
  223. TopK float64 `json:"topK,omitempty"`
  224. MaxOutputTokens uint `json:"maxOutputTokens,omitempty"`
  225. CandidateCount int `json:"candidateCount,omitempty"`
  226. StopSequences []string `json:"stopSequences,omitempty"`
  227. ResponseMimeType string `json:"responseMimeType,omitempty"`
  228. ResponseSchema any `json:"responseSchema,omitempty"`
  229. ResponseJsonSchema json.RawMessage `json:"responseJsonSchema,omitempty"`
  230. PresencePenalty *float32 `json:"presencePenalty,omitempty"`
  231. FrequencyPenalty *float32 `json:"frequencyPenalty,omitempty"`
  232. ResponseLogprobs bool `json:"responseLogprobs,omitempty"`
  233. Logprobs *int32 `json:"logprobs,omitempty"`
  234. MediaResolution MediaResolution `json:"mediaResolution,omitempty"`
  235. Seed int64 `json:"seed,omitempty"`
  236. ResponseModalities []string `json:"responseModalities,omitempty"`
  237. ThinkingConfig *GeminiThinkingConfig `json:"thinkingConfig,omitempty"`
  238. SpeechConfig json.RawMessage `json:"speechConfig,omitempty"` // RawMessage to allow flexible speech config
  239. ImageConfig json.RawMessage `json:"imageConfig,omitempty"` // RawMessage to allow flexible image config
  240. }
  241. type MediaResolution string
  242. type GeminiChatCandidate struct {
  243. Content GeminiChatContent `json:"content"`
  244. FinishReason *string `json:"finishReason"`
  245. Index int64 `json:"index"`
  246. SafetyRatings []GeminiChatSafetyRating `json:"safetyRatings"`
  247. }
  248. type GeminiChatSafetyRating struct {
  249. Category string `json:"category"`
  250. Probability string `json:"probability"`
  251. }
  252. type GeminiChatPromptFeedback struct {
  253. SafetyRatings []GeminiChatSafetyRating `json:"safetyRatings"`
  254. BlockReason *string `json:"blockReason,omitempty"`
  255. }
  256. type GeminiChatResponse struct {
  257. Candidates []GeminiChatCandidate `json:"candidates"`
  258. PromptFeedback *GeminiChatPromptFeedback `json:"promptFeedback,omitempty"`
  259. UsageMetadata GeminiUsageMetadata `json:"usageMetadata"`
  260. }
  261. type GeminiUsageMetadata struct {
  262. PromptTokenCount int `json:"promptTokenCount"`
  263. CandidatesTokenCount int `json:"candidatesTokenCount"`
  264. TotalTokenCount int `json:"totalTokenCount"`
  265. ThoughtsTokenCount int `json:"thoughtsTokenCount"`
  266. PromptTokensDetails []GeminiPromptTokensDetails `json:"promptTokensDetails"`
  267. }
  268. type GeminiPromptTokensDetails struct {
  269. Modality string `json:"modality"`
  270. TokenCount int `json:"tokenCount"`
  271. }
  272. // Imagen related structs
  273. type GeminiImageRequest struct {
  274. Instances []GeminiImageInstance `json:"instances"`
  275. Parameters GeminiImageParameters `json:"parameters"`
  276. }
  277. type GeminiImageInstance struct {
  278. Prompt string `json:"prompt"`
  279. }
  280. type GeminiImageParameters struct {
  281. SampleCount int `json:"sampleCount,omitempty"`
  282. AspectRatio string `json:"aspectRatio,omitempty"`
  283. PersonGeneration string `json:"personGeneration,omitempty"`
  284. ImageSize string `json:"imageSize,omitempty"`
  285. }
  286. type GeminiImageResponse struct {
  287. Predictions []GeminiImagePrediction `json:"predictions"`
  288. }
  289. type GeminiImagePrediction struct {
  290. MimeType string `json:"mimeType"`
  291. BytesBase64Encoded string `json:"bytesBase64Encoded"`
  292. RaiFilteredReason string `json:"raiFilteredReason,omitempty"`
  293. SafetyAttributes any `json:"safetyAttributes,omitempty"`
  294. }
  295. // Embedding related structs
  296. type GeminiEmbeddingRequest struct {
  297. Model string `json:"model,omitempty"`
  298. Content GeminiChatContent `json:"content"`
  299. TaskType string `json:"taskType,omitempty"`
  300. Title string `json:"title,omitempty"`
  301. OutputDimensionality int `json:"outputDimensionality,omitempty"`
  302. }
  303. func (r *GeminiEmbeddingRequest) IsStream(c *gin.Context) bool {
  304. // Gemini embedding requests are not streamed
  305. return false
  306. }
  307. func (r *GeminiEmbeddingRequest) GetTokenCountMeta() *types.TokenCountMeta {
  308. var inputTexts []string
  309. for _, part := range r.Content.Parts {
  310. if part.Text != "" {
  311. inputTexts = append(inputTexts, part.Text)
  312. }
  313. }
  314. inputText := strings.Join(inputTexts, "\n")
  315. return &types.TokenCountMeta{
  316. CombineText: inputText,
  317. }
  318. }
  319. func (r *GeminiEmbeddingRequest) SetModelName(modelName string) {
  320. if modelName != "" {
  321. r.Model = modelName
  322. }
  323. }
  324. type GeminiBatchEmbeddingRequest struct {
  325. Requests []*GeminiEmbeddingRequest `json:"requests"`
  326. }
  327. func (r *GeminiBatchEmbeddingRequest) IsStream(c *gin.Context) bool {
  328. // Gemini batch embedding requests are not streamed
  329. return false
  330. }
  331. func (r *GeminiBatchEmbeddingRequest) GetTokenCountMeta() *types.TokenCountMeta {
  332. var inputTexts []string
  333. for _, request := range r.Requests {
  334. meta := request.GetTokenCountMeta()
  335. if meta != nil && meta.CombineText != "" {
  336. inputTexts = append(inputTexts, meta.CombineText)
  337. }
  338. }
  339. inputText := strings.Join(inputTexts, "\n")
  340. return &types.TokenCountMeta{
  341. CombineText: inputText,
  342. }
  343. }
  344. func (r *GeminiBatchEmbeddingRequest) SetModelName(modelName string) {
  345. if modelName != "" {
  346. for _, req := range r.Requests {
  347. req.SetModelName(modelName)
  348. }
  349. }
  350. }
  351. type GeminiEmbeddingResponse struct {
  352. Embedding ContentEmbedding `json:"embedding"`
  353. }
  354. type GeminiBatchEmbeddingResponse struct {
  355. Embeddings []*ContentEmbedding `json:"embeddings"`
  356. }
  357. type ContentEmbedding struct {
  358. Values []float64 `json:"values"`
  359. }