openai_request.go 29 KB


  1. package dto
  2. import (
  3. "encoding/json"
  4. "fmt"
  5. "strings"
  6. "github.com/QuantumNous/new-api/common"
  7. "github.com/QuantumNous/new-api/types"
  8. "github.com/gin-gonic/gin"
  9. )
  10. type ResponseFormat struct {
  11. Type string `json:"type,omitempty"`
  12. JsonSchema json.RawMessage `json:"json_schema,omitempty"`
  13. }
  14. type FormatJsonSchema struct {
  15. Description string `json:"description,omitempty"`
  16. Name string `json:"name"`
  17. Schema any `json:"schema,omitempty"`
  18. Strict json.RawMessage `json:"strict,omitempty"`
  19. }
  20. // GeneralOpenAIRequest represents a general request structure for OpenAI-compatible APIs.
  21. // 参数增加规范:无引用的参数必须使用json.RawMessage类型,并添加omitempty标签
  22. type GeneralOpenAIRequest struct {
  23. Model string `json:"model,omitempty"`
  24. Messages []Message `json:"messages,omitempty"`
  25. Prompt any `json:"prompt,omitempty"`
  26. Prefix any `json:"prefix,omitempty"`
  27. Suffix any `json:"suffix,omitempty"`
  28. Stream bool `json:"stream,omitempty"`
  29. StreamOptions *StreamOptions `json:"stream_options,omitempty"`
  30. MaxTokens uint `json:"max_tokens,omitempty"`
  31. MaxCompletionTokens uint `json:"max_completion_tokens,omitempty"`
  32. ReasoningEffort string `json:"reasoning_effort,omitempty"`
  33. Verbosity json.RawMessage `json:"verbosity,omitempty"` // gpt-5
  34. Temperature *float64 `json:"temperature,omitempty"`
  35. TopP float64 `json:"top_p,omitempty"`
  36. TopK int `json:"top_k,omitempty"`
  37. Stop any `json:"stop,omitempty"`
  38. N int `json:"n,omitempty"`
  39. Input any `json:"input,omitempty"`
  40. Instruction string `json:"instruction,omitempty"`
  41. Size string `json:"size,omitempty"`
  42. Functions json.RawMessage `json:"functions,omitempty"`
  43. FrequencyPenalty float64 `json:"frequency_penalty,omitempty"`
  44. PresencePenalty float64 `json:"presence_penalty,omitempty"`
  45. ResponseFormat *ResponseFormat `json:"response_format,omitempty"`
  46. EncodingFormat json.RawMessage `json:"encoding_format,omitempty"`
  47. Seed float64 `json:"seed,omitempty"`
  48. ParallelTooCalls *bool `json:"parallel_tool_calls,omitempty"`
  49. Tools []ToolCallRequest `json:"tools,omitempty"`
  50. ToolChoice any `json:"tool_choice,omitempty"`
  51. User string `json:"user,omitempty"`
  52. LogProbs bool `json:"logprobs,omitempty"`
  53. TopLogProbs int `json:"top_logprobs,omitempty"`
  54. Dimensions int `json:"dimensions,omitempty"`
  55. Modalities json.RawMessage `json:"modalities,omitempty"`
  56. Audio json.RawMessage `json:"audio,omitempty"`
  57. // 安全标识符,用于帮助 OpenAI 检测可能违反使用政策的应用程序用户
  58. // 注意:此字段会向 OpenAI 发送用户标识信息,默认过滤以保护用户隐私
  59. SafetyIdentifier string `json:"safety_identifier,omitempty"`
  60. // Whether or not to store the output of this chat completion request for use in our model distillation or evals products.
  61. // 是否存储此次请求数据供 OpenAI 用于评估和优化产品
  62. // 注意:默认过滤此字段以保护用户隐私,但过滤后可能导致 Codex 无法正常使用
  63. Store json.RawMessage `json:"store,omitempty"`
  64. // Used by OpenAI to cache responses for similar requests to optimize your cache hit rates. Replaces the user field
  65. PromptCacheKey string `json:"prompt_cache_key,omitempty"`
  66. PromptCacheRetention json.RawMessage `json:"prompt_cache_retention,omitempty"`
  67. LogitBias json.RawMessage `json:"logit_bias,omitempty"`
  68. Metadata json.RawMessage `json:"metadata,omitempty"`
  69. Prediction json.RawMessage `json:"prediction,omitempty"`
  70. // gemini
  71. ExtraBody json.RawMessage `json:"extra_body,omitempty"`
  72. //xai
  73. SearchParameters json.RawMessage `json:"search_parameters,omitempty"`
  74. // claude
  75. WebSearchOptions *WebSearchOptions `json:"web_search_options,omitempty"`
  76. // OpenRouter Params
  77. Usage json.RawMessage `json:"usage,omitempty"`
  78. Reasoning json.RawMessage `json:"reasoning,omitempty"`
  79. // Ali Qwen Params
  80. VlHighResolutionImages json.RawMessage `json:"vl_high_resolution_images,omitempty"`
  81. EnableThinking json.RawMessage `json:"enable_thinking,omitempty"`
  82. ChatTemplateKwargs json.RawMessage `json:"chat_template_kwargs,omitempty"`
  83. EnableSearch json.RawMessage `json:"enable_search,omitempty"`
  84. // ollama Params
  85. Think json.RawMessage `json:"think,omitempty"`
  86. // baidu v2
  87. WebSearch json.RawMessage `json:"web_search,omitempty"`
  88. // doubao,zhipu_v4
  89. THINKING json.RawMessage `json:"thinking,omitempty"`
  90. // pplx Params
  91. SearchDomainFilter json.RawMessage `json:"search_domain_filter,omitempty"`
  92. SearchRecencyFilter string `json:"search_recency_filter,omitempty"`
  93. ReturnImages bool `json:"return_images,omitempty"`
  94. ReturnRelatedQuestions bool `json:"return_related_questions,omitempty"`
  95. SearchMode string `json:"search_mode,omitempty"`
  96. }
  97. // createFileSource 根据数据内容创建正确类型的 FileSource
  98. func createFileSource(data string) *types.FileSource {
  99. if strings.HasPrefix(data, "http://") || strings.HasPrefix(data, "https://") {
  100. return types.NewURLFileSource(data)
  101. }
  102. return types.NewBase64FileSource(data, "")
  103. }
  104. func (r *GeneralOpenAIRequest) GetTokenCountMeta() *types.TokenCountMeta {
  105. var tokenCountMeta types.TokenCountMeta
  106. var texts = make([]string, 0)
  107. var fileMeta = make([]*types.FileMeta, 0)
  108. if r.Prompt != nil {
  109. switch v := r.Prompt.(type) {
  110. case string:
  111. texts = append(texts, v)
  112. case []any:
  113. for _, item := range v {
  114. if str, ok := item.(string); ok {
  115. texts = append(texts, str)
  116. }
  117. }
  118. default:
  119. texts = append(texts, fmt.Sprintf("%v", r.Prompt))
  120. }
  121. }
  122. if r.Input != nil {
  123. inputs := r.ParseInput()
  124. texts = append(texts, inputs...)
  125. }
  126. if r.MaxCompletionTokens > r.MaxTokens {
  127. tokenCountMeta.MaxTokens = int(r.MaxCompletionTokens)
  128. } else {
  129. tokenCountMeta.MaxTokens = int(r.MaxTokens)
  130. }
  131. for _, message := range r.Messages {
  132. tokenCountMeta.MessagesCount++
  133. texts = append(texts, message.Role)
  134. if message.Content != nil {
  135. if message.Name != nil {
  136. tokenCountMeta.NameCount++
  137. texts = append(texts, *message.Name)
  138. }
  139. arrayContent := message.ParseContent()
  140. for _, m := range arrayContent {
  141. if m.Type == ContentTypeImageURL {
  142. imageUrl := m.GetImageMedia()
  143. if imageUrl != nil && imageUrl.Url != "" {
  144. source := createFileSource(imageUrl.Url)
  145. fileMeta = append(fileMeta, &types.FileMeta{
  146. FileType: types.FileTypeImage,
  147. Source: source,
  148. Detail: imageUrl.Detail,
  149. })
  150. }
  151. } else if m.Type == ContentTypeInputAudio {
  152. inputAudio := m.GetInputAudio()
  153. if inputAudio != nil && inputAudio.Data != "" {
  154. source := createFileSource(inputAudio.Data)
  155. fileMeta = append(fileMeta, &types.FileMeta{
  156. FileType: types.FileTypeAudio,
  157. Source: source,
  158. })
  159. }
  160. } else if m.Type == ContentTypeFile {
  161. file := m.GetFile()
  162. if file != nil && file.FileData != "" {
  163. source := createFileSource(file.FileData)
  164. fileMeta = append(fileMeta, &types.FileMeta{
  165. FileType: types.FileTypeFile,
  166. Source: source,
  167. })
  168. }
  169. } else if m.Type == ContentTypeVideoUrl {
  170. videoUrl := m.GetVideoUrl()
  171. if videoUrl != nil && videoUrl.Url != "" {
  172. source := createFileSource(videoUrl.Url)
  173. fileMeta = append(fileMeta, &types.FileMeta{
  174. FileType: types.FileTypeVideo,
  175. Source: source,
  176. })
  177. }
  178. } else {
  179. texts = append(texts, m.Text)
  180. }
  181. }
  182. }
  183. }
  184. if r.Tools != nil {
  185. openaiTools := r.Tools
  186. for _, tool := range openaiTools {
  187. tokenCountMeta.ToolsCount++
  188. texts = append(texts, tool.Function.Name)
  189. if tool.Function.Description != "" {
  190. texts = append(texts, tool.Function.Description)
  191. }
  192. if tool.Function.Parameters != nil {
  193. texts = append(texts, fmt.Sprintf("%v", tool.Function.Parameters))
  194. }
  195. }
  196. //toolTokens := CountTokenInput(countStr, request.Model)
  197. //tkm += 8
  198. //tkm += toolTokens
  199. }
  200. tokenCountMeta.CombineText = strings.Join(texts, "\n")
  201. tokenCountMeta.Files = fileMeta
  202. return &tokenCountMeta
  203. }
  204. func (r *GeneralOpenAIRequest) IsStream(c *gin.Context) bool {
  205. return r.Stream
  206. }
  207. func (r *GeneralOpenAIRequest) SetModelName(modelName string) {
  208. if modelName != "" {
  209. r.Model = modelName
  210. }
  211. }
  212. func (r *GeneralOpenAIRequest) ToMap() map[string]any {
  213. result := make(map[string]any)
  214. data, _ := common.Marshal(r)
  215. _ = common.Unmarshal(data, &result)
  216. return result
  217. }
  218. func (r *GeneralOpenAIRequest) GetSystemRoleName() string {
  219. if strings.HasPrefix(r.Model, "o") {
  220. if !strings.HasPrefix(r.Model, "o1-mini") && !strings.HasPrefix(r.Model, "o1-preview") {
  221. return "developer"
  222. }
  223. } else if strings.HasPrefix(r.Model, "gpt-5") {
  224. return "developer"
  225. }
  226. return "system"
  227. }
  228. const CustomType = "custom"
  229. type ToolCallRequest struct {
  230. ID string `json:"id,omitempty"`
  231. Type string `json:"type"`
  232. Function FunctionRequest `json:"function,omitempty"`
  233. Custom json.RawMessage `json:"custom,omitempty"`
  234. }
  235. type FunctionRequest struct {
  236. Description string `json:"description,omitempty"`
  237. Name string `json:"name"`
  238. Parameters any `json:"parameters,omitempty"`
  239. Arguments string `json:"arguments,omitempty"`
  240. }
  241. type StreamOptions struct {
  242. IncludeUsage bool `json:"include_usage,omitempty"`
  243. }
  244. func (r *GeneralOpenAIRequest) GetMaxTokens() uint {
  245. if r.MaxCompletionTokens != 0 {
  246. return r.MaxCompletionTokens
  247. }
  248. return r.MaxTokens
  249. }
  250. func (r *GeneralOpenAIRequest) ParseInput() []string {
  251. if r.Input == nil {
  252. return nil
  253. }
  254. var input []string
  255. switch r.Input.(type) {
  256. case string:
  257. input = []string{r.Input.(string)}
  258. case []any:
  259. input = make([]string, 0, len(r.Input.([]any)))
  260. for _, item := range r.Input.([]any) {
  261. if str, ok := item.(string); ok {
  262. input = append(input, str)
  263. }
  264. }
  265. }
  266. return input
  267. }
  268. type Message struct {
  269. Role string `json:"role"`
  270. Content any `json:"content"`
  271. Name *string `json:"name,omitempty"`
  272. Prefix *bool `json:"prefix,omitempty"`
  273. ReasoningContent string `json:"reasoning_content,omitempty"`
  274. Reasoning string `json:"reasoning,omitempty"`
  275. ToolCalls json.RawMessage `json:"tool_calls,omitempty"`
  276. ToolCallId string `json:"tool_call_id,omitempty"`
  277. parsedContent []MediaContent
  278. //parsedStringContent *string
  279. }
  280. type MediaContent struct {
  281. Type string `json:"type"`
  282. Text string `json:"text,omitempty"`
  283. ImageUrl any `json:"image_url,omitempty"`
  284. InputAudio any `json:"input_audio,omitempty"`
  285. File any `json:"file,omitempty"`
  286. VideoUrl any `json:"video_url,omitempty"`
  287. // OpenRouter Params
  288. CacheControl json.RawMessage `json:"cache_control,omitempty"`
  289. }
  290. func (m *MediaContent) GetImageMedia() *MessageImageUrl {
  291. if m.ImageUrl != nil {
  292. if _, ok := m.ImageUrl.(*MessageImageUrl); ok {
  293. return m.ImageUrl.(*MessageImageUrl)
  294. }
  295. if itemMap, ok := m.ImageUrl.(map[string]any); ok {
  296. out := &MessageImageUrl{
  297. Url: common.Interface2String(itemMap["url"]),
  298. Detail: common.Interface2String(itemMap["detail"]),
  299. MimeType: common.Interface2String(itemMap["mime_type"]),
  300. }
  301. return out
  302. }
  303. }
  304. return nil
  305. }
  306. func (m *MediaContent) GetInputAudio() *MessageInputAudio {
  307. if m.InputAudio != nil {
  308. if _, ok := m.InputAudio.(*MessageInputAudio); ok {
  309. return m.InputAudio.(*MessageInputAudio)
  310. }
  311. if itemMap, ok := m.InputAudio.(map[string]any); ok {
  312. out := &MessageInputAudio{
  313. Data: common.Interface2String(itemMap["data"]),
  314. Format: common.Interface2String(itemMap["format"]),
  315. }
  316. return out
  317. }
  318. }
  319. return nil
  320. }
  321. func (m *MediaContent) GetFile() *MessageFile {
  322. if m.File != nil {
  323. if _, ok := m.File.(*MessageFile); ok {
  324. return m.File.(*MessageFile)
  325. }
  326. if itemMap, ok := m.File.(map[string]any); ok {
  327. out := &MessageFile{
  328. FileName: common.Interface2String(itemMap["file_name"]),
  329. FileData: common.Interface2String(itemMap["file_data"]),
  330. FileId: common.Interface2String(itemMap["file_id"]),
  331. }
  332. return out
  333. }
  334. }
  335. return nil
  336. }
  337. func (m *MediaContent) GetVideoUrl() *MessageVideoUrl {
  338. if m.VideoUrl != nil {
  339. if _, ok := m.VideoUrl.(*MessageVideoUrl); ok {
  340. return m.VideoUrl.(*MessageVideoUrl)
  341. }
  342. if itemMap, ok := m.VideoUrl.(map[string]any); ok {
  343. out := &MessageVideoUrl{
  344. Url: common.Interface2String(itemMap["url"]),
  345. }
  346. return out
  347. }
  348. }
  349. return nil
  350. }
  351. type MessageImageUrl struct {
  352. Url string `json:"url"`
  353. Detail string `json:"detail"`
  354. MimeType string
  355. }
  356. func (m *MessageImageUrl) IsRemoteImage() bool {
  357. return strings.HasPrefix(m.Url, "http")
  358. }
  359. type MessageInputAudio struct {
  360. Data string `json:"data"` //base64
  361. Format string `json:"format"`
  362. }
  363. type MessageFile struct {
  364. FileName string `json:"filename,omitempty"`
  365. FileData string `json:"file_data,omitempty"`
  366. FileId string `json:"file_id,omitempty"`
  367. }
  368. type MessageVideoUrl struct {
  369. Url string `json:"url"`
  370. }
  371. const (
  372. ContentTypeText = "text"
  373. ContentTypeImageURL = "image_url"
  374. ContentTypeInputAudio = "input_audio"
  375. ContentTypeFile = "file"
  376. ContentTypeVideoUrl = "video_url" // 阿里百炼视频识别
  377. //ContentTypeAudioUrl = "audio_url"
  378. )
  379. func (m *Message) GetPrefix() bool {
  380. if m.Prefix == nil {
  381. return false
  382. }
  383. return *m.Prefix
  384. }
  385. func (m *Message) SetPrefix(prefix bool) {
  386. m.Prefix = &prefix
  387. }
  388. func (m *Message) ParseToolCalls() []ToolCallRequest {
  389. if m.ToolCalls == nil {
  390. return nil
  391. }
  392. var toolCalls []ToolCallRequest
  393. if err := json.Unmarshal(m.ToolCalls, &toolCalls); err == nil {
  394. return toolCalls
  395. }
  396. return toolCalls
  397. }
  398. func (m *Message) SetToolCalls(toolCalls any) {
  399. toolCallsJson, _ := json.Marshal(toolCalls)
  400. m.ToolCalls = toolCallsJson
  401. }
  402. func (m *Message) StringContent() string {
  403. switch m.Content.(type) {
  404. case string:
  405. return m.Content.(string)
  406. case []any:
  407. var contentStr string
  408. for _, contentItem := range m.Content.([]any) {
  409. contentMap, ok := contentItem.(map[string]any)
  410. if !ok {
  411. continue
  412. }
  413. if contentMap["type"] == ContentTypeText {
  414. if subStr, ok := contentMap["text"].(string); ok {
  415. contentStr += subStr
  416. }
  417. }
  418. }
  419. return contentStr
  420. }
  421. return ""
  422. }
  423. func (m *Message) SetNullContent() {
  424. m.Content = nil
  425. m.parsedContent = nil
  426. }
  427. func (m *Message) SetStringContent(content string) {
  428. m.Content = content
  429. m.parsedContent = nil
  430. }
  431. func (m *Message) SetMediaContent(content []MediaContent) {
  432. m.Content = content
  433. m.parsedContent = content
  434. }
  435. func (m *Message) IsStringContent() bool {
  436. _, ok := m.Content.(string)
  437. if ok {
  438. return true
  439. }
  440. return false
  441. }
  442. func (m *Message) ParseContent() []MediaContent {
  443. if m.Content == nil {
  444. return nil
  445. }
  446. if len(m.parsedContent) > 0 {
  447. return m.parsedContent
  448. }
  449. var contentList []MediaContent
  450. // 先尝试解析为字符串
  451. content, ok := m.Content.(string)
  452. if ok {
  453. contentList = []MediaContent{{
  454. Type: ContentTypeText,
  455. Text: content,
  456. }}
  457. m.parsedContent = contentList
  458. return contentList
  459. }
  460. // 尝试解析为数组
  461. //var arrayContent []map[string]interface{}
  462. arrayContent, ok := m.Content.([]any)
  463. if !ok {
  464. return contentList
  465. }
  466. for _, contentItemAny := range arrayContent {
  467. mediaItem, ok := contentItemAny.(MediaContent)
  468. if ok {
  469. contentList = append(contentList, mediaItem)
  470. continue
  471. }
  472. contentItem, ok := contentItemAny.(map[string]any)
  473. if !ok {
  474. continue
  475. }
  476. contentType, ok := contentItem["type"].(string)
  477. if !ok {
  478. continue
  479. }
  480. switch contentType {
  481. case ContentTypeText:
  482. if text, ok := contentItem["text"].(string); ok {
  483. contentList = append(contentList, MediaContent{
  484. Type: ContentTypeText,
  485. Text: text,
  486. })
  487. }
  488. case ContentTypeImageURL:
  489. imageUrl := contentItem["image_url"]
  490. temp := &MessageImageUrl{
  491. Detail: "high",
  492. }
  493. switch v := imageUrl.(type) {
  494. case string:
  495. temp.Url = v
  496. case map[string]interface{}:
  497. url, ok1 := v["url"].(string)
  498. detail, ok2 := v["detail"].(string)
  499. if ok2 {
  500. temp.Detail = detail
  501. }
  502. if ok1 {
  503. temp.Url = url
  504. }
  505. }
  506. contentList = append(contentList, MediaContent{
  507. Type: ContentTypeImageURL,
  508. ImageUrl: temp,
  509. })
  510. case ContentTypeInputAudio:
  511. if audioData, ok := contentItem["input_audio"].(map[string]interface{}); ok {
  512. data, ok1 := audioData["data"].(string)
  513. format, ok2 := audioData["format"].(string)
  514. if ok1 && ok2 {
  515. temp := &MessageInputAudio{
  516. Data: data,
  517. Format: format,
  518. }
  519. contentList = append(contentList, MediaContent{
  520. Type: ContentTypeInputAudio,
  521. InputAudio: temp,
  522. })
  523. }
  524. }
  525. case ContentTypeFile:
  526. if fileData, ok := contentItem["file"].(map[string]interface{}); ok {
  527. fileId, ok3 := fileData["file_id"].(string)
  528. if ok3 {
  529. contentList = append(contentList, MediaContent{
  530. Type: ContentTypeFile,
  531. File: &MessageFile{
  532. FileId: fileId,
  533. },
  534. })
  535. } else {
  536. fileName, ok1 := fileData["filename"].(string)
  537. fileDataStr, ok2 := fileData["file_data"].(string)
  538. if ok1 && ok2 {
  539. contentList = append(contentList, MediaContent{
  540. Type: ContentTypeFile,
  541. File: &MessageFile{
  542. FileName: fileName,
  543. FileData: fileDataStr,
  544. },
  545. })
  546. }
  547. }
  548. }
  549. case ContentTypeVideoUrl:
  550. if videoUrl, ok := contentItem["video_url"].(string); ok {
  551. contentList = append(contentList, MediaContent{
  552. Type: ContentTypeVideoUrl,
  553. VideoUrl: &MessageVideoUrl{
  554. Url: videoUrl,
  555. },
  556. })
  557. }
  558. }
  559. }
  560. if len(contentList) > 0 {
  561. m.parsedContent = contentList
  562. }
  563. return contentList
  564. }
  565. // old code
  566. /*func (m *Message) StringContent() string {
  567. if m.parsedStringContent != nil {
  568. return *m.parsedStringContent
  569. }
  570. var stringContent string
  571. if err := json.Unmarshal(m.Content, &stringContent); err == nil {
  572. m.parsedStringContent = &stringContent
  573. return stringContent
  574. }
  575. contentStr := new(strings.Builder)
  576. arrayContent := m.ParseContent()
  577. for _, content := range arrayContent {
  578. if content.Type == ContentTypeText {
  579. contentStr.WriteString(content.Text)
  580. }
  581. }
  582. stringContent = contentStr.String()
  583. m.parsedStringContent = &stringContent
  584. return stringContent
  585. }
  586. func (m *Message) SetNullContent() {
  587. m.Content = nil
  588. m.parsedStringContent = nil
  589. m.parsedContent = nil
  590. }
  591. func (m *Message) SetStringContent(content string) {
  592. jsonContent, _ := json.Marshal(content)
  593. m.Content = jsonContent
  594. m.parsedStringContent = &content
  595. m.parsedContent = nil
  596. }
  597. func (m *Message) SetMediaContent(content []MediaContent) {
  598. jsonContent, _ := json.Marshal(content)
  599. m.Content = jsonContent
  600. m.parsedContent = nil
  601. m.parsedStringContent = nil
  602. }
  603. func (m *Message) IsStringContent() bool {
  604. if m.parsedStringContent != nil {
  605. return true
  606. }
  607. var stringContent string
  608. if err := json.Unmarshal(m.Content, &stringContent); err == nil {
  609. m.parsedStringContent = &stringContent
  610. return true
  611. }
  612. return false
  613. }
  614. func (m *Message) ParseContent() []MediaContent {
  615. if m.parsedContent != nil {
  616. return m.parsedContent
  617. }
  618. var contentList []MediaContent
  619. // 先尝试解析为字符串
  620. var stringContent string
  621. if err := json.Unmarshal(m.Content, &stringContent); err == nil {
  622. contentList = []MediaContent{{
  623. Type: ContentTypeText,
  624. Text: stringContent,
  625. }}
  626. m.parsedContent = contentList
  627. return contentList
  628. }
  629. // 尝试解析为数组
  630. var arrayContent []map[string]interface{}
  631. if err := json.Unmarshal(m.Content, &arrayContent); err == nil {
  632. for _, contentItem := range arrayContent {
  633. contentType, ok := contentItem["type"].(string)
  634. if !ok {
  635. continue
  636. }
  637. switch contentType {
  638. case ContentTypeText:
  639. if text, ok := contentItem["text"].(string); ok {
  640. contentList = append(contentList, MediaContent{
  641. Type: ContentTypeText,
  642. Text: text,
  643. })
  644. }
  645. case ContentTypeImageURL:
  646. imageUrl := contentItem["image_url"]
  647. temp := &MessageImageUrl{
  648. Detail: "high",
  649. }
  650. switch v := imageUrl.(type) {
  651. case string:
  652. temp.Url = v
  653. case map[string]interface{}:
  654. url, ok1 := v["url"].(string)
  655. detail, ok2 := v["detail"].(string)
  656. if ok2 {
  657. temp.Detail = detail
  658. }
  659. if ok1 {
  660. temp.Url = url
  661. }
  662. }
  663. contentList = append(contentList, MediaContent{
  664. Type: ContentTypeImageURL,
  665. ImageUrl: temp,
  666. })
  667. case ContentTypeInputAudio:
  668. if audioData, ok := contentItem["input_audio"].(map[string]interface{}); ok {
  669. data, ok1 := audioData["data"].(string)
  670. format, ok2 := audioData["format"].(string)
  671. if ok1 && ok2 {
  672. temp := &MessageInputAudio{
  673. Data: data,
  674. Format: format,
  675. }
  676. contentList = append(contentList, MediaContent{
  677. Type: ContentTypeInputAudio,
  678. InputAudio: temp,
  679. })
  680. }
  681. }
  682. case ContentTypeFile:
  683. if fileData, ok := contentItem["file"].(map[string]interface{}); ok {
  684. fileId, ok3 := fileData["file_id"].(string)
  685. if ok3 {
  686. contentList = append(contentList, MediaContent{
  687. Type: ContentTypeFile,
  688. File: &MessageFile{
  689. FileId: fileId,
  690. },
  691. })
  692. } else {
  693. fileName, ok1 := fileData["filename"].(string)
  694. fileDataStr, ok2 := fileData["file_data"].(string)
  695. if ok1 && ok2 {
  696. contentList = append(contentList, MediaContent{
  697. Type: ContentTypeFile,
  698. File: &MessageFile{
  699. FileName: fileName,
  700. FileData: fileDataStr,
  701. },
  702. })
  703. }
  704. }
  705. }
  706. case ContentTypeVideoUrl:
  707. if videoUrl, ok := contentItem["video_url"].(string); ok {
  708. contentList = append(contentList, MediaContent{
  709. Type: ContentTypeVideoUrl,
  710. VideoUrl: &MessageVideoUrl{
  711. Url: videoUrl,
  712. },
  713. })
  714. }
  715. }
  716. }
  717. }
  718. if len(contentList) > 0 {
  719. m.parsedContent = contentList
  720. }
  721. return contentList
  722. }*/
  723. type WebSearchOptions struct {
  724. SearchContextSize string `json:"search_context_size,omitempty"`
  725. UserLocation json.RawMessage `json:"user_location,omitempty"`
  726. }
  727. // https://platform.openai.com/docs/api-reference/responses/create
  728. type OpenAIResponsesRequest struct {
  729. Model string `json:"model"`
  730. Input json.RawMessage `json:"input,omitempty"`
  731. Include json.RawMessage `json:"include,omitempty"`
  732. Instructions json.RawMessage `json:"instructions,omitempty"`
  733. MaxOutputTokens uint `json:"max_output_tokens,omitempty"`
  734. Metadata json.RawMessage `json:"metadata,omitempty"`
  735. ParallelToolCalls json.RawMessage `json:"parallel_tool_calls,omitempty"`
  736. PreviousResponseID string `json:"previous_response_id,omitempty"`
  737. Reasoning *Reasoning `json:"reasoning,omitempty"`
  738. // 服务层级字段,用于指定 API 服务等级。允许透传可能导致实际计费高于预期,默认应过滤
  739. ServiceTier string `json:"service_tier,omitempty"`
  740. Store json.RawMessage `json:"store,omitempty"`
  741. PromptCacheKey json.RawMessage `json:"prompt_cache_key,omitempty"`
  742. PromptCacheRetention json.RawMessage `json:"prompt_cache_retention,omitempty"`
  743. Stream bool `json:"stream,omitempty"`
  744. Temperature *float64 `json:"temperature,omitempty"`
  745. Text json.RawMessage `json:"text,omitempty"`
  746. ToolChoice json.RawMessage `json:"tool_choice,omitempty"`
  747. Tools json.RawMessage `json:"tools,omitempty"` // 需要处理的参数很少,MCP 参数太多不确定,所以用 map
  748. TopP *float64 `json:"top_p,omitempty"`
  749. Truncation string `json:"truncation,omitempty"`
  750. User string `json:"user,omitempty"`
  751. MaxToolCalls uint `json:"max_tool_calls,omitempty"`
  752. Prompt json.RawMessage `json:"prompt,omitempty"`
  753. // qwen
  754. EnableThinking json.RawMessage `json:"enable_thinking,omitempty"`
  755. // perplexity
  756. Preset json.RawMessage `json:"preset,omitempty"`
  757. }
  758. func (r *OpenAIResponsesRequest) GetTokenCountMeta() *types.TokenCountMeta {
  759. var fileMeta = make([]*types.FileMeta, 0)
  760. var texts = make([]string, 0)
  761. if r.Input != nil {
  762. inputs := r.ParseInput()
  763. for _, input := range inputs {
  764. if input.Type == "input_image" {
  765. if input.ImageUrl != "" {
  766. fileMeta = append(fileMeta, &types.FileMeta{
  767. FileType: types.FileTypeImage,
  768. Source: createFileSource(input.ImageUrl),
  769. Detail: input.Detail,
  770. })
  771. }
  772. } else if input.Type == "input_file" {
  773. if input.FileUrl != "" {
  774. fileMeta = append(fileMeta, &types.FileMeta{
  775. FileType: types.FileTypeFile,
  776. Source: createFileSource(input.FileUrl),
  777. })
  778. }
  779. } else {
  780. texts = append(texts, input.Text)
  781. }
  782. }
  783. }
  784. if len(r.Instructions) > 0 {
  785. texts = append(texts, string(r.Instructions))
  786. }
  787. if len(r.Metadata) > 0 {
  788. texts = append(texts, string(r.Metadata))
  789. }
  790. if len(r.Text) > 0 {
  791. texts = append(texts, string(r.Text))
  792. }
  793. if len(r.ToolChoice) > 0 {
  794. texts = append(texts, string(r.ToolChoice))
  795. }
  796. if len(r.Prompt) > 0 {
  797. texts = append(texts, string(r.Prompt))
  798. }
  799. if len(r.Tools) > 0 {
  800. texts = append(texts, string(r.Tools))
  801. }
  802. return &types.TokenCountMeta{
  803. CombineText: strings.Join(texts, "\n"),
  804. Files: fileMeta,
  805. MaxTokens: int(r.MaxOutputTokens),
  806. }
  807. }
  808. func (r *OpenAIResponsesRequest) IsStream(c *gin.Context) bool {
  809. return r.Stream
  810. }
  811. func (r *OpenAIResponsesRequest) SetModelName(modelName string) {
  812. if modelName != "" {
  813. r.Model = modelName
  814. }
  815. }
  816. func (r *OpenAIResponsesRequest) GetToolsMap() []map[string]any {
  817. var toolsMap []map[string]any
  818. if len(r.Tools) > 0 {
  819. _ = common.Unmarshal(r.Tools, &toolsMap)
  820. }
  821. return toolsMap
  822. }
  823. type Reasoning struct {
  824. Effort string `json:"effort,omitempty"`
  825. Summary string `json:"summary,omitempty"`
  826. }
  827. type Input struct {
  828. Type string `json:"type,omitempty"`
  829. Role string `json:"role,omitempty"`
  830. Content json.RawMessage `json:"content,omitempty"`
  831. }
  832. type MediaInput struct {
  833. Type string `json:"type"`
  834. Text string `json:"text,omitempty"`
  835. FileUrl string `json:"file_url,omitempty"`
  836. ImageUrl string `json:"image_url,omitempty"`
  837. Detail string `json:"detail,omitempty"` // 仅 input_image 有效
  838. }
  839. // ParseInput parses the Responses API `input` field into a normalized slice of MediaInput.
  840. // Reference implementation mirrors Message.ParseContent:
  841. // - input can be a string, treated as an input_text item
  842. // - input can be an array of objects with a `type` field
  843. // supported types: input_text, input_image, input_file
  844. func (r *OpenAIResponsesRequest) ParseInput() []MediaInput {
  845. if r.Input == nil {
  846. return nil
  847. }
  848. var mediaInputs []MediaInput
  849. // Try string first
  850. // if str, ok := common.GetJsonType(r.Input); ok {
  851. // inputs = append(inputs, MediaInput{Type: "input_text", Text: str})
  852. // return inputs
  853. // }
  854. if common.GetJsonType(r.Input) == "string" {
  855. var str string
  856. _ = common.Unmarshal(r.Input, &str)
  857. mediaInputs = append(mediaInputs, MediaInput{Type: "input_text", Text: str})
  858. return mediaInputs
  859. }
  860. // Try array of parts
  861. if common.GetJsonType(r.Input) == "array" {
  862. var inputs []Input
  863. _ = common.Unmarshal(r.Input, &inputs)
  864. for _, input := range inputs {
  865. if common.GetJsonType(input.Content) == "string" {
  866. var str string
  867. _ = common.Unmarshal(input.Content, &str)
  868. mediaInputs = append(mediaInputs, MediaInput{Type: "input_text", Text: str})
  869. }
  870. if common.GetJsonType(input.Content) == "array" {
  871. var array []any
  872. _ = common.Unmarshal(input.Content, &array)
  873. for _, itemAny := range array {
  874. // Already parsed MediaContent
  875. if media, ok := itemAny.(MediaInput); ok {
  876. mediaInputs = append(mediaInputs, media)
  877. continue
  878. }
  879. // Generic map
  880. item, ok := itemAny.(map[string]any)
  881. if !ok {
  882. continue
  883. }
  884. typeVal, ok := item["type"].(string)
  885. if !ok {
  886. continue
  887. }
  888. switch typeVal {
  889. case "input_text":
  890. text, _ := item["text"].(string)
  891. mediaInputs = append(mediaInputs, MediaInput{Type: "input_text", Text: text})
  892. case "input_image":
  893. // image_url may be string or object with url field
  894. var imageUrl string
  895. switch v := item["image_url"].(type) {
  896. case string:
  897. imageUrl = v
  898. case map[string]any:
  899. if url, ok := v["url"].(string); ok {
  900. imageUrl = url
  901. }
  902. }
  903. mediaInputs = append(mediaInputs, MediaInput{Type: "input_image", ImageUrl: imageUrl})
  904. case "input_file":
  905. // file_url may be string or object with url field
  906. var fileUrl string
  907. switch v := item["file_url"].(type) {
  908. case string:
  909. fileUrl = v
  910. case map[string]any:
  911. if url, ok := v["url"].(string); ok {
  912. fileUrl = url
  913. }
  914. }
  915. mediaInputs = append(mediaInputs, MediaInput{Type: "input_file", FileUrl: fileUrl})
  916. }
  917. }
  918. }
  919. }
  920. }
  921. return mediaInputs
  922. }