openai_request.go 26 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639640641642643644645646647648649650651652653654655656657658659660661662663664665666667668669670671672673674675676677678679680681682683684685686687688689690691692693694695696697698699700701702703704705706707708709710711712713714715716717718719720721722723724725726727728729730731732733734735736737738739740741742743744745746747748749750751752753754755756757758759760761762763764765766767768769770771772773774775776777778779780781782783784785786787788789790791792793794795796797798799800801802803804805806807808809810811812813814815816817818819820821822823824825826827828829830831832833834835836837838839840841842843844845846847848849850851852853854855856857858859860861862863864865866867868869870871872873874875876877878879880881882883884885886887888889890891892893894895896897898899900901902903904905906907908909910911912913914915916917918919920921922923924925926927928929930931932933934935936937938939940941942943944945946947948949950951952953954955956957958959
  1. package dto
  2. import (
  3. "encoding/json"
  4. "fmt"
  5. "one-api/common"
  6. "one-api/types"
  7. "strings"
  8. "github.com/gin-gonic/gin"
  9. )
  10. type ResponseFormat struct {
  11. Type string `json:"type,omitempty"`
  12. JsonSchema json.RawMessage `json:"json_schema,omitempty"`
  13. }
  14. type FormatJsonSchema struct {
  15. Description string `json:"description,omitempty"`
  16. Name string `json:"name"`
  17. Schema any `json:"schema,omitempty"`
  18. Strict json.RawMessage `json:"strict,omitempty"`
  19. }
  20. type GeneralOpenAIRequest struct {
  21. Model string `json:"model,omitempty"`
  22. Messages []Message `json:"messages,omitempty"`
  23. Prompt any `json:"prompt,omitempty"`
  24. Prefix any `json:"prefix,omitempty"`
  25. Suffix any `json:"suffix,omitempty"`
  26. Stream bool `json:"stream,omitempty"`
  27. StreamOptions *StreamOptions `json:"stream_options,omitempty"`
  28. MaxTokens uint `json:"max_tokens,omitempty"`
  29. MaxCompletionTokens uint `json:"max_completion_tokens,omitempty"`
  30. ReasoningEffort string `json:"reasoning_effort,omitempty"`
  31. Verbosity json.RawMessage `json:"verbosity,omitempty"` // gpt-5
  32. Temperature *float64 `json:"temperature,omitempty"`
  33. TopP float64 `json:"top_p,omitempty"`
  34. TopK int `json:"top_k,omitempty"`
  35. Stop any `json:"stop,omitempty"`
  36. N int `json:"n,omitempty"`
  37. Input any `json:"input,omitempty"`
  38. Instruction string `json:"instruction,omitempty"`
  39. Size string `json:"size,omitempty"`
  40. Functions json.RawMessage `json:"functions,omitempty"`
  41. FrequencyPenalty float64 `json:"frequency_penalty,omitempty"`
  42. PresencePenalty float64 `json:"presence_penalty,omitempty"`
  43. ResponseFormat *ResponseFormat `json:"response_format,omitempty"`
  44. EncodingFormat json.RawMessage `json:"encoding_format,omitempty"`
  45. Seed float64 `json:"seed,omitempty"`
  46. ParallelTooCalls *bool `json:"parallel_tool_calls,omitempty"`
  47. Tools []ToolCallRequest `json:"tools,omitempty"`
  48. ToolChoice any `json:"tool_choice,omitempty"`
  49. User string `json:"user,omitempty"`
  50. LogProbs bool `json:"logprobs,omitempty"`
  51. TopLogProbs int `json:"top_logprobs,omitempty"`
  52. Dimensions int `json:"dimensions,omitempty"`
  53. Modalities json.RawMessage `json:"modalities,omitempty"`
  54. Audio json.RawMessage `json:"audio,omitempty"`
  55. // gemini
  56. ExtraBody json.RawMessage `json:"extra_body,omitempty"`
  57. //xai
  58. SearchParameters json.RawMessage `json:"search_parameters,omitempty"`
  59. // claude
  60. WebSearchOptions *WebSearchOptions `json:"web_search_options,omitempty"`
  61. // OpenRouter Params
  62. Usage json.RawMessage `json:"usage,omitempty"`
  63. Reasoning json.RawMessage `json:"reasoning,omitempty"`
  64. // Ali Qwen Params
  65. VlHighResolutionImages json.RawMessage `json:"vl_high_resolution_images,omitempty"`
  66. EnableThinking any `json:"enable_thinking,omitempty"`
  67. // ollama Params
  68. Think json.RawMessage `json:"think,omitempty"`
  69. // baidu v2
  70. WebSearch json.RawMessage `json:"web_search,omitempty"`
  71. // doubao,zhipu_v4
  72. THINKING json.RawMessage `json:"thinking,omitempty"`
  73. }
  74. func (r *GeneralOpenAIRequest) GetTokenCountMeta() *types.TokenCountMeta {
  75. var tokenCountMeta types.TokenCountMeta
  76. var texts = make([]string, 0)
  77. var fileMeta = make([]*types.FileMeta, 0)
  78. if r.Prompt != nil {
  79. switch v := r.Prompt.(type) {
  80. case string:
  81. texts = append(texts, v)
  82. case []any:
  83. for _, item := range v {
  84. if str, ok := item.(string); ok {
  85. texts = append(texts, str)
  86. }
  87. }
  88. default:
  89. texts = append(texts, fmt.Sprintf("%v", r.Prompt))
  90. }
  91. }
  92. if r.Input != nil {
  93. inputs := r.ParseInput()
  94. texts = append(texts, inputs...)
  95. }
  96. if r.MaxCompletionTokens > r.MaxTokens {
  97. tokenCountMeta.MaxTokens = int(r.MaxCompletionTokens)
  98. } else {
  99. tokenCountMeta.MaxTokens = int(r.MaxTokens)
  100. }
  101. for _, message := range r.Messages {
  102. tokenCountMeta.MessagesCount++
  103. texts = append(texts, message.Role)
  104. if message.Content != nil {
  105. if message.Name != nil {
  106. tokenCountMeta.NameCount++
  107. texts = append(texts, *message.Name)
  108. }
  109. arrayContent := message.ParseContent()
  110. for _, m := range arrayContent {
  111. if m.Type == ContentTypeImageURL {
  112. imageUrl := m.GetImageMedia()
  113. if imageUrl != nil {
  114. if imageUrl.Url != "" {
  115. meta := &types.FileMeta{
  116. FileType: types.FileTypeImage,
  117. }
  118. meta.OriginData = imageUrl.Url
  119. meta.Detail = imageUrl.Detail
  120. fileMeta = append(fileMeta, meta)
  121. }
  122. }
  123. } else if m.Type == ContentTypeInputAudio {
  124. inputAudio := m.GetInputAudio()
  125. if inputAudio != nil {
  126. meta := &types.FileMeta{
  127. FileType: types.FileTypeAudio,
  128. }
  129. meta.OriginData = inputAudio.Data
  130. fileMeta = append(fileMeta, meta)
  131. }
  132. } else if m.Type == ContentTypeFile {
  133. file := m.GetFile()
  134. if file != nil {
  135. meta := &types.FileMeta{
  136. FileType: types.FileTypeFile,
  137. }
  138. meta.OriginData = file.FileData
  139. fileMeta = append(fileMeta, meta)
  140. }
  141. } else if m.Type == ContentTypeVideoUrl {
  142. videoUrl := m.GetVideoUrl()
  143. if videoUrl != nil && videoUrl.Url != "" {
  144. meta := &types.FileMeta{
  145. FileType: types.FileTypeVideo,
  146. }
  147. meta.OriginData = videoUrl.Url
  148. fileMeta = append(fileMeta, meta)
  149. }
  150. } else {
  151. texts = append(texts, m.Text)
  152. }
  153. }
  154. }
  155. }
  156. if r.Tools != nil {
  157. openaiTools := r.Tools
  158. for _, tool := range openaiTools {
  159. tokenCountMeta.ToolsCount++
  160. texts = append(texts, tool.Function.Name)
  161. if tool.Function.Description != "" {
  162. texts = append(texts, tool.Function.Description)
  163. }
  164. if tool.Function.Parameters != nil {
  165. texts = append(texts, fmt.Sprintf("%v", tool.Function.Parameters))
  166. }
  167. }
  168. //toolTokens := CountTokenInput(countStr, request.Model)
  169. //tkm += 8
  170. //tkm += toolTokens
  171. }
  172. tokenCountMeta.CombineText = strings.Join(texts, "\n")
  173. tokenCountMeta.Files = fileMeta
  174. return &tokenCountMeta
  175. }
  176. func (r *GeneralOpenAIRequest) IsStream(c *gin.Context) bool {
  177. return r.Stream
  178. }
  179. func (r *GeneralOpenAIRequest) SetModelName(modelName string) {
  180. if modelName != "" {
  181. r.Model = modelName
  182. }
  183. }
  184. func (r *GeneralOpenAIRequest) ToMap() map[string]any {
  185. result := make(map[string]any)
  186. data, _ := common.Marshal(r)
  187. _ = common.Unmarshal(data, &result)
  188. return result
  189. }
  190. func (r *GeneralOpenAIRequest) GetSystemRoleName() string {
  191. if strings.HasPrefix(r.Model, "o") {
  192. if !strings.HasPrefix(r.Model, "o1-mini") && !strings.HasPrefix(r.Model, "o1-preview") {
  193. return "developer"
  194. }
  195. } else if strings.HasPrefix(r.Model, "gpt-5") {
  196. return "developer"
  197. }
  198. return "system"
  199. }
  200. type ToolCallRequest struct {
  201. ID string `json:"id,omitempty"`
  202. Type string `json:"type"`
  203. Function FunctionRequest `json:"function"`
  204. }
  205. type FunctionRequest struct {
  206. Description string `json:"description,omitempty"`
  207. Name string `json:"name"`
  208. Parameters any `json:"parameters,omitempty"`
  209. Arguments string `json:"arguments,omitempty"`
  210. }
  211. type StreamOptions struct {
  212. IncludeUsage bool `json:"include_usage,omitempty"`
  213. }
  214. func (r *GeneralOpenAIRequest) GetMaxTokens() uint {
  215. if r.MaxCompletionTokens != 0 {
  216. return r.MaxCompletionTokens
  217. }
  218. return r.MaxTokens
  219. }
  220. func (r *GeneralOpenAIRequest) ParseInput() []string {
  221. if r.Input == nil {
  222. return nil
  223. }
  224. var input []string
  225. switch r.Input.(type) {
  226. case string:
  227. input = []string{r.Input.(string)}
  228. case []any:
  229. input = make([]string, 0, len(r.Input.([]any)))
  230. for _, item := range r.Input.([]any) {
  231. if str, ok := item.(string); ok {
  232. input = append(input, str)
  233. }
  234. }
  235. }
  236. return input
  237. }
  238. type Message struct {
  239. Role string `json:"role"`
  240. Content any `json:"content"`
  241. Name *string `json:"name,omitempty"`
  242. Prefix *bool `json:"prefix,omitempty"`
  243. ReasoningContent string `json:"reasoning_content,omitempty"`
  244. Reasoning string `json:"reasoning,omitempty"`
  245. ToolCalls json.RawMessage `json:"tool_calls,omitempty"`
  246. ToolCallId string `json:"tool_call_id,omitempty"`
  247. parsedContent []MediaContent
  248. //parsedStringContent *string
  249. }
  250. type MediaContent struct {
  251. Type string `json:"type"`
  252. Text string `json:"text,omitempty"`
  253. ImageUrl any `json:"image_url,omitempty"`
  254. InputAudio any `json:"input_audio,omitempty"`
  255. File any `json:"file,omitempty"`
  256. VideoUrl any `json:"video_url,omitempty"`
  257. // OpenRouter Params
  258. CacheControl json.RawMessage `json:"cache_control,omitempty"`
  259. }
  260. func (m *MediaContent) GetImageMedia() *MessageImageUrl {
  261. if m.ImageUrl != nil {
  262. if _, ok := m.ImageUrl.(*MessageImageUrl); ok {
  263. return m.ImageUrl.(*MessageImageUrl)
  264. }
  265. if itemMap, ok := m.ImageUrl.(map[string]any); ok {
  266. out := &MessageImageUrl{
  267. Url: common.Interface2String(itemMap["url"]),
  268. Detail: common.Interface2String(itemMap["detail"]),
  269. MimeType: common.Interface2String(itemMap["mime_type"]),
  270. }
  271. return out
  272. }
  273. }
  274. return nil
  275. }
  276. func (m *MediaContent) GetInputAudio() *MessageInputAudio {
  277. if m.InputAudio != nil {
  278. if _, ok := m.InputAudio.(*MessageInputAudio); ok {
  279. return m.InputAudio.(*MessageInputAudio)
  280. }
  281. if itemMap, ok := m.InputAudio.(map[string]any); ok {
  282. out := &MessageInputAudio{
  283. Data: common.Interface2String(itemMap["data"]),
  284. Format: common.Interface2String(itemMap["format"]),
  285. }
  286. return out
  287. }
  288. }
  289. return nil
  290. }
  291. func (m *MediaContent) GetFile() *MessageFile {
  292. if m.File != nil {
  293. if _, ok := m.File.(*MessageFile); ok {
  294. return m.File.(*MessageFile)
  295. }
  296. if itemMap, ok := m.File.(map[string]any); ok {
  297. out := &MessageFile{
  298. FileName: common.Interface2String(itemMap["file_name"]),
  299. FileData: common.Interface2String(itemMap["file_data"]),
  300. FileId: common.Interface2String(itemMap["file_id"]),
  301. }
  302. return out
  303. }
  304. }
  305. return nil
  306. }
  307. func (m *MediaContent) GetVideoUrl() *MessageVideoUrl {
  308. if m.VideoUrl != nil {
  309. if _, ok := m.VideoUrl.(*MessageVideoUrl); ok {
  310. return m.VideoUrl.(*MessageVideoUrl)
  311. }
  312. if itemMap, ok := m.VideoUrl.(map[string]any); ok {
  313. out := &MessageVideoUrl{
  314. Url: common.Interface2String(itemMap["url"]),
  315. }
  316. return out
  317. }
  318. }
  319. return nil
  320. }
  321. type MessageImageUrl struct {
  322. Url string `json:"url"`
  323. Detail string `json:"detail"`
  324. MimeType string
  325. }
  326. func (m *MessageImageUrl) IsRemoteImage() bool {
  327. return strings.HasPrefix(m.Url, "http")
  328. }
  329. type MessageInputAudio struct {
  330. Data string `json:"data"` //base64
  331. Format string `json:"format"`
  332. }
  333. type MessageFile struct {
  334. FileName string `json:"filename,omitempty"`
  335. FileData string `json:"file_data,omitempty"`
  336. FileId string `json:"file_id,omitempty"`
  337. }
  338. type MessageVideoUrl struct {
  339. Url string `json:"url"`
  340. }
  341. const (
  342. ContentTypeText = "text"
  343. ContentTypeImageURL = "image_url"
  344. ContentTypeInputAudio = "input_audio"
  345. ContentTypeFile = "file"
  346. ContentTypeVideoUrl = "video_url" // 阿里百炼视频识别
  347. //ContentTypeAudioUrl = "audio_url"
  348. )
  349. func (m *Message) GetPrefix() bool {
  350. if m.Prefix == nil {
  351. return false
  352. }
  353. return *m.Prefix
  354. }
  355. func (m *Message) SetPrefix(prefix bool) {
  356. m.Prefix = &prefix
  357. }
  358. func (m *Message) ParseToolCalls() []ToolCallRequest {
  359. if m.ToolCalls == nil {
  360. return nil
  361. }
  362. var toolCalls []ToolCallRequest
  363. if err := json.Unmarshal(m.ToolCalls, &toolCalls); err == nil {
  364. return toolCalls
  365. }
  366. return toolCalls
  367. }
  368. func (m *Message) SetToolCalls(toolCalls any) {
  369. toolCallsJson, _ := json.Marshal(toolCalls)
  370. m.ToolCalls = toolCallsJson
  371. }
  372. func (m *Message) StringContent() string {
  373. switch m.Content.(type) {
  374. case string:
  375. return m.Content.(string)
  376. case []any:
  377. var contentStr string
  378. for _, contentItem := range m.Content.([]any) {
  379. contentMap, ok := contentItem.(map[string]any)
  380. if !ok {
  381. continue
  382. }
  383. if contentMap["type"] == ContentTypeText {
  384. if subStr, ok := contentMap["text"].(string); ok {
  385. contentStr += subStr
  386. }
  387. }
  388. }
  389. return contentStr
  390. }
  391. return ""
  392. }
  393. func (m *Message) SetNullContent() {
  394. m.Content = nil
  395. m.parsedContent = nil
  396. }
  397. func (m *Message) SetStringContent(content string) {
  398. m.Content = content
  399. m.parsedContent = nil
  400. }
  401. func (m *Message) SetMediaContent(content []MediaContent) {
  402. m.Content = content
  403. m.parsedContent = content
  404. }
  405. func (m *Message) IsStringContent() bool {
  406. _, ok := m.Content.(string)
  407. if ok {
  408. return true
  409. }
  410. return false
  411. }
  412. func (m *Message) ParseContent() []MediaContent {
  413. if m.Content == nil {
  414. return nil
  415. }
  416. if len(m.parsedContent) > 0 {
  417. return m.parsedContent
  418. }
  419. var contentList []MediaContent
  420. // 先尝试解析为字符串
  421. content, ok := m.Content.(string)
  422. if ok {
  423. contentList = []MediaContent{{
  424. Type: ContentTypeText,
  425. Text: content,
  426. }}
  427. m.parsedContent = contentList
  428. return contentList
  429. }
  430. // 尝试解析为数组
  431. //var arrayContent []map[string]interface{}
  432. arrayContent, ok := m.Content.([]any)
  433. if !ok {
  434. return contentList
  435. }
  436. for _, contentItemAny := range arrayContent {
  437. mediaItem, ok := contentItemAny.(MediaContent)
  438. if ok {
  439. contentList = append(contentList, mediaItem)
  440. continue
  441. }
  442. contentItem, ok := contentItemAny.(map[string]any)
  443. if !ok {
  444. continue
  445. }
  446. contentType, ok := contentItem["type"].(string)
  447. if !ok {
  448. continue
  449. }
  450. switch contentType {
  451. case ContentTypeText:
  452. if text, ok := contentItem["text"].(string); ok {
  453. contentList = append(contentList, MediaContent{
  454. Type: ContentTypeText,
  455. Text: text,
  456. })
  457. }
  458. case ContentTypeImageURL:
  459. imageUrl := contentItem["image_url"]
  460. temp := &MessageImageUrl{
  461. Detail: "high",
  462. }
  463. switch v := imageUrl.(type) {
  464. case string:
  465. temp.Url = v
  466. case map[string]interface{}:
  467. url, ok1 := v["url"].(string)
  468. detail, ok2 := v["detail"].(string)
  469. if ok2 {
  470. temp.Detail = detail
  471. }
  472. if ok1 {
  473. temp.Url = url
  474. }
  475. }
  476. contentList = append(contentList, MediaContent{
  477. Type: ContentTypeImageURL,
  478. ImageUrl: temp,
  479. })
  480. case ContentTypeInputAudio:
  481. if audioData, ok := contentItem["input_audio"].(map[string]interface{}); ok {
  482. data, ok1 := audioData["data"].(string)
  483. format, ok2 := audioData["format"].(string)
  484. if ok1 && ok2 {
  485. temp := &MessageInputAudio{
  486. Data: data,
  487. Format: format,
  488. }
  489. contentList = append(contentList, MediaContent{
  490. Type: ContentTypeInputAudio,
  491. InputAudio: temp,
  492. })
  493. }
  494. }
  495. case ContentTypeFile:
  496. if fileData, ok := contentItem["file"].(map[string]interface{}); ok {
  497. fileId, ok3 := fileData["file_id"].(string)
  498. if ok3 {
  499. contentList = append(contentList, MediaContent{
  500. Type: ContentTypeFile,
  501. File: &MessageFile{
  502. FileId: fileId,
  503. },
  504. })
  505. } else {
  506. fileName, ok1 := fileData["filename"].(string)
  507. fileDataStr, ok2 := fileData["file_data"].(string)
  508. if ok1 && ok2 {
  509. contentList = append(contentList, MediaContent{
  510. Type: ContentTypeFile,
  511. File: &MessageFile{
  512. FileName: fileName,
  513. FileData: fileDataStr,
  514. },
  515. })
  516. }
  517. }
  518. }
  519. case ContentTypeVideoUrl:
  520. if videoUrl, ok := contentItem["video_url"].(string); ok {
  521. contentList = append(contentList, MediaContent{
  522. Type: ContentTypeVideoUrl,
  523. VideoUrl: &MessageVideoUrl{
  524. Url: videoUrl,
  525. },
  526. })
  527. }
  528. }
  529. }
  530. if len(contentList) > 0 {
  531. m.parsedContent = contentList
  532. }
  533. return contentList
  534. }
  535. // old code
  536. /*func (m *Message) StringContent() string {
  537. if m.parsedStringContent != nil {
  538. return *m.parsedStringContent
  539. }
  540. var stringContent string
  541. if err := json.Unmarshal(m.Content, &stringContent); err == nil {
  542. m.parsedStringContent = &stringContent
  543. return stringContent
  544. }
  545. contentStr := new(strings.Builder)
  546. arrayContent := m.ParseContent()
  547. for _, content := range arrayContent {
  548. if content.Type == ContentTypeText {
  549. contentStr.WriteString(content.Text)
  550. }
  551. }
  552. stringContent = contentStr.String()
  553. m.parsedStringContent = &stringContent
  554. return stringContent
  555. }
  556. func (m *Message) SetNullContent() {
  557. m.Content = nil
  558. m.parsedStringContent = nil
  559. m.parsedContent = nil
  560. }
  561. func (m *Message) SetStringContent(content string) {
  562. jsonContent, _ := json.Marshal(content)
  563. m.Content = jsonContent
  564. m.parsedStringContent = &content
  565. m.parsedContent = nil
  566. }
  567. func (m *Message) SetMediaContent(content []MediaContent) {
  568. jsonContent, _ := json.Marshal(content)
  569. m.Content = jsonContent
  570. m.parsedContent = nil
  571. m.parsedStringContent = nil
  572. }
  573. func (m *Message) IsStringContent() bool {
  574. if m.parsedStringContent != nil {
  575. return true
  576. }
  577. var stringContent string
  578. if err := json.Unmarshal(m.Content, &stringContent); err == nil {
  579. m.parsedStringContent = &stringContent
  580. return true
  581. }
  582. return false
  583. }
  584. func (m *Message) ParseContent() []MediaContent {
  585. if m.parsedContent != nil {
  586. return m.parsedContent
  587. }
  588. var contentList []MediaContent
  589. // 先尝试解析为字符串
  590. var stringContent string
  591. if err := json.Unmarshal(m.Content, &stringContent); err == nil {
  592. contentList = []MediaContent{{
  593. Type: ContentTypeText,
  594. Text: stringContent,
  595. }}
  596. m.parsedContent = contentList
  597. return contentList
  598. }
  599. // 尝试解析为数组
  600. var arrayContent []map[string]interface{}
  601. if err := json.Unmarshal(m.Content, &arrayContent); err == nil {
  602. for _, contentItem := range arrayContent {
  603. contentType, ok := contentItem["type"].(string)
  604. if !ok {
  605. continue
  606. }
  607. switch contentType {
  608. case ContentTypeText:
  609. if text, ok := contentItem["text"].(string); ok {
  610. contentList = append(contentList, MediaContent{
  611. Type: ContentTypeText,
  612. Text: text,
  613. })
  614. }
  615. case ContentTypeImageURL:
  616. imageUrl := contentItem["image_url"]
  617. temp := &MessageImageUrl{
  618. Detail: "high",
  619. }
  620. switch v := imageUrl.(type) {
  621. case string:
  622. temp.Url = v
  623. case map[string]interface{}:
  624. url, ok1 := v["url"].(string)
  625. detail, ok2 := v["detail"].(string)
  626. if ok2 {
  627. temp.Detail = detail
  628. }
  629. if ok1 {
  630. temp.Url = url
  631. }
  632. }
  633. contentList = append(contentList, MediaContent{
  634. Type: ContentTypeImageURL,
  635. ImageUrl: temp,
  636. })
  637. case ContentTypeInputAudio:
  638. if audioData, ok := contentItem["input_audio"].(map[string]interface{}); ok {
  639. data, ok1 := audioData["data"].(string)
  640. format, ok2 := audioData["format"].(string)
  641. if ok1 && ok2 {
  642. temp := &MessageInputAudio{
  643. Data: data,
  644. Format: format,
  645. }
  646. contentList = append(contentList, MediaContent{
  647. Type: ContentTypeInputAudio,
  648. InputAudio: temp,
  649. })
  650. }
  651. }
  652. case ContentTypeFile:
  653. if fileData, ok := contentItem["file"].(map[string]interface{}); ok {
  654. fileId, ok3 := fileData["file_id"].(string)
  655. if ok3 {
  656. contentList = append(contentList, MediaContent{
  657. Type: ContentTypeFile,
  658. File: &MessageFile{
  659. FileId: fileId,
  660. },
  661. })
  662. } else {
  663. fileName, ok1 := fileData["filename"].(string)
  664. fileDataStr, ok2 := fileData["file_data"].(string)
  665. if ok1 && ok2 {
  666. contentList = append(contentList, MediaContent{
  667. Type: ContentTypeFile,
  668. File: &MessageFile{
  669. FileName: fileName,
  670. FileData: fileDataStr,
  671. },
  672. })
  673. }
  674. }
  675. }
  676. case ContentTypeVideoUrl:
  677. if videoUrl, ok := contentItem["video_url"].(string); ok {
  678. contentList = append(contentList, MediaContent{
  679. Type: ContentTypeVideoUrl,
  680. VideoUrl: &MessageVideoUrl{
  681. Url: videoUrl,
  682. },
  683. })
  684. }
  685. }
  686. }
  687. }
  688. if len(contentList) > 0 {
  689. m.parsedContent = contentList
  690. }
  691. return contentList
  692. }*/
  693. type WebSearchOptions struct {
  694. SearchContextSize string `json:"search_context_size,omitempty"`
  695. UserLocation json.RawMessage `json:"user_location,omitempty"`
  696. }
  697. // https://platform.openai.com/docs/api-reference/responses/create
  698. type OpenAIResponsesRequest struct {
  699. Model string `json:"model"`
  700. Input json.RawMessage `json:"input,omitempty"`
  701. Include json.RawMessage `json:"include,omitempty"`
  702. Instructions json.RawMessage `json:"instructions,omitempty"`
  703. MaxOutputTokens uint `json:"max_output_tokens,omitempty"`
  704. Metadata json.RawMessage `json:"metadata,omitempty"`
  705. ParallelToolCalls json.RawMessage `json:"parallel_tool_calls,omitempty"`
  706. PreviousResponseID string `json:"previous_response_id,omitempty"`
  707. Reasoning *Reasoning `json:"reasoning,omitempty"`
  708. ServiceTier string `json:"service_tier,omitempty"`
  709. Store json.RawMessage `json:"store,omitempty"`
  710. PromptCacheKey json.RawMessage `json:"prompt_cache_key,omitempty"`
  711. Stream bool `json:"stream,omitempty"`
  712. Temperature float64 `json:"temperature,omitempty"`
  713. Text json.RawMessage `json:"text,omitempty"`
  714. ToolChoice json.RawMessage `json:"tool_choice,omitempty"`
  715. Tools json.RawMessage `json:"tools,omitempty"` // 需要处理的参数很少,MCP 参数太多不确定,所以用 map
  716. TopP float64 `json:"top_p,omitempty"`
  717. Truncation string `json:"truncation,omitempty"`
  718. User string `json:"user,omitempty"`
  719. MaxToolCalls uint `json:"max_tool_calls,omitempty"`
  720. Prompt json.RawMessage `json:"prompt,omitempty"`
  721. }
  722. func (r *OpenAIResponsesRequest) GetTokenCountMeta() *types.TokenCountMeta {
  723. var fileMeta = make([]*types.FileMeta, 0)
  724. var texts = make([]string, 0)
  725. if r.Input != nil {
  726. inputs := r.ParseInput()
  727. for _, input := range inputs {
  728. if input.Type == "input_image" {
  729. if input.ImageUrl != "" {
  730. fileMeta = append(fileMeta, &types.FileMeta{
  731. FileType: types.FileTypeImage,
  732. OriginData: input.ImageUrl,
  733. Detail: input.Detail,
  734. })
  735. }
  736. } else if input.Type == "input_file" {
  737. if input.FileUrl != "" {
  738. fileMeta = append(fileMeta, &types.FileMeta{
  739. FileType: types.FileTypeFile,
  740. OriginData: input.FileUrl,
  741. })
  742. }
  743. } else {
  744. texts = append(texts, input.Text)
  745. }
  746. }
  747. }
  748. if len(r.Instructions) > 0 {
  749. texts = append(texts, string(r.Instructions))
  750. }
  751. if len(r.Metadata) > 0 {
  752. texts = append(texts, string(r.Metadata))
  753. }
  754. if len(r.Text) > 0 {
  755. texts = append(texts, string(r.Text))
  756. }
  757. if len(r.ToolChoice) > 0 {
  758. texts = append(texts, string(r.ToolChoice))
  759. }
  760. if len(r.Prompt) > 0 {
  761. texts = append(texts, string(r.Prompt))
  762. }
  763. if len(r.Tools) > 0 {
  764. texts = append(texts, string(r.Tools))
  765. }
  766. return &types.TokenCountMeta{
  767. CombineText: strings.Join(texts, "\n"),
  768. Files: fileMeta,
  769. MaxTokens: int(r.MaxOutputTokens),
  770. }
  771. }
  772. func (r *OpenAIResponsesRequest) IsStream(c *gin.Context) bool {
  773. return r.Stream
  774. }
  775. func (r *OpenAIResponsesRequest) SetModelName(modelName string) {
  776. if modelName != "" {
  777. r.Model = modelName
  778. }
  779. }
  780. func (r *OpenAIResponsesRequest) GetToolsMap() []map[string]any {
  781. var toolsMap []map[string]any
  782. if len(r.Tools) > 0 {
  783. _ = common.Unmarshal(r.Tools, &toolsMap)
  784. }
  785. return toolsMap
  786. }
  787. type Reasoning struct {
  788. Effort string `json:"effort,omitempty"`
  789. Summary string `json:"summary,omitempty"`
  790. }
  791. type MediaInput struct {
  792. Type string `json:"type"`
  793. Text string `json:"text,omitempty"`
  794. FileUrl string `json:"file_url,omitempty"`
  795. ImageUrl string `json:"image_url,omitempty"`
  796. Detail string `json:"detail,omitempty"` // 仅 input_image 有效
  797. }
  798. // ParseInput parses the Responses API `input` field into a normalized slice of MediaInput.
  799. // Reference implementation mirrors Message.ParseContent:
  800. // - input can be a string, treated as an input_text item
  801. // - input can be an array of objects with a `type` field
  802. // supported types: input_text, input_image, input_file
  803. func (r *OpenAIResponsesRequest) ParseInput() []MediaInput {
  804. if r.Input == nil {
  805. return nil
  806. }
  807. var inputs []MediaInput
  808. // Try string first
  809. // if str, ok := common.GetJsonType(r.Input); ok {
  810. // inputs = append(inputs, MediaInput{Type: "input_text", Text: str})
  811. // return inputs
  812. // }
  813. if common.GetJsonType(r.Input) == "string" {
  814. var str string
  815. _ = common.Unmarshal(r.Input, &str)
  816. inputs = append(inputs, MediaInput{Type: "input_text", Text: str})
  817. return inputs
  818. }
  819. // Try array of parts
  820. if common.GetJsonType(r.Input) == "array" {
  821. var array []any
  822. _ = common.Unmarshal(r.Input, &array)
  823. for _, itemAny := range array {
  824. // Already parsed MediaInput
  825. if media, ok := itemAny.(MediaInput); ok {
  826. inputs = append(inputs, media)
  827. continue
  828. }
  829. // Generic map
  830. item, ok := itemAny.(map[string]any)
  831. if !ok {
  832. continue
  833. }
  834. typeVal, ok := item["type"].(string)
  835. if !ok {
  836. continue
  837. }
  838. switch typeVal {
  839. case "input_text":
  840. text, _ := item["text"].(string)
  841. inputs = append(inputs, MediaInput{Type: "input_text", Text: text})
  842. case "input_image":
  843. // image_url may be string or object with url field
  844. var imageUrl string
  845. switch v := item["image_url"].(type) {
  846. case string:
  847. imageUrl = v
  848. case map[string]any:
  849. if url, ok := v["url"].(string); ok {
  850. imageUrl = url
  851. }
  852. }
  853. inputs = append(inputs, MediaInput{Type: "input_image", ImageUrl: imageUrl})
  854. case "input_file":
  855. // file_url may be string or object with url field
  856. var fileUrl string
  857. switch v := item["file_url"].(type) {
  858. case string:
  859. fileUrl = v
  860. case map[string]any:
  861. if url, ok := v["url"].(string); ok {
  862. fileUrl = url
  863. }
  864. }
  865. inputs = append(inputs, MediaInput{Type: "input_file", FileUrl: fileUrl})
  866. }
  867. }
  868. }
  869. return inputs
  870. }