openai_request.go 27 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639640641642643644645646647648649650651652653654655656657658659660661662663664665666667668669670671672673674675676677678679680681682683684685686687688689690691692693694695696697698699700701702703704705706707708709710711712713714715716717718719720721722723724725726727728729730731732733734735736737738739740741742743744745746747748749750751752753754755756757758759760761762763764765766767768769770771772773774775776777778779780781782783784785786787788789790791792793794795796797798799800801802803804805806807808809810811812813814815816817818819820821822823824825826827828829830831832833834835836837838839840841842843844845846847848849850851852853854855856857858859860861862863864865866867868869870871872873874875876877878879880881882883884885886887888889890891892893894895896897898899900901902903904905906907908909910911912913914915916917918919920921922923924925926927928929930931932933934935936937938939940941942943944945946947948949950951952953954955956957958959960961962963964965966967968969970971972
  1. package dto
  2. import (
  3. "encoding/json"
  4. "fmt"
  5. "one-api/common"
  6. "one-api/types"
  7. "strings"
  8. "github.com/gin-gonic/gin"
  9. )
  10. type ResponseFormat struct {
  11. Type string `json:"type,omitempty"`
  12. JsonSchema json.RawMessage `json:"json_schema,omitempty"`
  13. }
  14. type FormatJsonSchema struct {
  15. Description string `json:"description,omitempty"`
  16. Name string `json:"name"`
  17. Schema any `json:"schema,omitempty"`
  18. Strict json.RawMessage `json:"strict,omitempty"`
  19. }
  20. type GeneralOpenAIRequest struct {
  21. Model string `json:"model,omitempty"`
  22. Messages []Message `json:"messages,omitempty"`
  23. Prompt any `json:"prompt,omitempty"`
  24. Prefix any `json:"prefix,omitempty"`
  25. Suffix any `json:"suffix,omitempty"`
  26. Stream bool `json:"stream,omitempty"`
  27. StreamOptions *StreamOptions `json:"stream_options,omitempty"`
  28. MaxTokens uint `json:"max_tokens,omitempty"`
  29. MaxCompletionTokens uint `json:"max_completion_tokens,omitempty"`
  30. ReasoningEffort string `json:"reasoning_effort,omitempty"`
  31. Verbosity json.RawMessage `json:"verbosity,omitempty"` // gpt-5
  32. Temperature *float64 `json:"temperature,omitempty"`
  33. TopP float64 `json:"top_p,omitempty"`
  34. TopK int `json:"top_k,omitempty"`
  35. Stop any `json:"stop,omitempty"`
  36. N int `json:"n,omitempty"`
  37. Input any `json:"input,omitempty"`
  38. Instruction string `json:"instruction,omitempty"`
  39. Size string `json:"size,omitempty"`
  40. Functions json.RawMessage `json:"functions,omitempty"`
  41. FrequencyPenalty float64 `json:"frequency_penalty,omitempty"`
  42. PresencePenalty float64 `json:"presence_penalty,omitempty"`
  43. ResponseFormat *ResponseFormat `json:"response_format,omitempty"`
  44. EncodingFormat json.RawMessage `json:"encoding_format,omitempty"`
  45. Seed float64 `json:"seed,omitempty"`
  46. ParallelTooCalls *bool `json:"parallel_tool_calls,omitempty"`
  47. Tools []ToolCallRequest `json:"tools,omitempty"`
  48. ToolChoice any `json:"tool_choice,omitempty"`
  49. User string `json:"user,omitempty"`
  50. LogProbs bool `json:"logprobs,omitempty"`
  51. TopLogProbs int `json:"top_logprobs,omitempty"`
  52. Dimensions int `json:"dimensions,omitempty"`
  53. Modalities json.RawMessage `json:"modalities,omitempty"`
  54. Audio json.RawMessage `json:"audio,omitempty"`
  55. // 安全标识符,用于帮助 OpenAI 检测可能违反使用政策的应用程序用户
  56. // 注意:此字段会向 OpenAI 发送用户标识信息,默认过滤以保护用户隐私
  57. SafetyIdentifier string `json:"safety_identifier,omitempty"`
  58. // Whether or not to store the output of this chat completion request for use in our model distillation or evals products.
  59. // 是否存储此次请求数据供 OpenAI 用于评估和优化产品
  60. // 注意:默认过滤此字段以保护用户隐私,但过滤后可能导致 Codex 无法正常使用
  61. Store json.RawMessage `json:"store,omitempty"`
  62. // Used by OpenAI to cache responses for similar requests to optimize your cache hit rates. Replaces the user field
  63. PromptCacheKey string `json:"prompt_cache_key,omitempty"`
  64. LogitBias json.RawMessage `json:"logit_bias,omitempty"`
  65. Metadata json.RawMessage `json:"metadata,omitempty"`
  66. Prediction json.RawMessage `json:"prediction,omitempty"`
  67. // gemini
  68. ExtraBody json.RawMessage `json:"extra_body,omitempty"`
  69. //xai
  70. SearchParameters json.RawMessage `json:"search_parameters,omitempty"`
  71. // claude
  72. WebSearchOptions *WebSearchOptions `json:"web_search_options,omitempty"`
  73. // OpenRouter Params
  74. Usage json.RawMessage `json:"usage,omitempty"`
  75. Reasoning json.RawMessage `json:"reasoning,omitempty"`
  76. // Ali Qwen Params
  77. VlHighResolutionImages json.RawMessage `json:"vl_high_resolution_images,omitempty"`
  78. EnableThinking any `json:"enable_thinking,omitempty"`
  79. // ollama Params
  80. Think json.RawMessage `json:"think,omitempty"`
  81. // baidu v2
  82. WebSearch json.RawMessage `json:"web_search,omitempty"`
  83. // doubao,zhipu_v4
  84. THINKING json.RawMessage `json:"thinking,omitempty"`
  85. }
  86. func (r *GeneralOpenAIRequest) GetTokenCountMeta() *types.TokenCountMeta {
  87. var tokenCountMeta types.TokenCountMeta
  88. var texts = make([]string, 0)
  89. var fileMeta = make([]*types.FileMeta, 0)
  90. if r.Prompt != nil {
  91. switch v := r.Prompt.(type) {
  92. case string:
  93. texts = append(texts, v)
  94. case []any:
  95. for _, item := range v {
  96. if str, ok := item.(string); ok {
  97. texts = append(texts, str)
  98. }
  99. }
  100. default:
  101. texts = append(texts, fmt.Sprintf("%v", r.Prompt))
  102. }
  103. }
  104. if r.Input != nil {
  105. inputs := r.ParseInput()
  106. texts = append(texts, inputs...)
  107. }
  108. if r.MaxCompletionTokens > r.MaxTokens {
  109. tokenCountMeta.MaxTokens = int(r.MaxCompletionTokens)
  110. } else {
  111. tokenCountMeta.MaxTokens = int(r.MaxTokens)
  112. }
  113. for _, message := range r.Messages {
  114. tokenCountMeta.MessagesCount++
  115. texts = append(texts, message.Role)
  116. if message.Content != nil {
  117. if message.Name != nil {
  118. tokenCountMeta.NameCount++
  119. texts = append(texts, *message.Name)
  120. }
  121. arrayContent := message.ParseContent()
  122. for _, m := range arrayContent {
  123. if m.Type == ContentTypeImageURL {
  124. imageUrl := m.GetImageMedia()
  125. if imageUrl != nil {
  126. if imageUrl.Url != "" {
  127. meta := &types.FileMeta{
  128. FileType: types.FileTypeImage,
  129. }
  130. meta.OriginData = imageUrl.Url
  131. meta.Detail = imageUrl.Detail
  132. fileMeta = append(fileMeta, meta)
  133. }
  134. }
  135. } else if m.Type == ContentTypeInputAudio {
  136. inputAudio := m.GetInputAudio()
  137. if inputAudio != nil {
  138. meta := &types.FileMeta{
  139. FileType: types.FileTypeAudio,
  140. }
  141. meta.OriginData = inputAudio.Data
  142. fileMeta = append(fileMeta, meta)
  143. }
  144. } else if m.Type == ContentTypeFile {
  145. file := m.GetFile()
  146. if file != nil {
  147. meta := &types.FileMeta{
  148. FileType: types.FileTypeFile,
  149. }
  150. meta.OriginData = file.FileData
  151. fileMeta = append(fileMeta, meta)
  152. }
  153. } else if m.Type == ContentTypeVideoUrl {
  154. videoUrl := m.GetVideoUrl()
  155. if videoUrl != nil && videoUrl.Url != "" {
  156. meta := &types.FileMeta{
  157. FileType: types.FileTypeVideo,
  158. }
  159. meta.OriginData = videoUrl.Url
  160. fileMeta = append(fileMeta, meta)
  161. }
  162. } else {
  163. texts = append(texts, m.Text)
  164. }
  165. }
  166. }
  167. }
  168. if r.Tools != nil {
  169. openaiTools := r.Tools
  170. for _, tool := range openaiTools {
  171. tokenCountMeta.ToolsCount++
  172. texts = append(texts, tool.Function.Name)
  173. if tool.Function.Description != "" {
  174. texts = append(texts, tool.Function.Description)
  175. }
  176. if tool.Function.Parameters != nil {
  177. texts = append(texts, fmt.Sprintf("%v", tool.Function.Parameters))
  178. }
  179. }
  180. //toolTokens := CountTokenInput(countStr, request.Model)
  181. //tkm += 8
  182. //tkm += toolTokens
  183. }
  184. tokenCountMeta.CombineText = strings.Join(texts, "\n")
  185. tokenCountMeta.Files = fileMeta
  186. return &tokenCountMeta
  187. }
  188. func (r *GeneralOpenAIRequest) IsStream(c *gin.Context) bool {
  189. return r.Stream
  190. }
  191. func (r *GeneralOpenAIRequest) SetModelName(modelName string) {
  192. if modelName != "" {
  193. r.Model = modelName
  194. }
  195. }
  196. func (r *GeneralOpenAIRequest) ToMap() map[string]any {
  197. result := make(map[string]any)
  198. data, _ := common.Marshal(r)
  199. _ = common.Unmarshal(data, &result)
  200. return result
  201. }
  202. func (r *GeneralOpenAIRequest) GetSystemRoleName() string {
  203. if strings.HasPrefix(r.Model, "o") {
  204. if !strings.HasPrefix(r.Model, "o1-mini") && !strings.HasPrefix(r.Model, "o1-preview") {
  205. return "developer"
  206. }
  207. } else if strings.HasPrefix(r.Model, "gpt-5") {
  208. return "developer"
  209. }
  210. return "system"
  211. }
  212. type ToolCallRequest struct {
  213. ID string `json:"id,omitempty"`
  214. Type string `json:"type"`
  215. Function FunctionRequest `json:"function"`
  216. }
  217. type FunctionRequest struct {
  218. Description string `json:"description,omitempty"`
  219. Name string `json:"name"`
  220. Parameters any `json:"parameters,omitempty"`
  221. Arguments string `json:"arguments,omitempty"`
  222. }
  223. type StreamOptions struct {
  224. IncludeUsage bool `json:"include_usage,omitempty"`
  225. }
  226. func (r *GeneralOpenAIRequest) GetMaxTokens() uint {
  227. if r.MaxCompletionTokens != 0 {
  228. return r.MaxCompletionTokens
  229. }
  230. return r.MaxTokens
  231. }
  232. func (r *GeneralOpenAIRequest) ParseInput() []string {
  233. if r.Input == nil {
  234. return nil
  235. }
  236. var input []string
  237. switch r.Input.(type) {
  238. case string:
  239. input = []string{r.Input.(string)}
  240. case []any:
  241. input = make([]string, 0, len(r.Input.([]any)))
  242. for _, item := range r.Input.([]any) {
  243. if str, ok := item.(string); ok {
  244. input = append(input, str)
  245. }
  246. }
  247. }
  248. return input
  249. }
  250. type Message struct {
  251. Role string `json:"role"`
  252. Content any `json:"content"`
  253. Name *string `json:"name,omitempty"`
  254. Prefix *bool `json:"prefix,omitempty"`
  255. ReasoningContent string `json:"reasoning_content,omitempty"`
  256. Reasoning string `json:"reasoning,omitempty"`
  257. ToolCalls json.RawMessage `json:"tool_calls,omitempty"`
  258. ToolCallId string `json:"tool_call_id,omitempty"`
  259. parsedContent []MediaContent
  260. //parsedStringContent *string
  261. }
  262. type MediaContent struct {
  263. Type string `json:"type"`
  264. Text string `json:"text,omitempty"`
  265. ImageUrl any `json:"image_url,omitempty"`
  266. InputAudio any `json:"input_audio,omitempty"`
  267. File any `json:"file,omitempty"`
  268. VideoUrl any `json:"video_url,omitempty"`
  269. // OpenRouter Params
  270. CacheControl json.RawMessage `json:"cache_control,omitempty"`
  271. }
  272. func (m *MediaContent) GetImageMedia() *MessageImageUrl {
  273. if m.ImageUrl != nil {
  274. if _, ok := m.ImageUrl.(*MessageImageUrl); ok {
  275. return m.ImageUrl.(*MessageImageUrl)
  276. }
  277. if itemMap, ok := m.ImageUrl.(map[string]any); ok {
  278. out := &MessageImageUrl{
  279. Url: common.Interface2String(itemMap["url"]),
  280. Detail: common.Interface2String(itemMap["detail"]),
  281. MimeType: common.Interface2String(itemMap["mime_type"]),
  282. }
  283. return out
  284. }
  285. }
  286. return nil
  287. }
  288. func (m *MediaContent) GetInputAudio() *MessageInputAudio {
  289. if m.InputAudio != nil {
  290. if _, ok := m.InputAudio.(*MessageInputAudio); ok {
  291. return m.InputAudio.(*MessageInputAudio)
  292. }
  293. if itemMap, ok := m.InputAudio.(map[string]any); ok {
  294. out := &MessageInputAudio{
  295. Data: common.Interface2String(itemMap["data"]),
  296. Format: common.Interface2String(itemMap["format"]),
  297. }
  298. return out
  299. }
  300. }
  301. return nil
  302. }
  303. func (m *MediaContent) GetFile() *MessageFile {
  304. if m.File != nil {
  305. if _, ok := m.File.(*MessageFile); ok {
  306. return m.File.(*MessageFile)
  307. }
  308. if itemMap, ok := m.File.(map[string]any); ok {
  309. out := &MessageFile{
  310. FileName: common.Interface2String(itemMap["file_name"]),
  311. FileData: common.Interface2String(itemMap["file_data"]),
  312. FileId: common.Interface2String(itemMap["file_id"]),
  313. }
  314. return out
  315. }
  316. }
  317. return nil
  318. }
  319. func (m *MediaContent) GetVideoUrl() *MessageVideoUrl {
  320. if m.VideoUrl != nil {
  321. if _, ok := m.VideoUrl.(*MessageVideoUrl); ok {
  322. return m.VideoUrl.(*MessageVideoUrl)
  323. }
  324. if itemMap, ok := m.VideoUrl.(map[string]any); ok {
  325. out := &MessageVideoUrl{
  326. Url: common.Interface2String(itemMap["url"]),
  327. }
  328. return out
  329. }
  330. }
  331. return nil
  332. }
  333. type MessageImageUrl struct {
  334. Url string `json:"url"`
  335. Detail string `json:"detail"`
  336. MimeType string
  337. }
  338. func (m *MessageImageUrl) IsRemoteImage() bool {
  339. return strings.HasPrefix(m.Url, "http")
  340. }
  341. type MessageInputAudio struct {
  342. Data string `json:"data"` //base64
  343. Format string `json:"format"`
  344. }
  345. type MessageFile struct {
  346. FileName string `json:"filename,omitempty"`
  347. FileData string `json:"file_data,omitempty"`
  348. FileId string `json:"file_id,omitempty"`
  349. }
  350. type MessageVideoUrl struct {
  351. Url string `json:"url"`
  352. }
  353. const (
  354. ContentTypeText = "text"
  355. ContentTypeImageURL = "image_url"
  356. ContentTypeInputAudio = "input_audio"
  357. ContentTypeFile = "file"
  358. ContentTypeVideoUrl = "video_url" // 阿里百炼视频识别
  359. //ContentTypeAudioUrl = "audio_url"
  360. )
  361. func (m *Message) GetPrefix() bool {
  362. if m.Prefix == nil {
  363. return false
  364. }
  365. return *m.Prefix
  366. }
  367. func (m *Message) SetPrefix(prefix bool) {
  368. m.Prefix = &prefix
  369. }
  370. func (m *Message) ParseToolCalls() []ToolCallRequest {
  371. if m.ToolCalls == nil {
  372. return nil
  373. }
  374. var toolCalls []ToolCallRequest
  375. if err := json.Unmarshal(m.ToolCalls, &toolCalls); err == nil {
  376. return toolCalls
  377. }
  378. return toolCalls
  379. }
  380. func (m *Message) SetToolCalls(toolCalls any) {
  381. toolCallsJson, _ := json.Marshal(toolCalls)
  382. m.ToolCalls = toolCallsJson
  383. }
  384. func (m *Message) StringContent() string {
  385. switch m.Content.(type) {
  386. case string:
  387. return m.Content.(string)
  388. case []any:
  389. var contentStr string
  390. for _, contentItem := range m.Content.([]any) {
  391. contentMap, ok := contentItem.(map[string]any)
  392. if !ok {
  393. continue
  394. }
  395. if contentMap["type"] == ContentTypeText {
  396. if subStr, ok := contentMap["text"].(string); ok {
  397. contentStr += subStr
  398. }
  399. }
  400. }
  401. return contentStr
  402. }
  403. return ""
  404. }
  405. func (m *Message) SetNullContent() {
  406. m.Content = nil
  407. m.parsedContent = nil
  408. }
  409. func (m *Message) SetStringContent(content string) {
  410. m.Content = content
  411. m.parsedContent = nil
  412. }
  413. func (m *Message) SetMediaContent(content []MediaContent) {
  414. m.Content = content
  415. m.parsedContent = content
  416. }
  417. func (m *Message) IsStringContent() bool {
  418. _, ok := m.Content.(string)
  419. if ok {
  420. return true
  421. }
  422. return false
  423. }
  424. func (m *Message) ParseContent() []MediaContent {
  425. if m.Content == nil {
  426. return nil
  427. }
  428. if len(m.parsedContent) > 0 {
  429. return m.parsedContent
  430. }
  431. var contentList []MediaContent
  432. // 先尝试解析为字符串
  433. content, ok := m.Content.(string)
  434. if ok {
  435. contentList = []MediaContent{{
  436. Type: ContentTypeText,
  437. Text: content,
  438. }}
  439. m.parsedContent = contentList
  440. return contentList
  441. }
  442. // 尝试解析为数组
  443. //var arrayContent []map[string]interface{}
  444. arrayContent, ok := m.Content.([]any)
  445. if !ok {
  446. return contentList
  447. }
  448. for _, contentItemAny := range arrayContent {
  449. mediaItem, ok := contentItemAny.(MediaContent)
  450. if ok {
  451. contentList = append(contentList, mediaItem)
  452. continue
  453. }
  454. contentItem, ok := contentItemAny.(map[string]any)
  455. if !ok {
  456. continue
  457. }
  458. contentType, ok := contentItem["type"].(string)
  459. if !ok {
  460. continue
  461. }
  462. switch contentType {
  463. case ContentTypeText:
  464. if text, ok := contentItem["text"].(string); ok {
  465. contentList = append(contentList, MediaContent{
  466. Type: ContentTypeText,
  467. Text: text,
  468. })
  469. }
  470. case ContentTypeImageURL:
  471. imageUrl := contentItem["image_url"]
  472. temp := &MessageImageUrl{
  473. Detail: "high",
  474. }
  475. switch v := imageUrl.(type) {
  476. case string:
  477. temp.Url = v
  478. case map[string]interface{}:
  479. url, ok1 := v["url"].(string)
  480. detail, ok2 := v["detail"].(string)
  481. if ok2 {
  482. temp.Detail = detail
  483. }
  484. if ok1 {
  485. temp.Url = url
  486. }
  487. }
  488. contentList = append(contentList, MediaContent{
  489. Type: ContentTypeImageURL,
  490. ImageUrl: temp,
  491. })
  492. case ContentTypeInputAudio:
  493. if audioData, ok := contentItem["input_audio"].(map[string]interface{}); ok {
  494. data, ok1 := audioData["data"].(string)
  495. format, ok2 := audioData["format"].(string)
  496. if ok1 && ok2 {
  497. temp := &MessageInputAudio{
  498. Data: data,
  499. Format: format,
  500. }
  501. contentList = append(contentList, MediaContent{
  502. Type: ContentTypeInputAudio,
  503. InputAudio: temp,
  504. })
  505. }
  506. }
  507. case ContentTypeFile:
  508. if fileData, ok := contentItem["file"].(map[string]interface{}); ok {
  509. fileId, ok3 := fileData["file_id"].(string)
  510. if ok3 {
  511. contentList = append(contentList, MediaContent{
  512. Type: ContentTypeFile,
  513. File: &MessageFile{
  514. FileId: fileId,
  515. },
  516. })
  517. } else {
  518. fileName, ok1 := fileData["filename"].(string)
  519. fileDataStr, ok2 := fileData["file_data"].(string)
  520. if ok1 && ok2 {
  521. contentList = append(contentList, MediaContent{
  522. Type: ContentTypeFile,
  523. File: &MessageFile{
  524. FileName: fileName,
  525. FileData: fileDataStr,
  526. },
  527. })
  528. }
  529. }
  530. }
  531. case ContentTypeVideoUrl:
  532. if videoUrl, ok := contentItem["video_url"].(string); ok {
  533. contentList = append(contentList, MediaContent{
  534. Type: ContentTypeVideoUrl,
  535. VideoUrl: &MessageVideoUrl{
  536. Url: videoUrl,
  537. },
  538. })
  539. }
  540. }
  541. }
  542. if len(contentList) > 0 {
  543. m.parsedContent = contentList
  544. }
  545. return contentList
  546. }
  547. // old code
  548. /*func (m *Message) StringContent() string {
  549. if m.parsedStringContent != nil {
  550. return *m.parsedStringContent
  551. }
  552. var stringContent string
  553. if err := json.Unmarshal(m.Content, &stringContent); err == nil {
  554. m.parsedStringContent = &stringContent
  555. return stringContent
  556. }
  557. contentStr := new(strings.Builder)
  558. arrayContent := m.ParseContent()
  559. for _, content := range arrayContent {
  560. if content.Type == ContentTypeText {
  561. contentStr.WriteString(content.Text)
  562. }
  563. }
  564. stringContent = contentStr.String()
  565. m.parsedStringContent = &stringContent
  566. return stringContent
  567. }
  568. func (m *Message) SetNullContent() {
  569. m.Content = nil
  570. m.parsedStringContent = nil
  571. m.parsedContent = nil
  572. }
  573. func (m *Message) SetStringContent(content string) {
  574. jsonContent, _ := json.Marshal(content)
  575. m.Content = jsonContent
  576. m.parsedStringContent = &content
  577. m.parsedContent = nil
  578. }
  579. func (m *Message) SetMediaContent(content []MediaContent) {
  580. jsonContent, _ := json.Marshal(content)
  581. m.Content = jsonContent
  582. m.parsedContent = nil
  583. m.parsedStringContent = nil
  584. }
  585. func (m *Message) IsStringContent() bool {
  586. if m.parsedStringContent != nil {
  587. return true
  588. }
  589. var stringContent string
  590. if err := json.Unmarshal(m.Content, &stringContent); err == nil {
  591. m.parsedStringContent = &stringContent
  592. return true
  593. }
  594. return false
  595. }
  596. func (m *Message) ParseContent() []MediaContent {
  597. if m.parsedContent != nil {
  598. return m.parsedContent
  599. }
  600. var contentList []MediaContent
  601. // 先尝试解析为字符串
  602. var stringContent string
  603. if err := json.Unmarshal(m.Content, &stringContent); err == nil {
  604. contentList = []MediaContent{{
  605. Type: ContentTypeText,
  606. Text: stringContent,
  607. }}
  608. m.parsedContent = contentList
  609. return contentList
  610. }
  611. // 尝试解析为数组
  612. var arrayContent []map[string]interface{}
  613. if err := json.Unmarshal(m.Content, &arrayContent); err == nil {
  614. for _, contentItem := range arrayContent {
  615. contentType, ok := contentItem["type"].(string)
  616. if !ok {
  617. continue
  618. }
  619. switch contentType {
  620. case ContentTypeText:
  621. if text, ok := contentItem["text"].(string); ok {
  622. contentList = append(contentList, MediaContent{
  623. Type: ContentTypeText,
  624. Text: text,
  625. })
  626. }
  627. case ContentTypeImageURL:
  628. imageUrl := contentItem["image_url"]
  629. temp := &MessageImageUrl{
  630. Detail: "high",
  631. }
  632. switch v := imageUrl.(type) {
  633. case string:
  634. temp.Url = v
  635. case map[string]interface{}:
  636. url, ok1 := v["url"].(string)
  637. detail, ok2 := v["detail"].(string)
  638. if ok2 {
  639. temp.Detail = detail
  640. }
  641. if ok1 {
  642. temp.Url = url
  643. }
  644. }
  645. contentList = append(contentList, MediaContent{
  646. Type: ContentTypeImageURL,
  647. ImageUrl: temp,
  648. })
  649. case ContentTypeInputAudio:
  650. if audioData, ok := contentItem["input_audio"].(map[string]interface{}); ok {
  651. data, ok1 := audioData["data"].(string)
  652. format, ok2 := audioData["format"].(string)
  653. if ok1 && ok2 {
  654. temp := &MessageInputAudio{
  655. Data: data,
  656. Format: format,
  657. }
  658. contentList = append(contentList, MediaContent{
  659. Type: ContentTypeInputAudio,
  660. InputAudio: temp,
  661. })
  662. }
  663. }
  664. case ContentTypeFile:
  665. if fileData, ok := contentItem["file"].(map[string]interface{}); ok {
  666. fileId, ok3 := fileData["file_id"].(string)
  667. if ok3 {
  668. contentList = append(contentList, MediaContent{
  669. Type: ContentTypeFile,
  670. File: &MessageFile{
  671. FileId: fileId,
  672. },
  673. })
  674. } else {
  675. fileName, ok1 := fileData["filename"].(string)
  676. fileDataStr, ok2 := fileData["file_data"].(string)
  677. if ok1 && ok2 {
  678. contentList = append(contentList, MediaContent{
  679. Type: ContentTypeFile,
  680. File: &MessageFile{
  681. FileName: fileName,
  682. FileData: fileDataStr,
  683. },
  684. })
  685. }
  686. }
  687. }
  688. case ContentTypeVideoUrl:
  689. if videoUrl, ok := contentItem["video_url"].(string); ok {
  690. contentList = append(contentList, MediaContent{
  691. Type: ContentTypeVideoUrl,
  692. VideoUrl: &MessageVideoUrl{
  693. Url: videoUrl,
  694. },
  695. })
  696. }
  697. }
  698. }
  699. }
  700. if len(contentList) > 0 {
  701. m.parsedContent = contentList
  702. }
  703. return contentList
  704. }*/
  705. type WebSearchOptions struct {
  706. SearchContextSize string `json:"search_context_size,omitempty"`
  707. UserLocation json.RawMessage `json:"user_location,omitempty"`
  708. }
  709. // https://platform.openai.com/docs/api-reference/responses/create
  710. type OpenAIResponsesRequest struct {
  711. Model string `json:"model"`
  712. Input json.RawMessage `json:"input,omitempty"`
  713. Include json.RawMessage `json:"include,omitempty"`
  714. Instructions json.RawMessage `json:"instructions,omitempty"`
  715. MaxOutputTokens uint `json:"max_output_tokens,omitempty"`
  716. Metadata json.RawMessage `json:"metadata,omitempty"`
  717. ParallelToolCalls json.RawMessage `json:"parallel_tool_calls,omitempty"`
  718. PreviousResponseID string `json:"previous_response_id,omitempty"`
  719. Reasoning *Reasoning `json:"reasoning,omitempty"`
  720. // 服务层级字段,用于指定 API 服务等级。允许透传可能导致实际计费高于预期,默认应过滤
  721. ServiceTier string `json:"service_tier,omitempty"`
  722. Store json.RawMessage `json:"store,omitempty"`
  723. PromptCacheKey json.RawMessage `json:"prompt_cache_key,omitempty"`
  724. Stream bool `json:"stream,omitempty"`
  725. Temperature float64 `json:"temperature,omitempty"`
  726. Text json.RawMessage `json:"text,omitempty"`
  727. ToolChoice json.RawMessage `json:"tool_choice,omitempty"`
  728. Tools json.RawMessage `json:"tools,omitempty"` // 需要处理的参数很少,MCP 参数太多不确定,所以用 map
  729. TopP float64 `json:"top_p,omitempty"`
  730. Truncation string `json:"truncation,omitempty"`
  731. User string `json:"user,omitempty"`
  732. MaxToolCalls uint `json:"max_tool_calls,omitempty"`
  733. Prompt json.RawMessage `json:"prompt,omitempty"`
  734. }
  735. func (r *OpenAIResponsesRequest) GetTokenCountMeta() *types.TokenCountMeta {
  736. var fileMeta = make([]*types.FileMeta, 0)
  737. var texts = make([]string, 0)
  738. if r.Input != nil {
  739. inputs := r.ParseInput()
  740. for _, input := range inputs {
  741. if input.Type == "input_image" {
  742. if input.ImageUrl != "" {
  743. fileMeta = append(fileMeta, &types.FileMeta{
  744. FileType: types.FileTypeImage,
  745. OriginData: input.ImageUrl,
  746. Detail: input.Detail,
  747. })
  748. }
  749. } else if input.Type == "input_file" {
  750. if input.FileUrl != "" {
  751. fileMeta = append(fileMeta, &types.FileMeta{
  752. FileType: types.FileTypeFile,
  753. OriginData: input.FileUrl,
  754. })
  755. }
  756. } else {
  757. texts = append(texts, input.Text)
  758. }
  759. }
  760. }
  761. if len(r.Instructions) > 0 {
  762. texts = append(texts, string(r.Instructions))
  763. }
  764. if len(r.Metadata) > 0 {
  765. texts = append(texts, string(r.Metadata))
  766. }
  767. if len(r.Text) > 0 {
  768. texts = append(texts, string(r.Text))
  769. }
  770. if len(r.ToolChoice) > 0 {
  771. texts = append(texts, string(r.ToolChoice))
  772. }
  773. if len(r.Prompt) > 0 {
  774. texts = append(texts, string(r.Prompt))
  775. }
  776. if len(r.Tools) > 0 {
  777. texts = append(texts, string(r.Tools))
  778. }
  779. return &types.TokenCountMeta{
  780. CombineText: strings.Join(texts, "\n"),
  781. Files: fileMeta,
  782. MaxTokens: int(r.MaxOutputTokens),
  783. }
  784. }
  785. func (r *OpenAIResponsesRequest) IsStream(c *gin.Context) bool {
  786. return r.Stream
  787. }
  788. func (r *OpenAIResponsesRequest) SetModelName(modelName string) {
  789. if modelName != "" {
  790. r.Model = modelName
  791. }
  792. }
  793. func (r *OpenAIResponsesRequest) GetToolsMap() []map[string]any {
  794. var toolsMap []map[string]any
  795. if len(r.Tools) > 0 {
  796. _ = common.Unmarshal(r.Tools, &toolsMap)
  797. }
  798. return toolsMap
  799. }
  800. type Reasoning struct {
  801. Effort string `json:"effort,omitempty"`
  802. Summary string `json:"summary,omitempty"`
  803. }
  804. type MediaInput struct {
  805. Type string `json:"type"`
  806. Text string `json:"text,omitempty"`
  807. FileUrl string `json:"file_url,omitempty"`
  808. ImageUrl string `json:"image_url,omitempty"`
  809. Detail string `json:"detail,omitempty"` // 仅 input_image 有效
  810. }
  811. // ParseInput parses the Responses API `input` field into a normalized slice of MediaInput.
  812. // Reference implementation mirrors Message.ParseContent:
  813. // - input can be a string, treated as an input_text item
  814. // - input can be an array of objects with a `type` field
  815. // supported types: input_text, input_image, input_file
  816. func (r *OpenAIResponsesRequest) ParseInput() []MediaInput {
  817. if r.Input == nil {
  818. return nil
  819. }
  820. var inputs []MediaInput
  821. // Try string first
  822. // if str, ok := common.GetJsonType(r.Input); ok {
  823. // inputs = append(inputs, MediaInput{Type: "input_text", Text: str})
  824. // return inputs
  825. // }
  826. if common.GetJsonType(r.Input) == "string" {
  827. var str string
  828. _ = common.Unmarshal(r.Input, &str)
  829. inputs = append(inputs, MediaInput{Type: "input_text", Text: str})
  830. return inputs
  831. }
  832. // Try array of parts
  833. if common.GetJsonType(r.Input) == "array" {
  834. var array []any
  835. _ = common.Unmarshal(r.Input, &array)
  836. for _, itemAny := range array {
  837. // Already parsed MediaInput
  838. if media, ok := itemAny.(MediaInput); ok {
  839. inputs = append(inputs, media)
  840. continue
  841. }
  842. // Generic map
  843. item, ok := itemAny.(map[string]any)
  844. if !ok {
  845. continue
  846. }
  847. typeVal, ok := item["type"].(string)
  848. if !ok {
  849. continue
  850. }
  851. switch typeVal {
  852. case "input_text":
  853. text, _ := item["text"].(string)
  854. inputs = append(inputs, MediaInput{Type: "input_text", Text: text})
  855. case "input_image":
  856. // image_url may be string or object with url field
  857. var imageUrl string
  858. switch v := item["image_url"].(type) {
  859. case string:
  860. imageUrl = v
  861. case map[string]any:
  862. if url, ok := v["url"].(string); ok {
  863. imageUrl = url
  864. }
  865. }
  866. inputs = append(inputs, MediaInput{Type: "input_image", ImageUrl: imageUrl})
  867. case "input_file":
  868. // file_url may be string or object with url field
  869. var fileUrl string
  870. switch v := item["file_url"].(type) {
  871. case string:
  872. fileUrl = v
  873. case map[string]any:
  874. if url, ok := v["url"].(string); ok {
  875. fileUrl = url
  876. }
  877. }
  878. inputs = append(inputs, MediaInput{Type: "input_file", FileUrl: fileUrl})
  879. }
  880. }
  881. }
  882. return inputs
  883. }