openai_request.go 27 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639640641642643644645646647648649650651652653654655656657658659660661662663664665666667668669670671672673674675676677678679680681682683684685686687688689690691692693694695696697698699700701702703704705706707708709710711712713714715716717718719720721722723724725726727728729730731732733734735736737738739740741742743744745746747748749750751752753754755756757758759760761762763764765766767768769770771772773774775776777778779780781782783784785786787788789790791792793794795796797798799800801802803804805806807808809810811812813814815816817818819820821822823824825826827828829830831832833834835836837838839840841842843844845846847848849850851852853854855856857858859860861862863864865866867868869870871872873874875876877878879880881882883884885886887888889890891892893894895896897898899900901902903904905906907908909910911912913914915916917918919920921922923924925926927928929930931932933934935936937938939940941942943944945946947948949950951952953954955956957958959960961962963964965966967968969970971972973974975976977978979
  1. package dto
  2. import (
  3. "encoding/json"
  4. "fmt"
  5. "strings"
  6. "github.com/QuantumNous/new-api/common"
  7. "github.com/QuantumNous/new-api/types"
  8. "github.com/gin-gonic/gin"
  9. )
  10. type ResponseFormat struct {
  11. Type string `json:"type,omitempty"`
  12. JsonSchema json.RawMessage `json:"json_schema,omitempty"`
  13. }
  14. type FormatJsonSchema struct {
  15. Description string `json:"description,omitempty"`
  16. Name string `json:"name"`
  17. Schema any `json:"schema,omitempty"`
  18. Strict json.RawMessage `json:"strict,omitempty"`
  19. }
  20. type GeneralOpenAIRequest struct {
  21. Model string `json:"model,omitempty"`
  22. Messages []Message `json:"messages,omitempty"`
  23. Prompt any `json:"prompt,omitempty"`
  24. Prefix any `json:"prefix,omitempty"`
  25. Suffix any `json:"suffix,omitempty"`
  26. Stream bool `json:"stream,omitempty"`
  27. StreamOptions *StreamOptions `json:"stream_options,omitempty"`
  28. MaxTokens uint `json:"max_tokens,omitempty"`
  29. MaxCompletionTokens uint `json:"max_completion_tokens,omitempty"`
  30. ReasoningEffort string `json:"reasoning_effort,omitempty"`
  31. Verbosity json.RawMessage `json:"verbosity,omitempty"` // gpt-5
  32. Temperature *float64 `json:"temperature,omitempty"`
  33. TopP float64 `json:"top_p,omitempty"`
  34. TopK int `json:"top_k,omitempty"`
  35. Stop any `json:"stop,omitempty"`
  36. N int `json:"n,omitempty"`
  37. Input any `json:"input,omitempty"`
  38. Instruction string `json:"instruction,omitempty"`
  39. Size string `json:"size,omitempty"`
  40. Functions json.RawMessage `json:"functions,omitempty"`
  41. FrequencyPenalty float64 `json:"frequency_penalty,omitempty"`
  42. PresencePenalty float64 `json:"presence_penalty,omitempty"`
  43. ResponseFormat *ResponseFormat `json:"response_format,omitempty"`
  44. EncodingFormat json.RawMessage `json:"encoding_format,omitempty"`
  45. Seed float64 `json:"seed,omitempty"`
  46. ParallelTooCalls *bool `json:"parallel_tool_calls,omitempty"`
  47. Tools []ToolCallRequest `json:"tools,omitempty"`
  48. ToolChoice any `json:"tool_choice,omitempty"`
  49. User string `json:"user,omitempty"`
  50. LogProbs bool `json:"logprobs,omitempty"`
  51. TopLogProbs int `json:"top_logprobs,omitempty"`
  52. Dimensions int `json:"dimensions,omitempty"`
  53. Modalities json.RawMessage `json:"modalities,omitempty"`
  54. Audio json.RawMessage `json:"audio,omitempty"`
  55. // 安全标识符,用于帮助 OpenAI 检测可能违反使用政策的应用程序用户
  56. // 注意:此字段会向 OpenAI 发送用户标识信息,默认过滤以保护用户隐私
  57. SafetyIdentifier string `json:"safety_identifier,omitempty"`
  58. // Whether or not to store the output of this chat completion request for use in our model distillation or evals products.
  59. // 是否存储此次请求数据供 OpenAI 用于评估和优化产品
  60. // 注意:默认过滤此字段以保护用户隐私,但过滤后可能导致 Codex 无法正常使用
  61. Store json.RawMessage `json:"store,omitempty"`
  62. // Used by OpenAI to cache responses for similar requests to optimize your cache hit rates. Replaces the user field
  63. PromptCacheKey string `json:"prompt_cache_key,omitempty"`
  64. LogitBias json.RawMessage `json:"logit_bias,omitempty"`
  65. Metadata json.RawMessage `json:"metadata,omitempty"`
  66. Prediction json.RawMessage `json:"prediction,omitempty"`
  67. // gemini
  68. ExtraBody json.RawMessage `json:"extra_body,omitempty"`
  69. //xai
  70. SearchParameters json.RawMessage `json:"search_parameters,omitempty"`
  71. // claude
  72. WebSearchOptions *WebSearchOptions `json:"web_search_options,omitempty"`
  73. // OpenRouter Params
  74. Usage json.RawMessage `json:"usage,omitempty"`
  75. Reasoning json.RawMessage `json:"reasoning,omitempty"`
  76. // Ali Qwen Params
  77. VlHighResolutionImages json.RawMessage `json:"vl_high_resolution_images,omitempty"`
  78. EnableThinking any `json:"enable_thinking,omitempty"`
  79. // ollama Params
  80. Think json.RawMessage `json:"think,omitempty"`
  81. // baidu v2
  82. WebSearch json.RawMessage `json:"web_search,omitempty"`
  83. // doubao,zhipu_v4
  84. THINKING json.RawMessage `json:"thinking,omitempty"`
  85. // pplx Params
  86. SearchDomainFilter json.RawMessage `json:"search_domain_filter,omitempty"`
  87. SearchRecencyFilter string `json:"search_recency_filter,omitempty"`
  88. ReturnImages bool `json:"return_images,omitempty"`
  89. ReturnRelatedQuestions bool `json:"return_related_questions,omitempty"`
  90. SearchMode string `json:"search_mode,omitempty"`
  91. }
  92. func (r *GeneralOpenAIRequest) GetTokenCountMeta() *types.TokenCountMeta {
  93. var tokenCountMeta types.TokenCountMeta
  94. var texts = make([]string, 0)
  95. var fileMeta = make([]*types.FileMeta, 0)
  96. if r.Prompt != nil {
  97. switch v := r.Prompt.(type) {
  98. case string:
  99. texts = append(texts, v)
  100. case []any:
  101. for _, item := range v {
  102. if str, ok := item.(string); ok {
  103. texts = append(texts, str)
  104. }
  105. }
  106. default:
  107. texts = append(texts, fmt.Sprintf("%v", r.Prompt))
  108. }
  109. }
  110. if r.Input != nil {
  111. inputs := r.ParseInput()
  112. texts = append(texts, inputs...)
  113. }
  114. if r.MaxCompletionTokens > r.MaxTokens {
  115. tokenCountMeta.MaxTokens = int(r.MaxCompletionTokens)
  116. } else {
  117. tokenCountMeta.MaxTokens = int(r.MaxTokens)
  118. }
  119. for _, message := range r.Messages {
  120. tokenCountMeta.MessagesCount++
  121. texts = append(texts, message.Role)
  122. if message.Content != nil {
  123. if message.Name != nil {
  124. tokenCountMeta.NameCount++
  125. texts = append(texts, *message.Name)
  126. }
  127. arrayContent := message.ParseContent()
  128. for _, m := range arrayContent {
  129. if m.Type == ContentTypeImageURL {
  130. imageUrl := m.GetImageMedia()
  131. if imageUrl != nil {
  132. if imageUrl.Url != "" {
  133. meta := &types.FileMeta{
  134. FileType: types.FileTypeImage,
  135. }
  136. meta.OriginData = imageUrl.Url
  137. meta.Detail = imageUrl.Detail
  138. fileMeta = append(fileMeta, meta)
  139. }
  140. }
  141. } else if m.Type == ContentTypeInputAudio {
  142. inputAudio := m.GetInputAudio()
  143. if inputAudio != nil {
  144. meta := &types.FileMeta{
  145. FileType: types.FileTypeAudio,
  146. }
  147. meta.OriginData = inputAudio.Data
  148. fileMeta = append(fileMeta, meta)
  149. }
  150. } else if m.Type == ContentTypeFile {
  151. file := m.GetFile()
  152. if file != nil {
  153. meta := &types.FileMeta{
  154. FileType: types.FileTypeFile,
  155. }
  156. meta.OriginData = file.FileData
  157. fileMeta = append(fileMeta, meta)
  158. }
  159. } else if m.Type == ContentTypeVideoUrl {
  160. videoUrl := m.GetVideoUrl()
  161. if videoUrl != nil && videoUrl.Url != "" {
  162. meta := &types.FileMeta{
  163. FileType: types.FileTypeVideo,
  164. }
  165. meta.OriginData = videoUrl.Url
  166. fileMeta = append(fileMeta, meta)
  167. }
  168. } else {
  169. texts = append(texts, m.Text)
  170. }
  171. }
  172. }
  173. }
  174. if r.Tools != nil {
  175. openaiTools := r.Tools
  176. for _, tool := range openaiTools {
  177. tokenCountMeta.ToolsCount++
  178. texts = append(texts, tool.Function.Name)
  179. if tool.Function.Description != "" {
  180. texts = append(texts, tool.Function.Description)
  181. }
  182. if tool.Function.Parameters != nil {
  183. texts = append(texts, fmt.Sprintf("%v", tool.Function.Parameters))
  184. }
  185. }
  186. //toolTokens := CountTokenInput(countStr, request.Model)
  187. //tkm += 8
  188. //tkm += toolTokens
  189. }
  190. tokenCountMeta.CombineText = strings.Join(texts, "\n")
  191. tokenCountMeta.Files = fileMeta
  192. return &tokenCountMeta
  193. }
  194. func (r *GeneralOpenAIRequest) IsStream(c *gin.Context) bool {
  195. return r.Stream
  196. }
  197. func (r *GeneralOpenAIRequest) SetModelName(modelName string) {
  198. if modelName != "" {
  199. r.Model = modelName
  200. }
  201. }
  202. func (r *GeneralOpenAIRequest) ToMap() map[string]any {
  203. result := make(map[string]any)
  204. data, _ := common.Marshal(r)
  205. _ = common.Unmarshal(data, &result)
  206. return result
  207. }
  208. func (r *GeneralOpenAIRequest) GetSystemRoleName() string {
  209. if strings.HasPrefix(r.Model, "o") {
  210. if !strings.HasPrefix(r.Model, "o1-mini") && !strings.HasPrefix(r.Model, "o1-preview") {
  211. return "developer"
  212. }
  213. } else if strings.HasPrefix(r.Model, "gpt-5") {
  214. return "developer"
  215. }
  216. return "system"
  217. }
  218. type ToolCallRequest struct {
  219. ID string `json:"id,omitempty"`
  220. Type string `json:"type"`
  221. Function FunctionRequest `json:"function"`
  222. }
  223. type FunctionRequest struct {
  224. Description string `json:"description,omitempty"`
  225. Name string `json:"name"`
  226. Parameters any `json:"parameters,omitempty"`
  227. Arguments string `json:"arguments,omitempty"`
  228. }
  229. type StreamOptions struct {
  230. IncludeUsage bool `json:"include_usage,omitempty"`
  231. }
  232. func (r *GeneralOpenAIRequest) GetMaxTokens() uint {
  233. if r.MaxCompletionTokens != 0 {
  234. return r.MaxCompletionTokens
  235. }
  236. return r.MaxTokens
  237. }
  238. func (r *GeneralOpenAIRequest) ParseInput() []string {
  239. if r.Input == nil {
  240. return nil
  241. }
  242. var input []string
  243. switch r.Input.(type) {
  244. case string:
  245. input = []string{r.Input.(string)}
  246. case []any:
  247. input = make([]string, 0, len(r.Input.([]any)))
  248. for _, item := range r.Input.([]any) {
  249. if str, ok := item.(string); ok {
  250. input = append(input, str)
  251. }
  252. }
  253. }
  254. return input
  255. }
  256. type Message struct {
  257. Role string `json:"role"`
  258. Content any `json:"content"`
  259. Name *string `json:"name,omitempty"`
  260. Prefix *bool `json:"prefix,omitempty"`
  261. ReasoningContent string `json:"reasoning_content,omitempty"`
  262. Reasoning string `json:"reasoning,omitempty"`
  263. ToolCalls json.RawMessage `json:"tool_calls,omitempty"`
  264. ToolCallId string `json:"tool_call_id,omitempty"`
  265. parsedContent []MediaContent
  266. //parsedStringContent *string
  267. }
  268. type MediaContent struct {
  269. Type string `json:"type"`
  270. Text string `json:"text,omitempty"`
  271. ImageUrl any `json:"image_url,omitempty"`
  272. InputAudio any `json:"input_audio,omitempty"`
  273. File any `json:"file,omitempty"`
  274. VideoUrl any `json:"video_url,omitempty"`
  275. // OpenRouter Params
  276. CacheControl json.RawMessage `json:"cache_control,omitempty"`
  277. }
  278. func (m *MediaContent) GetImageMedia() *MessageImageUrl {
  279. if m.ImageUrl != nil {
  280. if _, ok := m.ImageUrl.(*MessageImageUrl); ok {
  281. return m.ImageUrl.(*MessageImageUrl)
  282. }
  283. if itemMap, ok := m.ImageUrl.(map[string]any); ok {
  284. out := &MessageImageUrl{
  285. Url: common.Interface2String(itemMap["url"]),
  286. Detail: common.Interface2String(itemMap["detail"]),
  287. MimeType: common.Interface2String(itemMap["mime_type"]),
  288. }
  289. return out
  290. }
  291. }
  292. return nil
  293. }
  294. func (m *MediaContent) GetInputAudio() *MessageInputAudio {
  295. if m.InputAudio != nil {
  296. if _, ok := m.InputAudio.(*MessageInputAudio); ok {
  297. return m.InputAudio.(*MessageInputAudio)
  298. }
  299. if itemMap, ok := m.InputAudio.(map[string]any); ok {
  300. out := &MessageInputAudio{
  301. Data: common.Interface2String(itemMap["data"]),
  302. Format: common.Interface2String(itemMap["format"]),
  303. }
  304. return out
  305. }
  306. }
  307. return nil
  308. }
  309. func (m *MediaContent) GetFile() *MessageFile {
  310. if m.File != nil {
  311. if _, ok := m.File.(*MessageFile); ok {
  312. return m.File.(*MessageFile)
  313. }
  314. if itemMap, ok := m.File.(map[string]any); ok {
  315. out := &MessageFile{
  316. FileName: common.Interface2String(itemMap["file_name"]),
  317. FileData: common.Interface2String(itemMap["file_data"]),
  318. FileId: common.Interface2String(itemMap["file_id"]),
  319. }
  320. return out
  321. }
  322. }
  323. return nil
  324. }
  325. func (m *MediaContent) GetVideoUrl() *MessageVideoUrl {
  326. if m.VideoUrl != nil {
  327. if _, ok := m.VideoUrl.(*MessageVideoUrl); ok {
  328. return m.VideoUrl.(*MessageVideoUrl)
  329. }
  330. if itemMap, ok := m.VideoUrl.(map[string]any); ok {
  331. out := &MessageVideoUrl{
  332. Url: common.Interface2String(itemMap["url"]),
  333. }
  334. return out
  335. }
  336. }
  337. return nil
  338. }
  339. type MessageImageUrl struct {
  340. Url string `json:"url"`
  341. Detail string `json:"detail"`
  342. MimeType string
  343. }
  344. func (m *MessageImageUrl) IsRemoteImage() bool {
  345. return strings.HasPrefix(m.Url, "http")
  346. }
  347. type MessageInputAudio struct {
  348. Data string `json:"data"` //base64
  349. Format string `json:"format"`
  350. }
  351. type MessageFile struct {
  352. FileName string `json:"filename,omitempty"`
  353. FileData string `json:"file_data,omitempty"`
  354. FileId string `json:"file_id,omitempty"`
  355. }
  356. type MessageVideoUrl struct {
  357. Url string `json:"url"`
  358. }
  359. const (
  360. ContentTypeText = "text"
  361. ContentTypeImageURL = "image_url"
  362. ContentTypeInputAudio = "input_audio"
  363. ContentTypeFile = "file"
  364. ContentTypeVideoUrl = "video_url" // 阿里百炼视频识别
  365. //ContentTypeAudioUrl = "audio_url"
  366. )
  367. func (m *Message) GetPrefix() bool {
  368. if m.Prefix == nil {
  369. return false
  370. }
  371. return *m.Prefix
  372. }
  373. func (m *Message) SetPrefix(prefix bool) {
  374. m.Prefix = &prefix
  375. }
  376. func (m *Message) ParseToolCalls() []ToolCallRequest {
  377. if m.ToolCalls == nil {
  378. return nil
  379. }
  380. var toolCalls []ToolCallRequest
  381. if err := json.Unmarshal(m.ToolCalls, &toolCalls); err == nil {
  382. return toolCalls
  383. }
  384. return toolCalls
  385. }
  386. func (m *Message) SetToolCalls(toolCalls any) {
  387. toolCallsJson, _ := json.Marshal(toolCalls)
  388. m.ToolCalls = toolCallsJson
  389. }
  390. func (m *Message) StringContent() string {
  391. switch m.Content.(type) {
  392. case string:
  393. return m.Content.(string)
  394. case []any:
  395. var contentStr string
  396. for _, contentItem := range m.Content.([]any) {
  397. contentMap, ok := contentItem.(map[string]any)
  398. if !ok {
  399. continue
  400. }
  401. if contentMap["type"] == ContentTypeText {
  402. if subStr, ok := contentMap["text"].(string); ok {
  403. contentStr += subStr
  404. }
  405. }
  406. }
  407. return contentStr
  408. }
  409. return ""
  410. }
  411. func (m *Message) SetNullContent() {
  412. m.Content = nil
  413. m.parsedContent = nil
  414. }
  415. func (m *Message) SetStringContent(content string) {
  416. m.Content = content
  417. m.parsedContent = nil
  418. }
  419. func (m *Message) SetMediaContent(content []MediaContent) {
  420. m.Content = content
  421. m.parsedContent = content
  422. }
  423. func (m *Message) IsStringContent() bool {
  424. _, ok := m.Content.(string)
  425. if ok {
  426. return true
  427. }
  428. return false
  429. }
  430. func (m *Message) ParseContent() []MediaContent {
  431. if m.Content == nil {
  432. return nil
  433. }
  434. if len(m.parsedContent) > 0 {
  435. return m.parsedContent
  436. }
  437. var contentList []MediaContent
  438. // 先尝试解析为字符串
  439. content, ok := m.Content.(string)
  440. if ok {
  441. contentList = []MediaContent{{
  442. Type: ContentTypeText,
  443. Text: content,
  444. }}
  445. m.parsedContent = contentList
  446. return contentList
  447. }
  448. // 尝试解析为数组
  449. //var arrayContent []map[string]interface{}
  450. arrayContent, ok := m.Content.([]any)
  451. if !ok {
  452. return contentList
  453. }
  454. for _, contentItemAny := range arrayContent {
  455. mediaItem, ok := contentItemAny.(MediaContent)
  456. if ok {
  457. contentList = append(contentList, mediaItem)
  458. continue
  459. }
  460. contentItem, ok := contentItemAny.(map[string]any)
  461. if !ok {
  462. continue
  463. }
  464. contentType, ok := contentItem["type"].(string)
  465. if !ok {
  466. continue
  467. }
  468. switch contentType {
  469. case ContentTypeText:
  470. if text, ok := contentItem["text"].(string); ok {
  471. contentList = append(contentList, MediaContent{
  472. Type: ContentTypeText,
  473. Text: text,
  474. })
  475. }
  476. case ContentTypeImageURL:
  477. imageUrl := contentItem["image_url"]
  478. temp := &MessageImageUrl{
  479. Detail: "high",
  480. }
  481. switch v := imageUrl.(type) {
  482. case string:
  483. temp.Url = v
  484. case map[string]interface{}:
  485. url, ok1 := v["url"].(string)
  486. detail, ok2 := v["detail"].(string)
  487. if ok2 {
  488. temp.Detail = detail
  489. }
  490. if ok1 {
  491. temp.Url = url
  492. }
  493. }
  494. contentList = append(contentList, MediaContent{
  495. Type: ContentTypeImageURL,
  496. ImageUrl: temp,
  497. })
  498. case ContentTypeInputAudio:
  499. if audioData, ok := contentItem["input_audio"].(map[string]interface{}); ok {
  500. data, ok1 := audioData["data"].(string)
  501. format, ok2 := audioData["format"].(string)
  502. if ok1 && ok2 {
  503. temp := &MessageInputAudio{
  504. Data: data,
  505. Format: format,
  506. }
  507. contentList = append(contentList, MediaContent{
  508. Type: ContentTypeInputAudio,
  509. InputAudio: temp,
  510. })
  511. }
  512. }
  513. case ContentTypeFile:
  514. if fileData, ok := contentItem["file"].(map[string]interface{}); ok {
  515. fileId, ok3 := fileData["file_id"].(string)
  516. if ok3 {
  517. contentList = append(contentList, MediaContent{
  518. Type: ContentTypeFile,
  519. File: &MessageFile{
  520. FileId: fileId,
  521. },
  522. })
  523. } else {
  524. fileName, ok1 := fileData["filename"].(string)
  525. fileDataStr, ok2 := fileData["file_data"].(string)
  526. if ok1 && ok2 {
  527. contentList = append(contentList, MediaContent{
  528. Type: ContentTypeFile,
  529. File: &MessageFile{
  530. FileName: fileName,
  531. FileData: fileDataStr,
  532. },
  533. })
  534. }
  535. }
  536. }
  537. case ContentTypeVideoUrl:
  538. if videoUrl, ok := contentItem["video_url"].(string); ok {
  539. contentList = append(contentList, MediaContent{
  540. Type: ContentTypeVideoUrl,
  541. VideoUrl: &MessageVideoUrl{
  542. Url: videoUrl,
  543. },
  544. })
  545. }
  546. }
  547. }
  548. if len(contentList) > 0 {
  549. m.parsedContent = contentList
  550. }
  551. return contentList
  552. }
  553. // old code
  554. /*func (m *Message) StringContent() string {
  555. if m.parsedStringContent != nil {
  556. return *m.parsedStringContent
  557. }
  558. var stringContent string
  559. if err := json.Unmarshal(m.Content, &stringContent); err == nil {
  560. m.parsedStringContent = &stringContent
  561. return stringContent
  562. }
  563. contentStr := new(strings.Builder)
  564. arrayContent := m.ParseContent()
  565. for _, content := range arrayContent {
  566. if content.Type == ContentTypeText {
  567. contentStr.WriteString(content.Text)
  568. }
  569. }
  570. stringContent = contentStr.String()
  571. m.parsedStringContent = &stringContent
  572. return stringContent
  573. }
  574. func (m *Message) SetNullContent() {
  575. m.Content = nil
  576. m.parsedStringContent = nil
  577. m.parsedContent = nil
  578. }
  579. func (m *Message) SetStringContent(content string) {
  580. jsonContent, _ := json.Marshal(content)
  581. m.Content = jsonContent
  582. m.parsedStringContent = &content
  583. m.parsedContent = nil
  584. }
  585. func (m *Message) SetMediaContent(content []MediaContent) {
  586. jsonContent, _ := json.Marshal(content)
  587. m.Content = jsonContent
  588. m.parsedContent = nil
  589. m.parsedStringContent = nil
  590. }
  591. func (m *Message) IsStringContent() bool {
  592. if m.parsedStringContent != nil {
  593. return true
  594. }
  595. var stringContent string
  596. if err := json.Unmarshal(m.Content, &stringContent); err == nil {
  597. m.parsedStringContent = &stringContent
  598. return true
  599. }
  600. return false
  601. }
  602. func (m *Message) ParseContent() []MediaContent {
  603. if m.parsedContent != nil {
  604. return m.parsedContent
  605. }
  606. var contentList []MediaContent
  607. // 先尝试解析为字符串
  608. var stringContent string
  609. if err := json.Unmarshal(m.Content, &stringContent); err == nil {
  610. contentList = []MediaContent{{
  611. Type: ContentTypeText,
  612. Text: stringContent,
  613. }}
  614. m.parsedContent = contentList
  615. return contentList
  616. }
  617. // 尝试解析为数组
  618. var arrayContent []map[string]interface{}
  619. if err := json.Unmarshal(m.Content, &arrayContent); err == nil {
  620. for _, contentItem := range arrayContent {
  621. contentType, ok := contentItem["type"].(string)
  622. if !ok {
  623. continue
  624. }
  625. switch contentType {
  626. case ContentTypeText:
  627. if text, ok := contentItem["text"].(string); ok {
  628. contentList = append(contentList, MediaContent{
  629. Type: ContentTypeText,
  630. Text: text,
  631. })
  632. }
  633. case ContentTypeImageURL:
  634. imageUrl := contentItem["image_url"]
  635. temp := &MessageImageUrl{
  636. Detail: "high",
  637. }
  638. switch v := imageUrl.(type) {
  639. case string:
  640. temp.Url = v
  641. case map[string]interface{}:
  642. url, ok1 := v["url"].(string)
  643. detail, ok2 := v["detail"].(string)
  644. if ok2 {
  645. temp.Detail = detail
  646. }
  647. if ok1 {
  648. temp.Url = url
  649. }
  650. }
  651. contentList = append(contentList, MediaContent{
  652. Type: ContentTypeImageURL,
  653. ImageUrl: temp,
  654. })
  655. case ContentTypeInputAudio:
  656. if audioData, ok := contentItem["input_audio"].(map[string]interface{}); ok {
  657. data, ok1 := audioData["data"].(string)
  658. format, ok2 := audioData["format"].(string)
  659. if ok1 && ok2 {
  660. temp := &MessageInputAudio{
  661. Data: data,
  662. Format: format,
  663. }
  664. contentList = append(contentList, MediaContent{
  665. Type: ContentTypeInputAudio,
  666. InputAudio: temp,
  667. })
  668. }
  669. }
  670. case ContentTypeFile:
  671. if fileData, ok := contentItem["file"].(map[string]interface{}); ok {
  672. fileId, ok3 := fileData["file_id"].(string)
  673. if ok3 {
  674. contentList = append(contentList, MediaContent{
  675. Type: ContentTypeFile,
  676. File: &MessageFile{
  677. FileId: fileId,
  678. },
  679. })
  680. } else {
  681. fileName, ok1 := fileData["filename"].(string)
  682. fileDataStr, ok2 := fileData["file_data"].(string)
  683. if ok1 && ok2 {
  684. contentList = append(contentList, MediaContent{
  685. Type: ContentTypeFile,
  686. File: &MessageFile{
  687. FileName: fileName,
  688. FileData: fileDataStr,
  689. },
  690. })
  691. }
  692. }
  693. }
  694. case ContentTypeVideoUrl:
  695. if videoUrl, ok := contentItem["video_url"].(string); ok {
  696. contentList = append(contentList, MediaContent{
  697. Type: ContentTypeVideoUrl,
  698. VideoUrl: &MessageVideoUrl{
  699. Url: videoUrl,
  700. },
  701. })
  702. }
  703. }
  704. }
  705. }
  706. if len(contentList) > 0 {
  707. m.parsedContent = contentList
  708. }
  709. return contentList
  710. }*/
  711. type WebSearchOptions struct {
  712. SearchContextSize string `json:"search_context_size,omitempty"`
  713. UserLocation json.RawMessage `json:"user_location,omitempty"`
  714. }
  715. // https://platform.openai.com/docs/api-reference/responses/create
  716. type OpenAIResponsesRequest struct {
  717. Model string `json:"model"`
  718. Input json.RawMessage `json:"input,omitempty"`
  719. Include json.RawMessage `json:"include,omitempty"`
  720. Instructions json.RawMessage `json:"instructions,omitempty"`
  721. MaxOutputTokens uint `json:"max_output_tokens,omitempty"`
  722. Metadata json.RawMessage `json:"metadata,omitempty"`
  723. ParallelToolCalls json.RawMessage `json:"parallel_tool_calls,omitempty"`
  724. PreviousResponseID string `json:"previous_response_id,omitempty"`
  725. Reasoning *Reasoning `json:"reasoning,omitempty"`
  726. // 服务层级字段,用于指定 API 服务等级。允许透传可能导致实际计费高于预期,默认应过滤
  727. ServiceTier string `json:"service_tier,omitempty"`
  728. Store json.RawMessage `json:"store,omitempty"`
  729. PromptCacheKey json.RawMessage `json:"prompt_cache_key,omitempty"`
  730. Stream bool `json:"stream,omitempty"`
  731. Temperature float64 `json:"temperature,omitempty"`
  732. Text json.RawMessage `json:"text,omitempty"`
  733. ToolChoice json.RawMessage `json:"tool_choice,omitempty"`
  734. Tools json.RawMessage `json:"tools,omitempty"` // 需要处理的参数很少,MCP 参数太多不确定,所以用 map
  735. TopP float64 `json:"top_p,omitempty"`
  736. Truncation string `json:"truncation,omitempty"`
  737. User string `json:"user,omitempty"`
  738. MaxToolCalls uint `json:"max_tool_calls,omitempty"`
  739. Prompt json.RawMessage `json:"prompt,omitempty"`
  740. }
  741. func (r *OpenAIResponsesRequest) GetTokenCountMeta() *types.TokenCountMeta {
  742. var fileMeta = make([]*types.FileMeta, 0)
  743. var texts = make([]string, 0)
  744. if r.Input != nil {
  745. inputs := r.ParseInput()
  746. for _, input := range inputs {
  747. if input.Type == "input_image" {
  748. if input.ImageUrl != "" {
  749. fileMeta = append(fileMeta, &types.FileMeta{
  750. FileType: types.FileTypeImage,
  751. OriginData: input.ImageUrl,
  752. Detail: input.Detail,
  753. })
  754. }
  755. } else if input.Type == "input_file" {
  756. if input.FileUrl != "" {
  757. fileMeta = append(fileMeta, &types.FileMeta{
  758. FileType: types.FileTypeFile,
  759. OriginData: input.FileUrl,
  760. })
  761. }
  762. } else {
  763. texts = append(texts, input.Text)
  764. }
  765. }
  766. }
  767. if len(r.Instructions) > 0 {
  768. texts = append(texts, string(r.Instructions))
  769. }
  770. if len(r.Metadata) > 0 {
  771. texts = append(texts, string(r.Metadata))
  772. }
  773. if len(r.Text) > 0 {
  774. texts = append(texts, string(r.Text))
  775. }
  776. if len(r.ToolChoice) > 0 {
  777. texts = append(texts, string(r.ToolChoice))
  778. }
  779. if len(r.Prompt) > 0 {
  780. texts = append(texts, string(r.Prompt))
  781. }
  782. if len(r.Tools) > 0 {
  783. texts = append(texts, string(r.Tools))
  784. }
  785. return &types.TokenCountMeta{
  786. CombineText: strings.Join(texts, "\n"),
  787. Files: fileMeta,
  788. MaxTokens: int(r.MaxOutputTokens),
  789. }
  790. }
  791. func (r *OpenAIResponsesRequest) IsStream(c *gin.Context) bool {
  792. return r.Stream
  793. }
  794. func (r *OpenAIResponsesRequest) SetModelName(modelName string) {
  795. if modelName != "" {
  796. r.Model = modelName
  797. }
  798. }
  799. func (r *OpenAIResponsesRequest) GetToolsMap() []map[string]any {
  800. var toolsMap []map[string]any
  801. if len(r.Tools) > 0 {
  802. _ = common.Unmarshal(r.Tools, &toolsMap)
  803. }
  804. return toolsMap
  805. }
  806. type Reasoning struct {
  807. Effort string `json:"effort,omitempty"`
  808. Summary string `json:"summary,omitempty"`
  809. }
  810. type MediaInput struct {
  811. Type string `json:"type"`
  812. Text string `json:"text,omitempty"`
  813. FileUrl string `json:"file_url,omitempty"`
  814. ImageUrl string `json:"image_url,omitempty"`
  815. Detail string `json:"detail,omitempty"` // 仅 input_image 有效
  816. }
  817. // ParseInput parses the Responses API `input` field into a normalized slice of MediaInput.
  818. // Reference implementation mirrors Message.ParseContent:
  819. // - input can be a string, treated as an input_text item
  820. // - input can be an array of objects with a `type` field
  821. // supported types: input_text, input_image, input_file
  822. func (r *OpenAIResponsesRequest) ParseInput() []MediaInput {
  823. if r.Input == nil {
  824. return nil
  825. }
  826. var inputs []MediaInput
  827. // Try string first
  828. // if str, ok := common.GetJsonType(r.Input); ok {
  829. // inputs = append(inputs, MediaInput{Type: "input_text", Text: str})
  830. // return inputs
  831. // }
  832. if common.GetJsonType(r.Input) == "string" {
  833. var str string
  834. _ = common.Unmarshal(r.Input, &str)
  835. inputs = append(inputs, MediaInput{Type: "input_text", Text: str})
  836. return inputs
  837. }
  838. // Try array of parts
  839. if common.GetJsonType(r.Input) == "array" {
  840. var array []any
  841. _ = common.Unmarshal(r.Input, &array)
  842. for _, itemAny := range array {
  843. // Already parsed MediaInput
  844. if media, ok := itemAny.(MediaInput); ok {
  845. inputs = append(inputs, media)
  846. continue
  847. }
  848. // Generic map
  849. item, ok := itemAny.(map[string]any)
  850. if !ok {
  851. continue
  852. }
  853. typeVal, ok := item["type"].(string)
  854. if !ok {
  855. continue
  856. }
  857. switch typeVal {
  858. case "input_text":
  859. text, _ := item["text"].(string)
  860. inputs = append(inputs, MediaInput{Type: "input_text", Text: text})
  861. case "input_image":
  862. // image_url may be string or object with url field
  863. var imageUrl string
  864. switch v := item["image_url"].(type) {
  865. case string:
  866. imageUrl = v
  867. case map[string]any:
  868. if url, ok := v["url"].(string); ok {
  869. imageUrl = url
  870. }
  871. }
  872. inputs = append(inputs, MediaInput{Type: "input_image", ImageUrl: imageUrl})
  873. case "input_file":
  874. // file_url may be string or object with url field
  875. var fileUrl string
  876. switch v := item["file_url"].(type) {
  877. case string:
  878. fileUrl = v
  879. case map[string]any:
  880. if url, ok := v["url"].(string); ok {
  881. fileUrl = url
  882. }
  883. }
  884. inputs = append(inputs, MediaInput{Type: "input_file", FileUrl: fileUrl})
  885. }
  886. }
  887. }
  888. return inputs
  889. }