openai_request.go 27 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639640641642643644645646647648649650651652653654655656657658659660661662663664665666667668669670671672673674675676677678679680681682683684685686687688689690691692693694695696697698699700701702703704705706707708709710711712713714715716717718719720721722723724725726727728729730731732733734735736737738739740741742743744745746747748749750751752753754755756757758759760761762763764765766767768769770771772773774775776777778779780781782783784785786787788789790791792793794795796797798799800801802803804805806807808809810811812813814815816817818819820821822823824825826827828829830831832833834835836837838839840841842843844845846847848849850851852853854855856857858859860861862863864865866867868869870871872873874875876877878879880881882883884885886887888889890891892893894895896897898899900901902903904905906907908909910911912913914915916917918919920921922923924925926927928929930931932933934935936937938939940941942943944945946947948949950951952953954955956957958959960961962963964965966967968969970971972973974975976977978979980981982
  1. package dto
  2. import (
  3. "encoding/json"
  4. "fmt"
  5. "strings"
  6. "github.com/QuantumNous/new-api/common"
  7. "github.com/QuantumNous/new-api/types"
  8. "github.com/gin-gonic/gin"
  9. )
  10. type ResponseFormat struct {
  11. Type string `json:"type,omitempty"`
  12. JsonSchema json.RawMessage `json:"json_schema,omitempty"`
  13. }
  14. type FormatJsonSchema struct {
  15. Description string `json:"description,omitempty"`
  16. Name string `json:"name"`
  17. Schema any `json:"schema,omitempty"`
  18. Strict json.RawMessage `json:"strict,omitempty"`
  19. }
  20. type GeneralOpenAIRequest struct {
  21. Model string `json:"model,omitempty"`
  22. Messages []Message `json:"messages,omitempty"`
  23. Prompt any `json:"prompt,omitempty"`
  24. Prefix any `json:"prefix,omitempty"`
  25. Suffix any `json:"suffix,omitempty"`
  26. Stream bool `json:"stream,omitempty"`
  27. StreamOptions *StreamOptions `json:"stream_options,omitempty"`
  28. MaxTokens uint `json:"max_tokens,omitempty"`
  29. MaxCompletionTokens uint `json:"max_completion_tokens,omitempty"`
  30. ReasoningEffort string `json:"reasoning_effort,omitempty"`
  31. Verbosity json.RawMessage `json:"verbosity,omitempty"` // gpt-5
  32. Temperature *float64 `json:"temperature,omitempty"`
  33. TopP float64 `json:"top_p,omitempty"`
  34. TopK int `json:"top_k,omitempty"`
  35. Stop any `json:"stop,omitempty"`
  36. N int `json:"n,omitempty"`
  37. Input any `json:"input,omitempty"`
  38. Instruction string `json:"instruction,omitempty"`
  39. Size string `json:"size,omitempty"`
  40. Functions json.RawMessage `json:"functions,omitempty"`
  41. FrequencyPenalty float64 `json:"frequency_penalty,omitempty"`
  42. PresencePenalty float64 `json:"presence_penalty,omitempty"`
  43. ResponseFormat *ResponseFormat `json:"response_format,omitempty"`
  44. EncodingFormat json.RawMessage `json:"encoding_format,omitempty"`
  45. Seed float64 `json:"seed,omitempty"`
  46. ParallelTooCalls *bool `json:"parallel_tool_calls,omitempty"`
  47. Tools []ToolCallRequest `json:"tools,omitempty"`
  48. ToolChoice any `json:"tool_choice,omitempty"`
  49. User string `json:"user,omitempty"`
  50. LogProbs bool `json:"logprobs,omitempty"`
  51. TopLogProbs int `json:"top_logprobs,omitempty"`
  52. Dimensions int `json:"dimensions,omitempty"`
  53. Modalities json.RawMessage `json:"modalities,omitempty"`
  54. Audio json.RawMessage `json:"audio,omitempty"`
  55. // 安全标识符,用于帮助 OpenAI 检测可能违反使用政策的应用程序用户
  56. // 注意:此字段会向 OpenAI 发送用户标识信息,默认过滤以保护用户隐私
  57. SafetyIdentifier string `json:"safety_identifier,omitempty"`
  58. // Whether or not to store the output of this chat completion request for use in our model distillation or evals products.
  59. // 是否存储此次请求数据供 OpenAI 用于评估和优化产品
  60. // 注意:默认过滤此字段以保护用户隐私,但过滤后可能导致 Codex 无法正常使用
  61. Store json.RawMessage `json:"store,omitempty"`
  62. // Used by OpenAI to cache responses for similar requests to optimize your cache hit rates. Replaces the user field
  63. PromptCacheKey string `json:"prompt_cache_key,omitempty"`
  64. LogitBias json.RawMessage `json:"logit_bias,omitempty"`
  65. Metadata json.RawMessage `json:"metadata,omitempty"`
  66. Prediction json.RawMessage `json:"prediction,omitempty"`
  67. // gemini
  68. ExtraBody json.RawMessage `json:"extra_body,omitempty"`
  69. //xai
  70. SearchParameters json.RawMessage `json:"search_parameters,omitempty"`
  71. // claude
  72. WebSearchOptions *WebSearchOptions `json:"web_search_options,omitempty"`
  73. // OpenRouter Params
  74. Usage json.RawMessage `json:"usage,omitempty"`
  75. Reasoning json.RawMessage `json:"reasoning,omitempty"`
  76. // Ali Qwen Params
  77. VlHighResolutionImages json.RawMessage `json:"vl_high_resolution_images,omitempty"`
  78. EnableThinking any `json:"enable_thinking,omitempty"`
  79. // ollama Params
  80. Think json.RawMessage `json:"think,omitempty"`
  81. // baidu v2
  82. WebSearch json.RawMessage `json:"web_search,omitempty"`
  83. // doubao,zhipu_v4
  84. THINKING json.RawMessage `json:"thinking,omitempty"`
  85. // pplx Params
  86. SearchDomainFilter json.RawMessage `json:"search_domain_filter,omitempty"`
  87. SearchRecencyFilter string `json:"search_recency_filter,omitempty"`
  88. ReturnImages bool `json:"return_images,omitempty"`
  89. ReturnRelatedQuestions bool `json:"return_related_questions,omitempty"`
  90. SearchMode string `json:"search_mode,omitempty"`
  91. }
  92. func (r *GeneralOpenAIRequest) GetTokenCountMeta() *types.TokenCountMeta {
  93. var tokenCountMeta types.TokenCountMeta
  94. var texts = make([]string, 0)
  95. var fileMeta = make([]*types.FileMeta, 0)
  96. if r.Prompt != nil {
  97. switch v := r.Prompt.(type) {
  98. case string:
  99. texts = append(texts, v)
  100. case []any:
  101. for _, item := range v {
  102. if str, ok := item.(string); ok {
  103. texts = append(texts, str)
  104. }
  105. }
  106. default:
  107. texts = append(texts, fmt.Sprintf("%v", r.Prompt))
  108. }
  109. }
  110. if r.Input != nil {
  111. inputs := r.ParseInput()
  112. texts = append(texts, inputs...)
  113. }
  114. if r.MaxCompletionTokens > r.MaxTokens {
  115. tokenCountMeta.MaxTokens = int(r.MaxCompletionTokens)
  116. } else {
  117. tokenCountMeta.MaxTokens = int(r.MaxTokens)
  118. }
  119. for _, message := range r.Messages {
  120. tokenCountMeta.MessagesCount++
  121. texts = append(texts, message.Role)
  122. if message.Content != nil {
  123. if message.Name != nil {
  124. tokenCountMeta.NameCount++
  125. texts = append(texts, *message.Name)
  126. }
  127. arrayContent := message.ParseContent()
  128. for _, m := range arrayContent {
  129. if m.Type == ContentTypeImageURL {
  130. imageUrl := m.GetImageMedia()
  131. if imageUrl != nil {
  132. if imageUrl.Url != "" {
  133. meta := &types.FileMeta{
  134. FileType: types.FileTypeImage,
  135. }
  136. meta.OriginData = imageUrl.Url
  137. meta.Detail = imageUrl.Detail
  138. fileMeta = append(fileMeta, meta)
  139. }
  140. }
  141. } else if m.Type == ContentTypeInputAudio {
  142. inputAudio := m.GetInputAudio()
  143. if inputAudio != nil {
  144. meta := &types.FileMeta{
  145. FileType: types.FileTypeAudio,
  146. }
  147. meta.OriginData = inputAudio.Data
  148. fileMeta = append(fileMeta, meta)
  149. }
  150. } else if m.Type == ContentTypeFile {
  151. file := m.GetFile()
  152. if file != nil {
  153. meta := &types.FileMeta{
  154. FileType: types.FileTypeFile,
  155. }
  156. meta.OriginData = file.FileData
  157. fileMeta = append(fileMeta, meta)
  158. }
  159. } else if m.Type == ContentTypeVideoUrl {
  160. videoUrl := m.GetVideoUrl()
  161. if videoUrl != nil && videoUrl.Url != "" {
  162. meta := &types.FileMeta{
  163. FileType: types.FileTypeVideo,
  164. }
  165. meta.OriginData = videoUrl.Url
  166. fileMeta = append(fileMeta, meta)
  167. }
  168. } else {
  169. texts = append(texts, m.Text)
  170. }
  171. }
  172. }
  173. }
  174. if r.Tools != nil {
  175. openaiTools := r.Tools
  176. for _, tool := range openaiTools {
  177. tokenCountMeta.ToolsCount++
  178. texts = append(texts, tool.Function.Name)
  179. if tool.Function.Description != "" {
  180. texts = append(texts, tool.Function.Description)
  181. }
  182. if tool.Function.Parameters != nil {
  183. texts = append(texts, fmt.Sprintf("%v", tool.Function.Parameters))
  184. }
  185. }
  186. //toolTokens := CountTokenInput(countStr, request.Model)
  187. //tkm += 8
  188. //tkm += toolTokens
  189. }
  190. tokenCountMeta.CombineText = strings.Join(texts, "\n")
  191. tokenCountMeta.Files = fileMeta
  192. return &tokenCountMeta
  193. }
  194. func (r *GeneralOpenAIRequest) IsStream(c *gin.Context) bool {
  195. return r.Stream
  196. }
  197. func (r *GeneralOpenAIRequest) SetModelName(modelName string) {
  198. if modelName != "" {
  199. r.Model = modelName
  200. }
  201. }
  202. func (r *GeneralOpenAIRequest) ToMap() map[string]any {
  203. result := make(map[string]any)
  204. data, _ := common.Marshal(r)
  205. _ = common.Unmarshal(data, &result)
  206. return result
  207. }
  208. func (r *GeneralOpenAIRequest) GetSystemRoleName() string {
  209. if strings.HasPrefix(r.Model, "o") {
  210. if !strings.HasPrefix(r.Model, "o1-mini") && !strings.HasPrefix(r.Model, "o1-preview") {
  211. return "developer"
  212. }
  213. } else if strings.HasPrefix(r.Model, "gpt-5") {
  214. return "developer"
  215. }
  216. return "system"
  217. }
  218. const CustomType = "custom"
  219. type ToolCallRequest struct {
  220. ID string `json:"id,omitempty"`
  221. Type string `json:"type"`
  222. Function FunctionRequest `json:"function,omitempty"`
  223. Custom json.RawMessage `json:"custom,omitempty"`
  224. }
  225. type FunctionRequest struct {
  226. Description string `json:"description,omitempty"`
  227. Name string `json:"name"`
  228. Parameters any `json:"parameters,omitempty"`
  229. Arguments string `json:"arguments,omitempty"`
  230. }
  231. type StreamOptions struct {
  232. IncludeUsage bool `json:"include_usage,omitempty"`
  233. }
  234. func (r *GeneralOpenAIRequest) GetMaxTokens() uint {
  235. if r.MaxCompletionTokens != 0 {
  236. return r.MaxCompletionTokens
  237. }
  238. return r.MaxTokens
  239. }
  240. func (r *GeneralOpenAIRequest) ParseInput() []string {
  241. if r.Input == nil {
  242. return nil
  243. }
  244. var input []string
  245. switch r.Input.(type) {
  246. case string:
  247. input = []string{r.Input.(string)}
  248. case []any:
  249. input = make([]string, 0, len(r.Input.([]any)))
  250. for _, item := range r.Input.([]any) {
  251. if str, ok := item.(string); ok {
  252. input = append(input, str)
  253. }
  254. }
  255. }
  256. return input
  257. }
  258. type Message struct {
  259. Role string `json:"role"`
  260. Content any `json:"content"`
  261. Name *string `json:"name,omitempty"`
  262. Prefix *bool `json:"prefix,omitempty"`
  263. ReasoningContent string `json:"reasoning_content,omitempty"`
  264. Reasoning string `json:"reasoning,omitempty"`
  265. ToolCalls json.RawMessage `json:"tool_calls,omitempty"`
  266. ToolCallId string `json:"tool_call_id,omitempty"`
  267. parsedContent []MediaContent
  268. //parsedStringContent *string
  269. }
  270. type MediaContent struct {
  271. Type string `json:"type"`
  272. Text string `json:"text,omitempty"`
  273. ImageUrl any `json:"image_url,omitempty"`
  274. InputAudio any `json:"input_audio,omitempty"`
  275. File any `json:"file,omitempty"`
  276. VideoUrl any `json:"video_url,omitempty"`
  277. // OpenRouter Params
  278. CacheControl json.RawMessage `json:"cache_control,omitempty"`
  279. }
  280. func (m *MediaContent) GetImageMedia() *MessageImageUrl {
  281. if m.ImageUrl != nil {
  282. if _, ok := m.ImageUrl.(*MessageImageUrl); ok {
  283. return m.ImageUrl.(*MessageImageUrl)
  284. }
  285. if itemMap, ok := m.ImageUrl.(map[string]any); ok {
  286. out := &MessageImageUrl{
  287. Url: common.Interface2String(itemMap["url"]),
  288. Detail: common.Interface2String(itemMap["detail"]),
  289. MimeType: common.Interface2String(itemMap["mime_type"]),
  290. }
  291. return out
  292. }
  293. }
  294. return nil
  295. }
  296. func (m *MediaContent) GetInputAudio() *MessageInputAudio {
  297. if m.InputAudio != nil {
  298. if _, ok := m.InputAudio.(*MessageInputAudio); ok {
  299. return m.InputAudio.(*MessageInputAudio)
  300. }
  301. if itemMap, ok := m.InputAudio.(map[string]any); ok {
  302. out := &MessageInputAudio{
  303. Data: common.Interface2String(itemMap["data"]),
  304. Format: common.Interface2String(itemMap["format"]),
  305. }
  306. return out
  307. }
  308. }
  309. return nil
  310. }
  311. func (m *MediaContent) GetFile() *MessageFile {
  312. if m.File != nil {
  313. if _, ok := m.File.(*MessageFile); ok {
  314. return m.File.(*MessageFile)
  315. }
  316. if itemMap, ok := m.File.(map[string]any); ok {
  317. out := &MessageFile{
  318. FileName: common.Interface2String(itemMap["file_name"]),
  319. FileData: common.Interface2String(itemMap["file_data"]),
  320. FileId: common.Interface2String(itemMap["file_id"]),
  321. }
  322. return out
  323. }
  324. }
  325. return nil
  326. }
  327. func (m *MediaContent) GetVideoUrl() *MessageVideoUrl {
  328. if m.VideoUrl != nil {
  329. if _, ok := m.VideoUrl.(*MessageVideoUrl); ok {
  330. return m.VideoUrl.(*MessageVideoUrl)
  331. }
  332. if itemMap, ok := m.VideoUrl.(map[string]any); ok {
  333. out := &MessageVideoUrl{
  334. Url: common.Interface2String(itemMap["url"]),
  335. }
  336. return out
  337. }
  338. }
  339. return nil
  340. }
  341. type MessageImageUrl struct {
  342. Url string `json:"url"`
  343. Detail string `json:"detail"`
  344. MimeType string
  345. }
  346. func (m *MessageImageUrl) IsRemoteImage() bool {
  347. return strings.HasPrefix(m.Url, "http")
  348. }
  349. type MessageInputAudio struct {
  350. Data string `json:"data"` //base64
  351. Format string `json:"format"`
  352. }
  353. type MessageFile struct {
  354. FileName string `json:"filename,omitempty"`
  355. FileData string `json:"file_data,omitempty"`
  356. FileId string `json:"file_id,omitempty"`
  357. }
  358. type MessageVideoUrl struct {
  359. Url string `json:"url"`
  360. }
  361. const (
  362. ContentTypeText = "text"
  363. ContentTypeImageURL = "image_url"
  364. ContentTypeInputAudio = "input_audio"
  365. ContentTypeFile = "file"
  366. ContentTypeVideoUrl = "video_url" // 阿里百炼视频识别
  367. //ContentTypeAudioUrl = "audio_url"
  368. )
  369. func (m *Message) GetPrefix() bool {
  370. if m.Prefix == nil {
  371. return false
  372. }
  373. return *m.Prefix
  374. }
  375. func (m *Message) SetPrefix(prefix bool) {
  376. m.Prefix = &prefix
  377. }
  378. func (m *Message) ParseToolCalls() []ToolCallRequest {
  379. if m.ToolCalls == nil {
  380. return nil
  381. }
  382. var toolCalls []ToolCallRequest
  383. if err := json.Unmarshal(m.ToolCalls, &toolCalls); err == nil {
  384. return toolCalls
  385. }
  386. return toolCalls
  387. }
  388. func (m *Message) SetToolCalls(toolCalls any) {
  389. toolCallsJson, _ := json.Marshal(toolCalls)
  390. m.ToolCalls = toolCallsJson
  391. }
  392. func (m *Message) StringContent() string {
  393. switch m.Content.(type) {
  394. case string:
  395. return m.Content.(string)
  396. case []any:
  397. var contentStr string
  398. for _, contentItem := range m.Content.([]any) {
  399. contentMap, ok := contentItem.(map[string]any)
  400. if !ok {
  401. continue
  402. }
  403. if contentMap["type"] == ContentTypeText {
  404. if subStr, ok := contentMap["text"].(string); ok {
  405. contentStr += subStr
  406. }
  407. }
  408. }
  409. return contentStr
  410. }
  411. return ""
  412. }
  413. func (m *Message) SetNullContent() {
  414. m.Content = nil
  415. m.parsedContent = nil
  416. }
  417. func (m *Message) SetStringContent(content string) {
  418. m.Content = content
  419. m.parsedContent = nil
  420. }
  421. func (m *Message) SetMediaContent(content []MediaContent) {
  422. m.Content = content
  423. m.parsedContent = content
  424. }
  425. func (m *Message) IsStringContent() bool {
  426. _, ok := m.Content.(string)
  427. if ok {
  428. return true
  429. }
  430. return false
  431. }
  432. func (m *Message) ParseContent() []MediaContent {
  433. if m.Content == nil {
  434. return nil
  435. }
  436. if len(m.parsedContent) > 0 {
  437. return m.parsedContent
  438. }
  439. var contentList []MediaContent
  440. // 先尝试解析为字符串
  441. content, ok := m.Content.(string)
  442. if ok {
  443. contentList = []MediaContent{{
  444. Type: ContentTypeText,
  445. Text: content,
  446. }}
  447. m.parsedContent = contentList
  448. return contentList
  449. }
  450. // 尝试解析为数组
  451. //var arrayContent []map[string]interface{}
  452. arrayContent, ok := m.Content.([]any)
  453. if !ok {
  454. return contentList
  455. }
  456. for _, contentItemAny := range arrayContent {
  457. mediaItem, ok := contentItemAny.(MediaContent)
  458. if ok {
  459. contentList = append(contentList, mediaItem)
  460. continue
  461. }
  462. contentItem, ok := contentItemAny.(map[string]any)
  463. if !ok {
  464. continue
  465. }
  466. contentType, ok := contentItem["type"].(string)
  467. if !ok {
  468. continue
  469. }
  470. switch contentType {
  471. case ContentTypeText:
  472. if text, ok := contentItem["text"].(string); ok {
  473. contentList = append(contentList, MediaContent{
  474. Type: ContentTypeText,
  475. Text: text,
  476. })
  477. }
  478. case ContentTypeImageURL:
  479. imageUrl := contentItem["image_url"]
  480. temp := &MessageImageUrl{
  481. Detail: "high",
  482. }
  483. switch v := imageUrl.(type) {
  484. case string:
  485. temp.Url = v
  486. case map[string]interface{}:
  487. url, ok1 := v["url"].(string)
  488. detail, ok2 := v["detail"].(string)
  489. if ok2 {
  490. temp.Detail = detail
  491. }
  492. if ok1 {
  493. temp.Url = url
  494. }
  495. }
  496. contentList = append(contentList, MediaContent{
  497. Type: ContentTypeImageURL,
  498. ImageUrl: temp,
  499. })
  500. case ContentTypeInputAudio:
  501. if audioData, ok := contentItem["input_audio"].(map[string]interface{}); ok {
  502. data, ok1 := audioData["data"].(string)
  503. format, ok2 := audioData["format"].(string)
  504. if ok1 && ok2 {
  505. temp := &MessageInputAudio{
  506. Data: data,
  507. Format: format,
  508. }
  509. contentList = append(contentList, MediaContent{
  510. Type: ContentTypeInputAudio,
  511. InputAudio: temp,
  512. })
  513. }
  514. }
  515. case ContentTypeFile:
  516. if fileData, ok := contentItem["file"].(map[string]interface{}); ok {
  517. fileId, ok3 := fileData["file_id"].(string)
  518. if ok3 {
  519. contentList = append(contentList, MediaContent{
  520. Type: ContentTypeFile,
  521. File: &MessageFile{
  522. FileId: fileId,
  523. },
  524. })
  525. } else {
  526. fileName, ok1 := fileData["filename"].(string)
  527. fileDataStr, ok2 := fileData["file_data"].(string)
  528. if ok1 && ok2 {
  529. contentList = append(contentList, MediaContent{
  530. Type: ContentTypeFile,
  531. File: &MessageFile{
  532. FileName: fileName,
  533. FileData: fileDataStr,
  534. },
  535. })
  536. }
  537. }
  538. }
  539. case ContentTypeVideoUrl:
  540. if videoUrl, ok := contentItem["video_url"].(string); ok {
  541. contentList = append(contentList, MediaContent{
  542. Type: ContentTypeVideoUrl,
  543. VideoUrl: &MessageVideoUrl{
  544. Url: videoUrl,
  545. },
  546. })
  547. }
  548. }
  549. }
  550. if len(contentList) > 0 {
  551. m.parsedContent = contentList
  552. }
  553. return contentList
  554. }
  555. // old code
  556. /*func (m *Message) StringContent() string {
  557. if m.parsedStringContent != nil {
  558. return *m.parsedStringContent
  559. }
  560. var stringContent string
  561. if err := json.Unmarshal(m.Content, &stringContent); err == nil {
  562. m.parsedStringContent = &stringContent
  563. return stringContent
  564. }
  565. contentStr := new(strings.Builder)
  566. arrayContent := m.ParseContent()
  567. for _, content := range arrayContent {
  568. if content.Type == ContentTypeText {
  569. contentStr.WriteString(content.Text)
  570. }
  571. }
  572. stringContent = contentStr.String()
  573. m.parsedStringContent = &stringContent
  574. return stringContent
  575. }
  576. func (m *Message) SetNullContent() {
  577. m.Content = nil
  578. m.parsedStringContent = nil
  579. m.parsedContent = nil
  580. }
  581. func (m *Message) SetStringContent(content string) {
  582. jsonContent, _ := json.Marshal(content)
  583. m.Content = jsonContent
  584. m.parsedStringContent = &content
  585. m.parsedContent = nil
  586. }
  587. func (m *Message) SetMediaContent(content []MediaContent) {
  588. jsonContent, _ := json.Marshal(content)
  589. m.Content = jsonContent
  590. m.parsedContent = nil
  591. m.parsedStringContent = nil
  592. }
  593. func (m *Message) IsStringContent() bool {
  594. if m.parsedStringContent != nil {
  595. return true
  596. }
  597. var stringContent string
  598. if err := json.Unmarshal(m.Content, &stringContent); err == nil {
  599. m.parsedStringContent = &stringContent
  600. return true
  601. }
  602. return false
  603. }
  604. func (m *Message) ParseContent() []MediaContent {
  605. if m.parsedContent != nil {
  606. return m.parsedContent
  607. }
  608. var contentList []MediaContent
  609. // 先尝试解析为字符串
  610. var stringContent string
  611. if err := json.Unmarshal(m.Content, &stringContent); err == nil {
  612. contentList = []MediaContent{{
  613. Type: ContentTypeText,
  614. Text: stringContent,
  615. }}
  616. m.parsedContent = contentList
  617. return contentList
  618. }
  619. // 尝试解析为数组
  620. var arrayContent []map[string]interface{}
  621. if err := json.Unmarshal(m.Content, &arrayContent); err == nil {
  622. for _, contentItem := range arrayContent {
  623. contentType, ok := contentItem["type"].(string)
  624. if !ok {
  625. continue
  626. }
  627. switch contentType {
  628. case ContentTypeText:
  629. if text, ok := contentItem["text"].(string); ok {
  630. contentList = append(contentList, MediaContent{
  631. Type: ContentTypeText,
  632. Text: text,
  633. })
  634. }
  635. case ContentTypeImageURL:
  636. imageUrl := contentItem["image_url"]
  637. temp := &MessageImageUrl{
  638. Detail: "high",
  639. }
  640. switch v := imageUrl.(type) {
  641. case string:
  642. temp.Url = v
  643. case map[string]interface{}:
  644. url, ok1 := v["url"].(string)
  645. detail, ok2 := v["detail"].(string)
  646. if ok2 {
  647. temp.Detail = detail
  648. }
  649. if ok1 {
  650. temp.Url = url
  651. }
  652. }
  653. contentList = append(contentList, MediaContent{
  654. Type: ContentTypeImageURL,
  655. ImageUrl: temp,
  656. })
  657. case ContentTypeInputAudio:
  658. if audioData, ok := contentItem["input_audio"].(map[string]interface{}); ok {
  659. data, ok1 := audioData["data"].(string)
  660. format, ok2 := audioData["format"].(string)
  661. if ok1 && ok2 {
  662. temp := &MessageInputAudio{
  663. Data: data,
  664. Format: format,
  665. }
  666. contentList = append(contentList, MediaContent{
  667. Type: ContentTypeInputAudio,
  668. InputAudio: temp,
  669. })
  670. }
  671. }
  672. case ContentTypeFile:
  673. if fileData, ok := contentItem["file"].(map[string]interface{}); ok {
  674. fileId, ok3 := fileData["file_id"].(string)
  675. if ok3 {
  676. contentList = append(contentList, MediaContent{
  677. Type: ContentTypeFile,
  678. File: &MessageFile{
  679. FileId: fileId,
  680. },
  681. })
  682. } else {
  683. fileName, ok1 := fileData["filename"].(string)
  684. fileDataStr, ok2 := fileData["file_data"].(string)
  685. if ok1 && ok2 {
  686. contentList = append(contentList, MediaContent{
  687. Type: ContentTypeFile,
  688. File: &MessageFile{
  689. FileName: fileName,
  690. FileData: fileDataStr,
  691. },
  692. })
  693. }
  694. }
  695. }
  696. case ContentTypeVideoUrl:
  697. if videoUrl, ok := contentItem["video_url"].(string); ok {
  698. contentList = append(contentList, MediaContent{
  699. Type: ContentTypeVideoUrl,
  700. VideoUrl: &MessageVideoUrl{
  701. Url: videoUrl,
  702. },
  703. })
  704. }
  705. }
  706. }
  707. }
  708. if len(contentList) > 0 {
  709. m.parsedContent = contentList
  710. }
  711. return contentList
  712. }*/
  713. type WebSearchOptions struct {
  714. SearchContextSize string `json:"search_context_size,omitempty"`
  715. UserLocation json.RawMessage `json:"user_location,omitempty"`
  716. }
  717. // https://platform.openai.com/docs/api-reference/responses/create
  718. type OpenAIResponsesRequest struct {
  719. Model string `json:"model"`
  720. Input json.RawMessage `json:"input,omitempty"`
  721. Include json.RawMessage `json:"include,omitempty"`
  722. Instructions json.RawMessage `json:"instructions,omitempty"`
  723. MaxOutputTokens uint `json:"max_output_tokens,omitempty"`
  724. Metadata json.RawMessage `json:"metadata,omitempty"`
  725. ParallelToolCalls json.RawMessage `json:"parallel_tool_calls,omitempty"`
  726. PreviousResponseID string `json:"previous_response_id,omitempty"`
  727. Reasoning *Reasoning `json:"reasoning,omitempty"`
  728. // 服务层级字段,用于指定 API 服务等级。允许透传可能导致实际计费高于预期,默认应过滤
  729. ServiceTier string `json:"service_tier,omitempty"`
  730. Store json.RawMessage `json:"store,omitempty"`
  731. PromptCacheKey json.RawMessage `json:"prompt_cache_key,omitempty"`
  732. Stream bool `json:"stream,omitempty"`
  733. Temperature float64 `json:"temperature,omitempty"`
  734. Text json.RawMessage `json:"text,omitempty"`
  735. ToolChoice json.RawMessage `json:"tool_choice,omitempty"`
  736. Tools json.RawMessage `json:"tools,omitempty"` // 需要处理的参数很少,MCP 参数太多不确定,所以用 map
  737. TopP float64 `json:"top_p,omitempty"`
  738. Truncation string `json:"truncation,omitempty"`
  739. User string `json:"user,omitempty"`
  740. MaxToolCalls uint `json:"max_tool_calls,omitempty"`
  741. Prompt json.RawMessage `json:"prompt,omitempty"`
  742. }
  743. func (r *OpenAIResponsesRequest) GetTokenCountMeta() *types.TokenCountMeta {
  744. var fileMeta = make([]*types.FileMeta, 0)
  745. var texts = make([]string, 0)
  746. if r.Input != nil {
  747. inputs := r.ParseInput()
  748. for _, input := range inputs {
  749. if input.Type == "input_image" {
  750. if input.ImageUrl != "" {
  751. fileMeta = append(fileMeta, &types.FileMeta{
  752. FileType: types.FileTypeImage,
  753. OriginData: input.ImageUrl,
  754. Detail: input.Detail,
  755. })
  756. }
  757. } else if input.Type == "input_file" {
  758. if input.FileUrl != "" {
  759. fileMeta = append(fileMeta, &types.FileMeta{
  760. FileType: types.FileTypeFile,
  761. OriginData: input.FileUrl,
  762. })
  763. }
  764. } else {
  765. texts = append(texts, input.Text)
  766. }
  767. }
  768. }
  769. if len(r.Instructions) > 0 {
  770. texts = append(texts, string(r.Instructions))
  771. }
  772. if len(r.Metadata) > 0 {
  773. texts = append(texts, string(r.Metadata))
  774. }
  775. if len(r.Text) > 0 {
  776. texts = append(texts, string(r.Text))
  777. }
  778. if len(r.ToolChoice) > 0 {
  779. texts = append(texts, string(r.ToolChoice))
  780. }
  781. if len(r.Prompt) > 0 {
  782. texts = append(texts, string(r.Prompt))
  783. }
  784. if len(r.Tools) > 0 {
  785. texts = append(texts, string(r.Tools))
  786. }
  787. return &types.TokenCountMeta{
  788. CombineText: strings.Join(texts, "\n"),
  789. Files: fileMeta,
  790. MaxTokens: int(r.MaxOutputTokens),
  791. }
  792. }
  793. func (r *OpenAIResponsesRequest) IsStream(c *gin.Context) bool {
  794. return r.Stream
  795. }
  796. func (r *OpenAIResponsesRequest) SetModelName(modelName string) {
  797. if modelName != "" {
  798. r.Model = modelName
  799. }
  800. }
  801. func (r *OpenAIResponsesRequest) GetToolsMap() []map[string]any {
  802. var toolsMap []map[string]any
  803. if len(r.Tools) > 0 {
  804. _ = common.Unmarshal(r.Tools, &toolsMap)
  805. }
  806. return toolsMap
  807. }
  808. type Reasoning struct {
  809. Effort string `json:"effort,omitempty"`
  810. Summary string `json:"summary,omitempty"`
  811. }
  812. type MediaInput struct {
  813. Type string `json:"type"`
  814. Text string `json:"text,omitempty"`
  815. FileUrl string `json:"file_url,omitempty"`
  816. ImageUrl string `json:"image_url,omitempty"`
  817. Detail string `json:"detail,omitempty"` // 仅 input_image 有效
  818. }
  819. // ParseInput parses the Responses API `input` field into a normalized slice of MediaInput.
  820. // Reference implementation mirrors Message.ParseContent:
  821. // - input can be a string, treated as an input_text item
  822. // - input can be an array of objects with a `type` field
  823. // supported types: input_text, input_image, input_file
  824. func (r *OpenAIResponsesRequest) ParseInput() []MediaInput {
  825. if r.Input == nil {
  826. return nil
  827. }
  828. var inputs []MediaInput
  829. // Try string first
  830. // if str, ok := common.GetJsonType(r.Input); ok {
  831. // inputs = append(inputs, MediaInput{Type: "input_text", Text: str})
  832. // return inputs
  833. // }
  834. if common.GetJsonType(r.Input) == "string" {
  835. var str string
  836. _ = common.Unmarshal(r.Input, &str)
  837. inputs = append(inputs, MediaInput{Type: "input_text", Text: str})
  838. return inputs
  839. }
  840. // Try array of parts
  841. if common.GetJsonType(r.Input) == "array" {
  842. var array []any
  843. _ = common.Unmarshal(r.Input, &array)
  844. for _, itemAny := range array {
  845. // Already parsed MediaInput
  846. if media, ok := itemAny.(MediaInput); ok {
  847. inputs = append(inputs, media)
  848. continue
  849. }
  850. // Generic map
  851. item, ok := itemAny.(map[string]any)
  852. if !ok {
  853. continue
  854. }
  855. typeVal, ok := item["type"].(string)
  856. if !ok {
  857. continue
  858. }
  859. switch typeVal {
  860. case "input_text":
  861. text, _ := item["text"].(string)
  862. inputs = append(inputs, MediaInput{Type: "input_text", Text: text})
  863. case "input_image":
  864. // image_url may be string or object with url field
  865. var imageUrl string
  866. switch v := item["image_url"].(type) {
  867. case string:
  868. imageUrl = v
  869. case map[string]any:
  870. if url, ok := v["url"].(string); ok {
  871. imageUrl = url
  872. }
  873. }
  874. inputs = append(inputs, MediaInput{Type: "input_image", ImageUrl: imageUrl})
  875. case "input_file":
  876. // file_url may be string or object with url field
  877. var fileUrl string
  878. switch v := item["file_url"].(type) {
  879. case string:
  880. fileUrl = v
  881. case map[string]any:
  882. if url, ok := v["url"].(string); ok {
  883. fileUrl = url
  884. }
  885. }
  886. inputs = append(inputs, MediaInput{Type: "input_file", FileUrl: fileUrl})
  887. }
  888. }
  889. }
  890. return inputs
  891. }