openai_request.go 28 KB

12345678910111213141516171819202122232425262728293031323334353637383940414243444546474849505152535455565758596061626364656667686970717273747576777879808182838485868788899091929394959697989910010110210310410510610710810911011111211311411511611711811912012112212312412512612712812913013113213313413513613713813914014114214314414514614714814915015115215315415515615715815916016116216316416516616716816917017117217317417517617717817918018118218318418518618718818919019119219319419519619719819920020120220320420520620720820921021121221321421521621721821922022122222322422522622722822923023123223323423523623723823924024124224324424524624724824925025125225325425525625725825926026126226326426526626726826927027127227327427527627727827928028128228328428528628728828929029129229329429529629729829930030130230330430530630730830931031131231331431531631731831932032132232332432532632732832933033133233333433533633733833934034134234334434534634734834935035135235335435535635735835936036136236336436536636736836937037137237337437537637737837938038138238338438538638738838939039139239339439539639739839940040140240340440540640740840941041141241341441541641741841942042142242342442542642742842943043143243343443543643743843944044144244344444544644744844945045145245345445545645745845946046146246346446546646746846947047147247347447547647747847948048148248348448548648748848949049149249349449549649749849950050150250350450550650750850951051151251351451551651751851952052152252352452552652752852953053153253353453553653753853954054154254354454554654754854955055155255355455555655755855956056156256356456556656756856957057157257357457557657757857958058158258358458558658758858959059159259359459559659759859960060160260360460560660760860961061161261361461561661761861962062162262362462562662762862963063163263363463563663763863964064164264364464564664764864965065165265365465565665765865966066166266366466566666766866967067167267367467567667767867968068168268368468568668768868969069169269369469569669769869970070170270370470570670770870971071171271371471571671771871972072172272372472572672772872973073173273373473573673773873974074174274374474574674774874975075175275375475575675775875976076176276376476576676776876977077177277377477577677777877978078178278378478578678778878979079179279379479579679779879980080180280380480580680780880981081181281381481581681781881982082182282382482582682782882983083183283383483583683783883984084184284384484584684784884985085185285385485585685785885986086186286386486586686786886987087187287387487587687787887988088188288388488588688788888989089189289389489589689789889990090190290390490590690790890991091191291391491591691791891992092192292392492592692792892993093193293393493593693793893994094194294394494594694794894995095195295395495595695795895996096196296396496596696796896997097197297397497597697797897998098198298398498598698798898999099199299399499599699799899910001001100210031004
  1. package dto
  2. import (
  3. "encoding/json"
  4. "fmt"
  5. "strings"
  6. "github.com/QuantumNous/new-api/common"
  7. "github.com/QuantumNous/new-api/types"
  8. "github.com/gin-gonic/gin"
  9. )
  10. type ResponseFormat struct {
  11. Type string `json:"type,omitempty"`
  12. JsonSchema json.RawMessage `json:"json_schema,omitempty"`
  13. }
  14. type FormatJsonSchema struct {
  15. Description string `json:"description,omitempty"`
  16. Name string `json:"name"`
  17. Schema any `json:"schema,omitempty"`
  18. Strict json.RawMessage `json:"strict,omitempty"`
  19. }
  20. type GeneralOpenAIRequest struct {
  21. Model string `json:"model,omitempty"`
  22. Messages []Message `json:"messages,omitempty"`
  23. Prompt any `json:"prompt,omitempty"`
  24. Prefix any `json:"prefix,omitempty"`
  25. Suffix any `json:"suffix,omitempty"`
  26. Stream bool `json:"stream,omitempty"`
  27. StreamOptions *StreamOptions `json:"stream_options,omitempty"`
  28. MaxTokens uint `json:"max_tokens,omitempty"`
  29. MaxCompletionTokens uint `json:"max_completion_tokens,omitempty"`
  30. ReasoningEffort string `json:"reasoning_effort,omitempty"`
  31. Verbosity json.RawMessage `json:"verbosity,omitempty"` // gpt-5
  32. Temperature *float64 `json:"temperature,omitempty"`
  33. TopP float64 `json:"top_p,omitempty"`
  34. TopK int `json:"top_k,omitempty"`
  35. Stop any `json:"stop,omitempty"`
  36. N int `json:"n,omitempty"`
  37. Input any `json:"input,omitempty"`
  38. Instruction string `json:"instruction,omitempty"`
  39. Size string `json:"size,omitempty"`
  40. Functions json.RawMessage `json:"functions,omitempty"`
  41. FrequencyPenalty float64 `json:"frequency_penalty,omitempty"`
  42. PresencePenalty float64 `json:"presence_penalty,omitempty"`
  43. ResponseFormat *ResponseFormat `json:"response_format,omitempty"`
  44. EncodingFormat json.RawMessage `json:"encoding_format,omitempty"`
  45. Seed float64 `json:"seed,omitempty"`
  46. ParallelTooCalls *bool `json:"parallel_tool_calls,omitempty"`
  47. Tools []ToolCallRequest `json:"tools,omitempty"`
  48. ToolChoice any `json:"tool_choice,omitempty"`
  49. User string `json:"user,omitempty"`
  50. LogProbs bool `json:"logprobs,omitempty"`
  51. TopLogProbs int `json:"top_logprobs,omitempty"`
  52. Dimensions int `json:"dimensions,omitempty"`
  53. Modalities json.RawMessage `json:"modalities,omitempty"`
  54. Audio json.RawMessage `json:"audio,omitempty"`
  55. // 安全标识符,用于帮助 OpenAI 检测可能违反使用政策的应用程序用户
  56. // 注意:此字段会向 OpenAI 发送用户标识信息,默认过滤以保护用户隐私
  57. SafetyIdentifier string `json:"safety_identifier,omitempty"`
  58. // Whether or not to store the output of this chat completion request for use in our model distillation or evals products.
  59. // 是否存储此次请求数据供 OpenAI 用于评估和优化产品
  60. // 注意:默认过滤此字段以保护用户隐私,但过滤后可能导致 Codex 无法正常使用
  61. Store json.RawMessage `json:"store,omitempty"`
  62. // Used by OpenAI to cache responses for similar requests to optimize your cache hit rates. Replaces the user field
  63. PromptCacheKey string `json:"prompt_cache_key,omitempty"`
  64. PromptCacheRetention json.RawMessage `json:"prompt_cache_retention,omitempty"`
  65. LogitBias json.RawMessage `json:"logit_bias,omitempty"`
  66. Metadata json.RawMessage `json:"metadata,omitempty"`
  67. Prediction json.RawMessage `json:"prediction,omitempty"`
  68. // gemini
  69. ExtraBody json.RawMessage `json:"extra_body,omitempty"`
  70. //xai
  71. SearchParameters json.RawMessage `json:"search_parameters,omitempty"`
  72. // claude
  73. WebSearchOptions *WebSearchOptions `json:"web_search_options,omitempty"`
  74. // OpenRouter Params
  75. Usage json.RawMessage `json:"usage,omitempty"`
  76. Reasoning json.RawMessage `json:"reasoning,omitempty"`
  77. // Ali Qwen Params
  78. VlHighResolutionImages json.RawMessage `json:"vl_high_resolution_images,omitempty"`
  79. EnableThinking any `json:"enable_thinking,omitempty"`
  80. // ollama Params
  81. Think json.RawMessage `json:"think,omitempty"`
  82. // baidu v2
  83. WebSearch json.RawMessage `json:"web_search,omitempty"`
  84. // doubao,zhipu_v4
  85. THINKING json.RawMessage `json:"thinking,omitempty"`
  86. // pplx Params
  87. SearchDomainFilter json.RawMessage `json:"search_domain_filter,omitempty"`
  88. SearchRecencyFilter string `json:"search_recency_filter,omitempty"`
  89. ReturnImages bool `json:"return_images,omitempty"`
  90. ReturnRelatedQuestions bool `json:"return_related_questions,omitempty"`
  91. SearchMode string `json:"search_mode,omitempty"`
  92. }
  93. func (r *GeneralOpenAIRequest) GetTokenCountMeta() *types.TokenCountMeta {
  94. var tokenCountMeta types.TokenCountMeta
  95. var texts = make([]string, 0)
  96. var fileMeta = make([]*types.FileMeta, 0)
  97. if r.Prompt != nil {
  98. switch v := r.Prompt.(type) {
  99. case string:
  100. texts = append(texts, v)
  101. case []any:
  102. for _, item := range v {
  103. if str, ok := item.(string); ok {
  104. texts = append(texts, str)
  105. }
  106. }
  107. default:
  108. texts = append(texts, fmt.Sprintf("%v", r.Prompt))
  109. }
  110. }
  111. if r.Input != nil {
  112. inputs := r.ParseInput()
  113. texts = append(texts, inputs...)
  114. }
  115. if r.MaxCompletionTokens > r.MaxTokens {
  116. tokenCountMeta.MaxTokens = int(r.MaxCompletionTokens)
  117. } else {
  118. tokenCountMeta.MaxTokens = int(r.MaxTokens)
  119. }
  120. for _, message := range r.Messages {
  121. tokenCountMeta.MessagesCount++
  122. texts = append(texts, message.Role)
  123. if message.Content != nil {
  124. if message.Name != nil {
  125. tokenCountMeta.NameCount++
  126. texts = append(texts, *message.Name)
  127. }
  128. arrayContent := message.ParseContent()
  129. for _, m := range arrayContent {
  130. if m.Type == ContentTypeImageURL {
  131. imageUrl := m.GetImageMedia()
  132. if imageUrl != nil {
  133. if imageUrl.Url != "" {
  134. meta := &types.FileMeta{
  135. FileType: types.FileTypeImage,
  136. }
  137. meta.OriginData = imageUrl.Url
  138. meta.Detail = imageUrl.Detail
  139. fileMeta = append(fileMeta, meta)
  140. }
  141. }
  142. } else if m.Type == ContentTypeInputAudio {
  143. inputAudio := m.GetInputAudio()
  144. if inputAudio != nil {
  145. meta := &types.FileMeta{
  146. FileType: types.FileTypeAudio,
  147. }
  148. meta.OriginData = inputAudio.Data
  149. fileMeta = append(fileMeta, meta)
  150. }
  151. } else if m.Type == ContentTypeFile {
  152. file := m.GetFile()
  153. if file != nil {
  154. meta := &types.FileMeta{
  155. FileType: types.FileTypeFile,
  156. }
  157. meta.OriginData = file.FileData
  158. fileMeta = append(fileMeta, meta)
  159. }
  160. } else if m.Type == ContentTypeVideoUrl {
  161. videoUrl := m.GetVideoUrl()
  162. if videoUrl != nil && videoUrl.Url != "" {
  163. meta := &types.FileMeta{
  164. FileType: types.FileTypeVideo,
  165. }
  166. meta.OriginData = videoUrl.Url
  167. fileMeta = append(fileMeta, meta)
  168. }
  169. } else {
  170. texts = append(texts, m.Text)
  171. }
  172. }
  173. }
  174. }
  175. if r.Tools != nil {
  176. openaiTools := r.Tools
  177. for _, tool := range openaiTools {
  178. tokenCountMeta.ToolsCount++
  179. texts = append(texts, tool.Function.Name)
  180. if tool.Function.Description != "" {
  181. texts = append(texts, tool.Function.Description)
  182. }
  183. if tool.Function.Parameters != nil {
  184. texts = append(texts, fmt.Sprintf("%v", tool.Function.Parameters))
  185. }
  186. }
  187. //toolTokens := CountTokenInput(countStr, request.Model)
  188. //tkm += 8
  189. //tkm += toolTokens
  190. }
  191. tokenCountMeta.CombineText = strings.Join(texts, "\n")
  192. tokenCountMeta.Files = fileMeta
  193. return &tokenCountMeta
  194. }
  195. func (r *GeneralOpenAIRequest) IsStream(c *gin.Context) bool {
  196. return r.Stream
  197. }
  198. func (r *GeneralOpenAIRequest) SetModelName(modelName string) {
  199. if modelName != "" {
  200. r.Model = modelName
  201. }
  202. }
  203. func (r *GeneralOpenAIRequest) ToMap() map[string]any {
  204. result := make(map[string]any)
  205. data, _ := common.Marshal(r)
  206. _ = common.Unmarshal(data, &result)
  207. return result
  208. }
  209. func (r *GeneralOpenAIRequest) GetSystemRoleName() string {
  210. if strings.HasPrefix(r.Model, "o") {
  211. if !strings.HasPrefix(r.Model, "o1-mini") && !strings.HasPrefix(r.Model, "o1-preview") {
  212. return "developer"
  213. }
  214. } else if strings.HasPrefix(r.Model, "gpt-5") {
  215. return "developer"
  216. }
  217. return "system"
  218. }
  219. const CustomType = "custom"
  220. type ToolCallRequest struct {
  221. ID string `json:"id,omitempty"`
  222. Type string `json:"type"`
  223. Function FunctionRequest `json:"function,omitempty"`
  224. Custom json.RawMessage `json:"custom,omitempty"`
  225. }
  226. type FunctionRequest struct {
  227. Description string `json:"description,omitempty"`
  228. Name string `json:"name"`
  229. Parameters any `json:"parameters,omitempty"`
  230. Arguments string `json:"arguments,omitempty"`
  231. }
  232. type StreamOptions struct {
  233. IncludeUsage bool `json:"include_usage,omitempty"`
  234. }
  235. func (r *GeneralOpenAIRequest) GetMaxTokens() uint {
  236. if r.MaxCompletionTokens != 0 {
  237. return r.MaxCompletionTokens
  238. }
  239. return r.MaxTokens
  240. }
  241. func (r *GeneralOpenAIRequest) ParseInput() []string {
  242. if r.Input == nil {
  243. return nil
  244. }
  245. var input []string
  246. switch r.Input.(type) {
  247. case string:
  248. input = []string{r.Input.(string)}
  249. case []any:
  250. input = make([]string, 0, len(r.Input.([]any)))
  251. for _, item := range r.Input.([]any) {
  252. if str, ok := item.(string); ok {
  253. input = append(input, str)
  254. }
  255. }
  256. }
  257. return input
  258. }
  259. type Message struct {
  260. Role string `json:"role"`
  261. Content any `json:"content"`
  262. Name *string `json:"name,omitempty"`
  263. Prefix *bool `json:"prefix,omitempty"`
  264. ReasoningContent string `json:"reasoning_content,omitempty"`
  265. Reasoning string `json:"reasoning,omitempty"`
  266. ToolCalls json.RawMessage `json:"tool_calls,omitempty"`
  267. ToolCallId string `json:"tool_call_id,omitempty"`
  268. parsedContent []MediaContent
  269. //parsedStringContent *string
  270. }
  271. type MediaContent struct {
  272. Type string `json:"type"`
  273. Text string `json:"text,omitempty"`
  274. ImageUrl any `json:"image_url,omitempty"`
  275. InputAudio any `json:"input_audio,omitempty"`
  276. File any `json:"file,omitempty"`
  277. VideoUrl any `json:"video_url,omitempty"`
  278. // OpenRouter Params
  279. CacheControl json.RawMessage `json:"cache_control,omitempty"`
  280. }
  281. func (m *MediaContent) GetImageMedia() *MessageImageUrl {
  282. if m.ImageUrl != nil {
  283. if _, ok := m.ImageUrl.(*MessageImageUrl); ok {
  284. return m.ImageUrl.(*MessageImageUrl)
  285. }
  286. if itemMap, ok := m.ImageUrl.(map[string]any); ok {
  287. out := &MessageImageUrl{
  288. Url: common.Interface2String(itemMap["url"]),
  289. Detail: common.Interface2String(itemMap["detail"]),
  290. MimeType: common.Interface2String(itemMap["mime_type"]),
  291. }
  292. return out
  293. }
  294. }
  295. return nil
  296. }
  297. func (m *MediaContent) GetInputAudio() *MessageInputAudio {
  298. if m.InputAudio != nil {
  299. if _, ok := m.InputAudio.(*MessageInputAudio); ok {
  300. return m.InputAudio.(*MessageInputAudio)
  301. }
  302. if itemMap, ok := m.InputAudio.(map[string]any); ok {
  303. out := &MessageInputAudio{
  304. Data: common.Interface2String(itemMap["data"]),
  305. Format: common.Interface2String(itemMap["format"]),
  306. }
  307. return out
  308. }
  309. }
  310. return nil
  311. }
  312. func (m *MediaContent) GetFile() *MessageFile {
  313. if m.File != nil {
  314. if _, ok := m.File.(*MessageFile); ok {
  315. return m.File.(*MessageFile)
  316. }
  317. if itemMap, ok := m.File.(map[string]any); ok {
  318. out := &MessageFile{
  319. FileName: common.Interface2String(itemMap["file_name"]),
  320. FileData: common.Interface2String(itemMap["file_data"]),
  321. FileId: common.Interface2String(itemMap["file_id"]),
  322. }
  323. return out
  324. }
  325. }
  326. return nil
  327. }
  328. func (m *MediaContent) GetVideoUrl() *MessageVideoUrl {
  329. if m.VideoUrl != nil {
  330. if _, ok := m.VideoUrl.(*MessageVideoUrl); ok {
  331. return m.VideoUrl.(*MessageVideoUrl)
  332. }
  333. if itemMap, ok := m.VideoUrl.(map[string]any); ok {
  334. out := &MessageVideoUrl{
  335. Url: common.Interface2String(itemMap["url"]),
  336. }
  337. return out
  338. }
  339. }
  340. return nil
  341. }
  342. type MessageImageUrl struct {
  343. Url string `json:"url"`
  344. Detail string `json:"detail"`
  345. MimeType string
  346. }
  347. func (m *MessageImageUrl) IsRemoteImage() bool {
  348. return strings.HasPrefix(m.Url, "http")
  349. }
  350. type MessageInputAudio struct {
  351. Data string `json:"data"` //base64
  352. Format string `json:"format"`
  353. }
  354. type MessageFile struct {
  355. FileName string `json:"filename,omitempty"`
  356. FileData string `json:"file_data,omitempty"`
  357. FileId string `json:"file_id,omitempty"`
  358. }
  359. type MessageVideoUrl struct {
  360. Url string `json:"url"`
  361. }
  362. const (
  363. ContentTypeText = "text"
  364. ContentTypeImageURL = "image_url"
  365. ContentTypeInputAudio = "input_audio"
  366. ContentTypeFile = "file"
  367. ContentTypeVideoUrl = "video_url" // 阿里百炼视频识别
  368. //ContentTypeAudioUrl = "audio_url"
  369. )
  370. func (m *Message) GetPrefix() bool {
  371. if m.Prefix == nil {
  372. return false
  373. }
  374. return *m.Prefix
  375. }
  376. func (m *Message) SetPrefix(prefix bool) {
  377. m.Prefix = &prefix
  378. }
  379. func (m *Message) ParseToolCalls() []ToolCallRequest {
  380. if m.ToolCalls == nil {
  381. return nil
  382. }
  383. var toolCalls []ToolCallRequest
  384. if err := json.Unmarshal(m.ToolCalls, &toolCalls); err == nil {
  385. return toolCalls
  386. }
  387. return toolCalls
  388. }
  389. func (m *Message) SetToolCalls(toolCalls any) {
  390. toolCallsJson, _ := json.Marshal(toolCalls)
  391. m.ToolCalls = toolCallsJson
  392. }
  393. func (m *Message) StringContent() string {
  394. switch m.Content.(type) {
  395. case string:
  396. return m.Content.(string)
  397. case []any:
  398. var contentStr string
  399. for _, contentItem := range m.Content.([]any) {
  400. contentMap, ok := contentItem.(map[string]any)
  401. if !ok {
  402. continue
  403. }
  404. if contentMap["type"] == ContentTypeText {
  405. if subStr, ok := contentMap["text"].(string); ok {
  406. contentStr += subStr
  407. }
  408. }
  409. }
  410. return contentStr
  411. }
  412. return ""
  413. }
  414. func (m *Message) SetNullContent() {
  415. m.Content = nil
  416. m.parsedContent = nil
  417. }
  418. func (m *Message) SetStringContent(content string) {
  419. m.Content = content
  420. m.parsedContent = nil
  421. }
  422. func (m *Message) SetMediaContent(content []MediaContent) {
  423. m.Content = content
  424. m.parsedContent = content
  425. }
  426. func (m *Message) IsStringContent() bool {
  427. _, ok := m.Content.(string)
  428. if ok {
  429. return true
  430. }
  431. return false
  432. }
  433. func (m *Message) ParseContent() []MediaContent {
  434. if m.Content == nil {
  435. return nil
  436. }
  437. if len(m.parsedContent) > 0 {
  438. return m.parsedContent
  439. }
  440. var contentList []MediaContent
  441. // 先尝试解析为字符串
  442. content, ok := m.Content.(string)
  443. if ok {
  444. contentList = []MediaContent{{
  445. Type: ContentTypeText,
  446. Text: content,
  447. }}
  448. m.parsedContent = contentList
  449. return contentList
  450. }
  451. // 尝试解析为数组
  452. //var arrayContent []map[string]interface{}
  453. arrayContent, ok := m.Content.([]any)
  454. if !ok {
  455. return contentList
  456. }
  457. for _, contentItemAny := range arrayContent {
  458. mediaItem, ok := contentItemAny.(MediaContent)
  459. if ok {
  460. contentList = append(contentList, mediaItem)
  461. continue
  462. }
  463. contentItem, ok := contentItemAny.(map[string]any)
  464. if !ok {
  465. continue
  466. }
  467. contentType, ok := contentItem["type"].(string)
  468. if !ok {
  469. continue
  470. }
  471. switch contentType {
  472. case ContentTypeText:
  473. if text, ok := contentItem["text"].(string); ok {
  474. contentList = append(contentList, MediaContent{
  475. Type: ContentTypeText,
  476. Text: text,
  477. })
  478. }
  479. case ContentTypeImageURL:
  480. imageUrl := contentItem["image_url"]
  481. temp := &MessageImageUrl{
  482. Detail: "high",
  483. }
  484. switch v := imageUrl.(type) {
  485. case string:
  486. temp.Url = v
  487. case map[string]interface{}:
  488. url, ok1 := v["url"].(string)
  489. detail, ok2 := v["detail"].(string)
  490. if ok2 {
  491. temp.Detail = detail
  492. }
  493. if ok1 {
  494. temp.Url = url
  495. }
  496. }
  497. contentList = append(contentList, MediaContent{
  498. Type: ContentTypeImageURL,
  499. ImageUrl: temp,
  500. })
  501. case ContentTypeInputAudio:
  502. if audioData, ok := contentItem["input_audio"].(map[string]interface{}); ok {
  503. data, ok1 := audioData["data"].(string)
  504. format, ok2 := audioData["format"].(string)
  505. if ok1 && ok2 {
  506. temp := &MessageInputAudio{
  507. Data: data,
  508. Format: format,
  509. }
  510. contentList = append(contentList, MediaContent{
  511. Type: ContentTypeInputAudio,
  512. InputAudio: temp,
  513. })
  514. }
  515. }
  516. case ContentTypeFile:
  517. if fileData, ok := contentItem["file"].(map[string]interface{}); ok {
  518. fileId, ok3 := fileData["file_id"].(string)
  519. if ok3 {
  520. contentList = append(contentList, MediaContent{
  521. Type: ContentTypeFile,
  522. File: &MessageFile{
  523. FileId: fileId,
  524. },
  525. })
  526. } else {
  527. fileName, ok1 := fileData["filename"].(string)
  528. fileDataStr, ok2 := fileData["file_data"].(string)
  529. if ok1 && ok2 {
  530. contentList = append(contentList, MediaContent{
  531. Type: ContentTypeFile,
  532. File: &MessageFile{
  533. FileName: fileName,
  534. FileData: fileDataStr,
  535. },
  536. })
  537. }
  538. }
  539. }
  540. case ContentTypeVideoUrl:
  541. if videoUrl, ok := contentItem["video_url"].(string); ok {
  542. contentList = append(contentList, MediaContent{
  543. Type: ContentTypeVideoUrl,
  544. VideoUrl: &MessageVideoUrl{
  545. Url: videoUrl,
  546. },
  547. })
  548. }
  549. }
  550. }
  551. if len(contentList) > 0 {
  552. m.parsedContent = contentList
  553. }
  554. return contentList
  555. }
  556. // old code
  557. /*func (m *Message) StringContent() string {
  558. if m.parsedStringContent != nil {
  559. return *m.parsedStringContent
  560. }
  561. var stringContent string
  562. if err := json.Unmarshal(m.Content, &stringContent); err == nil {
  563. m.parsedStringContent = &stringContent
  564. return stringContent
  565. }
  566. contentStr := new(strings.Builder)
  567. arrayContent := m.ParseContent()
  568. for _, content := range arrayContent {
  569. if content.Type == ContentTypeText {
  570. contentStr.WriteString(content.Text)
  571. }
  572. }
  573. stringContent = contentStr.String()
  574. m.parsedStringContent = &stringContent
  575. return stringContent
  576. }
  577. func (m *Message) SetNullContent() {
  578. m.Content = nil
  579. m.parsedStringContent = nil
  580. m.parsedContent = nil
  581. }
  582. func (m *Message) SetStringContent(content string) {
  583. jsonContent, _ := json.Marshal(content)
  584. m.Content = jsonContent
  585. m.parsedStringContent = &content
  586. m.parsedContent = nil
  587. }
  588. func (m *Message) SetMediaContent(content []MediaContent) {
  589. jsonContent, _ := json.Marshal(content)
  590. m.Content = jsonContent
  591. m.parsedContent = nil
  592. m.parsedStringContent = nil
  593. }
  594. func (m *Message) IsStringContent() bool {
  595. if m.parsedStringContent != nil {
  596. return true
  597. }
  598. var stringContent string
  599. if err := json.Unmarshal(m.Content, &stringContent); err == nil {
  600. m.parsedStringContent = &stringContent
  601. return true
  602. }
  603. return false
  604. }
  605. func (m *Message) ParseContent() []MediaContent {
  606. if m.parsedContent != nil {
  607. return m.parsedContent
  608. }
  609. var contentList []MediaContent
  610. // 先尝试解析为字符串
  611. var stringContent string
  612. if err := json.Unmarshal(m.Content, &stringContent); err == nil {
  613. contentList = []MediaContent{{
  614. Type: ContentTypeText,
  615. Text: stringContent,
  616. }}
  617. m.parsedContent = contentList
  618. return contentList
  619. }
  620. // 尝试解析为数组
  621. var arrayContent []map[string]interface{}
  622. if err := json.Unmarshal(m.Content, &arrayContent); err == nil {
  623. for _, contentItem := range arrayContent {
  624. contentType, ok := contentItem["type"].(string)
  625. if !ok {
  626. continue
  627. }
  628. switch contentType {
  629. case ContentTypeText:
  630. if text, ok := contentItem["text"].(string); ok {
  631. contentList = append(contentList, MediaContent{
  632. Type: ContentTypeText,
  633. Text: text,
  634. })
  635. }
  636. case ContentTypeImageURL:
  637. imageUrl := contentItem["image_url"]
  638. temp := &MessageImageUrl{
  639. Detail: "high",
  640. }
  641. switch v := imageUrl.(type) {
  642. case string:
  643. temp.Url = v
  644. case map[string]interface{}:
  645. url, ok1 := v["url"].(string)
  646. detail, ok2 := v["detail"].(string)
  647. if ok2 {
  648. temp.Detail = detail
  649. }
  650. if ok1 {
  651. temp.Url = url
  652. }
  653. }
  654. contentList = append(contentList, MediaContent{
  655. Type: ContentTypeImageURL,
  656. ImageUrl: temp,
  657. })
  658. case ContentTypeInputAudio:
  659. if audioData, ok := contentItem["input_audio"].(map[string]interface{}); ok {
  660. data, ok1 := audioData["data"].(string)
  661. format, ok2 := audioData["format"].(string)
  662. if ok1 && ok2 {
  663. temp := &MessageInputAudio{
  664. Data: data,
  665. Format: format,
  666. }
  667. contentList = append(contentList, MediaContent{
  668. Type: ContentTypeInputAudio,
  669. InputAudio: temp,
  670. })
  671. }
  672. }
  673. case ContentTypeFile:
  674. if fileData, ok := contentItem["file"].(map[string]interface{}); ok {
  675. fileId, ok3 := fileData["file_id"].(string)
  676. if ok3 {
  677. contentList = append(contentList, MediaContent{
  678. Type: ContentTypeFile,
  679. File: &MessageFile{
  680. FileId: fileId,
  681. },
  682. })
  683. } else {
  684. fileName, ok1 := fileData["filename"].(string)
  685. fileDataStr, ok2 := fileData["file_data"].(string)
  686. if ok1 && ok2 {
  687. contentList = append(contentList, MediaContent{
  688. Type: ContentTypeFile,
  689. File: &MessageFile{
  690. FileName: fileName,
  691. FileData: fileDataStr,
  692. },
  693. })
  694. }
  695. }
  696. }
  697. case ContentTypeVideoUrl:
  698. if videoUrl, ok := contentItem["video_url"].(string); ok {
  699. contentList = append(contentList, MediaContent{
  700. Type: ContentTypeVideoUrl,
  701. VideoUrl: &MessageVideoUrl{
  702. Url: videoUrl,
  703. },
  704. })
  705. }
  706. }
  707. }
  708. }
  709. if len(contentList) > 0 {
  710. m.parsedContent = contentList
  711. }
  712. return contentList
  713. }*/
  714. type WebSearchOptions struct {
  715. SearchContextSize string `json:"search_context_size,omitempty"`
  716. UserLocation json.RawMessage `json:"user_location,omitempty"`
  717. }
  718. // https://platform.openai.com/docs/api-reference/responses/create
  719. type OpenAIResponsesRequest struct {
  720. Model string `json:"model"`
  721. Input json.RawMessage `json:"input,omitempty"`
  722. Include json.RawMessage `json:"include,omitempty"`
  723. Instructions json.RawMessage `json:"instructions,omitempty"`
  724. MaxOutputTokens uint `json:"max_output_tokens,omitempty"`
  725. Metadata json.RawMessage `json:"metadata,omitempty"`
  726. ParallelToolCalls json.RawMessage `json:"parallel_tool_calls,omitempty"`
  727. PreviousResponseID string `json:"previous_response_id,omitempty"`
  728. Reasoning *Reasoning `json:"reasoning,omitempty"`
  729. // 服务层级字段,用于指定 API 服务等级。允许透传可能导致实际计费高于预期,默认应过滤
  730. ServiceTier string `json:"service_tier,omitempty"`
  731. Store json.RawMessage `json:"store,omitempty"`
  732. PromptCacheKey json.RawMessage `json:"prompt_cache_key,omitempty"`
  733. PromptCacheRetention json.RawMessage `json:"prompt_cache_retention,omitempty"`
  734. Stream bool `json:"stream,omitempty"`
  735. Temperature float64 `json:"temperature,omitempty"`
  736. Text json.RawMessage `json:"text,omitempty"`
  737. ToolChoice json.RawMessage `json:"tool_choice,omitempty"`
  738. Tools json.RawMessage `json:"tools,omitempty"` // 需要处理的参数很少,MCP 参数太多不确定,所以用 map
  739. TopP float64 `json:"top_p,omitempty"`
  740. Truncation string `json:"truncation,omitempty"`
  741. User string `json:"user,omitempty"`
  742. MaxToolCalls uint `json:"max_tool_calls,omitempty"`
  743. Prompt json.RawMessage `json:"prompt,omitempty"`
  744. }
  745. func (r *OpenAIResponsesRequest) GetTokenCountMeta() *types.TokenCountMeta {
  746. var fileMeta = make([]*types.FileMeta, 0)
  747. var texts = make([]string, 0)
  748. if r.Input != nil {
  749. inputs := r.ParseInput()
  750. for _, input := range inputs {
  751. if input.Type == "input_image" {
  752. if input.ImageUrl != "" {
  753. fileMeta = append(fileMeta, &types.FileMeta{
  754. FileType: types.FileTypeImage,
  755. OriginData: input.ImageUrl,
  756. Detail: input.Detail,
  757. })
  758. }
  759. } else if input.Type == "input_file" {
  760. if input.FileUrl != "" {
  761. fileMeta = append(fileMeta, &types.FileMeta{
  762. FileType: types.FileTypeFile,
  763. OriginData: input.FileUrl,
  764. })
  765. }
  766. } else {
  767. texts = append(texts, input.Text)
  768. }
  769. }
  770. }
  771. if len(r.Instructions) > 0 {
  772. texts = append(texts, string(r.Instructions))
  773. }
  774. if len(r.Metadata) > 0 {
  775. texts = append(texts, string(r.Metadata))
  776. }
  777. if len(r.Text) > 0 {
  778. texts = append(texts, string(r.Text))
  779. }
  780. if len(r.ToolChoice) > 0 {
  781. texts = append(texts, string(r.ToolChoice))
  782. }
  783. if len(r.Prompt) > 0 {
  784. texts = append(texts, string(r.Prompt))
  785. }
  786. if len(r.Tools) > 0 {
  787. texts = append(texts, string(r.Tools))
  788. }
  789. return &types.TokenCountMeta{
  790. CombineText: strings.Join(texts, "\n"),
  791. Files: fileMeta,
  792. MaxTokens: int(r.MaxOutputTokens),
  793. }
  794. }
  795. func (r *OpenAIResponsesRequest) IsStream(c *gin.Context) bool {
  796. return r.Stream
  797. }
  798. func (r *OpenAIResponsesRequest) SetModelName(modelName string) {
  799. if modelName != "" {
  800. r.Model = modelName
  801. }
  802. }
  803. func (r *OpenAIResponsesRequest) GetToolsMap() []map[string]any {
  804. var toolsMap []map[string]any
  805. if len(r.Tools) > 0 {
  806. _ = common.Unmarshal(r.Tools, &toolsMap)
  807. }
  808. return toolsMap
  809. }
  810. type Reasoning struct {
  811. Effort string `json:"effort,omitempty"`
  812. Summary string `json:"summary,omitempty"`
  813. }
  814. type Input struct {
  815. Type string `json:"type,omitempty"`
  816. Role string `json:"role,omitempty"`
  817. Content json.RawMessage `json:"content,omitempty"`
  818. }
  819. type MediaInput struct {
  820. Type string `json:"type"`
  821. Text string `json:"text,omitempty"`
  822. FileUrl string `json:"file_url,omitempty"`
  823. ImageUrl string `json:"image_url,omitempty"`
  824. Detail string `json:"detail,omitempty"` // 仅 input_image 有效
  825. }
  826. // ParseInput parses the Responses API `input` field into a normalized slice of MediaInput.
  827. // Reference implementation mirrors Message.ParseContent:
  828. // - input can be a string, treated as an input_text item
  829. // - input can be an array of objects with a `type` field
  830. // supported types: input_text, input_image, input_file
  831. func (r *OpenAIResponsesRequest) ParseInput() []MediaInput {
  832. if r.Input == nil {
  833. return nil
  834. }
  835. var mediaInputs []MediaInput
  836. // Try string first
  837. // if str, ok := common.GetJsonType(r.Input); ok {
  838. // inputs = append(inputs, MediaInput{Type: "input_text", Text: str})
  839. // return inputs
  840. // }
  841. if common.GetJsonType(r.Input) == "string" {
  842. var str string
  843. _ = common.Unmarshal(r.Input, &str)
  844. mediaInputs = append(mediaInputs, MediaInput{Type: "input_text", Text: str})
  845. return mediaInputs
  846. }
  847. // Try array of parts
  848. if common.GetJsonType(r.Input) == "array" {
  849. var inputs []Input
  850. _ = common.Unmarshal(r.Input, &inputs)
  851. for _, input := range inputs {
  852. if common.GetJsonType(input.Content) == "string" {
  853. var str string
  854. _ = common.Unmarshal(input.Content, &str)
  855. mediaInputs = append(mediaInputs, MediaInput{Type: "input_text", Text: str})
  856. }
  857. if common.GetJsonType(input.Content) == "array" {
  858. var array []any
  859. _ = common.Unmarshal(input.Content, &array)
  860. for _, itemAny := range array {
  861. // Already parsed MediaContent
  862. if media, ok := itemAny.(MediaInput); ok {
  863. mediaInputs = append(mediaInputs, media)
  864. continue
  865. }
  866. // Generic map
  867. item, ok := itemAny.(map[string]any)
  868. if !ok {
  869. continue
  870. }
  871. typeVal, ok := item["type"].(string)
  872. if !ok {
  873. continue
  874. }
  875. switch typeVal {
  876. case "input_text":
  877. text, _ := item["text"].(string)
  878. mediaInputs = append(mediaInputs, MediaInput{Type: "input_text", Text: text})
  879. case "input_image":
  880. // image_url may be string or object with url field
  881. var imageUrl string
  882. switch v := item["image_url"].(type) {
  883. case string:
  884. imageUrl = v
  885. case map[string]any:
  886. if url, ok := v["url"].(string); ok {
  887. imageUrl = url
  888. }
  889. }
  890. mediaInputs = append(mediaInputs, MediaInput{Type: "input_image", ImageUrl: imageUrl})
  891. case "input_file":
  892. // file_url may be string or object with url field
  893. var fileUrl string
  894. switch v := item["file_url"].(type) {
  895. case string:
  896. fileUrl = v
  897. case map[string]any:
  898. if url, ok := v["url"].(string); ok {
  899. fileUrl = url
  900. }
  901. }
  902. mediaInputs = append(mediaInputs, MediaInput{Type: "input_file", FileUrl: fileUrl})
  903. }
  904. }
  905. }
  906. }
  907. }
  908. return mediaInputs
  909. }