openai_request.go 28 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639640641642643644645646647648649650651652653654655656657658659660661662663664665666667668669670671672673674675676677678679680681682683684685686687688689690691692693694695696697698699700701702703704705706707708709710711712713714715716717718719720721722723724725726727728729730731732733734735736737738739740741742743744745746747748749750751752753754755756757758759760761762763764765766767768769770771772773774775776777778779780781782783784785786787788789790791792793794795796797798799800801802803804805806807808809810811812813814815816817818819820821822823824825826827828829830831832833834835836837838839840841842843844845846847848849850851852853854855856857858859860861862863864865866867868869870871872873874875876877878879880881882883884885886887888889890891892893894895896897898899900901902903904905906907908909910911912913914915916917918919920921922923924925926927928929930931932933934935936937938939940941942943944945946947948949950951952953954955956957958959960961962963964965966967968969970971972973974975976977978979980981982983984985986987988989990991992993994995996997998999100010011002100310041005
  1. package dto
  2. import (
  3. "encoding/json"
  4. "fmt"
  5. "strings"
  6. "github.com/QuantumNous/new-api/common"
  7. "github.com/QuantumNous/new-api/types"
  8. "github.com/gin-gonic/gin"
  9. )
  10. type ResponseFormat struct {
  11. Type string `json:"type,omitempty"`
  12. JsonSchema json.RawMessage `json:"json_schema,omitempty"`
  13. }
  14. type FormatJsonSchema struct {
  15. Description string `json:"description,omitempty"`
  16. Name string `json:"name"`
  17. Schema any `json:"schema,omitempty"`
  18. Strict json.RawMessage `json:"strict,omitempty"`
  19. }
  20. type GeneralOpenAIRequest struct {
  21. Model string `json:"model,omitempty"`
  22. Messages []Message `json:"messages,omitempty"`
  23. Prompt any `json:"prompt,omitempty"`
  24. Prefix any `json:"prefix,omitempty"`
  25. Suffix any `json:"suffix,omitempty"`
  26. Stream bool `json:"stream,omitempty"`
  27. StreamOptions *StreamOptions `json:"stream_options,omitempty"`
  28. MaxTokens uint `json:"max_tokens,omitempty"`
  29. MaxCompletionTokens uint `json:"max_completion_tokens,omitempty"`
  30. ReasoningEffort string `json:"reasoning_effort,omitempty"`
  31. Verbosity json.RawMessage `json:"verbosity,omitempty"` // gpt-5
  32. Temperature *float64 `json:"temperature,omitempty"`
  33. TopP float64 `json:"top_p,omitempty"`
  34. TopK int `json:"top_k,omitempty"`
  35. Stop any `json:"stop,omitempty"`
  36. N int `json:"n,omitempty"`
  37. Input any `json:"input,omitempty"`
  38. Instruction string `json:"instruction,omitempty"`
  39. Size string `json:"size,omitempty"`
  40. Functions json.RawMessage `json:"functions,omitempty"`
  41. FrequencyPenalty float64 `json:"frequency_penalty,omitempty"`
  42. PresencePenalty float64 `json:"presence_penalty,omitempty"`
  43. ResponseFormat *ResponseFormat `json:"response_format,omitempty"`
  44. EncodingFormat json.RawMessage `json:"encoding_format,omitempty"`
  45. Seed float64 `json:"seed,omitempty"`
  46. ParallelTooCalls *bool `json:"parallel_tool_calls,omitempty"`
  47. Tools []ToolCallRequest `json:"tools,omitempty"`
  48. ToolChoice any `json:"tool_choice,omitempty"`
  49. User string `json:"user,omitempty"`
  50. LogProbs bool `json:"logprobs,omitempty"`
  51. TopLogProbs int `json:"top_logprobs,omitempty"`
  52. Dimensions int `json:"dimensions,omitempty"`
  53. Modalities json.RawMessage `json:"modalities,omitempty"`
  54. Audio json.RawMessage `json:"audio,omitempty"`
  55. // 安全标识符,用于帮助 OpenAI 检测可能违反使用政策的应用程序用户
  56. // 注意:此字段会向 OpenAI 发送用户标识信息,默认过滤以保护用户隐私
  57. SafetyIdentifier string `json:"safety_identifier,omitempty"`
  58. // Whether or not to store the output of this chat completion request for use in our model distillation or evals products.
  59. // 是否存储此次请求数据供 OpenAI 用于评估和优化产品
  60. // 注意:默认过滤此字段以保护用户隐私,但过滤后可能导致 Codex 无法正常使用
  61. Store json.RawMessage `json:"store,omitempty"`
  62. // Used by OpenAI to cache responses for similar requests to optimize your cache hit rates. Replaces the user field
  63. PromptCacheKey string `json:"prompt_cache_key,omitempty"`
  64. PromptCacheRetention json.RawMessage `json:"prompt_cache_retention,omitempty"`
  65. LogitBias json.RawMessage `json:"logit_bias,omitempty"`
  66. Metadata json.RawMessage `json:"metadata,omitempty"`
  67. Prediction json.RawMessage `json:"prediction,omitempty"`
  68. // gemini
  69. ExtraBody json.RawMessage `json:"extra_body,omitempty"`
  70. //xai
  71. SearchParameters json.RawMessage `json:"search_parameters,omitempty"`
  72. // claude
  73. WebSearchOptions *WebSearchOptions `json:"web_search_options,omitempty"`
  74. // OpenRouter Params
  75. Usage json.RawMessage `json:"usage,omitempty"`
  76. Reasoning json.RawMessage `json:"reasoning,omitempty"`
  77. // Ali Qwen Params
  78. VlHighResolutionImages json.RawMessage `json:"vl_high_resolution_images,omitempty"`
  79. EnableThinking any `json:"enable_thinking,omitempty"`
  80. ChatTemplateKwargs json.RawMessage `json:"chat_template_kwargs,omitempty"`
  81. // ollama Params
  82. Think json.RawMessage `json:"think,omitempty"`
  83. // baidu v2
  84. WebSearch json.RawMessage `json:"web_search,omitempty"`
  85. // doubao,zhipu_v4
  86. THINKING json.RawMessage `json:"thinking,omitempty"`
  87. // pplx Params
  88. SearchDomainFilter json.RawMessage `json:"search_domain_filter,omitempty"`
  89. SearchRecencyFilter string `json:"search_recency_filter,omitempty"`
  90. ReturnImages bool `json:"return_images,omitempty"`
  91. ReturnRelatedQuestions bool `json:"return_related_questions,omitempty"`
  92. SearchMode string `json:"search_mode,omitempty"`
  93. }
  94. func (r *GeneralOpenAIRequest) GetTokenCountMeta() *types.TokenCountMeta {
  95. var tokenCountMeta types.TokenCountMeta
  96. var texts = make([]string, 0)
  97. var fileMeta = make([]*types.FileMeta, 0)
  98. if r.Prompt != nil {
  99. switch v := r.Prompt.(type) {
  100. case string:
  101. texts = append(texts, v)
  102. case []any:
  103. for _, item := range v {
  104. if str, ok := item.(string); ok {
  105. texts = append(texts, str)
  106. }
  107. }
  108. default:
  109. texts = append(texts, fmt.Sprintf("%v", r.Prompt))
  110. }
  111. }
  112. if r.Input != nil {
  113. inputs := r.ParseInput()
  114. texts = append(texts, inputs...)
  115. }
  116. if r.MaxCompletionTokens > r.MaxTokens {
  117. tokenCountMeta.MaxTokens = int(r.MaxCompletionTokens)
  118. } else {
  119. tokenCountMeta.MaxTokens = int(r.MaxTokens)
  120. }
  121. for _, message := range r.Messages {
  122. tokenCountMeta.MessagesCount++
  123. texts = append(texts, message.Role)
  124. if message.Content != nil {
  125. if message.Name != nil {
  126. tokenCountMeta.NameCount++
  127. texts = append(texts, *message.Name)
  128. }
  129. arrayContent := message.ParseContent()
  130. for _, m := range arrayContent {
  131. if m.Type == ContentTypeImageURL {
  132. imageUrl := m.GetImageMedia()
  133. if imageUrl != nil {
  134. if imageUrl.Url != "" {
  135. meta := &types.FileMeta{
  136. FileType: types.FileTypeImage,
  137. }
  138. meta.OriginData = imageUrl.Url
  139. meta.Detail = imageUrl.Detail
  140. fileMeta = append(fileMeta, meta)
  141. }
  142. }
  143. } else if m.Type == ContentTypeInputAudio {
  144. inputAudio := m.GetInputAudio()
  145. if inputAudio != nil {
  146. meta := &types.FileMeta{
  147. FileType: types.FileTypeAudio,
  148. }
  149. meta.OriginData = inputAudio.Data
  150. fileMeta = append(fileMeta, meta)
  151. }
  152. } else if m.Type == ContentTypeFile {
  153. file := m.GetFile()
  154. if file != nil {
  155. meta := &types.FileMeta{
  156. FileType: types.FileTypeFile,
  157. }
  158. meta.OriginData = file.FileData
  159. fileMeta = append(fileMeta, meta)
  160. }
  161. } else if m.Type == ContentTypeVideoUrl {
  162. videoUrl := m.GetVideoUrl()
  163. if videoUrl != nil && videoUrl.Url != "" {
  164. meta := &types.FileMeta{
  165. FileType: types.FileTypeVideo,
  166. }
  167. meta.OriginData = videoUrl.Url
  168. fileMeta = append(fileMeta, meta)
  169. }
  170. } else {
  171. texts = append(texts, m.Text)
  172. }
  173. }
  174. }
  175. }
  176. if r.Tools != nil {
  177. openaiTools := r.Tools
  178. for _, tool := range openaiTools {
  179. tokenCountMeta.ToolsCount++
  180. texts = append(texts, tool.Function.Name)
  181. if tool.Function.Description != "" {
  182. texts = append(texts, tool.Function.Description)
  183. }
  184. if tool.Function.Parameters != nil {
  185. texts = append(texts, fmt.Sprintf("%v", tool.Function.Parameters))
  186. }
  187. }
  188. //toolTokens := CountTokenInput(countStr, request.Model)
  189. //tkm += 8
  190. //tkm += toolTokens
  191. }
  192. tokenCountMeta.CombineText = strings.Join(texts, "\n")
  193. tokenCountMeta.Files = fileMeta
  194. return &tokenCountMeta
  195. }
  196. func (r *GeneralOpenAIRequest) IsStream(c *gin.Context) bool {
  197. return r.Stream
  198. }
  199. func (r *GeneralOpenAIRequest) SetModelName(modelName string) {
  200. if modelName != "" {
  201. r.Model = modelName
  202. }
  203. }
  204. func (r *GeneralOpenAIRequest) ToMap() map[string]any {
  205. result := make(map[string]any)
  206. data, _ := common.Marshal(r)
  207. _ = common.Unmarshal(data, &result)
  208. return result
  209. }
  210. func (r *GeneralOpenAIRequest) GetSystemRoleName() string {
  211. if strings.HasPrefix(r.Model, "o") {
  212. if !strings.HasPrefix(r.Model, "o1-mini") && !strings.HasPrefix(r.Model, "o1-preview") {
  213. return "developer"
  214. }
  215. } else if strings.HasPrefix(r.Model, "gpt-5") {
  216. return "developer"
  217. }
  218. return "system"
  219. }
  220. const CustomType = "custom"
  221. type ToolCallRequest struct {
  222. ID string `json:"id,omitempty"`
  223. Type string `json:"type"`
  224. Function FunctionRequest `json:"function,omitempty"`
  225. Custom json.RawMessage `json:"custom,omitempty"`
  226. }
  227. type FunctionRequest struct {
  228. Description string `json:"description,omitempty"`
  229. Name string `json:"name"`
  230. Parameters any `json:"parameters,omitempty"`
  231. Arguments string `json:"arguments,omitempty"`
  232. }
  233. type StreamOptions struct {
  234. IncludeUsage bool `json:"include_usage,omitempty"`
  235. }
  236. func (r *GeneralOpenAIRequest) GetMaxTokens() uint {
  237. if r.MaxCompletionTokens != 0 {
  238. return r.MaxCompletionTokens
  239. }
  240. return r.MaxTokens
  241. }
  242. func (r *GeneralOpenAIRequest) ParseInput() []string {
  243. if r.Input == nil {
  244. return nil
  245. }
  246. var input []string
  247. switch r.Input.(type) {
  248. case string:
  249. input = []string{r.Input.(string)}
  250. case []any:
  251. input = make([]string, 0, len(r.Input.([]any)))
  252. for _, item := range r.Input.([]any) {
  253. if str, ok := item.(string); ok {
  254. input = append(input, str)
  255. }
  256. }
  257. }
  258. return input
  259. }
  260. type Message struct {
  261. Role string `json:"role"`
  262. Content any `json:"content"`
  263. Name *string `json:"name,omitempty"`
  264. Prefix *bool `json:"prefix,omitempty"`
  265. ReasoningContent string `json:"reasoning_content,omitempty"`
  266. Reasoning string `json:"reasoning,omitempty"`
  267. ToolCalls json.RawMessage `json:"tool_calls,omitempty"`
  268. ToolCallId string `json:"tool_call_id,omitempty"`
  269. parsedContent []MediaContent
  270. //parsedStringContent *string
  271. }
  272. type MediaContent struct {
  273. Type string `json:"type"`
  274. Text string `json:"text,omitempty"`
  275. ImageUrl any `json:"image_url,omitempty"`
  276. InputAudio any `json:"input_audio,omitempty"`
  277. File any `json:"file,omitempty"`
  278. VideoUrl any `json:"video_url,omitempty"`
  279. // OpenRouter Params
  280. CacheControl json.RawMessage `json:"cache_control,omitempty"`
  281. }
  282. func (m *MediaContent) GetImageMedia() *MessageImageUrl {
  283. if m.ImageUrl != nil {
  284. if _, ok := m.ImageUrl.(*MessageImageUrl); ok {
  285. return m.ImageUrl.(*MessageImageUrl)
  286. }
  287. if itemMap, ok := m.ImageUrl.(map[string]any); ok {
  288. out := &MessageImageUrl{
  289. Url: common.Interface2String(itemMap["url"]),
  290. Detail: common.Interface2String(itemMap["detail"]),
  291. MimeType: common.Interface2String(itemMap["mime_type"]),
  292. }
  293. return out
  294. }
  295. }
  296. return nil
  297. }
  298. func (m *MediaContent) GetInputAudio() *MessageInputAudio {
  299. if m.InputAudio != nil {
  300. if _, ok := m.InputAudio.(*MessageInputAudio); ok {
  301. return m.InputAudio.(*MessageInputAudio)
  302. }
  303. if itemMap, ok := m.InputAudio.(map[string]any); ok {
  304. out := &MessageInputAudio{
  305. Data: common.Interface2String(itemMap["data"]),
  306. Format: common.Interface2String(itemMap["format"]),
  307. }
  308. return out
  309. }
  310. }
  311. return nil
  312. }
  313. func (m *MediaContent) GetFile() *MessageFile {
  314. if m.File != nil {
  315. if _, ok := m.File.(*MessageFile); ok {
  316. return m.File.(*MessageFile)
  317. }
  318. if itemMap, ok := m.File.(map[string]any); ok {
  319. out := &MessageFile{
  320. FileName: common.Interface2String(itemMap["file_name"]),
  321. FileData: common.Interface2String(itemMap["file_data"]),
  322. FileId: common.Interface2String(itemMap["file_id"]),
  323. }
  324. return out
  325. }
  326. }
  327. return nil
  328. }
  329. func (m *MediaContent) GetVideoUrl() *MessageVideoUrl {
  330. if m.VideoUrl != nil {
  331. if _, ok := m.VideoUrl.(*MessageVideoUrl); ok {
  332. return m.VideoUrl.(*MessageVideoUrl)
  333. }
  334. if itemMap, ok := m.VideoUrl.(map[string]any); ok {
  335. out := &MessageVideoUrl{
  336. Url: common.Interface2String(itemMap["url"]),
  337. }
  338. return out
  339. }
  340. }
  341. return nil
  342. }
  343. type MessageImageUrl struct {
  344. Url string `json:"url"`
  345. Detail string `json:"detail"`
  346. MimeType string
  347. }
  348. func (m *MessageImageUrl) IsRemoteImage() bool {
  349. return strings.HasPrefix(m.Url, "http")
  350. }
  351. type MessageInputAudio struct {
  352. Data string `json:"data"` //base64
  353. Format string `json:"format"`
  354. }
  355. type MessageFile struct {
  356. FileName string `json:"filename,omitempty"`
  357. FileData string `json:"file_data,omitempty"`
  358. FileId string `json:"file_id,omitempty"`
  359. }
  360. type MessageVideoUrl struct {
  361. Url string `json:"url"`
  362. }
  363. const (
  364. ContentTypeText = "text"
  365. ContentTypeImageURL = "image_url"
  366. ContentTypeInputAudio = "input_audio"
  367. ContentTypeFile = "file"
  368. ContentTypeVideoUrl = "video_url" // 阿里百炼视频识别
  369. //ContentTypeAudioUrl = "audio_url"
  370. )
  371. func (m *Message) GetPrefix() bool {
  372. if m.Prefix == nil {
  373. return false
  374. }
  375. return *m.Prefix
  376. }
  377. func (m *Message) SetPrefix(prefix bool) {
  378. m.Prefix = &prefix
  379. }
  380. func (m *Message) ParseToolCalls() []ToolCallRequest {
  381. if m.ToolCalls == nil {
  382. return nil
  383. }
  384. var toolCalls []ToolCallRequest
  385. if err := json.Unmarshal(m.ToolCalls, &toolCalls); err == nil {
  386. return toolCalls
  387. }
  388. return toolCalls
  389. }
  390. func (m *Message) SetToolCalls(toolCalls any) {
  391. toolCallsJson, _ := json.Marshal(toolCalls)
  392. m.ToolCalls = toolCallsJson
  393. }
  394. func (m *Message) StringContent() string {
  395. switch m.Content.(type) {
  396. case string:
  397. return m.Content.(string)
  398. case []any:
  399. var contentStr string
  400. for _, contentItem := range m.Content.([]any) {
  401. contentMap, ok := contentItem.(map[string]any)
  402. if !ok {
  403. continue
  404. }
  405. if contentMap["type"] == ContentTypeText {
  406. if subStr, ok := contentMap["text"].(string); ok {
  407. contentStr += subStr
  408. }
  409. }
  410. }
  411. return contentStr
  412. }
  413. return ""
  414. }
  415. func (m *Message) SetNullContent() {
  416. m.Content = nil
  417. m.parsedContent = nil
  418. }
  419. func (m *Message) SetStringContent(content string) {
  420. m.Content = content
  421. m.parsedContent = nil
  422. }
  423. func (m *Message) SetMediaContent(content []MediaContent) {
  424. m.Content = content
  425. m.parsedContent = content
  426. }
  427. func (m *Message) IsStringContent() bool {
  428. _, ok := m.Content.(string)
  429. if ok {
  430. return true
  431. }
  432. return false
  433. }
  434. func (m *Message) ParseContent() []MediaContent {
  435. if m.Content == nil {
  436. return nil
  437. }
  438. if len(m.parsedContent) > 0 {
  439. return m.parsedContent
  440. }
  441. var contentList []MediaContent
  442. // 先尝试解析为字符串
  443. content, ok := m.Content.(string)
  444. if ok {
  445. contentList = []MediaContent{{
  446. Type: ContentTypeText,
  447. Text: content,
  448. }}
  449. m.parsedContent = contentList
  450. return contentList
  451. }
  452. // 尝试解析为数组
  453. //var arrayContent []map[string]interface{}
  454. arrayContent, ok := m.Content.([]any)
  455. if !ok {
  456. return contentList
  457. }
  458. for _, contentItemAny := range arrayContent {
  459. mediaItem, ok := contentItemAny.(MediaContent)
  460. if ok {
  461. contentList = append(contentList, mediaItem)
  462. continue
  463. }
  464. contentItem, ok := contentItemAny.(map[string]any)
  465. if !ok {
  466. continue
  467. }
  468. contentType, ok := contentItem["type"].(string)
  469. if !ok {
  470. continue
  471. }
  472. switch contentType {
  473. case ContentTypeText:
  474. if text, ok := contentItem["text"].(string); ok {
  475. contentList = append(contentList, MediaContent{
  476. Type: ContentTypeText,
  477. Text: text,
  478. })
  479. }
  480. case ContentTypeImageURL:
  481. imageUrl := contentItem["image_url"]
  482. temp := &MessageImageUrl{
  483. Detail: "high",
  484. }
  485. switch v := imageUrl.(type) {
  486. case string:
  487. temp.Url = v
  488. case map[string]interface{}:
  489. url, ok1 := v["url"].(string)
  490. detail, ok2 := v["detail"].(string)
  491. if ok2 {
  492. temp.Detail = detail
  493. }
  494. if ok1 {
  495. temp.Url = url
  496. }
  497. }
  498. contentList = append(contentList, MediaContent{
  499. Type: ContentTypeImageURL,
  500. ImageUrl: temp,
  501. })
  502. case ContentTypeInputAudio:
  503. if audioData, ok := contentItem["input_audio"].(map[string]interface{}); ok {
  504. data, ok1 := audioData["data"].(string)
  505. format, ok2 := audioData["format"].(string)
  506. if ok1 && ok2 {
  507. temp := &MessageInputAudio{
  508. Data: data,
  509. Format: format,
  510. }
  511. contentList = append(contentList, MediaContent{
  512. Type: ContentTypeInputAudio,
  513. InputAudio: temp,
  514. })
  515. }
  516. }
  517. case ContentTypeFile:
  518. if fileData, ok := contentItem["file"].(map[string]interface{}); ok {
  519. fileId, ok3 := fileData["file_id"].(string)
  520. if ok3 {
  521. contentList = append(contentList, MediaContent{
  522. Type: ContentTypeFile,
  523. File: &MessageFile{
  524. FileId: fileId,
  525. },
  526. })
  527. } else {
  528. fileName, ok1 := fileData["filename"].(string)
  529. fileDataStr, ok2 := fileData["file_data"].(string)
  530. if ok1 && ok2 {
  531. contentList = append(contentList, MediaContent{
  532. Type: ContentTypeFile,
  533. File: &MessageFile{
  534. FileName: fileName,
  535. FileData: fileDataStr,
  536. },
  537. })
  538. }
  539. }
  540. }
  541. case ContentTypeVideoUrl:
  542. if videoUrl, ok := contentItem["video_url"].(string); ok {
  543. contentList = append(contentList, MediaContent{
  544. Type: ContentTypeVideoUrl,
  545. VideoUrl: &MessageVideoUrl{
  546. Url: videoUrl,
  547. },
  548. })
  549. }
  550. }
  551. }
  552. if len(contentList) > 0 {
  553. m.parsedContent = contentList
  554. }
  555. return contentList
  556. }
  557. // old code
  558. /*func (m *Message) StringContent() string {
  559. if m.parsedStringContent != nil {
  560. return *m.parsedStringContent
  561. }
  562. var stringContent string
  563. if err := json.Unmarshal(m.Content, &stringContent); err == nil {
  564. m.parsedStringContent = &stringContent
  565. return stringContent
  566. }
  567. contentStr := new(strings.Builder)
  568. arrayContent := m.ParseContent()
  569. for _, content := range arrayContent {
  570. if content.Type == ContentTypeText {
  571. contentStr.WriteString(content.Text)
  572. }
  573. }
  574. stringContent = contentStr.String()
  575. m.parsedStringContent = &stringContent
  576. return stringContent
  577. }
  578. func (m *Message) SetNullContent() {
  579. m.Content = nil
  580. m.parsedStringContent = nil
  581. m.parsedContent = nil
  582. }
  583. func (m *Message) SetStringContent(content string) {
  584. jsonContent, _ := json.Marshal(content)
  585. m.Content = jsonContent
  586. m.parsedStringContent = &content
  587. m.parsedContent = nil
  588. }
  589. func (m *Message) SetMediaContent(content []MediaContent) {
  590. jsonContent, _ := json.Marshal(content)
  591. m.Content = jsonContent
  592. m.parsedContent = nil
  593. m.parsedStringContent = nil
  594. }
  595. func (m *Message) IsStringContent() bool {
  596. if m.parsedStringContent != nil {
  597. return true
  598. }
  599. var stringContent string
  600. if err := json.Unmarshal(m.Content, &stringContent); err == nil {
  601. m.parsedStringContent = &stringContent
  602. return true
  603. }
  604. return false
  605. }
  606. func (m *Message) ParseContent() []MediaContent {
  607. if m.parsedContent != nil {
  608. return m.parsedContent
  609. }
  610. var contentList []MediaContent
  611. // 先尝试解析为字符串
  612. var stringContent string
  613. if err := json.Unmarshal(m.Content, &stringContent); err == nil {
  614. contentList = []MediaContent{{
  615. Type: ContentTypeText,
  616. Text: stringContent,
  617. }}
  618. m.parsedContent = contentList
  619. return contentList
  620. }
  621. // 尝试解析为数组
  622. var arrayContent []map[string]interface{}
  623. if err := json.Unmarshal(m.Content, &arrayContent); err == nil {
  624. for _, contentItem := range arrayContent {
  625. contentType, ok := contentItem["type"].(string)
  626. if !ok {
  627. continue
  628. }
  629. switch contentType {
  630. case ContentTypeText:
  631. if text, ok := contentItem["text"].(string); ok {
  632. contentList = append(contentList, MediaContent{
  633. Type: ContentTypeText,
  634. Text: text,
  635. })
  636. }
  637. case ContentTypeImageURL:
  638. imageUrl := contentItem["image_url"]
  639. temp := &MessageImageUrl{
  640. Detail: "high",
  641. }
  642. switch v := imageUrl.(type) {
  643. case string:
  644. temp.Url = v
  645. case map[string]interface{}:
  646. url, ok1 := v["url"].(string)
  647. detail, ok2 := v["detail"].(string)
  648. if ok2 {
  649. temp.Detail = detail
  650. }
  651. if ok1 {
  652. temp.Url = url
  653. }
  654. }
  655. contentList = append(contentList, MediaContent{
  656. Type: ContentTypeImageURL,
  657. ImageUrl: temp,
  658. })
  659. case ContentTypeInputAudio:
  660. if audioData, ok := contentItem["input_audio"].(map[string]interface{}); ok {
  661. data, ok1 := audioData["data"].(string)
  662. format, ok2 := audioData["format"].(string)
  663. if ok1 && ok2 {
  664. temp := &MessageInputAudio{
  665. Data: data,
  666. Format: format,
  667. }
  668. contentList = append(contentList, MediaContent{
  669. Type: ContentTypeInputAudio,
  670. InputAudio: temp,
  671. })
  672. }
  673. }
  674. case ContentTypeFile:
  675. if fileData, ok := contentItem["file"].(map[string]interface{}); ok {
  676. fileId, ok3 := fileData["file_id"].(string)
  677. if ok3 {
  678. contentList = append(contentList, MediaContent{
  679. Type: ContentTypeFile,
  680. File: &MessageFile{
  681. FileId: fileId,
  682. },
  683. })
  684. } else {
  685. fileName, ok1 := fileData["filename"].(string)
  686. fileDataStr, ok2 := fileData["file_data"].(string)
  687. if ok1 && ok2 {
  688. contentList = append(contentList, MediaContent{
  689. Type: ContentTypeFile,
  690. File: &MessageFile{
  691. FileName: fileName,
  692. FileData: fileDataStr,
  693. },
  694. })
  695. }
  696. }
  697. }
  698. case ContentTypeVideoUrl:
  699. if videoUrl, ok := contentItem["video_url"].(string); ok {
  700. contentList = append(contentList, MediaContent{
  701. Type: ContentTypeVideoUrl,
  702. VideoUrl: &MessageVideoUrl{
  703. Url: videoUrl,
  704. },
  705. })
  706. }
  707. }
  708. }
  709. }
  710. if len(contentList) > 0 {
  711. m.parsedContent = contentList
  712. }
  713. return contentList
  714. }*/
  715. type WebSearchOptions struct {
  716. SearchContextSize string `json:"search_context_size,omitempty"`
  717. UserLocation json.RawMessage `json:"user_location,omitempty"`
  718. }
  719. // https://platform.openai.com/docs/api-reference/responses/create
  720. type OpenAIResponsesRequest struct {
  721. Model string `json:"model"`
  722. Input json.RawMessage `json:"input,omitempty"`
  723. Include json.RawMessage `json:"include,omitempty"`
  724. Instructions json.RawMessage `json:"instructions,omitempty"`
  725. MaxOutputTokens uint `json:"max_output_tokens,omitempty"`
  726. Metadata json.RawMessage `json:"metadata,omitempty"`
  727. ParallelToolCalls json.RawMessage `json:"parallel_tool_calls,omitempty"`
  728. PreviousResponseID string `json:"previous_response_id,omitempty"`
  729. Reasoning *Reasoning `json:"reasoning,omitempty"`
  730. // 服务层级字段,用于指定 API 服务等级。允许透传可能导致实际计费高于预期,默认应过滤
  731. ServiceTier string `json:"service_tier,omitempty"`
  732. Store json.RawMessage `json:"store,omitempty"`
  733. PromptCacheKey json.RawMessage `json:"prompt_cache_key,omitempty"`
  734. PromptCacheRetention json.RawMessage `json:"prompt_cache_retention,omitempty"`
  735. Stream bool `json:"stream,omitempty"`
  736. Temperature float64 `json:"temperature,omitempty"`
  737. Text json.RawMessage `json:"text,omitempty"`
  738. ToolChoice json.RawMessage `json:"tool_choice,omitempty"`
  739. Tools json.RawMessage `json:"tools,omitempty"` // 需要处理的参数很少,MCP 参数太多不确定,所以用 map
  740. TopP float64 `json:"top_p,omitempty"`
  741. Truncation string `json:"truncation,omitempty"`
  742. User string `json:"user,omitempty"`
  743. MaxToolCalls uint `json:"max_tool_calls,omitempty"`
  744. Prompt json.RawMessage `json:"prompt,omitempty"`
  745. }
  746. func (r *OpenAIResponsesRequest) GetTokenCountMeta() *types.TokenCountMeta {
  747. var fileMeta = make([]*types.FileMeta, 0)
  748. var texts = make([]string, 0)
  749. if r.Input != nil {
  750. inputs := r.ParseInput()
  751. for _, input := range inputs {
  752. if input.Type == "input_image" {
  753. if input.ImageUrl != "" {
  754. fileMeta = append(fileMeta, &types.FileMeta{
  755. FileType: types.FileTypeImage,
  756. OriginData: input.ImageUrl,
  757. Detail: input.Detail,
  758. })
  759. }
  760. } else if input.Type == "input_file" {
  761. if input.FileUrl != "" {
  762. fileMeta = append(fileMeta, &types.FileMeta{
  763. FileType: types.FileTypeFile,
  764. OriginData: input.FileUrl,
  765. })
  766. }
  767. } else {
  768. texts = append(texts, input.Text)
  769. }
  770. }
  771. }
  772. if len(r.Instructions) > 0 {
  773. texts = append(texts, string(r.Instructions))
  774. }
  775. if len(r.Metadata) > 0 {
  776. texts = append(texts, string(r.Metadata))
  777. }
  778. if len(r.Text) > 0 {
  779. texts = append(texts, string(r.Text))
  780. }
  781. if len(r.ToolChoice) > 0 {
  782. texts = append(texts, string(r.ToolChoice))
  783. }
  784. if len(r.Prompt) > 0 {
  785. texts = append(texts, string(r.Prompt))
  786. }
  787. if len(r.Tools) > 0 {
  788. texts = append(texts, string(r.Tools))
  789. }
  790. return &types.TokenCountMeta{
  791. CombineText: strings.Join(texts, "\n"),
  792. Files: fileMeta,
  793. MaxTokens: int(r.MaxOutputTokens),
  794. }
  795. }
  796. func (r *OpenAIResponsesRequest) IsStream(c *gin.Context) bool {
  797. return r.Stream
  798. }
  799. func (r *OpenAIResponsesRequest) SetModelName(modelName string) {
  800. if modelName != "" {
  801. r.Model = modelName
  802. }
  803. }
  804. func (r *OpenAIResponsesRequest) GetToolsMap() []map[string]any {
  805. var toolsMap []map[string]any
  806. if len(r.Tools) > 0 {
  807. _ = common.Unmarshal(r.Tools, &toolsMap)
  808. }
  809. return toolsMap
  810. }
  811. type Reasoning struct {
  812. Effort string `json:"effort,omitempty"`
  813. Summary string `json:"summary,omitempty"`
  814. }
  815. type Input struct {
  816. Type string `json:"type,omitempty"`
  817. Role string `json:"role,omitempty"`
  818. Content json.RawMessage `json:"content,omitempty"`
  819. }
  820. type MediaInput struct {
  821. Type string `json:"type"`
  822. Text string `json:"text,omitempty"`
  823. FileUrl string `json:"file_url,omitempty"`
  824. ImageUrl string `json:"image_url,omitempty"`
  825. Detail string `json:"detail,omitempty"` // 仅 input_image 有效
  826. }
  827. // ParseInput parses the Responses API `input` field into a normalized slice of MediaInput.
  828. // Reference implementation mirrors Message.ParseContent:
  829. // - input can be a string, treated as an input_text item
  830. // - input can be an array of objects with a `type` field
  831. // supported types: input_text, input_image, input_file
  832. func (r *OpenAIResponsesRequest) ParseInput() []MediaInput {
  833. if r.Input == nil {
  834. return nil
  835. }
  836. var mediaInputs []MediaInput
  837. // Try string first
  838. // if str, ok := common.GetJsonType(r.Input); ok {
  839. // inputs = append(inputs, MediaInput{Type: "input_text", Text: str})
  840. // return inputs
  841. // }
  842. if common.GetJsonType(r.Input) == "string" {
  843. var str string
  844. _ = common.Unmarshal(r.Input, &str)
  845. mediaInputs = append(mediaInputs, MediaInput{Type: "input_text", Text: str})
  846. return mediaInputs
  847. }
  848. // Try array of parts
  849. if common.GetJsonType(r.Input) == "array" {
  850. var inputs []Input
  851. _ = common.Unmarshal(r.Input, &inputs)
  852. for _, input := range inputs {
  853. if common.GetJsonType(input.Content) == "string" {
  854. var str string
  855. _ = common.Unmarshal(input.Content, &str)
  856. mediaInputs = append(mediaInputs, MediaInput{Type: "input_text", Text: str})
  857. }
  858. if common.GetJsonType(input.Content) == "array" {
  859. var array []any
  860. _ = common.Unmarshal(input.Content, &array)
  861. for _, itemAny := range array {
  862. // Already parsed MediaContent
  863. if media, ok := itemAny.(MediaInput); ok {
  864. mediaInputs = append(mediaInputs, media)
  865. continue
  866. }
  867. // Generic map
  868. item, ok := itemAny.(map[string]any)
  869. if !ok {
  870. continue
  871. }
  872. typeVal, ok := item["type"].(string)
  873. if !ok {
  874. continue
  875. }
  876. switch typeVal {
  877. case "input_text":
  878. text, _ := item["text"].(string)
  879. mediaInputs = append(mediaInputs, MediaInput{Type: "input_text", Text: text})
  880. case "input_image":
  881. // image_url may be string or object with url field
  882. var imageUrl string
  883. switch v := item["image_url"].(type) {
  884. case string:
  885. imageUrl = v
  886. case map[string]any:
  887. if url, ok := v["url"].(string); ok {
  888. imageUrl = url
  889. }
  890. }
  891. mediaInputs = append(mediaInputs, MediaInput{Type: "input_image", ImageUrl: imageUrl})
  892. case "input_file":
  893. // file_url may be string or object with url field
  894. var fileUrl string
  895. switch v := item["file_url"].(type) {
  896. case string:
  897. fileUrl = v
  898. case map[string]any:
  899. if url, ok := v["url"].(string); ok {
  900. fileUrl = url
  901. }
  902. }
  903. mediaInputs = append(mediaInputs, MediaInput{Type: "input_file", FileUrl: fileUrl})
  904. }
  905. }
  906. }
  907. }
  908. }
  909. return mediaInputs
  910. }