2
0
Эх сурвалжийг харах

feat: gemini image support edit

feitianbubu 1 сар өмнө
parent
commit
cb5a37abed

+ 2 - 0
relay/channel/api_request.go

@@ -27,6 +27,8 @@ import (
 func SetupApiRequestHeader(info *common.RelayInfo, c *gin.Context, req *http.Header) {
 	if info.RelayMode == constant.RelayModeAudioTranscription || info.RelayMode == constant.RelayModeAudioTranslation {
 		// multipart/form-data
+	} else if info.RelayMode == constant.RelayModeImagesEdits {
+		// multipart/form-data
 	} else if info.RelayMode == constant.RelayModeRealtime {
 		// websocket
 	} else {

+ 20 - 2
relay/channel/gemini/adaptor.go

@@ -142,11 +142,29 @@ func processSizeParameters(size, quality string) ImageConfig {
 }
 
 func (a *Adaptor) ConvertImageRequest(c *gin.Context, info *relaycommon.RelayInfo, request dto.ImageRequest) (any, error) {
-	if strings.HasPrefix(info.UpstreamModelName, "gemini-3-pro-image") {
+	if model_setting.IsGeminiModelSupportImagine(info.UpstreamModelName) {
+		var content any
+		if base64Data, err := relaycommon.GetImageBase64sFromForm(c); err == nil {
+			content = []any{
+				dto.MediaContent{
+					Type: dto.ContentTypeText,
+					Text: request.Prompt,
+				},
+				dto.MediaContent{
+					Type: dto.ContentTypeFile,
+					File: &dto.MessageFile{
+						FileData: base64Data.String(),
+					},
+				},
+			}
+		} else {
+			content = request.Prompt
+		}
+
 		chatRequest := dto.GeneralOpenAIRequest{
 			Model: request.Model,
 			Messages: []dto.Message{
-				{Role: "user", Content: request.Prompt},
+				{Role: "user", Content: content},
 			},
 			N: int(request.N),
 		}

+ 4 - 3
relay/channel/gemini/relay-gemini.go

@@ -183,7 +183,7 @@ func ThinkingAdaptor(geminiRequest *dto.GeminiChatRequest, info *relaycommon.Rel
 }
 
 // Setting safety to the lowest possible values since Gemini is already powerless enough
-func CovertOpenAI2Gemini(c *gin.Context, textRequest dto.GeneralOpenAIRequest, info *relaycommon.RelayInfo) (*dto.GeminiChatRequest, error) {
+func CovertOpenAI2Gemini(c *gin.Context, textRequest dto.GeneralOpenAIRequest, info *relaycommon.RelayInfo, base64Data ...*relaycommon.Base64Data) (*dto.GeminiChatRequest, error) {
 
 	geminiRequest := dto.GeminiChatRequest{
 		Contents: make([]dto.GeminiChatContent, 0, len(textRequest.Messages)),
@@ -464,10 +464,11 @@ func CovertOpenAI2Gemini(c *gin.Context, textRequest dto.GeneralOpenAIRequest, i
 					})
 				}
 			} else if part.Type == dto.ContentTypeFile {
-				if part.GetFile().FileId != "" {
+				file := part.GetFile()
+				if file.FileId != "" {
 					return nil, fmt.Errorf("only base64 file is supported in gemini")
 				}
-				format, base64String, err := service.DecodeBase64FileData(part.GetFile().FileData)
+				format, base64String, err := service.DecodeBase64FileData(file.FileData)
 				if err != nil {
 					return nil, fmt.Errorf("decode base64 file data failed: %s", err.Error())
 				}

+ 54 - 0
relay/common/relay_utils.go

@@ -1,7 +1,10 @@
 package common
 
 import (
+	"encoding/base64"
+	"errors"
 	"fmt"
+	"io"
 	"net/http"
 	"strconv"
 	"strings"
@@ -226,3 +229,54 @@ func ValidateBasicTaskRequest(c *gin.Context, info *RelayInfo, action string) *d
 	storeTaskRequest(c, info, action, req)
 	return nil
 }
+func GetImagesBase64sFromForm(c *gin.Context) ([]*Base64Data, error) {
+	return GetBase64sFromForm(c, "image")
+}
+func GetImageBase64sFromForm(c *gin.Context) (*Base64Data, error) {
+	base64s, err := GetImagesBase64sFromForm(c)
+	if err != nil {
+		return nil, err
+	}
+	return base64s[0], nil
+}
+
+type Base64Data struct {
+	MimeType string
+	Data     string
+}
+
+func (m Base64Data) String() string {
+	return fmt.Sprintf("data:%s;base64,%s", m.MimeType, m.Data)
+}
+func GetBase64sFromForm(c *gin.Context, fieldName string) ([]*Base64Data, error) {
+	mf := c.Request.MultipartForm
+	if mf == nil {
+		if _, err := c.MultipartForm(); err != nil {
+			return nil, fmt.Errorf("failed to parse image edit form request: %w", err)
+		}
+		mf = c.Request.MultipartForm
+	}
+	imageFiles, exists := mf.File[fieldName]
+	if !exists || len(imageFiles) == 0 {
+		return nil, errors.New("field " + fieldName + "\" is not found or empty")
+	}
+	var imageBase64s []*Base64Data
+	for _, file := range imageFiles {
+		image, err := file.Open()
+		if err != nil {
+			return nil, errors.New("failed to open image file")
+		}
+		imageData, err := io.ReadAll(image)
+		if err != nil {
+			return nil, errors.New("failed to read image file")
+		}
+		mimeType := http.DetectContentType(imageData)
+		base64Data := base64.StdEncoding.EncodeToString(imageData)
+		imageBase64s = append(imageBase64s, &Base64Data{
+			MimeType: mimeType,
+			Data:     base64Data,
+		})
+		image.Close()
+	}
+	return imageBase64s, nil
+}

+ 1 - 0
relay/helper/valid_request.go

@@ -141,6 +141,7 @@ func GetAndValidOpenAIImageRequest(c *gin.Context, relayMode int) (*dto.ImageReq
 			imageRequest.N = uint(common.String2Int(formData.Get("n")))
 			imageRequest.Quality = formData.Get("quality")
 			imageRequest.Size = formData.Get("size")
+			imageRequest.ResponseFormat = formData.Get("response_format")
 			if imageValue := formData.Get("image"); imageValue != "" {
 				imageRequest.Image, _ = json.Marshal(imageValue)
 			}