Browse Source

Remove truncation logic for now

Matt Rubens 1 year ago
parent
commit
25987dd40b

+ 1 - 1
.changeset/modern-carrots-applaud.md

@@ -2,4 +2,4 @@
 "roo-cline": patch
 ---
 
-Add the DeepSeek provider along with logic to trim messages when it hits the context window
+Add the DeepSeek provider

+ 1 - 1
README.md

@@ -13,7 +13,7 @@ A fork of Cline, an autonomous coding agent, with some additional experimental f
 - Includes current time in the system prompt
 - Uses a file system watcher to more reliably watch for file system changes
 - Language selection for Cline's communication (English, Japanese, Spanish, French, German, and more)
-- Support for DeepSeek V3 with logic to trim messages when it hits the context window
+- Support for DeepSeek V3
 - Support for Meta 3, 3.1, and 3.2 models via AWS Bedrock
 - Support for listing models from OpenAI-compatible providers
 - Per-tool MCP auto-approval

+ 4 - 94
src/api/providers/__tests__/deepseek.test.ts

@@ -5,19 +5,6 @@ import { Anthropic } from '@anthropic-ai/sdk'
 
 // Mock dependencies
 jest.mock('openai')
-jest.mock('../../../shared/api', () => ({
-    ...jest.requireActual('../../../shared/api'),
-    deepSeekModels: {
-        'deepseek-chat': {
-            maxTokens: 1000,
-            contextWindow: 2000,
-            supportsImages: false,
-            supportsPromptCache: false,
-            inputPrice: 0.014,
-            outputPrice: 0.28,
-        }
-    }
-}))
 
 describe('DeepSeekHandler', () => {
 
@@ -46,8 +33,8 @@ describe('DeepSeekHandler', () => {
         expect(result).toEqual({
             id: mockOptions.deepSeekModelId,
             info: expect.objectContaining({
-                maxTokens: 1000,
-                contextWindow: 2000,
+                maxTokens: 8192,
+                contextWindow: 64000,
                 supportsPromptCache: false,
                 supportsImages: false,
                 inputPrice: 0.014,
@@ -61,7 +48,7 @@ describe('DeepSeekHandler', () => {
         const result = handler.getModel()
         
         expect(result.id).toBe('deepseek-chat')
-        expect(result.info.maxTokens).toBe(1000)
+        expect(result.info.maxTokens).toBe(8192)
     })
 
     test('createMessage handles string content correctly', async () => {
@@ -109,7 +96,7 @@ describe('DeepSeekHandler', () => {
             ],
             temperature: 0,
             stream: true,
-            max_tokens: 1000,
+            max_tokens: 8192,
             stream_options: { include_usage: true }
         }))
     })
@@ -155,83 +142,6 @@ describe('DeepSeekHandler', () => {
         }))
     })
 
-    test('createMessage truncates messages when exceeding context window', async () => {
-        const handler = new DeepSeekHandler(mockOptions)
-        const longString = 'a'.repeat(1000) // ~300 tokens
-        const shortString = 'b'.repeat(100) // ~30 tokens
-        
-        const systemPrompt = 'test system prompt'
-        const messages: Anthropic.Messages.MessageParam[] = [
-            { role: 'user', content: longString }, // Old message
-            { role: 'assistant', content: 'short response' },
-            { role: 'user', content: shortString } // Recent message
-        ]
-
-        const mockStream = {
-            async *[Symbol.asyncIterator]() {
-                yield {
-                    choices: [{
-                        delta: {
-                            content: '(Note: Some earlier messages were truncated to fit within the model\'s context window)\n\n'
-                        }
-                    }]
-                }
-                yield {
-                    choices: [{
-                        delta: {
-                            content: 'test response'
-                        }
-                    }]
-                }
-            }
-        }
-
-        const mockCreate = jest.fn().mockResolvedValue(mockStream)
-        ;(OpenAI as jest.MockedClass<typeof OpenAI>).prototype.chat = {
-            completions: { create: mockCreate }
-        } as any
-
-        const generator = handler.createMessage(systemPrompt, messages)
-        const chunks = []
-        for await (const chunk of generator) {
-            chunks.push(chunk)
-        }
-
-        // Should get two chunks: truncation notice and response
-        expect(chunks).toHaveLength(2)
-        expect(chunks[0]).toEqual({
-            type: 'text',
-            text: expect.stringContaining('truncated')
-        })
-        expect(chunks[1]).toEqual({
-            type: 'text',
-            text: 'test response'
-        })
-
-        // Verify API call includes system prompt and recent messages, but not old message
-        expect(mockCreate).toHaveBeenCalledWith(expect.objectContaining({
-            messages: expect.arrayContaining([
-                { role: 'system', content: systemPrompt },
-                { role: 'assistant', content: 'short response' },
-                { role: 'user', content: shortString }
-            ])
-        }))
-        
-        // Verify truncation notice was included
-        expect(chunks[0]).toEqual({
-            type: 'text',
-            text: expect.stringContaining('truncated')
-        })
-
-        // Verify the messages array contains the expected messages
-        const calledMessages = mockCreate.mock.calls[0][0].messages
-        expect(calledMessages).toHaveLength(4)
-        expect(calledMessages[0]).toEqual({ role: 'system', content: systemPrompt })
-        expect(calledMessages[1]).toEqual({ role: 'user', content: longString })
-        expect(calledMessages[2]).toEqual({ role: 'assistant', content: 'short response' })
-        expect(calledMessages[3]).toEqual({ role: 'user', content: shortString })
-    })
-
     test('createMessage handles API errors', async () => {
         const handler = new DeepSeekHandler(mockOptions)
         const mockStream = {

+ 40 - 60
src/api/providers/deepseek.ts

@@ -20,64 +20,36 @@ export class DeepSeekHandler implements ApiHandler {
 	}
 
 	async *createMessage(systemPrompt: string, messages: Anthropic.Messages.MessageParam[]): ApiStream {
-		// Convert messages to simple format that DeepSeek expects
-		const formattedMessages = messages.map(msg => {
+		const modelInfo = deepSeekModels[this.options.deepSeekModelId as keyof typeof deepSeekModels] || deepSeekModels[deepSeekDefaultModelId]
+
+		// Format all messages
+		const messagesToInclude: OpenAI.Chat.ChatCompletionMessageParam[] = [
+			{ role: 'system' as const, content: systemPrompt }
+		]
+
+		// Add the rest of the messages
+		for (const msg of messages) {
+			let messageContent = ""
 			if (typeof msg.content === "string") {
-				return { role: msg.role, content: msg.content }
-			}
-			// For array content, concatenate text parts
-			return {
-				role: msg.role,
-				content: msg.content.reduce((acc, part) => {
+				messageContent = msg.content
+			} else if (Array.isArray(msg.content)) {
+				messageContent = msg.content.reduce((acc, part) => {
 					if (part.type === "text") {
 						return acc + part.text
 					}
 					return acc
 				}, "")
 			}
-		})
-
-		const openAiMessages: OpenAI.Chat.ChatCompletionMessageParam[] = [
-			{ role: "system", content: systemPrompt },
-			...formattedMessages,
-		]
-		const modelInfo = deepSeekModels[this.options.deepSeekModelId as keyof typeof deepSeekModels] || deepSeekModels[deepSeekDefaultModelId]
-		
-		const contextWindow = modelInfo.contextWindow || 64_000
-		const getTokenCount = (content: string) => Math.ceil(content.length * 0.3)
-
-		// Always keep system prompt
-		const systemMsg = openAiMessages[0]
-		let availableTokens = contextWindow - getTokenCount(typeof systemMsg.content === 'string' ? systemMsg.content : '')
-		
-		// Start with most recent messages and work backwards
-		const userMessages = openAiMessages.slice(1).reverse()
-		const includedMessages = []
-		let truncated = false
-
-		for (const msg of userMessages) {
-			const content = typeof msg.content === 'string' ? msg.content : ''
-			const tokens = getTokenCount(content)
 			
-			if (tokens <= availableTokens) {
-				includedMessages.unshift(msg)
-				availableTokens -= tokens
-			} else {
-				truncated = true
-				break
-			}
-		}
-
-		if (truncated) {
-			yield {
-				type: 'text',
-				text: '(Note: Some earlier messages were truncated to fit within the model\'s context window)\n\n'
-			}
+			messagesToInclude.push({
+				role: msg.role === 'user' ? 'user' as const : 'assistant' as const,
+				content: messageContent
+			})
 		}
 
 		const requestOptions: OpenAI.Chat.ChatCompletionCreateParamsStreaming = {
 			model: this.options.deepSeekModelId ?? "deepseek-chat",
-			messages: [systemMsg, ...includedMessages],
+			messages: messagesToInclude,
 			temperature: 0,
 			stream: true,
 			max_tokens: modelInfo.maxTokens,
@@ -87,22 +59,30 @@ export class DeepSeekHandler implements ApiHandler {
 			requestOptions.stream_options = { include_usage: true }
 		}
 
-		const stream = await this.client.chat.completions.create(requestOptions)
-		for await (const chunk of stream) {
-			const delta = chunk.choices[0]?.delta
-			if (delta?.content) {
-				yield {
-					type: "text",
-					text: delta.content,
+		let totalInputTokens = 0;
+		let totalOutputTokens = 0;
+
+		try {
+			const stream = await this.client.chat.completions.create(requestOptions)
+			for await (const chunk of stream) {
+				const delta = chunk.choices[0]?.delta
+				if (delta?.content) {
+					yield {
+						type: "text",
+						text: delta.content,
+					}
 				}
-			}
-			if (chunk.usage) {
-				yield {
-					type: "usage",
-					inputTokens: chunk.usage.prompt_tokens || 0,
-					outputTokens: chunk.usage.completion_tokens || 0,
+				if (chunk.usage) {
+					yield {
+						type: "usage",
+						inputTokens: chunk.usage.prompt_tokens || 0,
+						outputTokens: chunk.usage.completion_tokens || 0,
+					}
 				}
 			}
+		} catch (error) {
+			console.error("DeepSeek API Error:", error)
+			throw error
 		}
 	}
 
@@ -113,4 +93,4 @@ export class DeepSeekHandler implements ApiHandler {
 			info: deepSeekModels[modelId as keyof typeof deepSeekModels] || deepSeekModels[deepSeekDefaultModelId],
 		}
 	}
-}
+}