Forráskód Böngészése

Add a way to stop TTS (#1787)

Chris Estreich 9 hónapja
szülő
commit
a15691dc1d

+ 8 - 2
src/core/webview/ClineProvider.ts

@@ -41,7 +41,7 @@ import { BrowserSession } from "../../services/browser/BrowserSession"
 import { discoverChromeInstances } from "../../services/browser/browserDiscovery"
 import { fileExistsAtPath } from "../../utils/fs"
 import { playSound, setSoundEnabled, setSoundVolume } from "../../utils/sound"
-import { playTts, setTtsEnabled, setTtsSpeed } from "../../utils/tts"
+import { playTts, setTtsEnabled, setTtsSpeed, stopTts } from "../../utils/tts"
 import { singleCompletionHandler } from "../../utils/single-completion-handler"
 import { searchCommits } from "../../utils/git"
 import { getDiffStrategy } from "../diff/DiffStrategy"
@@ -1281,9 +1281,15 @@ export class ClineProvider extends EventEmitter<ClineProviderEvents> implements
 						break
 					case "playTts":
 						if (message.text) {
-							playTts(message.text)
+							playTts(message.text, {
+								onStart: () => this.postMessageToWebview({ type: "ttsStart", text: message.text }),
+								onStop: () => this.postMessageToWebview({ type: "ttsStop", text: message.text }),
+							})
 						}
 						break
+					case "stopTts":
+						stopTts()
+						break
 					case "diffEnabled":
 						const diffEnabled = message.bool ?? true
 						await this.updateGlobalState("diffEnabled", diffEnabled)

+ 2 - 0
src/shared/ExtensionMessage.ts

@@ -54,6 +54,8 @@ export interface ExtensionMessage {
 		| "browserToolEnabled"
 		| "browserConnectionResult"
 		| "remoteBrowserEnabled"
+		| "ttsStart"
+		| "ttsStop"
 	text?: string
 	action?:
 		| "chatButtonClicked"

+ 1 - 0
src/shared/WebviewMessage.ts

@@ -52,6 +52,7 @@ export interface WebviewMessage {
 		| "alwaysAllowSubtasks"
 		| "playSound"
 		| "playTts"
+		| "stopTts"
 		| "soundEnabled"
 		| "ttsEnabled"
 		| "ttsSpeed"

+ 55 - 49
src/utils/tts.ts

@@ -1,75 +1,81 @@
-import * as vscode from "vscode"
+interface Say {
+	speak: (text: string, voice?: string, speed?: number, callback?: (err?: string) => void) => void
+	stop: () => void
+}
+
+type PlayTtsOptions = {
+	onStart?: () => void
+	onStop?: () => void
+}
+
+type QueueItem = {
+	message: string
+	options: PlayTtsOptions
+}
 
 let isTtsEnabled = false
+
+export const setTtsEnabled = (enabled: boolean) => (isTtsEnabled = enabled)
+
 let speed = 1.0
-let isSpeaking = false
-const utteranceQueue: string[] = []
-
-/**
- * Set tts configuration
- * @param enabled boolean
- */
-export const setTtsEnabled = (enabled: boolean): void => {
-	isTtsEnabled = enabled
+
+export const setTtsSpeed = (newSpeed: number) => (speed = newSpeed)
+
+let sayInstance: Say | undefined = undefined
+let queue: QueueItem[] = []
+
+export const playTts = async (message: string, options: PlayTtsOptions = {}) => {
+	if (!isTtsEnabled) {
+		return
+	}
+
+	try {
+		queue.push({ message, options })
+		await processQueue()
+	} catch (error) {}
 }
 
-/**
- * Set tts speed
- * @param speed number
- */
-export const setTtsSpeed = (newSpeed: number): void => {
-	speed = newSpeed
+export const stopTts = () => {
+	sayInstance?.stop()
+	sayInstance = undefined
+	queue = []
 }
 
-/**
- * Process the next item in the utterance queue
- */
 const processQueue = async (): Promise<void> => {
-	if (!isTtsEnabled || isSpeaking || utteranceQueue.length === 0) {
+	if (!isTtsEnabled || sayInstance) {
+		return
+	}
+
+	const item = queue.shift()
+
+	if (!item) {
 		return
 	}
 
 	try {
-		isSpeaking = true
-		const nextUtterance = utteranceQueue.shift()!
-		const say = require("say")
+		const { message: nextUtterance, options } = item
 
-		// Wrap say.speak in a promise to handle completion
 		await new Promise<void>((resolve, reject) => {
-			say.speak(nextUtterance, null, speed, (err: Error) => {
+			const say: Say = require("say")
+			sayInstance = say
+			options.onStart?.()
+
+			say.speak(nextUtterance, undefined, speed, (err) => {
+				options.onStop?.()
+
 				if (err) {
-					reject(err)
+					reject(new Error(err))
 				} else {
 					resolve()
 				}
+
+				sayInstance = undefined
 			})
 		})
 
-		isSpeaking = false
-		// Process next item in queue if any
 		await processQueue()
 	} catch (error: any) {
-		isSpeaking = false
-		//vscode.window.showErrorMessage(error.message)
-		// Try to continue with next item despite error
-		await processQueue()
-	}
-}
-
-/**
- * Queue a tts message to be spoken
- * @param message string
- * @return void
- */
-export const playTts = async (message: string): Promise<void> => {
-	if (!isTtsEnabled) {
-		return
-	}
-
-	try {
-		utteranceQueue.push(message)
+		sayInstance = undefined
 		await processQueue()
-	} catch (error: any) {
-		//vscode.window.showErrorMessage(error.message)
 	}
 }

+ 53 - 37
webview-ui/src/components/chat/ChatTextArea.tsx

@@ -1,11 +1,15 @@
 import React, { forwardRef, useCallback, useEffect, useLayoutEffect, useMemo, useRef, useState } from "react"
+import { useEvent } from "react-use"
 import DynamicTextArea from "react-textarea-autosize"
 
 import { mentionRegex, mentionRegexGlobal } from "../../../../src/shared/context-mentions"
 import { WebviewMessage } from "../../../../src/shared/WebviewMessage"
 import { Mode, getAllModes } from "../../../../src/shared/modes"
+import { ExtensionMessage } from "../../../../src/shared/ExtensionMessage"
 
 import { vscode } from "@/utils/vscode"
+import { useExtensionState } from "@/context/ExtensionStateContext"
+import { useAppTranslation } from "@/i18n/TranslationContext"
 import {
 	ContextMenuOptionType,
 	getContextMenuOptions,
@@ -13,14 +17,13 @@ import {
 	removeMention,
 	shouldShowContextMenu,
 } from "@/utils/context-mentions"
-import { SelectDropdown, DropdownOptionType } from "@/components/ui"
+import { convertToMentionPath } from "@/utils/path-mentions"
+import { SelectDropdown, DropdownOptionType, Button } from "@/components/ui"
 
-import { useExtensionState } from "../../context/ExtensionStateContext"
 import Thumbnails from "../common/Thumbnails"
-import { convertToMentionPath } from "../../utils/path-mentions"
 import { MAX_IMAGES_PER_MESSAGE } from "./ChatView"
 import ContextMenu from "./ContextMenu"
-import { useAppTranslation } from "../../i18n/TranslationContext"
+import { VolumeX } from "lucide-react"
 
 interface ChatTextAreaProps {
 	inputValue: string
@@ -62,7 +65,7 @@ const ChatTextArea = forwardRef<HTMLTextAreaElement, ChatTextAreaProps>(
 		const [gitCommits, setGitCommits] = useState<any[]>([])
 		const [showDropdown, setShowDropdown] = useState(false)
 
-		// Close dropdown when clicking outside
+		// Close dropdown when clicking outside.
 		useEffect(() => {
 			const handleClickOutside = (event: MouseEvent) => {
 				if (showDropdown) {
@@ -73,14 +76,16 @@ const ChatTextArea = forwardRef<HTMLTextAreaElement, ChatTextAreaProps>(
 			return () => document.removeEventListener("mousedown", handleClickOutside)
 		}, [showDropdown])
 
-		// Handle enhanced prompt response
+		// Handle enhanced prompt response.
 		useEffect(() => {
 			const messageHandler = (event: MessageEvent) => {
 				const message = event.data
+
 				if (message.type === "enhancedPrompt") {
 					if (message.text) {
 						setInputValue(message.text)
 					}
+
 					setIsEnhancingPrompt(false)
 				} else if (message.type === "commitSearchResults") {
 					const commits = message.commits.map((commit: any) => ({
@@ -90,9 +95,11 @@ const ChatTextArea = forwardRef<HTMLTextAreaElement, ChatTextAreaProps>(
 						description: `${commit.shortHash} by ${commit.author} on ${commit.date}`,
 						icon: "$(git-commit)",
 					}))
+
 					setGitCommits(commits)
 				}
 			}
+
 			window.addEventListener("message", messageHandler)
 			return () => window.removeEventListener("message", messageHandler)
 		}, [setInputValue])
@@ -113,7 +120,7 @@ const ChatTextArea = forwardRef<HTMLTextAreaElement, ChatTextAreaProps>(
 		const [isEnhancingPrompt, setIsEnhancingPrompt] = useState(false)
 		const [isFocused, setIsFocused] = useState(false)
 
-		// Fetch git commits when Git is selected or when typing a hash
+		// Fetch git commits when Git is selected or when typing a hash.
 		useEffect(() => {
 			if (selectedType === ContextMenuOptionType.Git || /^[a-f0-9]+$/i.test(searchQuery)) {
 				const message: WebviewMessage = {
@@ -188,14 +195,11 @@ const ChatTextArea = forwardRef<HTMLTextAreaElement, ChatTextAreaProps>(
 				}
 
 				if (type === ContextMenuOptionType.Mode && value) {
-					// Handle mode selection
+					// Handle mode selection.
 					setMode(value)
 					setInputValue("")
 					setShowContextMenu(false)
-					vscode.postMessage({
-						type: "mode",
-						text: value,
-					})
+					vscode.postMessage({ type: "mode", text: value })
 					return
 				}
 
@@ -214,8 +218,10 @@ const ChatTextArea = forwardRef<HTMLTextAreaElement, ChatTextAreaProps>(
 
 				setShowContextMenu(false)
 				setSelectedType(null)
+
 				if (textAreaRef.current) {
 					let insertValue = value || ""
+
 					if (type === ContextMenuOptionType.URL) {
 						insertValue = value || ""
 					} else if (type === ContextMenuOptionType.File || type === ContextMenuOptionType.Folder) {
@@ -239,7 +245,7 @@ const ChatTextArea = forwardRef<HTMLTextAreaElement, ChatTextAreaProps>(
 					setCursorPosition(newCursorPosition)
 					setIntendedCursorPosition(newCursorPosition)
 
-					// scroll to cursor
+					// Scroll to cursor.
 					setTimeout(() => {
 						if (textAreaRef.current) {
 							textAreaRef.current.blur()
@@ -378,7 +384,7 @@ const ChatTextArea = forwardRef<HTMLTextAreaElement, ChatTextAreaProps>(
 		useLayoutEffect(() => {
 			if (intendedCursorPosition !== null && textAreaRef.current) {
 				textAreaRef.current.setSelectionRange(intendedCursorPosition, intendedCursorPosition)
-				setIntendedCursorPosition(null) // Reset the state
+				setIntendedCursorPosition(null) // Reset the state.
 			}
 		}, [inputValue, intendedCursorPosition])
 
@@ -423,10 +429,11 @@ const ChatTextArea = forwardRef<HTMLTextAreaElement, ChatTextAreaProps>(
 		}, [showContextMenu])
 
 		const handleBlur = useCallback(() => {
-			// Only hide the context menu if the user didn't click on it
+			// Only hide the context menu if the user didn't click on it.
 			if (!isMouseDownOnMenu) {
 				setShowContextMenu(false)
 			}
+
 			setIsFocused(false)
 		}, [isMouseDownOnMenu])
 
@@ -435,7 +442,8 @@ const ChatTextArea = forwardRef<HTMLTextAreaElement, ChatTextAreaProps>(
 				const items = e.clipboardData.items
 
 				const pastedText = e.clipboardData.getData("text")
-				// Check if the pasted content is a URL, add space after so user can easily delete if they don't want it
+				// Check if the pasted content is a URL, add space after so user
+				// can easily delete if they don't want it.
 				const urlRegex = /^\S+:\/\/\S+$/
 				if (urlRegex.test(pastedText.trim())) {
 					e.preventDefault()
@@ -448,7 +456,7 @@ const ChatTextArea = forwardRef<HTMLTextAreaElement, ChatTextAreaProps>(
 					setIntendedCursorPosition(newCursorPosition)
 					setShowContextMenu(false)
 
-					// Scroll to new cursor position
+					// Scroll to new cursor position.
 					setTimeout(() => {
 						if (textAreaRef.current) {
 							textAreaRef.current.blur()
@@ -460,10 +468,12 @@ const ChatTextArea = forwardRef<HTMLTextAreaElement, ChatTextAreaProps>(
 				}
 
 				const acceptedTypes = ["png", "jpeg", "webp"]
+
 				const imageItems = Array.from(items).filter((item) => {
 					const [type, subtype] = item.type.split("/")
 					return type === "image" && acceptedTypes.includes(subtype)
 				})
+
 				if (!shouldDisableImages && imageItems.length > 0) {
 					e.preventDefault()
 					const imagePromises = imageItems.map((item) => {
@@ -498,9 +508,7 @@ const ChatTextArea = forwardRef<HTMLTextAreaElement, ChatTextAreaProps>(
 			[shouldDisableImages, setSelectedImages, cursorPosition, setInputValue, inputValue, t],
 		)
 
-		const handleThumbnailsHeightChange = useCallback((height: number) => {
-			setThumbnailsHeight(height)
-		}, [])
+		const handleThumbnailsHeightChange = useCallback((height: number) => setThumbnailsHeight(height), [])
 
 		useEffect(() => {
 			if (selectedImages.length === 0) {
@@ -545,6 +553,18 @@ const ChatTextArea = forwardRef<HTMLTextAreaElement, ChatTextAreaProps>(
 			[updateCursorPosition],
 		)
 
+		const [isTtsPlaying, setIsTtsPlaying] = useState(false)
+
+		useEvent("message", (event: MessageEvent) => {
+			const message: ExtensionMessage = event.data
+
+			if (message.type === "ttsStart") {
+				setIsTtsPlaying(true)
+			} else if (message.type === "ttsStop") {
+				setIsTtsPlaying(false)
+			}
+		})
+
 		return (
 			<div
 				className="chat-text-area"
@@ -566,6 +586,7 @@ const ChatTextArea = forwardRef<HTMLTextAreaElement, ChatTextAreaProps>(
 					e.preventDefault()
 					const files = Array.from(e.dataTransfer.files)
 					const text = e.dataTransfer.getData("text")
+
 					if (text) {
 						// Split text on newlines to handle multiple files
 						const lines = text.split(/\r?\n/).filter((line) => line.trim() !== "")
@@ -597,6 +618,7 @@ const ChatTextArea = forwardRef<HTMLTextAreaElement, ChatTextAreaProps>(
 							setCursorPosition(newCursorPosition)
 							setIntendedCursorPosition(newCursorPosition)
 						}
+
 						return
 					}
 
@@ -741,6 +763,15 @@ const ChatTextArea = forwardRef<HTMLTextAreaElement, ChatTextAreaProps>(
 						}}
 						onScroll={() => updateHighlights()}
 					/>
+					{isTtsPlaying && (
+						<Button
+							variant="ghost"
+							size="icon"
+							className="absolute top-0 right-0 opacity-25 hover:opacity-100 z-10"
+							onClick={() => vscode.postMessage({ type: "stopTts" })}>
+							<VolumeX className="size-4" />
+						</Button>
+					)}
 				</div>
 
 				{selectedImages.length > 0 && (
@@ -782,26 +813,22 @@ const ChatTextArea = forwardRef<HTMLTextAreaElement, ChatTextAreaProps>(
 								disabled={textAreaDisabled}
 								title={t("chat:selectMode")}
 								options={[
-									// Add the shortcut text as a disabled option at the top
 									{
 										value: "shortcut",
 										label: modeShortcutText,
 										disabled: true,
 										type: DropdownOptionType.SHORTCUT,
 									},
-									// Add all modes
 									...getAllModes(customModes).map((mode) => ({
 										value: mode.slug,
 										label: mode.name,
 										type: DropdownOptionType.ITEM,
 									})),
-									// Add separator
 									{
 										value: "sep-1",
 										label: t("chat:separator"),
 										type: DropdownOptionType.SEPARATOR,
 									},
-									// Add Edit option
 									{
 										value: "promptsButtonClicked",
 										label: t("chat:edit"),
@@ -810,10 +837,7 @@ const ChatTextArea = forwardRef<HTMLTextAreaElement, ChatTextAreaProps>(
 								]}
 								onChange={(value) => {
 									setMode(value as Mode)
-									vscode.postMessage({
-										type: "mode",
-										text: value,
-									})
+									vscode.postMessage({ type: "mode", text: value })
 								}}
 								shortcutText={modeShortcutText}
 								triggerClassName="w-full"
@@ -832,31 +856,23 @@ const ChatTextArea = forwardRef<HTMLTextAreaElement, ChatTextAreaProps>(
 								disabled={textAreaDisabled}
 								title={t("chat:selectApiConfig")}
 								options={[
-									// Add all API configurations
 									...(listApiConfigMeta || []).map((config) => ({
 										value: config.name,
 										label: config.name,
 										type: DropdownOptionType.ITEM,
 									})),
-									// Add separator
 									{
 										value: "sep-2",
 										label: t("chat:separator"),
 										type: DropdownOptionType.SEPARATOR,
 									},
-									// Add Edit option
 									{
 										value: "settingsButtonClicked",
 										label: t("chat:edit"),
 										type: DropdownOptionType.ACTION,
 									},
 								]}
-								onChange={(value) => {
-									vscode.postMessage({
-										type: "loadApiConfiguration",
-										text: value,
-									})
-								}}
+								onChange={(value) => vscode.postMessage({ type: "loadApiConfiguration", text: value })}
 								contentClassName="max-h-[300px] overflow-y-auto"
 								triggerClassName="w-full text-ellipsis overflow-hidden"
 							/>