Browse Source

Add text-to-speech functionality (#1412)

* Add text-to-speech functionality

* Add speed config option to text-to-speech

* Fix test case for tts speed slider

* Fix test case for tts speed slider (really)

* Disabled error message logging in tts.ts

* ignore markdown and mermaid diagrams in TTS

* add ttsEnabled and ttsSpeed to GlobalStateKey

* fix failing webview test for save button

* Translations

* Fix tests

---------

Co-authored-by: Matt Rubens <[email protected]>
Seth Miller 9 months ago
parent
commit
f16a49de87
32 changed files with 392 additions and 14 deletions
  1. 17 1
      package-lock.json
  2. 1 0
      package.json
  3. 29 0
      src/core/webview/ClineProvider.ts
  4. 20 0
      src/core/webview/__tests__/ClineProvider.test.ts
  5. 2 0
      src/exports/roo-code.d.ts
  6. 2 0
      src/shared/ExtensionMessage.ts
  7. 3 0
      src/shared/WebviewMessage.ts
  8. 2 0
      src/shared/globalState.ts
  9. 75 0
      src/utils/tts.ts
  10. 6 0
      webview-ui/package-lock.json
  11. 1 0
      webview-ui/package.json
  12. 35 1
      webview-ui/src/components/chat/ChatView.tsx
  13. 3 3
      webview-ui/src/components/history/CopyButton.tsx
  14. 40 9
      webview-ui/src/components/settings/NotificationSettings.tsx
  15. 6 0
      webview-ui/src/components/settings/SettingsView.tsx
  16. 68 0
      webview-ui/src/components/settings/__tests__/SettingsView.test.tsx
  17. 7 0
      webview-ui/src/context/ExtensionStateContext.tsx
  18. 5 0
      webview-ui/src/i18n/locales/ca/settings.json
  19. 5 0
      webview-ui/src/i18n/locales/de/settings.json
  20. 5 0
      webview-ui/src/i18n/locales/en/settings.json
  21. 5 0
      webview-ui/src/i18n/locales/es/settings.json
  22. 5 0
      webview-ui/src/i18n/locales/fr/settings.json
  23. 5 0
      webview-ui/src/i18n/locales/hi/settings.json
  24. 5 0
      webview-ui/src/i18n/locales/it/settings.json
  25. 5 0
      webview-ui/src/i18n/locales/ja/settings.json
  26. 5 0
      webview-ui/src/i18n/locales/ko/settings.json
  27. 5 0
      webview-ui/src/i18n/locales/pl/settings.json
  28. 5 0
      webview-ui/src/i18n/locales/pt-BR/settings.json
  29. 5 0
      webview-ui/src/i18n/locales/tr/settings.json
  30. 5 0
      webview-ui/src/i18n/locales/vi/settings.json
  31. 5 0
      webview-ui/src/i18n/locales/zh-CN/settings.json
  32. 5 0
      webview-ui/src/i18n/locales/zh-TW/settings.json

+ 17 - 1
package-lock.json

@@ -48,6 +48,7 @@
 				"puppeteer-chromium-resolver": "^23.0.0",
 				"puppeteer-core": "^23.4.0",
 				"reconnecting-eventsource": "^1.6.4",
+				"say": "^0.16.0",
 				"serialize-error": "^11.0.3",
 				"simple-git": "^3.27.0",
 				"sound-play": "^1.1.0",
@@ -12546,7 +12547,6 @@
 			"resolved": "https://registry.npmjs.org/npm-run-all/-/npm-run-all-4.1.5.tgz",
 			"integrity": "sha512-Oo82gJDAVcaMdi3nuoKFavkIHBRVqQ1qvMb+9LHk/cF4P6B2m8aP04hGf7oL6wZ9BuGwX1onlLhpuoofSyoQDQ==",
 			"dev": true,
-			"license": "MIT",
 			"dependencies": {
 				"ansi-styles": "^3.2.1",
 				"chalk": "^2.4.1",
@@ -12827,6 +12827,11 @@
 				"wrappy": "1"
 			}
 		},
+		"node_modules/one-time": {
+			"version": "0.0.4",
+			"resolved": "https://registry.npmjs.org/one-time/-/one-time-0.0.4.tgz",
+			"integrity": "sha512-qAMrwuk2xLEutlASoiPiAMW3EN3K96Ka/ilSXYr6qR1zSVXw2j7+yDSqGTC4T9apfLYxM3tLLjKvgPdAUK7kYQ=="
+		},
 		"node_modules/onetime": {
 			"version": "5.1.2",
 			"resolved": "https://registry.npmjs.org/onetime/-/onetime-5.1.2.tgz",
@@ -14121,6 +14126,17 @@
 			"resolved": "https://registry.npmjs.org/safer-buffer/-/safer-buffer-2.1.2.tgz",
 			"integrity": "sha512-YZo3K82SD7Riyi0E1EQPojLz7kpepnSQI9IyPbHHg1XXXevb5dJI7tpyN2ADxGcQbHG7vcyRHk0cbwqcQriUtg=="
 		},
+		"node_modules/say": {
+			"version": "0.16.0",
+			"resolved": "https://registry.npmjs.org/say/-/say-0.16.0.tgz",
+			"integrity": "sha512-yEfncNu3I6lcZ6RIrXgE9DqbrEmvV5uQQ8ReM14u/DodlvJYpveqNphO55RLMSj77b06ZKNif/FLmhzQxcuUXg==",
+			"dependencies": {
+				"one-time": "0.0.4"
+			},
+			"engines": {
+				"node": ">=6.9"
+			}
+		},
 		"node_modules/semver": {
 			"version": "7.6.3",
 			"resolved": "https://registry.npmjs.org/semver/-/semver-7.6.3.tgz",

+ 1 - 0
package.json

@@ -359,6 +359,7 @@
 		"puppeteer-chromium-resolver": "^23.0.0",
 		"puppeteer-core": "^23.4.0",
 		"reconnecting-eventsource": "^1.6.4",
+		"say": "^0.16.0",
 		"serialize-error": "^11.0.3",
 		"simple-git": "^3.27.0",
 		"sound-play": "^1.1.0",

+ 29 - 0
src/core/webview/ClineProvider.ts

@@ -39,6 +39,7 @@ import { BrowserSession } from "../../services/browser/BrowserSession"
 import { discoverChromeInstances } from "../../services/browser/browserDiscovery"
 import { fileExistsAtPath } from "../../utils/fs"
 import { playSound, setSoundEnabled, setSoundVolume } from "../../utils/sound"
+import { playTts, setTtsEnabled, setTtsSpeed } from "../../utils/tts"
 import { singleCompletionHandler } from "../../utils/single-completion-handler"
 import { searchCommits } from "../../utils/git"
 import { getDiffStrategy } from "../diff/DiffStrategy"
@@ -356,6 +357,11 @@ export class ClineProvider extends EventEmitter<ClineProviderEvents> implements
 			setSoundEnabled(soundEnabled ?? false)
 		})
 
+		// Initialize tts enabled state
+		this.getState().then(({ ttsEnabled }) => {
+			setTtsEnabled(ttsEnabled ?? false)
+		})
+
 		webviewView.webview.options = {
 			// Allow scripts in the webview
 			enableScripts: true,
@@ -1233,6 +1239,23 @@ export class ClineProvider extends EventEmitter<ClineProviderEvents> implements
 						setSoundVolume(soundVolume)
 						await this.postStateToWebview()
 						break
+					case "ttsEnabled":
+						const ttsEnabled = message.bool ?? true
+						await this.updateGlobalState("ttsEnabled", ttsEnabled)
+						setTtsEnabled(ttsEnabled) // Add this line to update the tts utility
+						await this.postStateToWebview()
+						break
+					case "ttsSpeed":
+						const ttsSpeed = message.value ?? 1.0
+						await this.updateGlobalState("ttsSpeed", ttsSpeed)
+						setTtsSpeed(ttsSpeed)
+						await this.postStateToWebview()
+						break
+					case "playTts":
+						if (message.text) {
+							playTts(message.text)
+						}
+						break
 					case "diffEnabled":
 						const diffEnabled = message.bool ?? true
 						await this.updateGlobalState("diffEnabled", diffEnabled)
@@ -2333,6 +2356,8 @@ export class ClineProvider extends EventEmitter<ClineProviderEvents> implements
 			alwaysAllowModeSwitch,
 			alwaysAllowSubtasks,
 			soundEnabled,
+			ttsEnabled,
+			ttsSpeed,
 			diffEnabled,
 			enableCheckpoints,
 			checkpointStorage,
@@ -2392,6 +2417,8 @@ export class ClineProvider extends EventEmitter<ClineProviderEvents> implements
 				.filter((item: HistoryItem) => item.ts && item.task)
 				.sort((a: HistoryItem, b: HistoryItem) => b.ts - a.ts),
 			soundEnabled: soundEnabled ?? false,
+			ttsEnabled: ttsEnabled ?? false,
+			ttsSpeed: ttsSpeed ?? 1.0,
 			diffEnabled: diffEnabled ?? true,
 			enableCheckpoints: enableCheckpoints ?? true,
 			checkpointStorage: checkpointStorage ?? "task",
@@ -2551,6 +2578,8 @@ export class ClineProvider extends EventEmitter<ClineProviderEvents> implements
 			taskHistory: stateValues.taskHistory,
 			allowedCommands: stateValues.allowedCommands,
 			soundEnabled: stateValues.soundEnabled ?? false,
+			ttsEnabled: stateValues.ttsEnabled ?? false,
+			ttsSpeed: stateValues.ttsSpeed ?? 1.0,
 			diffEnabled: stateValues.diffEnabled ?? true,
 			enableCheckpoints: stateValues.enableCheckpoints ?? true,
 			checkpointStorage: stateValues.checkpointStorage ?? "task",

+ 20 - 0
src/core/webview/__tests__/ClineProvider.test.ts

@@ -7,6 +7,7 @@ import { ClineProvider } from "../ClineProvider"
 import { ExtensionMessage, ExtensionState } from "../../../shared/ExtensionMessage"
 import { GlobalStateKey, SecretKey } from "../../../shared/globalState"
 import { setSoundEnabled } from "../../../utils/sound"
+import { setTtsEnabled } from "../../../utils/tts"
 import { defaultModeSlug } from "../../../shared/modes"
 import { experimentDefault } from "../../../shared/experiments"
 import { Cline } from "../../Cline"
@@ -271,6 +272,11 @@ jest.mock("../../../utils/sound", () => ({
 	setSoundEnabled: jest.fn(),
 }))
 
+// Mock tts utility
+jest.mock("../../../utils/tts", () => ({
+	setTtsEnabled: jest.fn(),
+}))
+
 // Mock ESM modules
 jest.mock("p-wait-for", () => ({
 	__esModule: true,
@@ -506,6 +512,7 @@ describe("ClineProvider", () => {
 			alwaysAllowMcp: false,
 			uriScheme: "vscode",
 			soundEnabled: false,
+			ttsEnabled: false,
 			diffEnabled: false,
 			enableCheckpoints: false,
 			checkpointStorage: "task",
@@ -603,6 +610,7 @@ describe("ClineProvider", () => {
 		expect(state).toHaveProperty("alwaysAllowBrowser")
 		expect(state).toHaveProperty("taskHistory")
 		expect(state).toHaveProperty("soundEnabled")
+		expect(state).toHaveProperty("ttsEnabled")
 		expect(state).toHaveProperty("diffEnabled")
 		expect(state).toHaveProperty("writeDelayMs")
 	})
@@ -666,6 +674,18 @@ describe("ClineProvider", () => {
 		expect(setSoundEnabled).toHaveBeenCalledWith(false)
 		expect(mockContext.globalState.update).toHaveBeenCalledWith("soundEnabled", false)
 		expect(mockPostMessage).toHaveBeenCalled()
+
+		// Simulate setting tts to enabled
+		await messageHandler({ type: "ttsEnabled", bool: true })
+		expect(setTtsEnabled).toHaveBeenCalledWith(true)
+		expect(mockContext.globalState.update).toHaveBeenCalledWith("ttsEnabled", true)
+		expect(mockPostMessage).toHaveBeenCalled()
+
+		// Simulate setting tts to disabled
+		await messageHandler({ type: "ttsEnabled", bool: false })
+		expect(setTtsEnabled).toHaveBeenCalledWith(false)
+		expect(mockContext.globalState.update).toHaveBeenCalledWith("ttsEnabled", false)
+		expect(mockPostMessage).toHaveBeenCalled()
 	})
 
 	test("requestDelaySeconds defaults to 10 seconds", async () => {

+ 2 - 0
src/exports/roo-code.d.ts

@@ -207,6 +207,8 @@ export type GlobalStateKey =
 	| "openRouterUseMiddleOutTransform"
 	| "googleGeminiBaseUrl"
 	| "allowedCommands"
+	| "ttsEnabled"
+	| "ttsSpeed"
 	| "soundEnabled"
 	| "soundVolume"
 	| "diffEnabled"

+ 2 - 0
src/shared/ExtensionMessage.ts

@@ -124,6 +124,8 @@ export interface ExtensionState {
 	currentTaskItem?: HistoryItem
 	allowedCommands?: string[]
 	soundEnabled?: boolean
+	ttsEnabled?: boolean
+	ttsSpeed?: number
 	soundVolume?: number
 	diffEnabled?: boolean
 	enableCheckpoints: boolean

+ 3 - 0
src/shared/WebviewMessage.ts

@@ -50,7 +50,10 @@ export interface WebviewMessage {
 		| "alwaysAllowModeSwitch"
 		| "alwaysAllowSubtasks"
 		| "playSound"
+		| "playTts"
 		| "soundEnabled"
+		| "ttsEnabled"
+		| "ttsSpeed"
 		| "soundVolume"
 		| "diffEnabled"
 		| "enableCheckpoints"

+ 2 - 0
src/shared/globalState.ts

@@ -76,6 +76,8 @@ export const GLOBAL_STATE_KEYS = [
 	"googleGeminiBaseUrl",
 	"allowedCommands",
 	"soundEnabled",
+	"ttsEnabled",
+	"ttsSpeed",
 	"soundVolume",
 	"diffEnabled",
 	"enableCheckpoints",

+ 75 - 0
src/utils/tts.ts

@@ -0,0 +1,75 @@
+import * as vscode from "vscode"
+
+let isTtsEnabled = false
+let speed = 1.0
+let isSpeaking = false
+const utteranceQueue: string[] = []
+
+/**
+ * Set tts configuration
+ * @param enabled boolean
+ */
+export const setTtsEnabled = (enabled: boolean): void => {
+	isTtsEnabled = enabled
+}
+
+/**
+ * Set tts speed
+ * @param speed number
+ */
+export const setTtsSpeed = (newSpeed: number): void => {
+	speed = newSpeed
+}
+
+/**
+ * Process the next item in the utterance queue
+ */
+const processQueue = async (): Promise<void> => {
+	if (!isTtsEnabled || isSpeaking || utteranceQueue.length === 0) {
+		return
+	}
+
+	try {
+		isSpeaking = true
+		const nextUtterance = utteranceQueue.shift()!
+		const say = require("say")
+
+		// Wrap say.speak in a promise to handle completion
+		await new Promise<void>((resolve, reject) => {
+			say.speak(nextUtterance, null, speed, (err: Error) => {
+				if (err) {
+					reject(err)
+				} else {
+					resolve()
+				}
+			})
+		})
+
+		isSpeaking = false
+		// Process next item in queue if any
+		await processQueue()
+	} catch (error: any) {
+		isSpeaking = false
+		//vscode.window.showErrorMessage(error.message)
+		// Try to continue with next item despite error
+		await processQueue()
+	}
+}
+
+/**
+ * Queue a tts message to be spoken
+ * @param message string
+ * @return void
+ */
+export const playTts = async (message: string): Promise<void> => {
+	if (!isTtsEnabled) {
+		return
+	}
+
+	try {
+		utteranceQueue.push(message)
+		await processQueue()
+	} catch (error: any) {
+		//vscode.window.showErrorMessage(error.message)
+	}
+}

+ 6 - 0
webview-ui/package-lock.json

@@ -44,6 +44,7 @@
 				"react-virtuoso": "^4.7.13",
 				"rehype-highlight": "^7.0.0",
 				"remark-gfm": "^4.0.1",
+				"remove-markdown": "^0.6.0",
 				"shell-quote": "^1.8.2",
 				"styled-components": "^6.1.13",
 				"tailwind-merge": "^2.6.0",
@@ -19478,6 +19479,11 @@
 				"url": "https://opencollective.com/unified"
 			}
 		},
+		"node_modules/remove-markdown": {
+			"version": "0.6.0",
+			"resolved": "https://registry.npmjs.org/remove-markdown/-/remove-markdown-0.6.0.tgz",
+			"integrity": "sha512-B9g8yo5Zp1wXfZ77M1RLpqI7xrBBERkp7+3/Btm9N/uZV5xhXZjzIxDbCKz7CSj141lWDuCnQuH12DKLUv4Ghw=="
+		},
 		"node_modules/require-directory": {
 			"version": "2.1.1",
 			"resolved": "https://registry.npmjs.org/require-directory/-/require-directory-2.1.1.tgz",

+ 1 - 0
webview-ui/package.json

@@ -52,6 +52,7 @@
 		"react-virtuoso": "^4.7.13",
 		"rehype-highlight": "^7.0.0",
 		"remark-gfm": "^4.0.1",
+		"remove-markdown": "^0.6.0",
 		"shell-quote": "^1.8.2",
 		"styled-components": "^6.1.13",
 		"tailwind-merge": "^2.6.0",

+ 35 - 1
webview-ui/src/components/chat/ChatView.tsx

@@ -31,6 +31,7 @@ import { validateCommand } from "../../utils/command-validation"
 import { getAllModes } from "../../../../src/shared/modes"
 import TelemetryBanner from "../common/TelemetryBanner"
 import { useAppTranslation } from "@/i18n/TranslationContext"
+import removeMd from "remove-markdown"
 
 interface ChatViewProps {
 	isHidden: boolean
@@ -91,6 +92,7 @@ const ChatView = ({ isHidden, showAnnouncement, hideAnnouncement, showHistoryVie
 	const disableAutoScrollRef = useRef(false)
 	const [showScrollToBottom, setShowScrollToBottom] = useState(false)
 	const [isAtBottom, setIsAtBottom] = useState(false)
+	const lastTtsRef = useRef<string>("")
 
 	const [wasStreaming, setWasStreaming] = useState<boolean>(false)
 	const [showCheckpointWarning, setShowCheckpointWarning] = useState<boolean>(false)
@@ -104,6 +106,10 @@ const ChatView = ({ isHidden, showAnnouncement, hideAnnouncement, showHistoryVie
 		vscode.postMessage({ type: "playSound", audioType })
 	}
 
+	function playTts(text: string) {
+		vscode.postMessage({ type: "playTts", text })
+	}
+
 	useDeepCompareEffect(() => {
 		// if last message is an ask, show user ask UI
 		// if user finished a task, then start a new task with a new conversation history since in this moment that the extension is waiting for user response, the user could close the extension and the conversation history would be lost.
@@ -674,6 +680,34 @@ const ChatView = ({ isHidden, showAnnouncement, hideAnnouncement, showHistoryVie
 	)
 
 	useEffect(() => {
+		// this ensures the first message is not read, future user messages are labelled as user_feedback
+		if (lastMessage && messages.length > 1) {
+			//console.log(JSON.stringify(lastMessage))
+			if (
+				lastMessage.text && // has text
+				(lastMessage.say === "text" || lastMessage.say === "completion_result") && // is a text message
+				!lastMessage.partial && // not a partial message
+				!lastMessage.text.startsWith("{") // not a json object
+			) {
+				let text = lastMessage?.text || ""
+				const mermaidRegex = /```mermaid[\s\S]*?```/g
+				// remove mermaid diagrams from text
+				text = text.replace(mermaidRegex, "")
+				// remove markdown from text
+				text = removeMd(text)
+
+				// ensure message is not a duplicate of last read message
+				if (text !== lastTtsRef.current) {
+					try {
+						playTts(text)
+						lastTtsRef.current = text
+					} catch (error) {
+						console.error("Failed to execute text-to-speech:", error)
+					}
+				}
+			}
+		}
+
 		// Only execute when isStreaming changes from true to false
 		if (wasStreaming && !isStreaming && lastMessage) {
 			// Play appropriate sound based on lastMessage content
@@ -706,7 +740,7 @@ const ChatView = ({ isHidden, showAnnouncement, hideAnnouncement, showHistoryVie
 		}
 		// Update previous value
 		setWasStreaming(isStreaming)
-	}, [isStreaming, lastMessage, wasStreaming, isAutoApproved])
+	}, [isStreaming, lastMessage, wasStreaming, isAutoApproved, messages.length])
 
 	const isBrowserSessionMessage = (message: ClineMessage): boolean => {
 		// which of visible messages are browser session messages, see above

+ 3 - 3
webview-ui/src/components/history/CopyButton.tsx

@@ -16,9 +16,9 @@ export const CopyButton = ({ itemTask }: CopyButtonProps) => {
 	const onCopy = useCallback(
 		(e: React.MouseEvent) => {
 			e.stopPropagation()
-			const tempDiv = document.createElement('div');
-			tempDiv.innerHTML = itemTask;
-			const text = tempDiv.textContent || tempDiv.innerText || "";
+			const tempDiv = document.createElement("div")
+			tempDiv.innerHTML = itemTask
+			const text = tempDiv.textContent || tempDiv.innerText || ""
 			!isCopied && copy(text)
 		},
 		[isCopied, copy, itemTask],

+ 40 - 9
webview-ui/src/components/settings/NotificationSettings.tsx

@@ -8,12 +8,16 @@ import { SectionHeader } from "./SectionHeader"
 import { Section } from "./Section"
 
 type NotificationSettingsProps = HTMLAttributes<HTMLDivElement> & {
+	ttsEnabled?: boolean
+	ttsSpeed?: number
 	soundEnabled?: boolean
 	soundVolume?: number
-	setCachedStateField: SetCachedStateField<"soundEnabled" | "soundVolume">
+	setCachedStateField: SetCachedStateField<"ttsEnabled" | "ttsSpeed" | "soundEnabled" | "soundVolume">
 }
 
 export const NotificationSettings = ({
+	ttsEnabled,
+	ttsSpeed,
 	soundEnabled,
 	soundVolume,
 	setCachedStateField,
@@ -30,6 +34,38 @@ export const NotificationSettings = ({
 			</SectionHeader>
 
 			<Section>
+				<div>
+					<VSCodeCheckbox
+						checked={ttsEnabled}
+						onChange={(e: any) => setCachedStateField("ttsEnabled", e.target.checked)}
+						data-testid="tts-enabled-checkbox">
+						<span className="font-medium">{t("settings:notifications.tts.label")}</span>
+					</VSCodeCheckbox>
+					<p className="text-vscode-descriptionForeground text-sm mt-0">
+						{t("settings:notifications.tts.description")}
+					</p>
+					{ttsEnabled && (
+						<div className="pl-[10px] ml-0 border-l-2 border-l-vscode-button-background">
+							<div className="flex items-center gap-[5px]">
+								<input
+									type="range"
+									min="0.1"
+									max="2.0"
+									step="0.01"
+									value={ttsSpeed ?? 1.0}
+									onChange={(e) => setCachedStateField("ttsSpeed", parseFloat(e.target.value))}
+									className="h-2 focus:outline-0 w-4/5 accent-vscode-button-background"
+									aria-label="Speed"
+									data-testid="tts-speed-slider"
+								/>
+								<span className="min-w-[35px] text-left">{((ttsSpeed ?? 1.0) * 100).toFixed(0)}%</span>
+							</div>
+							<p className="text-vscode-descriptionForeground text-sm mt-1">
+								{t("settings:notifications.tts.speedLabel")}
+							</p>
+						</div>
+					)}
+				</div>
 				<div>
 					<VSCodeCheckbox
 						checked={soundEnabled}
@@ -41,13 +77,8 @@ export const NotificationSettings = ({
 						{t("settings:notifications.sound.description")}
 					</p>
 					{soundEnabled && (
-						<div
-							style={{
-								marginLeft: 0,
-								paddingLeft: 10,
-								borderLeft: "2px solid var(--vscode-button-background)",
-							}}>
-							<div style={{ display: "flex", alignItems: "center", gap: "5px" }}>
+						<div className="pl-[10px] ml-0 border-l-2 border-l-vscode-button-background">
+							<div className="flex items-center gap-[5px]">
 								<input
 									type="range"
 									min="0"
@@ -59,7 +90,7 @@ export const NotificationSettings = ({
 									aria-label="Volume"
 									data-testid="sound-volume-slider"
 								/>
-								<span style={{ minWidth: "35px", textAlign: "left" }}>
+								<span className="min-w-[35px] text-left">
 									{((soundVolume ?? 0.5) * 100).toFixed(0)}%
 								</span>
 							</div>

+ 6 - 0
webview-ui/src/components/settings/SettingsView.tsx

@@ -95,6 +95,8 @@ const SettingsView = forwardRef<SettingsViewRef, SettingsViewProps>(({ onDone },
 		remoteBrowserHost,
 		screenshotQuality,
 		soundEnabled,
+		ttsEnabled,
+		ttsSpeed,
 		soundVolume,
 		telemetrySetting,
 		terminalOutputLineLimit,
@@ -185,6 +187,8 @@ const SettingsView = forwardRef<SettingsViewRef, SettingsViewProps>(({ onDone },
 			vscode.postMessage({ type: "allowedCommands", commands: allowedCommands ?? [] })
 			vscode.postMessage({ type: "browserToolEnabled", bool: browserToolEnabled })
 			vscode.postMessage({ type: "soundEnabled", bool: soundEnabled })
+			vscode.postMessage({ type: "ttsEnabled", bool: ttsEnabled })
+			vscode.postMessage({ type: "ttsSpeed", value: ttsSpeed })
 			vscode.postMessage({ type: "soundVolume", value: soundVolume })
 			vscode.postMessage({ type: "diffEnabled", bool: diffEnabled })
 			vscode.postMessage({ type: "enableCheckpoints", bool: enableCheckpoints })
@@ -430,6 +434,8 @@ const SettingsView = forwardRef<SettingsViewRef, SettingsViewProps>(({ onDone },
 
 				<div ref={notificationsRef}>
 					<NotificationSettings
+						ttsEnabled={ttsEnabled}
+						ttsSpeed={ttsSpeed}
 						soundEnabled={soundEnabled}
 						soundVolume={soundVolume}
 						setCachedStateField={setCachedStateField}

+ 68 - 0
webview-ui/src/components/settings/__tests__/SettingsView.test.tsx

@@ -116,6 +116,8 @@ const mockPostMessage = (state: any) => {
 				shouldShowAnnouncement: false,
 				allowedCommands: [],
 				alwaysAllowExecute: false,
+				ttsEnabled: false,
+				ttsSpeed: 1,
 				soundEnabled: false,
 				soundVolume: 0.5,
 				...state,
@@ -148,6 +150,16 @@ describe("SettingsView - Sound Settings", () => {
 		jest.clearAllMocks()
 	})
 
+	it("initializes with tts disabled by default", () => {
+		renderSettingsView()
+
+		const ttsCheckbox = screen.getByTestId("tts-enabled-checkbox")
+		expect(ttsCheckbox).not.toBeChecked()
+
+		// Speed slider should not be visible when tts is disabled
+		expect(screen.queryByTestId("tts-speed-slider")).not.toBeInTheDocument()
+	})
+
 	it("initializes with sound disabled by default", () => {
 		renderSettingsView()
 
@@ -158,6 +170,27 @@ describe("SettingsView - Sound Settings", () => {
 		expect(screen.queryByTestId("sound-volume-slider")).not.toBeInTheDocument()
 	})
 
+	it("toggles tts setting and sends message to VSCode", () => {
+		renderSettingsView()
+
+		const ttsCheckbox = screen.getByTestId("tts-enabled-checkbox")
+
+		// Enable tts
+		fireEvent.click(ttsCheckbox)
+		expect(ttsCheckbox).toBeChecked()
+
+		// Click Save to save settings
+		const saveButton = screen.getByTestId("save-button")
+		fireEvent.click(saveButton)
+
+		expect(vscode.postMessage).toHaveBeenCalledWith(
+			expect.objectContaining({
+				type: "ttsEnabled",
+				bool: true,
+			}),
+		)
+	})
+
 	it("toggles sound setting and sends message to VSCode", () => {
 		renderSettingsView()
 
@@ -179,6 +212,19 @@ describe("SettingsView - Sound Settings", () => {
 		)
 	})
 
+	it("shows tts slider when sound is enabled", () => {
+		renderSettingsView()
+
+		// Enable tts
+		const ttsCheckbox = screen.getByTestId("tts-enabled-checkbox")
+		fireEvent.click(ttsCheckbox)
+
+		// Speed slider should be visible
+		const speedSlider = screen.getByTestId("tts-speed-slider")
+		expect(speedSlider).toBeInTheDocument()
+		expect(speedSlider).toHaveValue("1")
+	})
+
 	it("shows volume slider when sound is enabled", () => {
 		renderSettingsView()
 
@@ -192,6 +238,28 @@ describe("SettingsView - Sound Settings", () => {
 		expect(volumeSlider).toHaveValue("0.5")
 	})
 
+	it("updates speed and sends message to VSCode when slider changes", () => {
+		renderSettingsView()
+
+		// Enable tts
+		const ttsCheckbox = screen.getByTestId("tts-enabled-checkbox")
+		fireEvent.click(ttsCheckbox)
+
+		// Change speed
+		const speedSlider = screen.getByTestId("tts-speed-slider")
+		fireEvent.change(speedSlider, { target: { value: "0.75" } })
+
+		// Click Save to save settings
+		const saveButton = screen.getByTestId("save-button")
+		fireEvent.click(saveButton)
+
+		// Verify message sent to VSCode
+		expect(vscode.postMessage).toHaveBeenCalledWith({
+			type: "ttsSpeed",
+			value: 0.75,
+		})
+	})
+
 	it("updates volume and sends message to VSCode when slider changes", () => {
 		renderSettingsView()
 

+ 7 - 0
webview-ui/src/context/ExtensionStateContext.tsx

@@ -35,6 +35,8 @@ export interface ExtensionStateContextType extends ExtensionState {
 	setAllowedCommands: (value: string[]) => void
 	setSoundEnabled: (value: boolean) => void
 	setSoundVolume: (value: number) => void
+	setTtsEnabled: (value: boolean) => void
+	setTtsSpeed: (value: number) => void
 	setDiffEnabled: (value: boolean) => void
 	setEnableCheckpoints: (value: boolean) => void
 	setBrowserViewportSize: (value: string) => void
@@ -115,6 +117,8 @@ export const ExtensionStateContextProvider: React.FC<{ children: React.ReactNode
 		allowedCommands: [],
 		soundEnabled: false,
 		soundVolume: 0.5,
+		ttsEnabled: false,
+		ttsSpeed: 1.0,
 		diffEnabled: false,
 		enableCheckpoints: true,
 		checkpointStorage: "task",
@@ -232,6 +236,7 @@ export const ExtensionStateContextProvider: React.FC<{ children: React.ReactNode
 		filePaths,
 		openedTabs,
 		soundVolume: state.soundVolume,
+		ttsSpeed: state.ttsSpeed,
 		fuzzyMatchThreshold: state.fuzzyMatchThreshold,
 		writeDelayMs: state.writeDelayMs,
 		screenshotQuality: state.screenshotQuality,
@@ -257,6 +262,8 @@ export const ExtensionStateContextProvider: React.FC<{ children: React.ReactNode
 		setAllowedCommands: (value) => setState((prevState) => ({ ...prevState, allowedCommands: value })),
 		setSoundEnabled: (value) => setState((prevState) => ({ ...prevState, soundEnabled: value })),
 		setSoundVolume: (value) => setState((prevState) => ({ ...prevState, soundVolume: value })),
+		setTtsEnabled: (value) => setState((prevState) => ({ ...prevState, ttsEnabled: value })),
+		setTtsSpeed: (value) => setState((prevState) => ({ ...prevState, ttsSpeed: value })),
 		setDiffEnabled: (value) => setState((prevState) => ({ ...prevState, diffEnabled: value })),
 		setEnableCheckpoints: (value) => setState((prevState) => ({ ...prevState, enableCheckpoints: value })),
 		setBrowserViewportSize: (value: string) =>

+ 5 - 0
webview-ui/src/i18n/locales/ca/settings.json

@@ -241,6 +241,11 @@
 			"label": "Habilitar efectes de so",
 			"description": "Quan està habilitat, Roo reproduirà efectes de so per a notificacions i esdeveniments.",
 			"volumeLabel": "Volum"
+		},
+		"tts": {
+			"label": "Habilitar text a veu",
+			"description": "Quan està habilitat, Roo llegirà en veu alta les seves respostes utilitzant text a veu.",
+			"speedLabel": "Velocitat"
 		}
 	},
 	"contextManagement": {

+ 5 - 0
webview-ui/src/i18n/locales/de/settings.json

@@ -241,6 +241,11 @@
 			"label": "Soundeffekte aktivieren",
 			"description": "Wenn aktiviert, spielt Roo Soundeffekte für Benachrichtigungen und Ereignisse ab.",
 			"volumeLabel": "Lautstärke"
+		},
+		"tts": {
+			"label": "Text-zu-Sprache aktivieren",
+			"description": "Wenn aktiviert, liest Roo seine Antworten mit Text-zu-Sprache laut vor.",
+			"speedLabel": "Geschwindigkeit"
 		}
 	},
 	"contextManagement": {

+ 5 - 0
webview-ui/src/i18n/locales/en/settings.json

@@ -241,6 +241,11 @@
 			"label": "Enable sound effects",
 			"description": "When enabled, Roo will play sound effects for notifications and events.",
 			"volumeLabel": "Volume"
+		},
+		"tts": {
+			"label": "Enable text-to-speech",
+			"description": "When enabled, Roo will read aloud its responses using text-to-speech.",
+			"speedLabel": "Speed"
 		}
 	},
 	"contextManagement": {

+ 5 - 0
webview-ui/src/i18n/locales/es/settings.json

@@ -241,6 +241,11 @@
 			"label": "Habilitar efectos de sonido",
 			"description": "Cuando está habilitado, Roo reproducirá efectos de sonido para notificaciones y eventos.",
 			"volumeLabel": "Volumen"
+		},
+		"tts": {
+			"label": "Habilitar texto a voz",
+			"description": "Cuando está habilitado, Roo leerá en voz alta sus respuestas usando texto a voz.",
+			"speedLabel": "Velocidad"
 		}
 	},
 	"contextManagement": {

+ 5 - 0
webview-ui/src/i18n/locales/fr/settings.json

@@ -241,6 +241,11 @@
 			"label": "Activer les effets sonores",
 			"description": "Lorsque cette option est activée, Roo jouera des effets sonores pour les notifications et les événements.",
 			"volumeLabel": "Volume"
+		},
+		"tts": {
+			"label": "Activer la synthèse vocale",
+			"description": "Lorsque cette option est activée, Roo lira ses réponses à haute voix en utilisant la synthèse vocale.",
+			"speedLabel": "Vitesse"
 		}
 	},
 	"contextManagement": {

+ 5 - 0
webview-ui/src/i18n/locales/hi/settings.json

@@ -241,6 +241,11 @@
 			"label": "ध्वनि प्रभाव सक्षम करें",
 			"description": "जब सक्षम होता है, तो Roo सूचनाओं और घटनाओं के लिए ध्वनि प्रभाव चलाएगा।",
 			"volumeLabel": "वॉल्यूम"
+		},
+		"tts": {
+			"label": "टेक्स्ट-टू-स्पीच सक्षम करें",
+			"description": "जब सक्षम होता है, तो Roo टेक्स्ट-टू-स्पीच का उपयोग करके अपनी प्रतिक्रियाओं को बोलकर पढ़ेगा।",
+			"speedLabel": "गति"
 		}
 	},
 	"contextManagement": {

+ 5 - 0
webview-ui/src/i18n/locales/it/settings.json

@@ -241,6 +241,11 @@
 			"label": "Abilita effetti sonori",
 			"description": "Quando abilitato, Roo riprodurrà effetti sonori per notifiche ed eventi.",
 			"volumeLabel": "Volume"
+		},
+		"tts": {
+			"label": "Abilita sintesi vocale",
+			"description": "Quando abilitato, Roo leggerà ad alta voce le sue risposte utilizzando la sintesi vocale.",
+			"speedLabel": "Velocità"
 		}
 	},
 	"contextManagement": {

+ 5 - 0
webview-ui/src/i18n/locales/ja/settings.json

@@ -241,6 +241,11 @@
 			"label": "サウンドエフェクトを有効化",
 			"description": "有効にすると、Rooは通知やイベントのためにサウンドエフェクトを再生します。",
 			"volumeLabel": "音量"
+		},
+		"tts": {
+			"label": "音声合成を有効化",
+			"description": "有効にすると、Rooは音声合成を使用して応答を音声で読み上げます。",
+			"speedLabel": "速度"
 		}
 	},
 	"contextManagement": {

+ 5 - 0
webview-ui/src/i18n/locales/ko/settings.json

@@ -241,6 +241,11 @@
 			"label": "사운드 효과 활성화",
 			"description": "활성화되면 Roo는 알림 및 이벤트에 대한 사운드 효과를 재생합니다.",
 			"volumeLabel": "볼륨"
+		},
+		"tts": {
+			"label": "음성 합성 활성화",
+			"description": "활성화되면 Roo는 음성 합성을 사용하여 응답을 소리내어 읽습니다.",
+			"speedLabel": "속도"
 		}
 	},
 	"contextManagement": {

+ 5 - 0
webview-ui/src/i18n/locales/pl/settings.json

@@ -241,6 +241,11 @@
 			"label": "Włącz efekty dźwiękowe",
 			"description": "Gdy włączone, Roo będzie odtwarzać efekty dźwiękowe dla powiadomień i zdarzeń.",
 			"volumeLabel": "Głośność"
+		},
+		"tts": {
+			"label": "Włącz syntezę mowy",
+			"description": "Gdy włączone, Roo będzie czytać na głos swoje odpowiedzi za pomocą syntezy mowy.",
+			"speedLabel": "Szybkość"
 		}
 	},
 	"contextManagement": {

+ 5 - 0
webview-ui/src/i18n/locales/pt-BR/settings.json

@@ -241,6 +241,11 @@
 			"label": "Ativar efeitos sonoros",
 			"description": "Quando ativado, o Roo reproduzirá efeitos sonoros para notificações e eventos.",
 			"volumeLabel": "Volume"
+		},
+		"tts": {
+			"label": "Ativar texto para fala",
+			"description": "Quando ativado, o Roo lerá em voz alta suas respostas usando texto para fala.",
+			"speedLabel": "Velocidade"
 		}
 	},
 	"contextManagement": {

+ 5 - 0
webview-ui/src/i18n/locales/tr/settings.json

@@ -241,6 +241,11 @@
 			"label": "Ses efektlerini etkinleştir",
 			"description": "Etkinleştirildiğinde, Roo bildirimler ve olaylar için ses efektleri çalacaktır.",
 			"volumeLabel": "Ses Düzeyi"
+		},
+		"tts": {
+			"label": "Metinden sese özelliğini etkinleştir",
+			"description": "Etkinleştirildiğinde, Roo yanıtlarını metinden sese teknolojisi kullanarak sesli okuyacaktır.",
+			"speedLabel": "Hız"
 		}
 	},
 	"contextManagement": {

+ 5 - 0
webview-ui/src/i18n/locales/vi/settings.json

@@ -241,6 +241,11 @@
 			"label": "Bật hiệu ứng âm thanh",
 			"description": "Khi được bật, Roo sẽ phát hiệu ứng âm thanh cho thông báo và sự kiện.",
 			"volumeLabel": "Âm lượng"
+		},
+		"tts": {
+			"label": "Bật chuyển văn bản thành giọng nói",
+			"description": "Khi được bật, Roo sẽ đọc to các phản hồi của nó bằng chức năng chuyển văn bản thành giọng nói.",
+			"speedLabel": "Tốc độ"
 		}
 	},
 	"contextManagement": {

+ 5 - 0
webview-ui/src/i18n/locales/zh-CN/settings.json

@@ -241,6 +241,11 @@
 			"label": "启用音效",
 			"description": "启用后,Roo 将为通知和事件播放音效。",
 			"volumeLabel": "音量"
+		},
+		"tts": {
+			"label": "启用文本转语音",
+			"description": "启用后,Roo 将使用文本转语音功能朗读其响应。",
+			"speedLabel": "速度"
 		}
 	},
 	"contextManagement": {

+ 5 - 0
webview-ui/src/i18n/locales/zh-TW/settings.json

@@ -241,6 +241,11 @@
 			"label": "啟用音效",
 			"description": "啟用後,Roo 將為通知和事件播放音效。",
 			"volumeLabel": "音量"
+		},
+		"tts": {
+			"label": "啟用文字轉語音",
+			"description": "啟用後,Roo 將使用文字轉語音功能朗讀其回應。",
+			"speedLabel": "速度"
 		}
 	},
 	"contextManagement": {