Browse Source

fix(ui): use oniguruma wasm engine for markdown code highlighting

The JS regex engine in @pierre/diffs can cause catastrophic backtracking
on certain TextMate grammars (e.g. powershell), freezing the main thread.
Switch to a dedicated oniguruma WASM-based highlighter for markdown code
blocks. Diff components still use the existing @pierre/diffs highlighter.
Add try/catch fallback and regression tests.
Jack 2 tháng trước cách đây
mục cha
commit
96a2ca3c7e

+ 1 - 0
packages/ui/package.json

@@ -24,6 +24,7 @@
   },
   "scripts": {
     "typecheck": "tsgo --noEmit",
+    "test": "bun test ./src",
     "dev": "vite",
     "generate:tailwind": "bun run script/tailwind.ts"
   },

+ 145 - 0
packages/ui/src/context/marked.test.ts

@@ -0,0 +1,145 @@
+import { describe, test, expect } from "bun:test"
+import { getMarkdownHighlighter, highlightCodeBlocks } from "./marked"
+
+describe("getMarkdownHighlighter", () => {
+  test("creates a highlighter with Oniguruma engine", async () => {
+    const highlighter = await getMarkdownHighlighter()
+    expect(highlighter).toBeDefined()
+    expect(typeof highlighter.codeToHtml).toBe("function")
+  })
+
+  test("returns the same instance on subsequent calls", async () => {
+    const a = await getMarkdownHighlighter()
+    const b = await getMarkdownHighlighter()
+    expect(a).toBe(b)
+  })
+
+  test("has OpenCode theme loaded", async () => {
+    const highlighter = await getMarkdownHighlighter()
+    expect(highlighter.getLoadedThemes()).toContain("OpenCode")
+  })
+})
+
+describe("highlightCodeBlocks", () => {
+  test("returns html unchanged when no code blocks exist", async () => {
+    const html = "<p>hello world</p>"
+    const result = await highlightCodeBlocks(html)
+    expect(result).toBe(html)
+  })
+
+  test("highlights a javascript code block", async () => {
+    const html = '<pre><code class="language-javascript">const x = 1</code></pre>'
+    const result = await highlightCodeBlocks(html)
+    expect(result).toContain("shiki")
+    expect(result).not.toBe(html)
+  })
+
+  test("highlights a typescript code block", async () => {
+    const html = '<pre><code class="language-typescript">const x: number = 1</code></pre>'
+    const result = await highlightCodeBlocks(html)
+    expect(result).toContain("shiki")
+  })
+
+  test("highlights multiple code blocks with different languages", async () => {
+    const html = [
+      "<p>some text</p>",
+      '<pre><code class="language-javascript">const x = 1</code></pre>',
+      "<p>more text</p>",
+      '<pre><code class="language-python">x = 1</code></pre>',
+    ].join("")
+    const result = await highlightCodeBlocks(html)
+    expect(result).toContain("some text")
+    expect(result).toContain("more text")
+    // Both blocks should be highlighted
+    const shikiCount = (result.match(/class="shiki/g) || []).length
+    expect(shikiCount).toBe(2)
+  })
+
+  test("falls back to text for unknown languages", async () => {
+    const html = '<pre><code class="language-notareallanguage">hello</code></pre>'
+    const result = await highlightCodeBlocks(html)
+    // Should still produce shiki output (as "text" language)
+    expect(result).toContain("shiki")
+  })
+
+  test("handles code block without language class", async () => {
+    const html = "<pre><code>plain code</code></pre>"
+    const result = await highlightCodeBlocks(html)
+    expect(result).toContain("shiki")
+  })
+
+  test("decodes HTML entities in code content", async () => {
+    const html = '<pre><code class="language-javascript">if (a &lt; b &amp;&amp; c &gt; d) {}</code></pre>'
+    const result = await highlightCodeBlocks(html)
+    expect(result).toContain("shiki")
+    // The decoded content should not contain raw HTML entities
+    expect(result).not.toContain("&lt;")
+    expect(result).not.toContain("&amp;")
+  })
+
+  test("preserves content outside code blocks", async () => {
+    const html = "<h1>Title</h1><pre><code>code</code></pre><p>Footer</p>"
+    const result = await highlightCodeBlocks(html)
+    expect(result).toContain("<h1>Title</h1>")
+    expect(result).toContain("<p>Footer</p>")
+  })
+
+  test(
+    "highlights powershell code without hanging (regression test)",
+    async () => {
+      // This is the exact code that caused the desktop app to freeze
+      // when using the JS regex engine due to catastrophic backtracking
+      const powershellCode = [
+        "# PowerShell",
+        'Remove-Item -Recurse -Force "$env:APPDATA\\opencode" -ErrorAction SilentlyContinue',
+        'Remove-Item -Recurse -Force "$env:LOCALAPPDATA\\opencode" -ErrorAction SilentlyContinue',
+        'Remove-Item -Recurse -Force "$env:APPDATA\\OpenCode Desktop" -ErrorAction SilentlyContinue',
+        'Remove-Item -Recurse -Force "$env:LOCALAPPDATA\\OpenCode Desktop" -ErrorAction SilentlyContinue',
+      ].join("\n")
+
+      const escaped = powershellCode
+        .replace(/&/g, "&amp;")
+        .replace(/</g, "&lt;")
+        .replace(/>/g, "&gt;")
+        .replace(/"/g, "&quot;")
+
+      const html = `<pre><code class="language-powershell">${escaped}</code></pre>`
+      const result = await highlightCodeBlocks(html)
+      expect(result).toContain("shiki")
+    },
+    { timeout: 10_000 },
+  )
+
+  test(
+    "highlights powershell with env variable interpolation without hanging",
+    async () => {
+      // Additional powershell patterns that could trigger backtracking
+      const code = `$path = "$env:USERPROFILE\\.config\\opencode"
+if (Test-Path $path) {
+    Remove-Item -Recurse -Force "$path" -ErrorAction SilentlyContinue
+}
+Write-Host "Cleaned: $path"`
+
+      const escaped = code.replace(/&/g, "&amp;").replace(/</g, "&lt;").replace(/>/g, "&gt;").replace(/"/g, "&quot;")
+
+      const html = `<pre><code class="language-powershell">${escaped}</code></pre>`
+      const result = await highlightCodeBlocks(html)
+      expect(result).toContain("shiki")
+    },
+    { timeout: 10_000 },
+  )
+
+  test("continues highlighting other blocks if one fails", async () => {
+    // Get the highlighter and force-load a language, then test with a
+    // code block that has valid JS alongside potentially problematic content
+    const html = [
+      '<pre><code class="language-javascript">const a = 1</code></pre>',
+      '<pre><code class="language-python">x = 2</code></pre>',
+    ].join("")
+
+    const result = await highlightCodeBlocks(html)
+    // Both blocks should be highlighted
+    const shikiCount = (result.match(/class="shiki/g) || []).length
+    expect(shikiCount).toBe(2)
+  })
+})

+ 38 - 13
packages/ui/src/context/marked.tsx

@@ -2,7 +2,8 @@ import { marked } from "marked"
 import markedKatex from "marked-katex-extension"
 import markedShiki from "marked-shiki"
 import katex from "katex"
-import { bundledLanguages, type BundledLanguage } from "shiki"
+import { bundledLanguages, type BundledLanguage, createHighlighter, type HighlighterGeneric } from "shiki"
+import { createOnigurumaEngine } from "shiki/engine/oniguruma"
 import { createSimpleContext } from "./helper"
 import { getSharedHighlighter, registerCustomTheme, ThemeRegistrationResolved } from "@pierre/diffs"
 
@@ -376,6 +377,26 @@ registerCustomTheme("OpenCode", () => {
   } as unknown as ThemeRegistrationResolved)
 })
 
+let markdownHighlighter: HighlighterGeneric<any, any> | Promise<HighlighterGeneric<any, any>> | undefined
+
+export async function getMarkdownHighlighter() {
+  if (markdownHighlighter) {
+    if ("then" in markdownHighlighter) return markdownHighlighter
+    return markdownHighlighter
+  }
+  const shared = await getSharedHighlighter({ themes: ["OpenCode"], langs: [] })
+  const theme = shared.getTheme("OpenCode")
+  const promise = createHighlighter({
+    themes: [theme],
+    langs: ["text"],
+    engine: createOnigurumaEngine(import("shiki/wasm")),
+  })
+  markdownHighlighter = promise
+  const instance = await promise
+  markdownHighlighter = instance
+  return instance
+}
+
 function renderMathInText(text: string): string {
   let result = text
 
@@ -423,12 +444,12 @@ function renderMathExpressions(html: string): string {
     .join("")
 }
 
-async function highlightCodeBlocks(html: string): Promise<string> {
+export async function highlightCodeBlocks(html: string): Promise<string> {
   const codeBlockRegex = /<pre><code(?:\s+class="language-([^"]*)")?>([\s\S]*?)<\/code><\/pre>/g
   const matches = [...html.matchAll(codeBlockRegex)]
   if (matches.length === 0) return html
 
-  const highlighter = await getSharedHighlighter({ themes: ["OpenCode"], langs: [] })
+  const highlighter = await getMarkdownHighlighter()
 
   let result = html
   for (const match of matches) {
@@ -444,16 +465,20 @@ async function highlightCodeBlocks(html: string): Promise<string> {
     if (!(language in bundledLanguages)) {
       language = "text"
     }
-    if (!highlighter.getLoadedLanguages().includes(language)) {
-      await highlighter.loadLanguage(language as BundledLanguage)
-    }
 
-    const highlighted = highlighter.codeToHtml(code, {
-      lang: language,
-      theme: "OpenCode",
-      tabindex: false,
-    })
-    result = result.replace(fullMatch, () => highlighted)
+    try {
+      if (!highlighter.getLoadedLanguages().includes(language)) {
+        await highlighter.loadLanguage(language as BundledLanguage)
+      }
+      const highlighted = highlighter.codeToHtml(code, {
+        lang: language,
+        theme: "OpenCode",
+        tabindex: false,
+      })
+      result = result.replace(fullMatch, () => highlighted)
+    } catch (err) {
+      console.warn("[markdown] highlight failed for lang=%s, falling back to plain text:", language, err)
+    }
   }
 
   return result
@@ -479,7 +504,7 @@ export const { use: useMarked, provider: MarkedProvider } = createSimpleContext(
       }),
       markedShiki({
         async highlight(code, lang) {
-          const highlighter = await getSharedHighlighter({ themes: ["OpenCode"], langs: [] })
+          const highlighter = await getMarkdownHighlighter()
           if (!(lang in bundledLanguages)) {
             lang = "text"
           }