#!/usr/bin/env bun /** * Extracts user-facing URLs from the VS Code extension and CLI source code, * then writes them to a markdown file that the docs link-checker validates. * * Usage: * bun run script/extract-source-links.ts # Generate / update the committed file * bun run script/extract-source-links.ts --check # CI mode — exit 1 if the file is stale */ import { Glob } from "bun" import path from "path" const ROOT = path.resolve(import.meta.dir, "..") const OUTPUT = path.join(ROOT, "packages/kilo-docs/source-links.md") const check = process.argv.includes("--check") const DIRS = [ path.join(ROOT, "packages/kilo-vscode/src"), path.join(ROOT, "packages/kilo-vscode/webview-ui"), path.join(ROOT, "packages/opencode/src"), ] const EXTENSIONS = ["ts", "tsx", "js", "jsx"] // Matches http:// and https:// URLs in string literals or comments const URL_RE = /https?:\/\/[^\s"'`)\]},;*\\<>]+/g // URLs to exclude — only genuinely non-checkable URLs (API endpoints, localhost, // examples, dynamic templates, namespaces). Real external URLs should be extracted // and validated by lychee; add lychee.toml exclusions for sites that block bots. const EXCLUDE_PATTERNS = [ // Localhost and internal /^https?:\/\/(localhost|127\.0\.0\.1|0\.0\.0\.0)/, /^https?:\/\/kilo\.internal/, /^https?:\/\/dev\.kilo\.ai/, /^https?:\/\/tauri\.localhost/, // Example/placeholder URLs /^https?:\/\/example\.com/, /^https?:\/\/api\.myprovider\.com/, /^https?:\/\/synthetic\.new/, // API endpoints (not user-facing) /^https?:\/\/api\.kilo\.ai\/api\//, /^https?:\/\/ingest\.kilosessions\.ai/, /^https?:\/\/api\.openai\.com/, /^https?:\/\/api\.github\.com/, /^https?:\/\/api\.cloudflare\.com/, /^https?:\/\/api\.releases\.hashicorp\.com/, /^https?:\/\/auth\.openai\.com/, /^https?:\/\/chatgpt\.com\/backend-api/, /^https?:\/\/mcp\.exa\.ai/, /^https?:\/\/registry\.npmjs\.org/, /^https?:\/\/formulae\.brew\.sh\/api/, /^https?:\/\/community\.chocolatey\.org\/api/, /^https?:\/\/download-cdn\.jetbrains\.com/, /^https?:\/\/raw\.githubusercontent\.com/, // XML/SVG namespace URIs /^https?:\/\/www\.w3\.org\//, // URLs that are templates with interpolation (contain ${ after stripping) /\$\{/, // Truncated/placeholder URLs (e.g., https://…) or bare protocols /^https?:\/\/[\W]*$/, // GHE example domains /^https?:\/\/company\.ghe\.com/, // Example/placeholder GitHub URLs used in docs/comments /^https?:\/\/github\.com\/owner\//, /^https?:\/\/github\.com\/\.extraheader/, /^https?:\/\/github\.com\/user-attachments\/assets\/xxxx/, /^https?:\/\/github\.com\/user-attachments\/files\/\d+\/api\.json/, // Example/template session URLs with placeholders /\/s\/abc123$/, // Truncated URL paths (e.g., /s/ with no ID) /\/s\/$/, ] // Directories to skip entirely const SKIP_DIRS = ["node_modules", ".storybook", "stories", "test", "tests", "__tests__", "__mocks__"] // Subdirectories containing vendored/third-party code const SKIP_PATH_SEGMENTS = ["continuedev"] // Individual files to skip (data files full of non-user-facing URLs) const SKIP_FILES = ["models-snapshot.ts", "models-snapshot.js"] function shouldExclude(url: string): boolean { return EXCLUDE_PATTERNS.some((re) => re.test(url)) } function shouldSkipFile(filepath: string): boolean { const rel = path.relative(ROOT, filepath) const parts = rel.split(path.sep) if (parts.some((p) => SKIP_DIRS.includes(p))) return true if (SKIP_PATH_SEGMENTS.some((seg) => rel.includes(seg))) return true if (/\.test\.[jt]sx?$/.test(filepath)) return true if (/\.spec\.[jt]sx?$/.test(filepath)) return true if (/\.stories\.[jt]sx?$/.test(filepath)) return true if (/\/i18n\//.test(filepath) && !filepath.endsWith("en.ts")) return true const basename = path.basename(filepath) if (SKIP_FILES.includes(basename)) return true return false } function clean(url: string): string { return url.replace(/[.),:;]+$/, "").replace(/<\/?\w+>$/, "") } async function extract(): Promise>> { const links = new Map>() for (const dir of DIRS) { for (const ext of EXTENSIONS) { const glob = new Glob(`**/*.${ext}`) for await (const entry of glob.scan({ cwd: dir, absolute: true })) { if (shouldSkipFile(entry)) continue const content = await Bun.file(entry).text() for (const line of content.split("\n")) { for (const match of line.matchAll(URL_RE)) { const url = clean(match[0]) if (shouldExclude(url)) continue if (!links.has(url)) links.set(url, new Set()) links.get(url)!.add(path.relative(ROOT, entry)) } } } } } return links } function render(sorted: [string, Set][]): string { const parts = [ "# Source Code Links", "", "", ``, "", ] for (const [url, files] of sorted) { parts.push(`- <${url}>`) for (const file of [...files].sort()) { parts.push(` `) } } parts.push("") return parts.join("\n") } const links = await extract() const sorted = [...links.entries()].sort(([a], [b]) => a.localeCompare(b)) const output = render(sorted) if (check) { const committed = await Bun.file(OUTPUT) .text() .catch(() => "") if (committed === output) { console.log("packages/kilo-docs/source-links.md is up to date.") process.exit(0) } console.error( [ "ERROR: packages/kilo-docs/source-links.md is out of date.", "", "Run the following command locally and commit the result:", "", " bun run script/extract-source-links.ts", "", ].join("\n"), ) process.exit(1) } await Bun.write(OUTPUT, output) console.log(`Wrote ${sorted.length} unique URLs to packages/kilo-docs/source-links.md`)