| 123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179 |
- #!/usr/bin/env bun
- /**
- * Extracts user-facing URLs from the VS Code extension and CLI source code,
- * then writes them to a markdown file that the docs link-checker validates.
- *
- * Usage:
- * bun run script/extract-source-links.ts # Generate / update the committed file
- * bun run script/extract-source-links.ts --check # CI mode — exit 1 if the file is stale
- */
- import { Glob } from "bun"
- import path from "path"
- const ROOT = path.resolve(import.meta.dir, "..")
- const OUTPUT = path.join(ROOT, "packages/kilo-docs/source-links.md")
- const check = process.argv.includes("--check")
- const DIRS = [
- path.join(ROOT, "packages/kilo-vscode/src"),
- path.join(ROOT, "packages/kilo-vscode/webview-ui"),
- path.join(ROOT, "packages/opencode/src"),
- ]
- const EXTENSIONS = ["ts", "tsx", "js", "jsx"]
- // Matches http:// and https:// URLs in string literals or comments
- const URL_RE = /https?:\/\/[^\s"'`)\]},;*\\<>]+/g
- // URLs to exclude — only genuinely non-checkable URLs (API endpoints, localhost,
- // examples, dynamic templates, namespaces). Real external URLs should be extracted
- // and validated by lychee; add lychee.toml exclusions for sites that block bots.
- const EXCLUDE_PATTERNS = [
- // Localhost and internal
- /^https?:\/\/(localhost|127\.0\.0\.1|0\.0\.0\.0)/,
- /^https?:\/\/kilo\.internal/,
- /^https?:\/\/dev\.kilo\.ai/,
- /^https?:\/\/tauri\.localhost/,
- // Example/placeholder URLs
- /^https?:\/\/example\.com/,
- /^https?:\/\/api\.myprovider\.com/,
- /^https?:\/\/synthetic\.new/,
- // API endpoints (not user-facing)
- /^https?:\/\/api\.kilo\.ai\/api\//,
- /^https?:\/\/ingest\.kilosessions\.ai/,
- /^https?:\/\/api\.openai\.com/,
- /^https?:\/\/api\.github\.com/,
- /^https?:\/\/api\.cloudflare\.com/,
- /^https?:\/\/api\.releases\.hashicorp\.com/,
- /^https?:\/\/auth\.openai\.com/,
- /^https?:\/\/chatgpt\.com\/backend-api/,
- /^https?:\/\/mcp\.exa\.ai/,
- /^https?:\/\/registry\.npmjs\.org/,
- /^https?:\/\/formulae\.brew\.sh\/api/,
- /^https?:\/\/community\.chocolatey\.org\/api/,
- /^https?:\/\/download-cdn\.jetbrains\.com/,
- /^https?:\/\/raw\.githubusercontent\.com/,
- // XML/SVG namespace URIs
- /^https?:\/\/www\.w3\.org\//,
- // URLs that are templates with interpolation (contain ${ after stripping)
- /\$\{/,
- // Truncated/placeholder URLs (e.g., https://…) or bare protocols
- /^https?:\/\/[\W]*$/,
- // GHE example domains
- /^https?:\/\/company\.ghe\.com/,
- // Example/placeholder GitHub URLs used in docs/comments
- /^https?:\/\/github\.com\/owner\//,
- /^https?:\/\/github\.com\/\.extraheader/,
- /^https?:\/\/github\.com\/user-attachments\/assets\/xxxx/,
- /^https?:\/\/github\.com\/user-attachments\/files\/\d+\/api\.json/,
- // Example/template session URLs with placeholders
- /\/s\/abc123$/,
- // Truncated URL paths (e.g., /s/ with no ID)
- /\/s\/$/,
- ]
- // Directories to skip entirely
- const SKIP_DIRS = ["node_modules", ".storybook", "stories", "test", "tests", "__tests__", "__mocks__"]
- // Subdirectories containing vendored/third-party code
- const SKIP_PATH_SEGMENTS = ["continuedev"]
- // Individual files to skip (data files full of non-user-facing URLs)
- const SKIP_FILES = ["models-snapshot.ts", "models-snapshot.js"]
- function shouldExclude(url: string): boolean {
- return EXCLUDE_PATTERNS.some((re) => re.test(url))
- }
- function shouldSkipFile(filepath: string): boolean {
- const rel = path.relative(ROOT, filepath)
- const parts = rel.split(path.sep)
- if (parts.some((p) => SKIP_DIRS.includes(p))) return true
- if (SKIP_PATH_SEGMENTS.some((seg) => rel.includes(seg))) return true
- if (/\.test\.[jt]sx?$/.test(filepath)) return true
- if (/\.spec\.[jt]sx?$/.test(filepath)) return true
- if (/\.stories\.[jt]sx?$/.test(filepath)) return true
- if (/\/i18n\//.test(filepath) && !filepath.endsWith("en.ts")) return true
- const basename = path.basename(filepath)
- if (SKIP_FILES.includes(basename)) return true
- return false
- }
- function clean(url: string): string {
- return url.replace(/[.),:;]+$/, "").replace(/<\/?\w+>$/, "")
- }
- async function extract(): Promise<Map<string, Set<string>>> {
- const links = new Map<string, Set<string>>()
- for (const dir of DIRS) {
- for (const ext of EXTENSIONS) {
- const glob = new Glob(`**/*.${ext}`)
- for await (const entry of glob.scan({ cwd: dir, absolute: true })) {
- if (shouldSkipFile(entry)) continue
- const content = await Bun.file(entry).text()
- for (const line of content.split("\n")) {
- for (const match of line.matchAll(URL_RE)) {
- const url = clean(match[0])
- if (shouldExclude(url)) continue
- if (!links.has(url)) links.set(url, new Set())
- links.get(url)!.add(path.relative(ROOT, entry))
- }
- }
- }
- }
- }
- return links
- }
- function render(sorted: [string, Set<string>][]): string {
- const parts = [
- "# Source Code Links",
- "",
- "<!-- Auto-generated by script/extract-source-links.ts — DO NOT EDIT -->",
- `<!-- ${sorted.length} unique URLs extracted from extension and CLI source -->`,
- "",
- ]
- for (const [url, files] of sorted) {
- parts.push(`- <${url}>`)
- for (const file of [...files].sort()) {
- parts.push(` <!-- ${file} -->`)
- }
- }
- parts.push("")
- return parts.join("\n")
- }
- const links = await extract()
- const sorted = [...links.entries()].sort(([a], [b]) => a.localeCompare(b))
- const output = render(sorted)
- if (check) {
- const committed = await Bun.file(OUTPUT)
- .text()
- .catch(() => "")
- if (committed === output) {
- console.log("packages/kilo-docs/source-links.md is up to date.")
- process.exit(0)
- }
- console.error(
- [
- "ERROR: packages/kilo-docs/source-links.md is out of date.",
- "",
- "Run the following command locally and commit the result:",
- "",
- " bun run script/extract-source-links.ts",
- "",
- ].join("\n"),
- )
- process.exit(1)
- }
- await Bun.write(OUTPUT, output)
- console.log(`Wrote ${sorted.length} unique URLs to packages/kilo-docs/source-links.md`)
|