extract-source-links.ts 5.9 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179
  1. #!/usr/bin/env bun
  2. /**
  3. * Extracts user-facing URLs from the VS Code extension and CLI source code,
  4. * then writes them to a markdown file that the docs link-checker validates.
  5. *
  6. * Usage:
  7. * bun run script/extract-source-links.ts # Generate / update the committed file
  8. * bun run script/extract-source-links.ts --check # CI mode — exit 1 if the file is stale
  9. */
  10. import { Glob } from "bun"
  11. import path from "path"
  12. const ROOT = path.resolve(import.meta.dir, "..")
  13. const OUTPUT = path.join(ROOT, "packages/kilo-docs/source-links.md")
  14. const check = process.argv.includes("--check")
  15. const DIRS = [
  16. path.join(ROOT, "packages/kilo-vscode/src"),
  17. path.join(ROOT, "packages/kilo-vscode/webview-ui"),
  18. path.join(ROOT, "packages/opencode/src"),
  19. ]
  20. const EXTENSIONS = ["ts", "tsx", "js", "jsx"]
  21. // Matches http:// and https:// URLs in string literals or comments
  22. const URL_RE = /https?:\/\/[^\s"'`)\]},;*\\<>]+/g
  23. // URLs to exclude — only genuinely non-checkable URLs (API endpoints, localhost,
  24. // examples, dynamic templates, namespaces). Real external URLs should be extracted
  25. // and validated by lychee; add lychee.toml exclusions for sites that block bots.
  26. const EXCLUDE_PATTERNS = [
  27. // Localhost and internal
  28. /^https?:\/\/(localhost|127\.0\.0\.1|0\.0\.0\.0)/,
  29. /^https?:\/\/kilo\.internal/,
  30. /^https?:\/\/dev\.kilo\.ai/,
  31. /^https?:\/\/tauri\.localhost/,
  32. // Example/placeholder URLs
  33. /^https?:\/\/example\.com/,
  34. /^https?:\/\/api\.myprovider\.com/,
  35. /^https?:\/\/synthetic\.new/,
  36. // API endpoints (not user-facing)
  37. /^https?:\/\/api\.kilo\.ai\/api\//,
  38. /^https?:\/\/ingest\.kilosessions\.ai/,
  39. /^https?:\/\/api\.openai\.com/,
  40. /^https?:\/\/api\.github\.com/,
  41. /^https?:\/\/api\.cloudflare\.com/,
  42. /^https?:\/\/api\.releases\.hashicorp\.com/,
  43. /^https?:\/\/auth\.openai\.com/,
  44. /^https?:\/\/chatgpt\.com\/backend-api/,
  45. /^https?:\/\/mcp\.exa\.ai/,
  46. /^https?:\/\/registry\.npmjs\.org/,
  47. /^https?:\/\/formulae\.brew\.sh\/api/,
  48. /^https?:\/\/community\.chocolatey\.org\/api/,
  49. /^https?:\/\/download-cdn\.jetbrains\.com/,
  50. /^https?:\/\/raw\.githubusercontent\.com/,
  51. // XML/SVG namespace URIs
  52. /^https?:\/\/www\.w3\.org\//,
  53. // URLs that are templates with interpolation (contain ${ after stripping)
  54. /\$\{/,
  55. // Truncated/placeholder URLs (e.g., https://…) or bare protocols
  56. /^https?:\/\/[\W]*$/,
  57. // GHE example domains
  58. /^https?:\/\/company\.ghe\.com/,
  59. // Example/placeholder GitHub URLs used in docs/comments
  60. /^https?:\/\/github\.com\/owner\//,
  61. /^https?:\/\/github\.com\/\.extraheader/,
  62. /^https?:\/\/github\.com\/user-attachments\/assets\/xxxx/,
  63. /^https?:\/\/github\.com\/user-attachments\/files\/\d+\/api\.json/,
  64. // Example/template session URLs with placeholders
  65. /\/s\/abc123$/,
  66. // Truncated URL paths (e.g., /s/ with no ID)
  67. /\/s\/$/,
  68. ]
  69. // Directories to skip entirely
  70. const SKIP_DIRS = ["node_modules", ".storybook", "stories", "test", "tests", "__tests__", "__mocks__"]
  71. // Subdirectories containing vendored/third-party code
  72. const SKIP_PATH_SEGMENTS = ["continuedev"]
  73. // Individual files to skip (data files full of non-user-facing URLs)
  74. const SKIP_FILES = ["models-snapshot.ts"]
  75. function shouldExclude(url: string): boolean {
  76. return EXCLUDE_PATTERNS.some((re) => re.test(url))
  77. }
  78. function shouldSkipFile(filepath: string): boolean {
  79. const rel = path.relative(ROOT, filepath)
  80. const parts = rel.split(path.sep)
  81. if (parts.some((p) => SKIP_DIRS.includes(p))) return true
  82. if (SKIP_PATH_SEGMENTS.some((seg) => rel.includes(seg))) return true
  83. if (/\.test\.[jt]sx?$/.test(filepath)) return true
  84. if (/\.spec\.[jt]sx?$/.test(filepath)) return true
  85. if (/\.stories\.[jt]sx?$/.test(filepath)) return true
  86. if (/\/i18n\//.test(filepath) && !filepath.endsWith("en.ts")) return true
  87. const basename = path.basename(filepath)
  88. if (SKIP_FILES.includes(basename)) return true
  89. return false
  90. }
  91. function clean(url: string): string {
  92. return url.replace(/[.),:;]+$/, "").replace(/<\/?\w+>$/, "")
  93. }
  94. async function extract(): Promise<Map<string, Set<string>>> {
  95. const links = new Map<string, Set<string>>()
  96. for (const dir of DIRS) {
  97. for (const ext of EXTENSIONS) {
  98. const glob = new Glob(`**/*.${ext}`)
  99. for await (const entry of glob.scan({ cwd: dir, absolute: true })) {
  100. if (shouldSkipFile(entry)) continue
  101. const content = await Bun.file(entry).text()
  102. for (const line of content.split("\n")) {
  103. for (const match of line.matchAll(URL_RE)) {
  104. const url = clean(match[0])
  105. if (shouldExclude(url)) continue
  106. if (!links.has(url)) links.set(url, new Set())
  107. links.get(url)!.add(path.relative(ROOT, entry))
  108. }
  109. }
  110. }
  111. }
  112. }
  113. return links
  114. }
  115. function render(sorted: [string, Set<string>][]): string {
  116. const parts = [
  117. "# Source Code Links",
  118. "",
  119. "<!-- Auto-generated by script/extract-source-links.ts — DO NOT EDIT -->",
  120. `<!-- ${sorted.length} unique URLs extracted from extension and CLI source -->`,
  121. "",
  122. ]
  123. for (const [url, files] of sorted) {
  124. parts.push(`- <${url}>`)
  125. for (const file of [...files].sort()) {
  126. parts.push(` <!-- ${file} -->`)
  127. }
  128. }
  129. parts.push("")
  130. return parts.join("\n")
  131. }
  132. const links = await extract()
  133. const sorted = [...links.entries()].sort(([a], [b]) => a.localeCompare(b))
  134. const output = render(sorted)
  135. if (check) {
  136. const committed = await Bun.file(OUTPUT)
  137. .text()
  138. .catch(() => "")
  139. if (committed === output) {
  140. console.log("packages/kilo-docs/source-links.md is up to date.")
  141. process.exit(0)
  142. }
  143. console.error(
  144. [
  145. "ERROR: packages/kilo-docs/source-links.md is out of date.",
  146. "",
  147. "Run the following command locally and commit the result:",
  148. "",
  149. " bun run script/extract-source-links.ts",
  150. "",
  151. ].join("\n"),
  152. )
  153. process.exit(1)
  154. }
  155. await Bun.write(OUTPUT, output)
  156. console.log(`Wrote ${sorted.length} unique URLs to packages/kilo-docs/source-links.md`)