cli.ts 4.5 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171
  1. import * as fs from "fs"
  2. import * as path from "path"
  3. import { build, filesystem, GluegunPrompt } from "gluegun"
  4. import { runTests } from "@vscode/test-electron"
  5. // console.log(__dirname)
  6. // <...>/Roo-Code/benchmark/src
  7. const extensionDevelopmentPath = path.resolve(__dirname, "../../")
  8. const extensionTestsPath = path.resolve(__dirname, "../out/runExercise")
  9. const promptsPath = path.resolve(__dirname, "../prompts")
  10. const exercisesPath = path.resolve(__dirname, "../../../exercises")
  11. const languages = ["cpp", "go", "java", "javascript", "python", "rust"]
  12. async function runAll({ runId, model }: { runId: number; model: string }) {
  13. for (const language of languages) {
  14. await runLanguage({ runId, model, language })
  15. }
  16. }
  17. async function runLanguage({ runId, model, language }: { runId: number; model: string; language: string }) {
  18. const languagePath = path.resolve(exercisesPath, language)
  19. if (!fs.existsSync(languagePath)) {
  20. console.error(`Language directory ${languagePath} does not exist`)
  21. process.exit(1)
  22. }
  23. const exercises = filesystem
  24. .subdirectories(languagePath)
  25. .map((exercise) => path.basename(exercise))
  26. .filter((exercise) => !exercise.startsWith("."))
  27. for (const exercise of exercises) {
  28. await runExercise({ runId, model, language, exercise })
  29. }
  30. }
  31. async function runExercise({
  32. runId,
  33. model,
  34. language,
  35. exercise,
  36. }: {
  37. runId: number
  38. model: string
  39. language: string
  40. exercise: string
  41. }) {
  42. const workspacePath = path.resolve(exercisesPath, language, exercise)
  43. const promptPath = path.resolve(promptsPath, `${language}.md`)
  44. const extensionTestsEnv = {
  45. PROMPT_PATH: promptPath,
  46. WORKSPACE_PATH: workspacePath,
  47. OPENROUTER_MODEL_ID: model,
  48. RUN_ID: runId.toString(),
  49. }
  50. if (fs.existsSync(path.resolve(workspacePath, "usage.json"))) {
  51. console.log(`Test result exists for ${language} / ${exercise}, skipping`)
  52. return
  53. }
  54. console.log(`Running ${language} / ${exercise}`)
  55. await runTests({
  56. extensionDevelopmentPath,
  57. extensionTestsPath,
  58. launchArgs: [workspacePath, "--disable-extensions"],
  59. extensionTestsEnv,
  60. })
  61. }
  62. async function askLanguage(prompt: GluegunPrompt) {
  63. const languages = filesystem.subdirectories(exercisesPath)
  64. if (languages.length === 0) {
  65. throw new Error(`No languages found in ${exercisesPath}`)
  66. }
  67. const { language } = await prompt.ask<{ language: string }>({
  68. type: "select",
  69. name: "language",
  70. message: "Which language?",
  71. choices: languages.map((language) => path.basename(language)).filter((language) => !language.startsWith(".")),
  72. })
  73. return language
  74. }
  75. async function askExercise(prompt: GluegunPrompt, language: string) {
  76. const exercises = filesystem.subdirectories(path.join(exercisesPath, language))
  77. if (exercises.length === 0) {
  78. throw new Error(`No exercises found for ${language}`)
  79. }
  80. const { exercise } = await prompt.ask<{ exercise: string }>({
  81. type: "select",
  82. name: "exercise",
  83. message: "Which exercise?",
  84. choices: exercises.map((exercise) => path.basename(exercise)),
  85. })
  86. return exercise
  87. }
  88. async function createRun({ model }: { model: string }): Promise<{ id: number; model: string }> {
  89. const response = await fetch("http://localhost:3000/api/runs", {
  90. method: "POST",
  91. body: JSON.stringify({ model }),
  92. })
  93. if (!response.ok) {
  94. throw new Error(`Failed to create run: ${response.statusText}`)
  95. }
  96. const {
  97. run: [run],
  98. } = await response.json()
  99. return run
  100. }
  101. async function main() {
  102. const cli = build()
  103. .brand("benchmark-runner")
  104. .src(__dirname)
  105. .help()
  106. .version()
  107. .command({
  108. name: "run",
  109. run: ({ config, parameters }) => {
  110. config.language = parameters.first
  111. config.exercise = parameters.second
  112. if (parameters.options["runId"]) {
  113. config.runId = parameters.options["runId"]
  114. }
  115. },
  116. })
  117. .defaultCommand() // Use the default command if no args.
  118. .create()
  119. const { print, prompt, config } = await cli.run(process.argv)
  120. try {
  121. const model = "anthropic/claude-3.7-sonnet"
  122. const runId = config.runId ? Number(config.runId) : (await createRun({ model })).id
  123. if (config.language === "all") {
  124. console.log("Running all exercises for all languages")
  125. await runAll({ runId, model })
  126. } else if (config.exercise === "all") {
  127. console.log(`Running all exercises for ${config.language}`)
  128. await runLanguage({ runId, model, language: config.language })
  129. } else {
  130. const language = config.language || (await askLanguage(prompt))
  131. const exercise = config.exercise || (await askExercise(prompt, language))
  132. await runExercise({ runId, model, language, exercise })
  133. }
  134. process.exit(0)
  135. } catch (error) {
  136. print.error(error)
  137. process.exit(1)
  138. }
  139. }
  140. main()