language.go 7.8 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298
  1. package discovery
  2. import (
  3. "os"
  4. "path/filepath"
  5. "strings"
  6. "sync"
  7. "github.com/sst/opencode/internal/lsp"
  8. "log/slog"
  9. )
  10. // LanguageInfo stores information about a detected language
  11. type LanguageInfo struct {
  12. // Language identifier (e.g., "go", "typescript", "python")
  13. ID string
  14. // Number of files detected for this language
  15. FileCount int
  16. // Project files associated with this language (e.g., go.mod, package.json)
  17. ProjectFiles []string
  18. // Whether this is likely a primary language in the project
  19. IsPrimary bool
  20. }
  21. // ProjectFile represents a project configuration file
  22. type ProjectFile struct {
  23. // File name or pattern to match
  24. Name string
  25. // Associated language ID
  26. LanguageID string
  27. // Whether this file strongly indicates the language is primary
  28. IsPrimary bool
  29. }
  30. // Common project files that indicate specific languages
  31. var projectFilePatterns = []ProjectFile{
  32. {Name: "go.mod", LanguageID: "go", IsPrimary: true},
  33. {Name: "go.sum", LanguageID: "go", IsPrimary: false},
  34. {Name: "package.json", LanguageID: "javascript", IsPrimary: true}, // Could be TypeScript too
  35. {Name: "tsconfig.json", LanguageID: "typescript", IsPrimary: true},
  36. {Name: "jsconfig.json", LanguageID: "javascript", IsPrimary: true},
  37. {Name: "pyproject.toml", LanguageID: "python", IsPrimary: true},
  38. {Name: "setup.py", LanguageID: "python", IsPrimary: true},
  39. {Name: "requirements.txt", LanguageID: "python", IsPrimary: true},
  40. {Name: "Cargo.toml", LanguageID: "rust", IsPrimary: true},
  41. {Name: "Cargo.lock", LanguageID: "rust", IsPrimary: false},
  42. {Name: "CMakeLists.txt", LanguageID: "cmake", IsPrimary: true},
  43. {Name: "pom.xml", LanguageID: "java", IsPrimary: true},
  44. {Name: "build.gradle", LanguageID: "java", IsPrimary: true},
  45. {Name: "build.gradle.kts", LanguageID: "kotlin", IsPrimary: true},
  46. {Name: "composer.json", LanguageID: "php", IsPrimary: true},
  47. {Name: "Gemfile", LanguageID: "ruby", IsPrimary: true},
  48. {Name: "Rakefile", LanguageID: "ruby", IsPrimary: true},
  49. {Name: "mix.exs", LanguageID: "elixir", IsPrimary: true},
  50. {Name: "rebar.config", LanguageID: "erlang", IsPrimary: true},
  51. {Name: "dune-project", LanguageID: "ocaml", IsPrimary: true},
  52. {Name: "stack.yaml", LanguageID: "haskell", IsPrimary: true},
  53. {Name: "cabal.project", LanguageID: "haskell", IsPrimary: true},
  54. {Name: "Makefile", LanguageID: "make", IsPrimary: false},
  55. {Name: "Dockerfile", LanguageID: "dockerfile", IsPrimary: false},
  56. }
  57. // Map of file extensions to language IDs
  58. var extensionToLanguage = map[string]string{
  59. ".go": "go",
  60. ".js": "javascript",
  61. ".jsx": "javascript",
  62. ".ts": "typescript",
  63. ".tsx": "typescript",
  64. ".py": "python",
  65. ".rs": "rust",
  66. ".java": "java",
  67. ".c": "c",
  68. ".cpp": "cpp",
  69. ".h": "c",
  70. ".hpp": "cpp",
  71. ".rb": "ruby",
  72. ".php": "php",
  73. ".cs": "csharp",
  74. ".fs": "fsharp",
  75. ".swift": "swift",
  76. ".kt": "kotlin",
  77. ".scala": "scala",
  78. ".hs": "haskell",
  79. ".ml": "ocaml",
  80. ".ex": "elixir",
  81. ".exs": "elixir",
  82. ".erl": "erlang",
  83. ".lua": "lua",
  84. ".r": "r",
  85. ".sh": "shell",
  86. ".bash": "shell",
  87. ".zsh": "shell",
  88. ".html": "html",
  89. ".css": "css",
  90. ".scss": "scss",
  91. ".sass": "sass",
  92. ".less": "less",
  93. ".json": "json",
  94. ".xml": "xml",
  95. ".yaml": "yaml",
  96. ".yml": "yaml",
  97. ".md": "markdown",
  98. ".dart": "dart",
  99. }
  100. // Directories to exclude from scanning
  101. var excludedDirs = map[string]bool{
  102. ".git": true,
  103. "node_modules": true,
  104. "vendor": true,
  105. "dist": true,
  106. "build": true,
  107. "target": true,
  108. ".idea": true,
  109. ".vscode": true,
  110. ".github": true,
  111. ".gitlab": true,
  112. "__pycache__": true,
  113. ".next": true,
  114. ".nuxt": true,
  115. "venv": true,
  116. "env": true,
  117. ".env": true,
  118. }
  119. // DetectLanguages scans a directory to identify programming languages used in the project
  120. func DetectLanguages(rootDir string) (map[string]LanguageInfo, error) {
  121. languages := make(map[string]LanguageInfo)
  122. var mutex sync.Mutex
  123. // Walk the directory tree
  124. err := filepath.Walk(rootDir, func(path string, info os.FileInfo, err error) error {
  125. if err != nil {
  126. return nil // Skip files that can't be accessed
  127. }
  128. // Skip excluded directories
  129. if info.IsDir() {
  130. if excludedDirs[info.Name()] || strings.HasPrefix(info.Name(), ".") {
  131. return filepath.SkipDir
  132. }
  133. return nil
  134. }
  135. // Skip hidden files
  136. if strings.HasPrefix(info.Name(), ".") {
  137. return nil
  138. }
  139. // Check for project files
  140. for _, pattern := range projectFilePatterns {
  141. if info.Name() == pattern.Name {
  142. mutex.Lock()
  143. lang, exists := languages[pattern.LanguageID]
  144. if !exists {
  145. lang = LanguageInfo{
  146. ID: pattern.LanguageID,
  147. FileCount: 0,
  148. ProjectFiles: []string{},
  149. IsPrimary: pattern.IsPrimary,
  150. }
  151. }
  152. lang.ProjectFiles = append(lang.ProjectFiles, path)
  153. if pattern.IsPrimary {
  154. lang.IsPrimary = true
  155. }
  156. languages[pattern.LanguageID] = lang
  157. mutex.Unlock()
  158. break
  159. }
  160. }
  161. // Check file extension
  162. ext := strings.ToLower(filepath.Ext(path))
  163. if langID, ok := extensionToLanguage[ext]; ok {
  164. mutex.Lock()
  165. lang, exists := languages[langID]
  166. if !exists {
  167. lang = LanguageInfo{
  168. ID: langID,
  169. FileCount: 0,
  170. ProjectFiles: []string{},
  171. }
  172. }
  173. lang.FileCount++
  174. languages[langID] = lang
  175. mutex.Unlock()
  176. }
  177. return nil
  178. })
  179. if err != nil {
  180. return nil, err
  181. }
  182. // Determine primary languages based on file count if not already marked
  183. determinePrimaryLanguages(languages)
  184. // Log detected languages
  185. for id, info := range languages {
  186. if info.IsPrimary {
  187. slog.Debug("Detected primary language", "language", id, "files", info.FileCount, "projectFiles", len(info.ProjectFiles))
  188. } else {
  189. slog.Debug("Detected secondary language", "language", id, "files", info.FileCount)
  190. }
  191. }
  192. return languages, nil
  193. }
  194. // determinePrimaryLanguages marks languages as primary based on file count
  195. func determinePrimaryLanguages(languages map[string]LanguageInfo) {
  196. // Find the language with the most files
  197. var maxFiles int
  198. for _, info := range languages {
  199. if info.FileCount > maxFiles {
  200. maxFiles = info.FileCount
  201. }
  202. }
  203. // Mark languages with at least 20% of the max files as primary
  204. threshold := max(maxFiles/5, 5) // At least 5 files to be considered primary
  205. for id, info := range languages {
  206. if !info.IsPrimary && info.FileCount >= threshold {
  207. info.IsPrimary = true
  208. languages[id] = info
  209. }
  210. }
  211. }
  212. // GetLanguageIDFromExtension returns the language ID for a given file extension
  213. func GetLanguageIDFromExtension(ext string) string {
  214. ext = strings.ToLower(ext)
  215. if langID, ok := extensionToLanguage[ext]; ok {
  216. return langID
  217. }
  218. return ""
  219. }
  220. // GetLanguageIDFromProtocol converts a protocol.LanguageKind to our language ID string
  221. func GetLanguageIDFromProtocol(langKind string) string {
  222. // Convert protocol language kind to our language ID
  223. switch langKind {
  224. case "go":
  225. return "go"
  226. case "typescript":
  227. return "typescript"
  228. case "typescriptreact":
  229. return "typescript"
  230. case "javascript":
  231. return "javascript"
  232. case "javascriptreact":
  233. return "javascript"
  234. case "python":
  235. return "python"
  236. case "rust":
  237. return "rust"
  238. case "java":
  239. return "java"
  240. case "c":
  241. return "c"
  242. case "cpp":
  243. return "cpp"
  244. default:
  245. // Try to normalize the language kind
  246. return strings.ToLower(langKind)
  247. }
  248. }
  249. // GetLanguageIDFromPath determines the language ID from a file path
  250. func GetLanguageIDFromPath(path string) string {
  251. // Check file extension first
  252. ext := filepath.Ext(path)
  253. if langID := GetLanguageIDFromExtension(ext); langID != "" {
  254. return langID
  255. }
  256. // Check if it's a known project file
  257. filename := filepath.Base(path)
  258. for _, pattern := range projectFilePatterns {
  259. if filename == pattern.Name {
  260. return pattern.LanguageID
  261. }
  262. }
  263. // Use LSP's detection as a fallback
  264. uri := "file://" + path
  265. langKind := lsp.DetectLanguageID(uri)
  266. return GetLanguageIDFromProtocol(string(langKind))
  267. }