sourcegraph.go 13 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401
  1. package tools
  2. import (
  3. "bytes"
  4. "context"
  5. "encoding/json"
  6. "fmt"
  7. "io"
  8. "net/http"
  9. "strings"
  10. "time"
  11. )
  12. const (
  13. SourcegraphToolName = "sourcegraph"
  14. sourcegraphToolDescription = `Search code across public repositories using Sourcegraph's GraphQL API.
  15. WHEN TO USE THIS TOOL:
  16. - Use when you need to find code examples or implementations across public repositories
  17. - Helpful for researching how others have solved similar problems
  18. - Useful for discovering patterns and best practices in open source code
  19. HOW TO USE:
  20. - Provide a search query using Sourcegraph's query syntax
  21. - Optionally specify the number of results to return (default: 10)
  22. - Optionally set a timeout for the request
  23. QUERY SYNTAX:
  24. - Basic search: "fmt.Println" searches for exact matches
  25. - File filters: "file:.go fmt.Println" limits to Go files
  26. - Repository filters: "repo:^github\.com/golang/go$ fmt.Println" limits to specific repos
  27. - Language filters: "lang:go fmt.Println" limits to Go code
  28. - Boolean operators: "fmt.Println AND log.Fatal" for combined terms
  29. - Regular expressions: "fmt\.(Print|Printf|Println)" for pattern matching
  30. - Quoted strings: "\"exact phrase\"" for exact phrase matching
  31. - Exclude filters: "-file:test" or "-repo:forks" to exclude matches
  32. ADVANCED FILTERS:
  33. - Repository filters:
  34. * "repo:name" - Match repositories with name containing "name"
  35. * "repo:^github\.com/org/repo$" - Exact repository match
  36. * "repo:org/repo@branch" - Search specific branch
  37. * "repo:org/repo rev:branch" - Alternative branch syntax
  38. * "-repo:name" - Exclude repositories
  39. * "fork:yes" or "fork:only" - Include or only show forks
  40. * "archived:yes" or "archived:only" - Include or only show archived repos
  41. * "visibility:public" or "visibility:private" - Filter by visibility
  42. - File filters:
  43. * "file:\.js$" - Files with .js extension
  44. * "file:internal/" - Files in internal directory
  45. * "-file:test" - Exclude test files
  46. * "file:has.content(Copyright)" - Files containing "Copyright"
  47. * "file:has.contributor([email protected])" - Files with specific contributor
  48. - Content filters:
  49. * "content:\"exact string\"" - Search for exact string
  50. * "-content:\"unwanted\"" - Exclude files with unwanted content
  51. * "case:yes" - Case-sensitive search
  52. - Type filters:
  53. * "type:symbol" - Search for symbols (functions, classes, etc.)
  54. * "type:file" - Search file content only
  55. * "type:path" - Search filenames only
  56. * "type:diff" - Search code changes
  57. * "type:commit" - Search commit messages
  58. - Commit/diff search:
  59. * "after:\"1 month ago\"" - Commits after date
  60. * "before:\"2023-01-01\"" - Commits before date
  61. * "author:name" - Commits by author
  62. * "message:\"fix bug\"" - Commits with message
  63. - Result selection:
  64. * "select:repo" - Show only repository names
  65. * "select:file" - Show only file paths
  66. * "select:content" - Show only matching content
  67. * "select:symbol" - Show only matching symbols
  68. - Result control:
  69. * "count:100" - Return up to 100 results
  70. * "count:all" - Return all results
  71. * "timeout:30s" - Set search timeout
  72. EXAMPLES:
  73. - "file:.go context.WithTimeout" - Find Go code using context.WithTimeout
  74. - "lang:typescript useState type:symbol" - Find TypeScript React useState hooks
  75. - "repo:^github\.com/kubernetes/kubernetes$ pod list type:file" - Find Kubernetes files related to pod listing
  76. - "repo:sourcegraph/sourcegraph$ after:\"3 months ago\" type:diff database" - Recent changes to database code
  77. - "file:Dockerfile (alpine OR ubuntu) -content:alpine:latest" - Dockerfiles with specific base images
  78. - "repo:has.path(\.py) file:requirements.txt tensorflow" - Python projects using TensorFlow
  79. BOOLEAN OPERATORS:
  80. - "term1 AND term2" - Results containing both terms
  81. - "term1 OR term2" - Results containing either term
  82. - "term1 NOT term2" - Results with term1 but not term2
  83. - "term1 and (term2 or term3)" - Grouping with parentheses
  84. LIMITATIONS:
  85. - Only searches public repositories
  86. - Rate limits may apply
  87. - Complex queries may take longer to execute
  88. - Maximum of 20 results per query
  89. TIPS:
  90. - Use specific file extensions to narrow results
  91. - Add repo: filters for more targeted searches
  92. - Use type:symbol to find function/method definitions
  93. - Use type:file to find relevant files
  94. - For more details on query syntax, visit: https://docs.sourcegraph.com/code_search/queries`
  95. )
  96. type SourcegraphParams struct {
  97. Query string `json:"query"`
  98. Count int `json:"count,omitempty"`
  99. ContextWindow int `json:"context_window,omitempty"`
  100. Timeout int `json:"timeout,omitempty"`
  101. }
  102. type sourcegraphTool struct {
  103. client *http.Client
  104. }
  105. func NewSourcegraphTool() BaseTool {
  106. return &sourcegraphTool{
  107. client: &http.Client{
  108. Timeout: 30 * time.Second,
  109. },
  110. }
  111. }
  112. func (t *sourcegraphTool) Info() ToolInfo {
  113. return ToolInfo{
  114. Name: SourcegraphToolName,
  115. Description: sourcegraphToolDescription,
  116. Parameters: map[string]any{
  117. "query": map[string]any{
  118. "type": "string",
  119. "description": "The Sourcegraph search query",
  120. },
  121. "count": map[string]any{
  122. "type": "number",
  123. "description": "Optional number of results to return (default: 10, max: 20)",
  124. },
  125. "context_window": map[string]any{
  126. "type": "number",
  127. "description": "The context around the match to return (default: 10 lines)",
  128. },
  129. "timeout": map[string]any{
  130. "type": "number",
  131. "description": "Optional timeout in seconds (max 120)",
  132. },
  133. },
  134. Required: []string{"query"},
  135. }
  136. }
  137. func (t *sourcegraphTool) Run(ctx context.Context, call ToolCall) (ToolResponse, error) {
  138. var params SourcegraphParams
  139. if err := json.Unmarshal([]byte(call.Input), &params); err != nil {
  140. return NewTextErrorResponse("Failed to parse sourcegraph parameters: " + err.Error()), nil
  141. }
  142. if params.Query == "" {
  143. return NewTextErrorResponse("Query parameter is required"), nil
  144. }
  145. // Set default count if not specified
  146. if params.Count <= 0 {
  147. params.Count = 10
  148. } else if params.Count > 20 {
  149. params.Count = 20 // Limit to 20 results
  150. }
  151. if params.ContextWindow <= 0 {
  152. params.ContextWindow = 10 // Default context window
  153. }
  154. client := t.client
  155. if params.Timeout > 0 {
  156. maxTimeout := 120 // 2 minutes
  157. if params.Timeout > maxTimeout {
  158. params.Timeout = maxTimeout
  159. }
  160. client = &http.Client{
  161. Timeout: time.Duration(params.Timeout) * time.Second,
  162. }
  163. }
  164. // GraphQL query for Sourcegraph search
  165. // Create a properly escaped JSON structure
  166. type graphqlRequest struct {
  167. Query string `json:"query"`
  168. Variables struct {
  169. Query string `json:"query"`
  170. } `json:"variables"`
  171. }
  172. request := graphqlRequest{
  173. Query: "query Search($query: String!) { search(query: $query, version: V2, patternType: keyword ) { results { matchCount, limitHit, resultCount, approximateResultCount, missing { name }, timedout { name }, indexUnavailable, results { __typename, ... on FileMatch { repository { name }, file { path, url, content }, lineMatches { preview, lineNumber, offsetAndLengths } } } } } }",
  174. }
  175. request.Variables.Query = params.Query
  176. // Marshal to JSON to ensure proper escaping
  177. graphqlQueryBytes, err := json.Marshal(request)
  178. if err != nil {
  179. return NewTextErrorResponse("Failed to create GraphQL request: " + err.Error()), nil
  180. }
  181. graphqlQuery := string(graphqlQueryBytes)
  182. // Create request to Sourcegraph API
  183. req, err := http.NewRequestWithContext(
  184. ctx,
  185. "POST",
  186. "https://sourcegraph.com/.api/graphql",
  187. bytes.NewBuffer([]byte(graphqlQuery)),
  188. )
  189. if err != nil {
  190. return NewTextErrorResponse("Failed to create request: " + err.Error()), nil
  191. }
  192. req.Header.Set("Content-Type", "application/json")
  193. req.Header.Set("User-Agent", "termai/1.0")
  194. resp, err := client.Do(req)
  195. if err != nil {
  196. return NewTextErrorResponse("Failed to execute request: " + err.Error()), nil
  197. }
  198. defer resp.Body.Close()
  199. if resp.StatusCode != http.StatusOK {
  200. // log the error response
  201. body, _ := io.ReadAll(resp.Body)
  202. if len(body) > 0 {
  203. return NewTextErrorResponse(fmt.Sprintf("Request failed with status code: %d, response: %s", resp.StatusCode, string(body))), nil
  204. }
  205. return NewTextErrorResponse(fmt.Sprintf("Request failed with status code: %d", resp.StatusCode)), nil
  206. }
  207. body, err := io.ReadAll(resp.Body)
  208. if err != nil {
  209. return NewTextErrorResponse("Failed to read response body: " + err.Error()), nil
  210. }
  211. // Parse the GraphQL response
  212. var result map[string]any
  213. if err = json.Unmarshal(body, &result); err != nil {
  214. return NewTextErrorResponse("Failed to parse response: " + err.Error()), nil
  215. }
  216. // Format the results in a readable way
  217. formattedResults, err := formatSourcegraphResults(result, params.ContextWindow)
  218. if err != nil {
  219. return NewTextErrorResponse("Failed to format results: " + err.Error()), nil
  220. }
  221. return NewTextResponse(formattedResults), nil
  222. }
  223. func formatSourcegraphResults(result map[string]any, contextWindow int) (string, error) {
  224. var buffer strings.Builder
  225. // Check for errors in the GraphQL response
  226. if errors, ok := result["errors"].([]any); ok && len(errors) > 0 {
  227. buffer.WriteString("## Sourcegraph API Error\n\n")
  228. for _, err := range errors {
  229. if errMap, ok := err.(map[string]any); ok {
  230. if message, ok := errMap["message"].(string); ok {
  231. buffer.WriteString(fmt.Sprintf("- %s\n", message))
  232. }
  233. }
  234. }
  235. return buffer.String(), nil
  236. }
  237. // Extract data from the response
  238. data, ok := result["data"].(map[string]any)
  239. if !ok {
  240. return "", fmt.Errorf("invalid response format: missing data field")
  241. }
  242. search, ok := data["search"].(map[string]any)
  243. if !ok {
  244. return "", fmt.Errorf("invalid response format: missing search field")
  245. }
  246. searchResults, ok := search["results"].(map[string]any)
  247. if !ok {
  248. return "", fmt.Errorf("invalid response format: missing results field")
  249. }
  250. // Write search metadata
  251. matchCount, _ := searchResults["matchCount"].(float64)
  252. resultCount, _ := searchResults["resultCount"].(float64)
  253. limitHit, _ := searchResults["limitHit"].(bool)
  254. buffer.WriteString("# Sourcegraph Search Results\n\n")
  255. buffer.WriteString(fmt.Sprintf("Found %d matches across %d results\n", int(matchCount), int(resultCount)))
  256. if limitHit {
  257. buffer.WriteString("(Result limit reached, try a more specific query)\n")
  258. }
  259. buffer.WriteString("\n")
  260. // Process results
  261. results, ok := searchResults["results"].([]any)
  262. if !ok || len(results) == 0 {
  263. buffer.WriteString("No results found. Try a different query.\n")
  264. return buffer.String(), nil
  265. }
  266. // Limit to 10 results
  267. maxResults := 10
  268. if len(results) > maxResults {
  269. results = results[:maxResults]
  270. }
  271. // Process each result
  272. for i, res := range results {
  273. fileMatch, ok := res.(map[string]any)
  274. if !ok {
  275. continue
  276. }
  277. // Skip non-FileMatch results
  278. typeName, _ := fileMatch["__typename"].(string)
  279. if typeName != "FileMatch" {
  280. continue
  281. }
  282. // Extract repository and file information
  283. repo, _ := fileMatch["repository"].(map[string]any)
  284. file, _ := fileMatch["file"].(map[string]any)
  285. lineMatches, _ := fileMatch["lineMatches"].([]any)
  286. if repo == nil || file == nil {
  287. continue
  288. }
  289. repoName, _ := repo["name"].(string)
  290. filePath, _ := file["path"].(string)
  291. fileURL, _ := file["url"].(string)
  292. fileContent, _ := file["content"].(string)
  293. buffer.WriteString(fmt.Sprintf("## Result %d: %s/%s\n\n", i+1, repoName, filePath))
  294. if fileURL != "" {
  295. buffer.WriteString(fmt.Sprintf("URL: %s\n\n", fileURL))
  296. }
  297. // Show line matches with context
  298. if len(lineMatches) > 0 {
  299. for _, lm := range lineMatches {
  300. lineMatch, ok := lm.(map[string]any)
  301. if !ok {
  302. continue
  303. }
  304. lineNumber, _ := lineMatch["lineNumber"].(float64)
  305. preview, _ := lineMatch["preview"].(string)
  306. // Extract context from file content if available
  307. if fileContent != "" {
  308. lines := strings.Split(fileContent, "\n")
  309. buffer.WriteString("```\n")
  310. // Display context before the match (up to 10 lines)
  311. startLine := max(1, int(lineNumber)-contextWindow)
  312. for j := startLine - 1; j < int(lineNumber)-1 && j < len(lines); j++ {
  313. if j >= 0 {
  314. buffer.WriteString(fmt.Sprintf("%d| %s\n", j+1, lines[j]))
  315. }
  316. }
  317. // Display the matching line (highlighted)
  318. buffer.WriteString(fmt.Sprintf("%d| %s\n", int(lineNumber), preview))
  319. // Display context after the match (up to 10 lines)
  320. endLine := int(lineNumber) + contextWindow
  321. for j := int(lineNumber); j < endLine && j < len(lines); j++ {
  322. if j < len(lines) {
  323. buffer.WriteString(fmt.Sprintf("%d| %s\n", j+1, lines[j]))
  324. }
  325. }
  326. buffer.WriteString("```\n\n")
  327. } else {
  328. // If file content is not available, just show the preview
  329. buffer.WriteString("```\n")
  330. buffer.WriteString(fmt.Sprintf("%d| %s\n", int(lineNumber), preview))
  331. buffer.WriteString("```\n\n")
  332. }
  333. }
  334. }
  335. }
  336. return buffer.String(), nil
  337. }