fetch.go 6.0 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228
  1. package tools
  2. import (
  3. "context"
  4. "encoding/json"
  5. "fmt"
  6. "io"
  7. "net/http"
  8. "strings"
  9. "time"
  10. md "github.com/JohannesKaufmann/html-to-markdown"
  11. "github.com/PuerkitoBio/goquery"
  12. "github.com/sst/opencode/internal/config"
  13. "github.com/sst/opencode/internal/permission"
  14. )
  15. type FetchParams struct {
  16. URL string `json:"url"`
  17. Format string `json:"format"`
  18. Timeout int `json:"timeout,omitempty"`
  19. }
  20. type FetchPermissionsParams struct {
  21. URL string `json:"url"`
  22. Format string `json:"format"`
  23. Timeout int `json:"timeout,omitempty"`
  24. }
  25. type fetchTool struct {
  26. client *http.Client
  27. permissions permission.Service
  28. }
  29. const (
  30. FetchToolName = "fetch"
  31. fetchToolDescription = `Fetches content from a URL and returns it in the specified format.
  32. WHEN TO USE THIS TOOL:
  33. - Use when you need to download content from a URL
  34. - Helpful for retrieving documentation, API responses, or web content
  35. - Useful for getting external information to assist with tasks
  36. HOW TO USE:
  37. - Provide the URL to fetch content from
  38. - Specify the desired output format (text, markdown, or html)
  39. - Optionally set a timeout for the request
  40. FEATURES:
  41. - Supports three output formats: text, markdown, and html
  42. - Automatically handles HTTP redirects
  43. - Sets reasonable timeouts to prevent hanging
  44. - Validates input parameters before making requests
  45. LIMITATIONS:
  46. - Maximum response size is 5MB
  47. - Only supports HTTP and HTTPS protocols
  48. - Cannot handle authentication or cookies
  49. - Some websites may block automated requests
  50. TIPS:
  51. - Use text format for plain text content or simple API responses
  52. - Use markdown format for content that should be rendered with formatting
  53. - Use html format when you need the raw HTML structure
  54. - Set appropriate timeouts for potentially slow websites`
  55. )
  56. func NewFetchTool(permissions permission.Service) BaseTool {
  57. return &fetchTool{
  58. client: &http.Client{
  59. Timeout: 30 * time.Second,
  60. },
  61. permissions: permissions,
  62. }
  63. }
  64. func (t *fetchTool) Info() ToolInfo {
  65. return ToolInfo{
  66. Name: FetchToolName,
  67. Description: fetchToolDescription,
  68. Parameters: map[string]any{
  69. "url": map[string]any{
  70. "type": "string",
  71. "description": "The URL to fetch content from",
  72. },
  73. "format": map[string]any{
  74. "type": "string",
  75. "description": "The format to return the content in (text, markdown, or html)",
  76. "enum": []string{"text", "markdown", "html"},
  77. },
  78. "timeout": map[string]any{
  79. "type": "number",
  80. "description": "Optional timeout in seconds (max 120)",
  81. },
  82. },
  83. Required: []string{"url", "format"},
  84. }
  85. }
  86. func (t *fetchTool) Run(ctx context.Context, call ToolCall) (ToolResponse, error) {
  87. var params FetchParams
  88. if err := json.Unmarshal([]byte(call.Input), &params); err != nil {
  89. return NewTextErrorResponse("Failed to parse fetch parameters: " + err.Error()), nil
  90. }
  91. if params.URL == "" {
  92. return NewTextErrorResponse("URL parameter is required"), nil
  93. }
  94. format := strings.ToLower(params.Format)
  95. if format != "text" && format != "markdown" && format != "html" {
  96. return NewTextErrorResponse("Format must be one of: text, markdown, html"), nil
  97. }
  98. if !strings.HasPrefix(params.URL, "http://") && !strings.HasPrefix(params.URL, "https://") {
  99. return NewTextErrorResponse("URL must start with http:// or https://"), nil
  100. }
  101. sessionID, messageID := GetContextValues(ctx)
  102. if sessionID == "" || messageID == "" {
  103. return ToolResponse{}, fmt.Errorf("session ID and message ID are required for creating a new file")
  104. }
  105. p := t.permissions.Request(
  106. ctx,
  107. permission.CreatePermissionRequest{
  108. SessionID: sessionID,
  109. Path: config.WorkingDirectory(),
  110. ToolName: FetchToolName,
  111. Action: "fetch",
  112. Description: fmt.Sprintf("Fetch content from URL: %s", params.URL),
  113. Params: FetchPermissionsParams(params),
  114. },
  115. )
  116. if !p {
  117. return ToolResponse{}, permission.ErrorPermissionDenied
  118. }
  119. client := t.client
  120. if params.Timeout > 0 {
  121. maxTimeout := 120 // 2 minutes
  122. if params.Timeout > maxTimeout {
  123. params.Timeout = maxTimeout
  124. }
  125. client = &http.Client{
  126. Timeout: time.Duration(params.Timeout) * time.Second,
  127. }
  128. }
  129. req, err := http.NewRequestWithContext(ctx, "GET", params.URL, nil)
  130. if err != nil {
  131. return ToolResponse{}, fmt.Errorf("failed to create request: %w", err)
  132. }
  133. req.Header.Set("User-Agent", "opencode/1.0")
  134. resp, err := client.Do(req)
  135. if err != nil {
  136. return ToolResponse{}, fmt.Errorf("failed to fetch URL: %w", err)
  137. }
  138. defer resp.Body.Close()
  139. if resp.StatusCode != http.StatusOK {
  140. return NewTextErrorResponse(fmt.Sprintf("Request failed with status code: %d", resp.StatusCode)), nil
  141. }
  142. maxSize := int64(5 * 1024 * 1024) // 5MB
  143. body, err := io.ReadAll(io.LimitReader(resp.Body, maxSize))
  144. if err != nil {
  145. return NewTextErrorResponse("Failed to read response body: " + err.Error()), nil
  146. }
  147. content := string(body)
  148. contentType := resp.Header.Get("Content-Type")
  149. switch format {
  150. case "text":
  151. if strings.Contains(contentType, "text/html") {
  152. text, err := extractTextFromHTML(content)
  153. if err != nil {
  154. return NewTextErrorResponse("Failed to extract text from HTML: " + err.Error()), nil
  155. }
  156. return NewTextResponse(text), nil
  157. }
  158. return NewTextResponse(content), nil
  159. case "markdown":
  160. if strings.Contains(contentType, "text/html") {
  161. markdown, err := convertHTMLToMarkdown(content)
  162. if err != nil {
  163. return NewTextErrorResponse("Failed to convert HTML to Markdown: " + err.Error()), nil
  164. }
  165. return NewTextResponse(markdown), nil
  166. }
  167. return NewTextResponse("```\n" + content + "\n```"), nil
  168. case "html":
  169. return NewTextResponse(content), nil
  170. default:
  171. return NewTextResponse(content), nil
  172. }
  173. }
  174. func extractTextFromHTML(html string) (string, error) {
  175. doc, err := goquery.NewDocumentFromReader(strings.NewReader(html))
  176. if err != nil {
  177. return "", err
  178. }
  179. text := doc.Text()
  180. text = strings.Join(strings.Fields(text), " ")
  181. return text, nil
  182. }
  183. func convertHTMLToMarkdown(html string) (string, error) {
  184. converter := md.NewConverter("", true, nil)
  185. markdown, err := converter.ConvertString(html)
  186. if err != nil {
  187. return "", err
  188. }
  189. return markdown, nil
  190. }