fetch.go 5.8 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223
  1. package tools
  2. import (
  3. "context"
  4. "encoding/json"
  5. "fmt"
  6. "io"
  7. "net/http"
  8. "strings"
  9. "time"
  10. md "github.com/JohannesKaufmann/html-to-markdown"
  11. "github.com/PuerkitoBio/goquery"
  12. "github.com/kujtimiihoxha/termai/internal/config"
  13. "github.com/kujtimiihoxha/termai/internal/permission"
  14. )
  15. const (
  16. FetchToolName = "fetch"
  17. fetchToolDescription = `Fetches content from a URL and returns it in the specified format.
  18. WHEN TO USE THIS TOOL:
  19. - Use when you need to download content from a URL
  20. - Helpful for retrieving documentation, API responses, or web content
  21. - Useful for getting external information to assist with tasks
  22. HOW TO USE:
  23. - Provide the URL to fetch content from
  24. - Specify the desired output format (text, markdown, or html)
  25. - Optionally set a timeout for the request
  26. FEATURES:
  27. - Supports three output formats: text, markdown, and html
  28. - Automatically handles HTTP redirects
  29. - Sets reasonable timeouts to prevent hanging
  30. - Validates input parameters before making requests
  31. LIMITATIONS:
  32. - Maximum response size is 5MB
  33. - Only supports HTTP and HTTPS protocols
  34. - Cannot handle authentication or cookies
  35. - Some websites may block automated requests
  36. TIPS:
  37. - Use text format for plain text content or simple API responses
  38. - Use markdown format for content that should be rendered with formatting
  39. - Use html format when you need the raw HTML structure
  40. - Set appropriate timeouts for potentially slow websites`
  41. )
  42. type FetchParams struct {
  43. URL string `json:"url"`
  44. Format string `json:"format"`
  45. Timeout int `json:"timeout,omitempty"`
  46. }
  47. type FetchPermissionsParams struct {
  48. URL string `json:"url"`
  49. Format string `json:"format"`
  50. Timeout int `json:"timeout,omitempty"`
  51. }
  52. type fetchTool struct {
  53. client *http.Client
  54. }
  55. func NewFetchTool() BaseTool {
  56. return &fetchTool{
  57. client: &http.Client{
  58. Timeout: 30 * time.Second,
  59. },
  60. }
  61. }
  62. func (t *fetchTool) Info() ToolInfo {
  63. return ToolInfo{
  64. Name: FetchToolName,
  65. Description: fetchToolDescription,
  66. Parameters: map[string]any{
  67. "url": map[string]any{
  68. "type": "string",
  69. "description": "The URL to fetch content from",
  70. },
  71. "format": map[string]any{
  72. "type": "string",
  73. "description": "The format to return the content in (text, markdown, or html)",
  74. },
  75. "timeout": map[string]any{
  76. "type": "number",
  77. "description": "Optional timeout in seconds (max 120)",
  78. },
  79. },
  80. Required: []string{"url", "format"},
  81. }
  82. }
  83. func (t *fetchTool) Run(ctx context.Context, call ToolCall) (ToolResponse, error) {
  84. var params FetchParams
  85. if err := json.Unmarshal([]byte(call.Input), &params); err != nil {
  86. return NewTextErrorResponse("Failed to parse fetch parameters: " + err.Error()), nil
  87. }
  88. if params.URL == "" {
  89. return NewTextErrorResponse("URL parameter is required"), nil
  90. }
  91. format := strings.ToLower(params.Format)
  92. if format != "text" && format != "markdown" && format != "html" {
  93. return NewTextErrorResponse("Format must be one of: text, markdown, html"), nil
  94. }
  95. if !strings.HasPrefix(params.URL, "http://") && !strings.HasPrefix(params.URL, "https://") {
  96. return NewTextErrorResponse("URL must start with http:// or https://"), nil
  97. }
  98. p := permission.Default.Request(
  99. permission.CreatePermissionRequest{
  100. Path: config.WorkingDirectory(),
  101. ToolName: FetchToolName,
  102. Action: "fetch",
  103. Description: fmt.Sprintf("Fetch content from URL: %s", params.URL),
  104. Params: FetchPermissionsParams{
  105. URL: params.URL,
  106. Format: params.Format,
  107. Timeout: params.Timeout,
  108. },
  109. },
  110. )
  111. if !p {
  112. return NewTextErrorResponse("Permission denied to fetch from URL: " + params.URL), nil
  113. }
  114. client := t.client
  115. if params.Timeout > 0 {
  116. maxTimeout := 120 // 2 minutes
  117. if params.Timeout > maxTimeout {
  118. params.Timeout = maxTimeout
  119. }
  120. client = &http.Client{
  121. Timeout: time.Duration(params.Timeout) * time.Second,
  122. }
  123. }
  124. req, err := http.NewRequestWithContext(ctx, "GET", params.URL, nil)
  125. if err != nil {
  126. return NewTextErrorResponse("Failed to create request: " + err.Error()), nil
  127. }
  128. req.Header.Set("User-Agent", "termai/1.0")
  129. resp, err := client.Do(req)
  130. if err != nil {
  131. return NewTextErrorResponse("Failed to execute request: " + err.Error()), nil
  132. }
  133. defer resp.Body.Close()
  134. if resp.StatusCode != http.StatusOK {
  135. return NewTextErrorResponse(fmt.Sprintf("Request failed with status code: %d", resp.StatusCode)), nil
  136. }
  137. maxSize := int64(5 * 1024 * 1024) // 5MB
  138. body, err := io.ReadAll(io.LimitReader(resp.Body, maxSize))
  139. if err != nil {
  140. return NewTextErrorResponse("Failed to read response body: " + err.Error()), nil
  141. }
  142. content := string(body)
  143. contentType := resp.Header.Get("Content-Type")
  144. switch format {
  145. case "text":
  146. if strings.Contains(contentType, "text/html") {
  147. text, err := extractTextFromHTML(content)
  148. if err != nil {
  149. return NewTextErrorResponse("Failed to extract text from HTML: " + err.Error()), nil
  150. }
  151. return NewTextResponse(text), nil
  152. }
  153. return NewTextResponse(content), nil
  154. case "markdown":
  155. if strings.Contains(contentType, "text/html") {
  156. markdown, err := convertHTMLToMarkdown(content)
  157. if err != nil {
  158. return NewTextErrorResponse("Failed to convert HTML to Markdown: " + err.Error()), nil
  159. }
  160. return NewTextResponse(markdown), nil
  161. }
  162. return NewTextResponse("```\n" + content + "\n```"), nil
  163. case "html":
  164. return NewTextResponse(content), nil
  165. default:
  166. return NewTextResponse(content), nil
  167. }
  168. }
  169. func extractTextFromHTML(html string) (string, error) {
  170. doc, err := goquery.NewDocumentFromReader(strings.NewReader(html))
  171. if err != nil {
  172. return "", err
  173. }
  174. text := doc.Text()
  175. text = strings.Join(strings.Fields(text), " ")
  176. return text, nil
  177. }
  178. func convertHTMLToMarkdown(html string) (string, error) {
  179. converter := md.NewConverter("", true, nil)
  180. markdown, err := converter.ConvertString(html)
  181. if err != nil {
  182. return "", err
  183. }
  184. return markdown, nil
  185. }