| 123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227 |
- package tools
- import (
- "context"
- "encoding/json"
- "fmt"
- "io"
- "net/http"
- "strings"
- "time"
- md "github.com/JohannesKaufmann/html-to-markdown"
- "github.com/PuerkitoBio/goquery"
- "github.com/opencode-ai/opencode/internal/config"
- "github.com/opencode-ai/opencode/internal/permission"
- )
- type FetchParams struct {
- URL string `json:"url"`
- Format string `json:"format"`
- Timeout int `json:"timeout,omitempty"`
- }
- type FetchPermissionsParams struct {
- URL string `json:"url"`
- Format string `json:"format"`
- Timeout int `json:"timeout,omitempty"`
- }
- type fetchTool struct {
- client *http.Client
- permissions permission.Service
- }
- const (
- FetchToolName = "fetch"
- fetchToolDescription = `Fetches content from a URL and returns it in the specified format.
- WHEN TO USE THIS TOOL:
- - Use when you need to download content from a URL
- - Helpful for retrieving documentation, API responses, or web content
- - Useful for getting external information to assist with tasks
- HOW TO USE:
- - Provide the URL to fetch content from
- - Specify the desired output format (text, markdown, or html)
- - Optionally set a timeout for the request
- FEATURES:
- - Supports three output formats: text, markdown, and html
- - Automatically handles HTTP redirects
- - Sets reasonable timeouts to prevent hanging
- - Validates input parameters before making requests
- LIMITATIONS:
- - Maximum response size is 5MB
- - Only supports HTTP and HTTPS protocols
- - Cannot handle authentication or cookies
- - Some websites may block automated requests
- TIPS:
- - Use text format for plain text content or simple API responses
- - Use markdown format for content that should be rendered with formatting
- - Use html format when you need the raw HTML structure
- - Set appropriate timeouts for potentially slow websites`
- )
- func NewFetchTool(permissions permission.Service) BaseTool {
- return &fetchTool{
- client: &http.Client{
- Timeout: 30 * time.Second,
- },
- permissions: permissions,
- }
- }
- func (t *fetchTool) Info() ToolInfo {
- return ToolInfo{
- Name: FetchToolName,
- Description: fetchToolDescription,
- Parameters: map[string]any{
- "url": map[string]any{
- "type": "string",
- "description": "The URL to fetch content from",
- },
- "format": map[string]any{
- "type": "string",
- "description": "The format to return the content in (text, markdown, or html)",
- "enum": []string{"text", "markdown", "html"},
- },
- "timeout": map[string]any{
- "type": "number",
- "description": "Optional timeout in seconds (max 120)",
- },
- },
- Required: []string{"url", "format"},
- }
- }
- func (t *fetchTool) Run(ctx context.Context, call ToolCall) (ToolResponse, error) {
- var params FetchParams
- if err := json.Unmarshal([]byte(call.Input), ¶ms); err != nil {
- return NewTextErrorResponse("Failed to parse fetch parameters: " + err.Error()), nil
- }
- if params.URL == "" {
- return NewTextErrorResponse("URL parameter is required"), nil
- }
- format := strings.ToLower(params.Format)
- if format != "text" && format != "markdown" && format != "html" {
- return NewTextErrorResponse("Format must be one of: text, markdown, html"), nil
- }
- if !strings.HasPrefix(params.URL, "http://") && !strings.HasPrefix(params.URL, "https://") {
- return NewTextErrorResponse("URL must start with http:// or https://"), nil
- }
- sessionID, messageID := GetContextValues(ctx)
- if sessionID == "" || messageID == "" {
- return ToolResponse{}, fmt.Errorf("session ID and message ID are required for creating a new file")
- }
- p := t.permissions.Request(
- permission.CreatePermissionRequest{
- SessionID: sessionID,
- Path: config.WorkingDirectory(),
- ToolName: FetchToolName,
- Action: "fetch",
- Description: fmt.Sprintf("Fetch content from URL: %s", params.URL),
- Params: FetchPermissionsParams(params),
- },
- )
- if !p {
- return ToolResponse{}, permission.ErrorPermissionDenied
- }
- client := t.client
- if params.Timeout > 0 {
- maxTimeout := 120 // 2 minutes
- if params.Timeout > maxTimeout {
- params.Timeout = maxTimeout
- }
- client = &http.Client{
- Timeout: time.Duration(params.Timeout) * time.Second,
- }
- }
- req, err := http.NewRequestWithContext(ctx, "GET", params.URL, nil)
- if err != nil {
- return ToolResponse{}, fmt.Errorf("failed to create request: %w", err)
- }
- req.Header.Set("User-Agent", "opencode/1.0")
- resp, err := client.Do(req)
- if err != nil {
- return ToolResponse{}, fmt.Errorf("failed to fetch URL: %w", err)
- }
- defer resp.Body.Close()
- if resp.StatusCode != http.StatusOK {
- return NewTextErrorResponse(fmt.Sprintf("Request failed with status code: %d", resp.StatusCode)), nil
- }
- maxSize := int64(5 * 1024 * 1024) // 5MB
- body, err := io.ReadAll(io.LimitReader(resp.Body, maxSize))
- if err != nil {
- return NewTextErrorResponse("Failed to read response body: " + err.Error()), nil
- }
- content := string(body)
- contentType := resp.Header.Get("Content-Type")
- switch format {
- case "text":
- if strings.Contains(contentType, "text/html") {
- text, err := extractTextFromHTML(content)
- if err != nil {
- return NewTextErrorResponse("Failed to extract text from HTML: " + err.Error()), nil
- }
- return NewTextResponse(text), nil
- }
- return NewTextResponse(content), nil
- case "markdown":
- if strings.Contains(contentType, "text/html") {
- markdown, err := convertHTMLToMarkdown(content)
- if err != nil {
- return NewTextErrorResponse("Failed to convert HTML to Markdown: " + err.Error()), nil
- }
- return NewTextResponse(markdown), nil
- }
- return NewTextResponse("```\n" + content + "\n```"), nil
- case "html":
- return NewTextResponse(content), nil
- default:
- return NewTextResponse(content), nil
- }
- }
- func extractTextFromHTML(html string) (string, error) {
- doc, err := goquery.NewDocumentFromReader(strings.NewReader(html))
- if err != nil {
- return "", err
- }
- text := doc.Text()
- text = strings.Join(strings.Fields(text), " ")
- return text, nil
- }
- func convertHTMLToMarkdown(html string) (string, error) {
- converter := md.NewConverter("", true, nil)
- markdown, err := converter.ConvertString(html)
- if err != nil {
- return "", err
- }
- return markdown, nil
- }
|