package tools import ( "context" "encoding/json" "fmt" "io" "net/http" "strings" "time" md "github.com/JohannesKaufmann/html-to-markdown" "github.com/PuerkitoBio/goquery" "github.com/sst/opencode/internal/config" "github.com/sst/opencode/internal/permission" ) type FetchParams struct { URL string `json:"url"` Format string `json:"format"` Timeout int `json:"timeout,omitempty"` } type FetchPermissionsParams struct { URL string `json:"url"` Format string `json:"format"` Timeout int `json:"timeout,omitempty"` } type fetchTool struct { client *http.Client permissions permission.Service } const ( FetchToolName = "fetch" fetchToolDescription = `Fetches content from a URL and returns it in the specified format. WHEN TO USE THIS TOOL: - Use when you need to download content from a URL - Helpful for retrieving documentation, API responses, or web content - Useful for getting external information to assist with tasks HOW TO USE: - Provide the URL to fetch content from - Specify the desired output format (text, markdown, or html) - Optionally set a timeout for the request FEATURES: - Supports three output formats: text, markdown, and html - Automatically handles HTTP redirects - Sets reasonable timeouts to prevent hanging - Validates input parameters before making requests LIMITATIONS: - Maximum response size is 5MB - Only supports HTTP and HTTPS protocols - Cannot handle authentication or cookies - Some websites may block automated requests TIPS: - Use text format for plain text content or simple API responses - Use markdown format for content that should be rendered with formatting - Use html format when you need the raw HTML structure - Set appropriate timeouts for potentially slow websites` ) func NewFetchTool(permissions permission.Service) BaseTool { return &fetchTool{ client: &http.Client{ Timeout: 30 * time.Second, }, permissions: permissions, } } func (t *fetchTool) Info() ToolInfo { return ToolInfo{ Name: FetchToolName, Description: fetchToolDescription, Parameters: map[string]any{ "url": map[string]any{ "type": "string", "description": "The URL to fetch content from", }, "format": map[string]any{ "type": "string", "description": "The format to return the content in (text, markdown, or html)", "enum": []string{"text", "markdown", "html"}, }, "timeout": map[string]any{ "type": "number", "description": "Optional timeout in seconds (max 120)", }, }, Required: []string{"url", "format"}, } } func (t *fetchTool) Run(ctx context.Context, call ToolCall) (ToolResponse, error) { var params FetchParams if err := json.Unmarshal([]byte(call.Input), ¶ms); err != nil { return NewTextErrorResponse("Failed to parse fetch parameters: " + err.Error()), nil } if params.URL == "" { return NewTextErrorResponse("URL parameter is required"), nil } format := strings.ToLower(params.Format) if format != "text" && format != "markdown" && format != "html" { return NewTextErrorResponse("Format must be one of: text, markdown, html"), nil } if !strings.HasPrefix(params.URL, "http://") && !strings.HasPrefix(params.URL, "https://") { return NewTextErrorResponse("URL must start with http:// or https://"), nil } sessionID, messageID := GetContextValues(ctx) if sessionID == "" || messageID == "" { return ToolResponse{}, fmt.Errorf("session ID and message ID are required for creating a new file") } p := t.permissions.Request( ctx, permission.CreatePermissionRequest{ SessionID: sessionID, Path: config.WorkingDirectory(), ToolName: FetchToolName, Action: "fetch", Description: fmt.Sprintf("Fetch content from URL: %s", params.URL), Params: FetchPermissionsParams(params), }, ) if !p { return ToolResponse{}, permission.ErrorPermissionDenied } client := t.client if params.Timeout > 0 { maxTimeout := 120 // 2 minutes if params.Timeout > maxTimeout { params.Timeout = maxTimeout } client = &http.Client{ Timeout: time.Duration(params.Timeout) * time.Second, } } req, err := http.NewRequestWithContext(ctx, "GET", params.URL, nil) if err != nil { return ToolResponse{}, fmt.Errorf("failed to create request: %w", err) } req.Header.Set("User-Agent", "opencode/1.0") resp, err := client.Do(req) if err != nil { return ToolResponse{}, fmt.Errorf("failed to fetch URL: %w", err) } defer resp.Body.Close() if resp.StatusCode != http.StatusOK { return NewTextErrorResponse(fmt.Sprintf("Request failed with status code: %d", resp.StatusCode)), nil } maxSize := int64(5 * 1024 * 1024) // 5MB body, err := io.ReadAll(io.LimitReader(resp.Body, maxSize)) if err != nil { return NewTextErrorResponse("Failed to read response body: " + err.Error()), nil } content := string(body) contentType := resp.Header.Get("Content-Type") switch format { case "text": if strings.Contains(contentType, "text/html") { text, err := extractTextFromHTML(content) if err != nil { return NewTextErrorResponse("Failed to extract text from HTML: " + err.Error()), nil } return NewTextResponse(text), nil } return NewTextResponse(content), nil case "markdown": if strings.Contains(contentType, "text/html") { markdown, err := convertHTMLToMarkdown(content) if err != nil { return NewTextErrorResponse("Failed to convert HTML to Markdown: " + err.Error()), nil } return NewTextResponse(markdown), nil } return NewTextResponse("```\n" + content + "\n```"), nil case "html": return NewTextResponse(content), nil default: return NewTextResponse(content), nil } } func extractTextFromHTML(html string) (string, error) { doc, err := goquery.NewDocumentFromReader(strings.NewReader(html)) if err != nil { return "", err } text := doc.Text() text = strings.Join(strings.Fields(text), " ") return text, nil } func convertHTMLToMarkdown(html string) (string, error) { converter := md.NewConverter("", true, nil) markdown, err := converter.ConvertString(html) if err != nil { return "", err } return markdown, nil }