opencode/internal/llm/tools/fetch.go
2025-05-13 10:02:39 -05:00

228 lines
6 KiB
Go

package tools
import (
"context"
"encoding/json"
"fmt"
"io"
"net/http"
"strings"
"time"
md "github.com/JohannesKaufmann/html-to-markdown"
"github.com/PuerkitoBio/goquery"
"github.com/sst/opencode/internal/config"
"github.com/sst/opencode/internal/permission"
)
type FetchParams struct {
URL string `json:"url"`
Format string `json:"format"`
Timeout int `json:"timeout,omitempty"`
}
type FetchPermissionsParams struct {
URL string `json:"url"`
Format string `json:"format"`
Timeout int `json:"timeout,omitempty"`
}
type fetchTool struct {
client *http.Client
permissions permission.Service
}
const (
FetchToolName = "fetch"
fetchToolDescription = `Fetches content from a URL and returns it in the specified format.
WHEN TO USE THIS TOOL:
- Use when you need to download content from a URL
- Helpful for retrieving documentation, API responses, or web content
- Useful for getting external information to assist with tasks
HOW TO USE:
- Provide the URL to fetch content from
- Specify the desired output format (text, markdown, or html)
- Optionally set a timeout for the request
FEATURES:
- Supports three output formats: text, markdown, and html
- Automatically handles HTTP redirects
- Sets reasonable timeouts to prevent hanging
- Validates input parameters before making requests
LIMITATIONS:
- Maximum response size is 5MB
- Only supports HTTP and HTTPS protocols
- Cannot handle authentication or cookies
- Some websites may block automated requests
TIPS:
- Use text format for plain text content or simple API responses
- Use markdown format for content that should be rendered with formatting
- Use html format when you need the raw HTML structure
- Set appropriate timeouts for potentially slow websites`
)
func NewFetchTool(permissions permission.Service) BaseTool {
return &fetchTool{
client: &http.Client{
Timeout: 30 * time.Second,
},
permissions: permissions,
}
}
func (t *fetchTool) Info() ToolInfo {
return ToolInfo{
Name: FetchToolName,
Description: fetchToolDescription,
Parameters: map[string]any{
"url": map[string]any{
"type": "string",
"description": "The URL to fetch content from",
},
"format": map[string]any{
"type": "string",
"description": "The format to return the content in (text, markdown, or html)",
"enum": []string{"text", "markdown", "html"},
},
"timeout": map[string]any{
"type": "number",
"description": "Optional timeout in seconds (max 120)",
},
},
Required: []string{"url", "format"},
}
}
func (t *fetchTool) Run(ctx context.Context, call ToolCall) (ToolResponse, error) {
var params FetchParams
if err := json.Unmarshal([]byte(call.Input), &params); err != nil {
return NewTextErrorResponse("Failed to parse fetch parameters: " + err.Error()), nil
}
if params.URL == "" {
return NewTextErrorResponse("URL parameter is required"), nil
}
format := strings.ToLower(params.Format)
if format != "text" && format != "markdown" && format != "html" {
return NewTextErrorResponse("Format must be one of: text, markdown, html"), nil
}
if !strings.HasPrefix(params.URL, "http://") && !strings.HasPrefix(params.URL, "https://") {
return NewTextErrorResponse("URL must start with http:// or https://"), nil
}
sessionID, messageID := GetContextValues(ctx)
if sessionID == "" || messageID == "" {
return ToolResponse{}, fmt.Errorf("session ID and message ID are required for creating a new file")
}
p := t.permissions.Request(
ctx,
permission.CreatePermissionRequest{
SessionID: sessionID,
Path: config.WorkingDirectory(),
ToolName: FetchToolName,
Action: "fetch",
Description: fmt.Sprintf("Fetch content from URL: %s", params.URL),
Params: FetchPermissionsParams(params),
},
)
if !p {
return ToolResponse{}, permission.ErrorPermissionDenied
}
client := t.client
if params.Timeout > 0 {
maxTimeout := 120 // 2 minutes
if params.Timeout > maxTimeout {
params.Timeout = maxTimeout
}
client = &http.Client{
Timeout: time.Duration(params.Timeout) * time.Second,
}
}
req, err := http.NewRequestWithContext(ctx, "GET", params.URL, nil)
if err != nil {
return ToolResponse{}, fmt.Errorf("failed to create request: %w", err)
}
req.Header.Set("User-Agent", "opencode/1.0")
resp, err := client.Do(req)
if err != nil {
return ToolResponse{}, fmt.Errorf("failed to fetch URL: %w", err)
}
defer resp.Body.Close()
if resp.StatusCode != http.StatusOK {
return NewTextErrorResponse(fmt.Sprintf("Request failed with status code: %d", resp.StatusCode)), nil
}
maxSize := int64(5 * 1024 * 1024) // 5MB
body, err := io.ReadAll(io.LimitReader(resp.Body, maxSize))
if err != nil {
return NewTextErrorResponse("Failed to read response body: " + err.Error()), nil
}
content := string(body)
contentType := resp.Header.Get("Content-Type")
switch format {
case "text":
if strings.Contains(contentType, "text/html") {
text, err := extractTextFromHTML(content)
if err != nil {
return NewTextErrorResponse("Failed to extract text from HTML: " + err.Error()), nil
}
return NewTextResponse(text), nil
}
return NewTextResponse(content), nil
case "markdown":
if strings.Contains(contentType, "text/html") {
markdown, err := convertHTMLToMarkdown(content)
if err != nil {
return NewTextErrorResponse("Failed to convert HTML to Markdown: " + err.Error()), nil
}
return NewTextResponse(markdown), nil
}
return NewTextResponse("```\n" + content + "\n```"), nil
case "html":
return NewTextResponse(content), nil
default:
return NewTextResponse(content), nil
}
}
func extractTextFromHTML(html string) (string, error) {
doc, err := goquery.NewDocumentFromReader(strings.NewReader(html))
if err != nil {
return "", err
}
text := doc.Text()
text = strings.Join(strings.Fields(text), " ")
return text, nil
}
func convertHTMLToMarkdown(html string) (string, error) {
converter := md.NewConverter("", true, nil)
markdown, err := converter.ConvertString(html)
if err != nil {
return "", err
}
return markdown, nil
}