opencode/internal/llm/tools/glob.go
2025-05-13 10:02:39 -05:00

298 lines
7.1 KiB
Go

package tools
import (
"bytes"
"context"
"encoding/json"
"fmt"
"io/fs"
"os"
"os/exec"
"path/filepath"
"sort"
"strings"
"time"
"github.com/bmatcuk/doublestar/v4"
"github.com/sst/opencode/internal/config"
)
const (
GlobToolName = "glob"
globDescription = `Fast file pattern matching tool that finds files by name and pattern, returning matching paths sorted by modification time (newest first).
WHEN TO USE THIS TOOL:
- Use when you need to find files by name patterns or extensions
- Great for finding specific file types across a directory structure
- Useful for discovering files that match certain naming conventions
HOW TO USE:
- Provide a glob pattern to match against file paths
- Optionally specify a starting directory (defaults to current working directory)
- Results are sorted with most recently modified files first
GLOB PATTERN SYNTAX:
- '*' matches any sequence of non-separator characters
- '**' matches any sequence of characters, including separators
- '?' matches any single non-separator character
- '[...]' matches any character in the brackets
- '[!...]' matches any character not in the brackets
COMMON PATTERN EXAMPLES:
- '*.js' - Find all JavaScript files in the current directory
- '**/*.js' - Find all JavaScript files in any subdirectory
- 'src/**/*.{ts,tsx}' - Find all TypeScript files in the src directory
- '*.{html,css,js}' - Find all HTML, CSS, and JS files
LIMITATIONS:
- Results are limited to 100 files (newest first)
- Does not search file contents (use Grep tool for that)
- Hidden files (starting with '.') are skipped
TIPS:
- For the most useful results, combine with the Grep tool: first find files with Glob, then search their contents with Grep
- When doing iterative exploration that may require multiple rounds of searching, consider using the Agent tool instead
- Always check if results are truncated and refine your search pattern if needed`
)
type fileInfo struct {
path string
modTime time.Time
}
type GlobParams struct {
Pattern string `json:"pattern"`
Path string `json:"path"`
}
type GlobResponseMetadata struct {
NumberOfFiles int `json:"number_of_files"`
Truncated bool `json:"truncated"`
}
type globTool struct{}
func NewGlobTool() BaseTool {
return &globTool{}
}
func (g *globTool) Info() ToolInfo {
return ToolInfo{
Name: GlobToolName,
Description: globDescription,
Parameters: map[string]any{
"pattern": map[string]any{
"type": "string",
"description": "The glob pattern to match files against",
},
"path": map[string]any{
"type": "string",
"description": "The directory to search in. Defaults to the current working directory.",
},
},
Required: []string{"pattern"},
}
}
func (g *globTool) Run(ctx context.Context, call ToolCall) (ToolResponse, error) {
var params GlobParams
if err := json.Unmarshal([]byte(call.Input), &params); err != nil {
return NewTextErrorResponse(fmt.Sprintf("error parsing parameters: %s", err)), nil
}
if params.Pattern == "" {
return NewTextErrorResponse("pattern is required"), nil
}
searchPath := params.Path
if searchPath == "" {
searchPath = config.WorkingDirectory()
}
files, truncated, err := globFiles(params.Pattern, searchPath, 100)
if err != nil {
return ToolResponse{}, fmt.Errorf("error finding files: %w", err)
}
var output string
if len(files) == 0 {
output = "No files found"
} else {
output = strings.Join(files, "\n")
if truncated {
output += "\n\n(Results are truncated. Consider using a more specific path or pattern.)"
}
}
return WithResponseMetadata(
NewTextResponse(output),
GlobResponseMetadata{
NumberOfFiles: len(files),
Truncated: truncated,
},
), nil
}
func globFiles(pattern, searchPath string, limit int) ([]string, bool, error) {
matches, err := globWithRipgrep(pattern, searchPath, limit)
if err == nil {
return matches, len(matches) >= limit, nil
}
return globWithDoublestar(pattern, searchPath, limit)
}
func globWithRipgrep(
pattern, searchRoot string,
limit int,
) ([]string, error) {
if searchRoot == "" {
searchRoot = "."
}
rgBin, err := exec.LookPath("rg")
if err != nil {
return nil, fmt.Errorf("ripgrep not found in $PATH: %w", err)
}
if !filepath.IsAbs(pattern) && !strings.HasPrefix(pattern, "/") {
pattern = "/" + pattern
}
args := []string{
"--files",
"--null",
"--glob", pattern,
"-L",
}
cmd := exec.Command(rgBin, args...)
cmd.Dir = searchRoot
out, err := cmd.CombinedOutput()
if err != nil {
if ee, ok := err.(*exec.ExitError); ok && ee.ExitCode() == 1 {
return nil, nil
}
return nil, fmt.Errorf("ripgrep: %w\n%s", err, out)
}
var matches []string
for _, p := range bytes.Split(out, []byte{0}) {
if len(p) == 0 {
continue
}
abs := filepath.Join(searchRoot, string(p))
if skipHidden(abs) {
continue
}
matches = append(matches, abs)
}
sort.SliceStable(matches, func(i, j int) bool {
return len(matches[i]) < len(matches[j])
})
if len(matches) > limit {
matches = matches[:limit]
}
return matches, nil
}
func globWithDoublestar(pattern, searchPath string, limit int) ([]string, bool, error) {
fsys := os.DirFS(searchPath)
relPattern := strings.TrimPrefix(pattern, "/")
var matches []fileInfo
err := doublestar.GlobWalk(fsys, relPattern, func(path string, d fs.DirEntry) error {
if d.IsDir() {
return nil
}
if skipHidden(path) {
return nil
}
info, err := d.Info()
if err != nil {
return nil // Skip files we can't access
}
absPath := path // Restore absolute path
if !strings.HasPrefix(absPath, searchPath) {
absPath = filepath.Join(searchPath, absPath)
}
matches = append(matches, fileInfo{
path: absPath,
modTime: info.ModTime(),
})
if len(matches) >= limit*2 { // Collect more than needed for sorting
return fs.SkipAll
}
return nil
})
if err != nil {
return nil, false, fmt.Errorf("glob walk error: %w", err)
}
sort.Slice(matches, func(i, j int) bool {
return matches[i].modTime.After(matches[j].modTime)
})
truncated := len(matches) > limit
if truncated {
matches = matches[:limit]
}
results := make([]string, len(matches))
for i, m := range matches {
results[i] = m.path
}
return results, truncated, nil
}
func skipHidden(path string) bool {
// Check for hidden files (starting with a dot)
base := filepath.Base(path)
if base != "." && strings.HasPrefix(base, ".") {
return true
}
// List of commonly ignored directories in development projects
commonIgnoredDirs := map[string]bool{
"node_modules": true,
"vendor": true,
"dist": true,
"build": true,
"target": true,
".git": true,
".idea": true,
".vscode": true,
"__pycache__": true,
"bin": true,
"obj": true,
"out": true,
"coverage": true,
"tmp": true,
"temp": true,
"logs": true,
"generated": true,
"bower_components": true,
"jspm_packages": true,
}
// Check if any path component is in our ignore list
parts := strings.SplitSeq(path, string(os.PathSeparator))
for part := range parts {
if commonIgnoredDirs[part] {
return true
}
}
return false
}