mirror of
https://github.com/sst/opencode.git
synced 2025-12-23 10:11:41 +00:00
pdf support in read tool (#5222)
Co-authored-by: ammi1378 <ammi1378@users.noreply.github.com>
This commit is contained in:
parent
06ba1f76dc
commit
a3bb4a3c85
3 changed files with 40 additions and 169 deletions
|
|
@ -2,6 +2,17 @@ import type { APICallError, ModelMessage } from "ai"
|
|||
import { unique } from "remeda"
|
||||
import type { JSONSchema } from "zod/v4/core"
|
||||
import type { Provider } from "./provider"
|
||||
import type { ModelsDev } from "./models"
|
||||
|
||||
type Modality = NonNullable<ModelsDev.Model["modalities"]>["input"][number]
|
||||
|
||||
function mimeToModality(mime: string): Modality | undefined {
|
||||
if (mime.startsWith("image/")) return "image"
|
||||
if (mime.startsWith("audio/")) return "audio"
|
||||
if (mime.startsWith("video/")) return "video"
|
||||
if (mime === "application/pdf") return "pdf"
|
||||
return undefined
|
||||
}
|
||||
|
||||
export namespace ProviderTransform {
|
||||
function normalizeMessages(msgs: ModelMessage[], model: Provider.Model): ModelMessage[] {
|
||||
|
|
@ -148,7 +159,32 @@ export namespace ProviderTransform {
|
|||
return msgs
|
||||
}
|
||||
|
||||
function unsupportedParts(msgs: ModelMessage[], model: Provider.Model): ModelMessage[] {
|
||||
return msgs.map((msg) => {
|
||||
if (msg.role !== "user" || !Array.isArray(msg.content)) return msg
|
||||
|
||||
const filtered = msg.content.map((part) => {
|
||||
if (part.type !== "file" && part.type !== "image") return part
|
||||
|
||||
const mime = part.type === "image" ? part.image.toString().split(";")[0].replace("data:", "") : part.mediaType
|
||||
const filename = part.type === "file" ? part.filename : undefined
|
||||
const modality = mimeToModality(mime)
|
||||
if (!modality) return part
|
||||
if (model.capabilities.input[modality]) return part
|
||||
|
||||
const name = filename ? `"${filename}"` : modality
|
||||
return {
|
||||
type: "text" as const,
|
||||
text: `ERROR: Cannot read ${name} (this model does not support ${modality} input). Inform the user.`,
|
||||
}
|
||||
})
|
||||
|
||||
return { ...msg, content: filtered }
|
||||
})
|
||||
}
|
||||
|
||||
export function message(msgs: ModelMessage[], model: Provider.Model) {
|
||||
msgs = unsupportedParts(msgs, model)
|
||||
msgs = normalizeMessages(msgs, model)
|
||||
if (model.providerID === "anthropic" || model.api.id.includes("anthropic") || model.api.id.includes("claude")) {
|
||||
msgs = applyCaching(msgs, model.providerID)
|
||||
|
|
|
|||
|
|
@ -411,147 +411,6 @@ export namespace MessageV2 {
|
|||
})
|
||||
export type WithParts = z.infer<typeof WithParts>
|
||||
|
||||
export function fromV1(v1: Message.Info) {
|
||||
if (v1.role === "assistant") {
|
||||
const info: Assistant = {
|
||||
id: v1.id,
|
||||
parentID: "",
|
||||
sessionID: v1.metadata.sessionID,
|
||||
role: "assistant",
|
||||
time: {
|
||||
created: v1.metadata.time.created,
|
||||
completed: v1.metadata.time.completed,
|
||||
},
|
||||
cost: v1.metadata.assistant!.cost,
|
||||
path: v1.metadata.assistant!.path,
|
||||
summary: v1.metadata.assistant!.summary,
|
||||
tokens: v1.metadata.assistant!.tokens,
|
||||
modelID: v1.metadata.assistant!.modelID,
|
||||
providerID: v1.metadata.assistant!.providerID,
|
||||
mode: "build",
|
||||
error: v1.metadata.error,
|
||||
}
|
||||
const parts = v1.parts.flatMap((part): Part[] => {
|
||||
const base = {
|
||||
id: Identifier.ascending("part"),
|
||||
messageID: v1.id,
|
||||
sessionID: v1.metadata.sessionID,
|
||||
}
|
||||
if (part.type === "text") {
|
||||
return [
|
||||
{
|
||||
...base,
|
||||
type: "text",
|
||||
text: part.text,
|
||||
},
|
||||
]
|
||||
}
|
||||
if (part.type === "step-start") {
|
||||
return [
|
||||
{
|
||||
...base,
|
||||
type: "step-start",
|
||||
},
|
||||
]
|
||||
}
|
||||
if (part.type === "tool-invocation") {
|
||||
return [
|
||||
{
|
||||
...base,
|
||||
type: "tool",
|
||||
callID: part.toolInvocation.toolCallId,
|
||||
tool: part.toolInvocation.toolName,
|
||||
state: (() => {
|
||||
if (part.toolInvocation.state === "partial-call") {
|
||||
return {
|
||||
status: "pending",
|
||||
input: {},
|
||||
raw: "",
|
||||
}
|
||||
}
|
||||
|
||||
const { title, time, ...metadata } = v1.metadata.tool[part.toolInvocation.toolCallId] ?? {}
|
||||
if (part.toolInvocation.state === "call") {
|
||||
return {
|
||||
status: "running",
|
||||
input: part.toolInvocation.args,
|
||||
time: {
|
||||
start: time?.start,
|
||||
},
|
||||
}
|
||||
}
|
||||
|
||||
if (part.toolInvocation.state === "result") {
|
||||
return {
|
||||
status: "completed",
|
||||
input: part.toolInvocation.args,
|
||||
output: part.toolInvocation.result,
|
||||
title,
|
||||
time,
|
||||
metadata,
|
||||
}
|
||||
}
|
||||
throw new Error("unknown tool invocation state")
|
||||
})(),
|
||||
},
|
||||
]
|
||||
}
|
||||
return []
|
||||
})
|
||||
return {
|
||||
info,
|
||||
parts,
|
||||
}
|
||||
}
|
||||
|
||||
if (v1.role === "user") {
|
||||
const info: User = {
|
||||
id: v1.id,
|
||||
sessionID: v1.metadata.sessionID,
|
||||
role: "user",
|
||||
time: {
|
||||
created: v1.metadata.time.created,
|
||||
},
|
||||
agent: "build",
|
||||
model: {
|
||||
providerID: "opencode",
|
||||
modelID: "opencode",
|
||||
},
|
||||
}
|
||||
const parts = v1.parts.flatMap((part): Part[] => {
|
||||
const base = {
|
||||
id: Identifier.ascending("part"),
|
||||
messageID: v1.id,
|
||||
sessionID: v1.metadata.sessionID,
|
||||
}
|
||||
if (part.type === "text") {
|
||||
return [
|
||||
{
|
||||
...base,
|
||||
type: "text",
|
||||
text: part.text,
|
||||
},
|
||||
]
|
||||
}
|
||||
if (part.type === "file") {
|
||||
return [
|
||||
{
|
||||
...base,
|
||||
type: "file",
|
||||
mime: part.mediaType,
|
||||
filename: part.filename,
|
||||
url: part.url,
|
||||
},
|
||||
]
|
||||
}
|
||||
return []
|
||||
})
|
||||
return { info, parts }
|
||||
}
|
||||
|
||||
throw new Error("unknown message type")
|
||||
}
|
||||
|
||||
export function toModelMessage(
|
||||
input: {
|
||||
info: Info
|
||||
|
|
|
|||
|
|
@ -7,7 +7,6 @@ import { FileTime } from "../file/time"
|
|||
import DESCRIPTION from "./read.txt"
|
||||
import { Filesystem } from "../util/filesystem"
|
||||
import { Instance } from "../project/instance"
|
||||
import { Provider } from "../provider/provider"
|
||||
import { Identifier } from "../id/id"
|
||||
import { Permission } from "../permission"
|
||||
import { Agent } from "@/agent/agent"
|
||||
|
|
@ -94,15 +93,11 @@ export const ReadTool = Tool.define("read", {
|
|||
throw new Error(`File not found: ${filepath}`)
|
||||
}
|
||||
|
||||
const isImage = isImageFile(filepath)
|
||||
const model = ctx.extra?.model as Provider.Model | undefined
|
||||
const supportsImages = model?.capabilities.input.image ?? false
|
||||
if (isImage) {
|
||||
if (!supportsImages) {
|
||||
throw new Error(`Failed to read image: ${filepath}, model may not be able to read images`)
|
||||
}
|
||||
const isImage = file.type.startsWith("image/")
|
||||
const isPdf = file.type === "application/pdf"
|
||||
if (isImage || isPdf) {
|
||||
const mime = file.type
|
||||
const msg = "Image read successfully"
|
||||
const msg = `${isImage ? "Image" : "PDF"} read successfully`
|
||||
return {
|
||||
title,
|
||||
output: msg,
|
||||
|
|
@ -164,25 +159,6 @@ export const ReadTool = Tool.define("read", {
|
|||
},
|
||||
})
|
||||
|
||||
function isImageFile(filePath: string): string | false {
|
||||
const ext = path.extname(filePath).toLowerCase()
|
||||
switch (ext) {
|
||||
case ".jpg":
|
||||
case ".jpeg":
|
||||
return "JPEG"
|
||||
case ".png":
|
||||
return "PNG"
|
||||
case ".gif":
|
||||
return "GIF"
|
||||
case ".bmp":
|
||||
return "BMP"
|
||||
case ".webp":
|
||||
return "WebP"
|
||||
default:
|
||||
return false
|
||||
}
|
||||
}
|
||||
|
||||
async function isBinaryFile(filepath: string, file: Bun.BunFile): Promise<boolean> {
|
||||
const ext = path.extname(filepath).toLowerCase()
|
||||
// binary check for common non-text extensions
|
||||
|
|
|
|||
Loading…
Add table
Add a link
Reference in a new issue