diff --git a/packages/opencode/src/provider/transform.ts b/packages/opencode/src/provider/transform.ts index 09dfd69a3..17fbf18f5 100644 --- a/packages/opencode/src/provider/transform.ts +++ b/packages/opencode/src/provider/transform.ts @@ -2,6 +2,17 @@ import type { APICallError, ModelMessage } from "ai" import { unique } from "remeda" import type { JSONSchema } from "zod/v4/core" import type { Provider } from "./provider" +import type { ModelsDev } from "./models" + +type Modality = NonNullable["input"][number] + +function mimeToModality(mime: string): Modality | undefined { + if (mime.startsWith("image/")) return "image" + if (mime.startsWith("audio/")) return "audio" + if (mime.startsWith("video/")) return "video" + if (mime === "application/pdf") return "pdf" + return undefined +} export namespace ProviderTransform { function normalizeMessages(msgs: ModelMessage[], model: Provider.Model): ModelMessage[] { @@ -148,7 +159,32 @@ export namespace ProviderTransform { return msgs } + function unsupportedParts(msgs: ModelMessage[], model: Provider.Model): ModelMessage[] { + return msgs.map((msg) => { + if (msg.role !== "user" || !Array.isArray(msg.content)) return msg + + const filtered = msg.content.map((part) => { + if (part.type !== "file" && part.type !== "image") return part + + const mime = part.type === "image" ? part.image.toString().split(";")[0].replace("data:", "") : part.mediaType + const filename = part.type === "file" ? part.filename : undefined + const modality = mimeToModality(mime) + if (!modality) return part + if (model.capabilities.input[modality]) return part + + const name = filename ? `"${filename}"` : modality + return { + type: "text" as const, + text: `ERROR: Cannot read ${name} (this model does not support ${modality} input). Inform the user.`, + } + }) + + return { ...msg, content: filtered } + }) + } + export function message(msgs: ModelMessage[], model: Provider.Model) { + msgs = unsupportedParts(msgs, model) msgs = normalizeMessages(msgs, model) if (model.providerID === "anthropic" || model.api.id.includes("anthropic") || model.api.id.includes("claude")) { msgs = applyCaching(msgs, model.providerID) diff --git a/packages/opencode/src/session/message-v2.ts b/packages/opencode/src/session/message-v2.ts index 955d11e94..50a480626 100644 --- a/packages/opencode/src/session/message-v2.ts +++ b/packages/opencode/src/session/message-v2.ts @@ -411,147 +411,6 @@ export namespace MessageV2 { }) export type WithParts = z.infer - export function fromV1(v1: Message.Info) { - if (v1.role === "assistant") { - const info: Assistant = { - id: v1.id, - parentID: "", - sessionID: v1.metadata.sessionID, - role: "assistant", - time: { - created: v1.metadata.time.created, - completed: v1.metadata.time.completed, - }, - cost: v1.metadata.assistant!.cost, - path: v1.metadata.assistant!.path, - summary: v1.metadata.assistant!.summary, - tokens: v1.metadata.assistant!.tokens, - modelID: v1.metadata.assistant!.modelID, - providerID: v1.metadata.assistant!.providerID, - mode: "build", - error: v1.metadata.error, - } - const parts = v1.parts.flatMap((part): Part[] => { - const base = { - id: Identifier.ascending("part"), - messageID: v1.id, - sessionID: v1.metadata.sessionID, - } - if (part.type === "text") { - return [ - { - ...base, - type: "text", - text: part.text, - }, - ] - } - if (part.type === "step-start") { - return [ - { - ...base, - type: "step-start", - }, - ] - } - if (part.type === "tool-invocation") { - return [ - { - ...base, - type: "tool", - callID: part.toolInvocation.toolCallId, - tool: part.toolInvocation.toolName, - state: (() => { - if (part.toolInvocation.state === "partial-call") { - return { - status: "pending", - input: {}, - raw: "", - } - } - - const { title, time, ...metadata } = v1.metadata.tool[part.toolInvocation.toolCallId] ?? {} - if (part.toolInvocation.state === "call") { - return { - status: "running", - input: part.toolInvocation.args, - time: { - start: time?.start, - }, - } - } - - if (part.toolInvocation.state === "result") { - return { - status: "completed", - input: part.toolInvocation.args, - output: part.toolInvocation.result, - title, - time, - metadata, - } - } - throw new Error("unknown tool invocation state") - })(), - }, - ] - } - return [] - }) - return { - info, - parts, - } - } - - if (v1.role === "user") { - const info: User = { - id: v1.id, - sessionID: v1.metadata.sessionID, - role: "user", - time: { - created: v1.metadata.time.created, - }, - agent: "build", - model: { - providerID: "opencode", - modelID: "opencode", - }, - } - const parts = v1.parts.flatMap((part): Part[] => { - const base = { - id: Identifier.ascending("part"), - messageID: v1.id, - sessionID: v1.metadata.sessionID, - } - if (part.type === "text") { - return [ - { - ...base, - type: "text", - text: part.text, - }, - ] - } - if (part.type === "file") { - return [ - { - ...base, - type: "file", - mime: part.mediaType, - filename: part.filename, - url: part.url, - }, - ] - } - return [] - }) - return { info, parts } - } - - throw new Error("unknown message type") - } - export function toModelMessage( input: { info: Info diff --git a/packages/opencode/src/tool/read.ts b/packages/opencode/src/tool/read.ts index 7d01a1981..e1ba84524 100644 --- a/packages/opencode/src/tool/read.ts +++ b/packages/opencode/src/tool/read.ts @@ -7,7 +7,6 @@ import { FileTime } from "../file/time" import DESCRIPTION from "./read.txt" import { Filesystem } from "../util/filesystem" import { Instance } from "../project/instance" -import { Provider } from "../provider/provider" import { Identifier } from "../id/id" import { Permission } from "../permission" import { Agent } from "@/agent/agent" @@ -94,15 +93,11 @@ export const ReadTool = Tool.define("read", { throw new Error(`File not found: ${filepath}`) } - const isImage = isImageFile(filepath) - const model = ctx.extra?.model as Provider.Model | undefined - const supportsImages = model?.capabilities.input.image ?? false - if (isImage) { - if (!supportsImages) { - throw new Error(`Failed to read image: ${filepath}, model may not be able to read images`) - } + const isImage = file.type.startsWith("image/") + const isPdf = file.type === "application/pdf" + if (isImage || isPdf) { const mime = file.type - const msg = "Image read successfully" + const msg = `${isImage ? "Image" : "PDF"} read successfully` return { title, output: msg, @@ -164,25 +159,6 @@ export const ReadTool = Tool.define("read", { }, }) -function isImageFile(filePath: string): string | false { - const ext = path.extname(filePath).toLowerCase() - switch (ext) { - case ".jpg": - case ".jpeg": - return "JPEG" - case ".png": - return "PNG" - case ".gif": - return "GIF" - case ".bmp": - return "BMP" - case ".webp": - return "WebP" - default: - return false - } -} - async function isBinaryFile(filepath: string, file: Bun.BunFile): Promise { const ext = path.extname(filepath).toLowerCase() // binary check for common non-text extensions