wip

2025-12-23 10:11:41 +00:00 · 2025-10-08 00:06:16 -05:00 · 2025-10-08 00:06:16 -05:00 · b6ea506c0b
commit b6ea506c0b
parent df759be27e
6 changed files with 53 additions and 24 deletions
--- a/packages/opencode/src/provider/models.ts
+++ b/packages/opencode/src/provider/models.ts
@ -28,6 +28,12 @@ export namespace ModelsDev {
        context: z.number(),
        output: z.number(),
      }),
+      modalities: z
+        .object({
+          input: z.array(z.enum(["text", "audio", "image", "video", "pdf"])),
+          output: z.array(z.enum(["text", "audio", "image", "video", "pdf"])),
+        })
+        .optional(),
      experimental: z.boolean().optional(),
      options: z.record(z.string(), z.any()),
      provider: z.object({ npm: z.string() }).optional(),
--- a/packages/opencode/src/provider/provider.ts
+++ b/packages/opencode/src/provider/provider.ts
@ -245,6 +245,11 @@ export namespace Provider {
              context: 0,
              output: 0,
            },
+          modalities: model.modalities ??
+            existing?.modalities ?? {
+              input: ["text"],
+              output: ["text"],
+            },
          provider: model.provider ?? existing?.provider,
        }
        parsed.models[modelID] = parsedModel
--- a/packages/opencode/src/session/prompt.ts
+++ b/packages/opencode/src/session/prompt.ts
@ -453,6 +453,10 @@ export namespace SessionPrompt {
            abort: options.abortSignal!,
            messageID: input.processor.message.id,
            callID: options.toolCallId,
+            extra: {
+              modelID: input.modelID,
+              providerID: input.providerID,
+            },
            agent: input.agent.name,
            metadata: async (val) => {
              const match = input.processor.partFromToolCall(options.toolCallId)
@ -485,22 +489,24 @@ export namespace SessionPrompt {
        },
        toModelOutput: (result: any) => {
          const res = result as Tool.ExecuteResult
-          if (res.part) {
-            if (res.part.type === "text") {
-              return {
-                type: "text",
-                value: res.part.text,
+          if (res.parts) {
+            const parts = res.parts.map((part) => {
+              if (part.type === "text") {
+                return {
+                  type: "text",
+                  text: part.text,
+                } as const
              }
-            }
+              return {
+                type: "media",
+                mediaType: part.mime,
+                data: part.url,
+              } as const
+            })
+
            return {
              type: "content",
-              value: [
-                {
-                  type: "media",
-                  mediaType: res.part.mime,
-                  data: res.part.url,
-                },
-              ],
+              value: parts,
            }
          }

--- a/packages/opencode/src/tool/read.ts
+++ b/packages/opencode/src/tool/read.ts
@ -7,6 +7,7 @@ import { FileTime } from "../file/time"
 import DESCRIPTION from "./read.txt"
 import { Filesystem } from "../util/filesystem"
 import { Instance } from "../project/instance"
+import { Provider } from "../provider/provider"

 const DEFAULT_READ_LIMIT = 2000
 const MAX_LINE_LENGTH = 2000
@ -51,18 +52,30 @@ export const ReadTool = Tool.define("read", {
    }

    const isImage = isImageFile(filepath)
+    const supportsImages = await (async () => {
+      if (!ctx.extra?.["providerID"] || !ctx.extra?.["modelID"]) return false
+      const providerID = ctx.extra["providerID"] as string
+      const modelID = ctx.extra["modelID"] as string
+      const model = await Provider.getModel(providerID, modelID).catch(() => undefined)
+      if (!model) return false
+      return model.info.modalities?.input?.includes("image") ?? false
+    })()
    if (isImage) {
+      if (!supportsImages) {
+        throw new Error(`Model may not be able to read images`)
+      }
      const mime = file.type
-      const msg = `Image read successfully`
+      const msg = "Image read successfully"
      return {
        title,
        output: msg,
-        part: {
-          type: "file",
-          url: Buffer.from(await file.bytes()).toString("base64"),
-          mime,
-          filename: filepath,
-        },
+        parts: [
+          {
+            type: "file",
+            url: Buffer.from(await file.bytes()).toString("base64"),
+            mime,
+          },
+        ],
        metadata: {
          preview: msg,
        },
--- a/packages/opencode/src/tool/read.txt
+++ b/packages/opencode/src/tool/read.txt
@ -9,3 +9,4 @@ Usage:
 - Results are returned using cat -n format, with line numbers starting at 1
 - You have the capability to call multiple tools in a single response. It is always better to speculatively read multiple files as a batch that are potentially useful.
 - If you read a file that exists but has empty contents you will receive a system reminder warning in place of file contents.
+- You can read image files using this tool.
--- a/packages/opencode/src/tool/tool.ts
+++ b/packages/opencode/src/tool/tool.ts
@ -5,15 +5,13 @@ export namespace Tool {
    [key: string]: any
  }

-  export type PartOutput =
-    | { type: "text"; text: string }
-    | { type: "file"; url: string; mime: string; filename?: string }
+  export type PartOutput = { type: "text"; text: string } | { type: "file"; url: string; mime: string }

  export type ExecuteResult<M extends Metadata = Metadata> = {
    title: string
    metadata: M
    output: string
-    part?: PartOutput
+    parts?: PartOutput[]
  }

  export type Context<M extends Metadata = Metadata> = {