This commit is contained in:
rekram1-node 2025-10-08 00:06:16 -05:00
parent df759be27e
commit b6ea506c0b
6 changed files with 53 additions and 24 deletions

View file

@ -28,6 +28,12 @@ export namespace ModelsDev {
context: z.number(),
output: z.number(),
}),
modalities: z
.object({
input: z.array(z.enum(["text", "audio", "image", "video", "pdf"])),
output: z.array(z.enum(["text", "audio", "image", "video", "pdf"])),
})
.optional(),
experimental: z.boolean().optional(),
options: z.record(z.string(), z.any()),
provider: z.object({ npm: z.string() }).optional(),

View file

@ -245,6 +245,11 @@ export namespace Provider {
context: 0,
output: 0,
},
modalities: model.modalities ??
existing?.modalities ?? {
input: ["text"],
output: ["text"],
},
provider: model.provider ?? existing?.provider,
}
parsed.models[modelID] = parsedModel

View file

@ -453,6 +453,10 @@ export namespace SessionPrompt {
abort: options.abortSignal!,
messageID: input.processor.message.id,
callID: options.toolCallId,
extra: {
modelID: input.modelID,
providerID: input.providerID,
},
agent: input.agent.name,
metadata: async (val) => {
const match = input.processor.partFromToolCall(options.toolCallId)
@ -485,22 +489,24 @@ export namespace SessionPrompt {
},
toModelOutput: (result: any) => {
const res = result as Tool.ExecuteResult
if (res.part) {
if (res.part.type === "text") {
return {
type: "text",
value: res.part.text,
if (res.parts) {
const parts = res.parts.map((part) => {
if (part.type === "text") {
return {
type: "text",
text: part.text,
} as const
}
}
return {
type: "media",
mediaType: part.mime,
data: part.url,
} as const
})
return {
type: "content",
value: [
{
type: "media",
mediaType: res.part.mime,
data: res.part.url,
},
],
value: parts,
}
}

View file

@ -7,6 +7,7 @@ import { FileTime } from "../file/time"
import DESCRIPTION from "./read.txt"
import { Filesystem } from "../util/filesystem"
import { Instance } from "../project/instance"
import { Provider } from "../provider/provider"
const DEFAULT_READ_LIMIT = 2000
const MAX_LINE_LENGTH = 2000
@ -51,18 +52,30 @@ export const ReadTool = Tool.define("read", {
}
const isImage = isImageFile(filepath)
const supportsImages = await (async () => {
if (!ctx.extra?.["providerID"] || !ctx.extra?.["modelID"]) return false
const providerID = ctx.extra["providerID"] as string
const modelID = ctx.extra["modelID"] as string
const model = await Provider.getModel(providerID, modelID).catch(() => undefined)
if (!model) return false
return model.info.modalities?.input?.includes("image") ?? false
})()
if (isImage) {
if (!supportsImages) {
throw new Error(`Model may not be able to read images`)
}
const mime = file.type
const msg = `Image read successfully`
const msg = "Image read successfully"
return {
title,
output: msg,
part: {
type: "file",
url: Buffer.from(await file.bytes()).toString("base64"),
mime,
filename: filepath,
},
parts: [
{
type: "file",
url: Buffer.from(await file.bytes()).toString("base64"),
mime,
},
],
metadata: {
preview: msg,
},

View file

@ -9,3 +9,4 @@ Usage:
- Results are returned using cat -n format, with line numbers starting at 1
- You have the capability to call multiple tools in a single response. It is always better to speculatively read multiple files as a batch that are potentially useful.
- If you read a file that exists but has empty contents you will receive a system reminder warning in place of file contents.
- You can read image files using this tool.

View file

@ -5,15 +5,13 @@ export namespace Tool {
[key: string]: any
}
export type PartOutput =
| { type: "text"; text: string }
| { type: "file"; url: string; mime: string; filename?: string }
export type PartOutput = { type: "text"; text: string } | { type: "file"; url: string; mime: string }
export type ExecuteResult<M extends Metadata = Metadata> = {
title: string
metadata: M
output: string
part?: PartOutput
parts?: PartOutput[]
}
export type Context<M extends Metadata = Metadata> = {