From 46671ecdc344efac4f952a50ffc0750010631db9 Mon Sep 17 00:00:00 2001 From: Christian Stewart Date: Sun, 7 Dec 2025 02:26:28 -0800 Subject: [PATCH] feat: /knowledge to extract knowledge files Adds a command /knowledge which creates a sub-agent to take any new "knowledge" in the session and format it neatly into markdown docs in ./.opencode/knowledge/ This is particularly useful both for human-readable documentation for getting familiarized with a codebase, and for providing an extended version of AGENTS.md which does not pollute the context. A follow-on commit adds a system to automatically load files from the knowledge directory according to topics mentioned in the conversation. Try running /knowledge to create the sub-agent. Signed-off-by: Christian Stewart --- packages/opencode/src/agent/agent.ts | 90 ++++++++++++++++ .../cmd/tui/component/prompt/autocomplete.tsx | 5 + .../src/cli/cmd/tui/routes/session/index.tsx | 23 ++++ packages/opencode/src/server/server.ts | 102 ++++++++++++++++++ packages/opencode/src/session/prompt.ts | 2 +- packages/sdk/js/src/v2/gen/sdk.gen.ts | 43 ++++++++ packages/sdk/js/src/v2/gen/types.gen.ts | 39 +++++++ packages/sdk/openapi.json | 80 ++++++++++++++ 8 files changed, 383 insertions(+), 1 deletion(-) diff --git a/packages/opencode/src/agent/agent.ts b/packages/opencode/src/agent/agent.ts index add120f91..feba54ec7 100644 --- a/packages/opencode/src/agent/agent.ts +++ b/packages/opencode/src/agent/agent.ts @@ -187,6 +187,96 @@ export namespace Agent { prompt: PROMPT_SUMMARY, tools: {}, }, + "knowledge-extractor": { + name: "knowledge-extractor", + description: "Extracts reusable knowledge from sessions into markdown files", + tools: { + read: true, + write: true, + edit: true, + glob: true, + grep: true, + list: true, + bash: true, + webfetch: false, + task: false, + todoread: false, + todowrite: false, + websearch: false, + codesearch: false, + }, + prompt: [ + `You are a knowledge extraction specialist. Your role is to preserve valuable planning and insights from session transcripts.`, + ``, + `## Context`, + `Sessions often contain rich planning content: architectural decisions, clarifying questions with answers, research findings, and reasoning. This content is already well-structured by the assistant in plan() mode.`, + ``, + `## Your Process`, + `1. Read the session transcript to assess if it contains valuable knowledge`, + `2. If worth preserving, use \`cp\` to copy the transcript to knowledge/ with a descriptive name`, + `3. Check existing knowledge files - if an exact topic match exists, append there instead`, + `4. Use Edit to delete noise: tool outputs, debugging, session metadata, ephemeral details`, + `5. Use Edit to add YAML frontmatter`, + `6. Make minimal edits for flow`, + `7. Return a summary of files created/updated`, + ``, + `## IMPORTANT: Use the Bash Tool with cp`, + `ALWAYS use the Bash tool to run \`cp /.md\` first.`, + `This saves tokens vs using the Write tool to output the entire file contents.`, + `Then use Edit to remove noise and add frontmatter.`, + ``, + `## Key Principle: Preserve, Don't Rewrite`, + `The session transcript often contains beautifully structured plans and reasoning. Your job is to:`, + `- Copy the file with Bash cp command (NOT Write)`, + `- Delete the noise with Edit`, + `- Keep valuable content largely verbatim`, + `- Make minimal edits for flow`, + ``, + `Do NOT rewrite or heavily summarize good content. The assistant already did the hard work.`, + ``, + `## What to Keep`, + `- Architectural decisions and rationale`, + `- Planning sections with reasoning`, + `- Clarifying questions and answers`, + `- Non-obvious patterns and conventions`, + `- Bug root causes and prevention strategies`, + `- Gotchas that would otherwise be re-discovered`, + ``, + `## What to Delete`, + `- Raw tool outputs (file contents, grep results, etc.)`, + `- Debugging back-and-forth that led nowhere`, + `- Session-specific implementation logs`, + `- Generic programming knowledge`, + `- Ephemeral details`, + `- Session metadata header (ID, timestamps)`, + ``, + `## When to Skip Entirely`, + `If the session has no planning/decision content worth preserving, respond "No knowledge"`, + ``, + `## File Format`, + `After copying, use Edit to add YAML frontmatter at the top:`, + `\`\`\`markdown`, + `---`, + `created: YYYY-MM-DD`, + `source_sessions:`, + ` - `, + `---`, + `\`\`\``, + ``, + `## Naming Convention`, + `Use descriptive kebab-case: draggable-tabs-design.md, api-patterns.md, auth-flow.md`, + ].join("\n"), + options: {}, + permission: { + edit: "allow", + bash: { "cp *": "allow", "*": "deny" }, + webfetch: "deny", + doom_loop: "deny", + external_directory: "ask", + }, + mode: "subagent", + native: true, + }, } for (const [key, value] of Object.entries(cfg.agent ?? {})) { if (value.disable) { diff --git a/packages/opencode/src/cli/cmd/tui/component/prompt/autocomplete.tsx b/packages/opencode/src/cli/cmd/tui/component/prompt/autocomplete.tsx index 6fde66944..99b966131 100644 --- a/packages/opencode/src/cli/cmd/tui/component/prompt/autocomplete.tsx +++ b/packages/opencode/src/cli/cmd/tui/component/prompt/autocomplete.tsx @@ -240,6 +240,11 @@ export function Autocomplete(props: { description: "compact the session", onSelect: () => command.trigger("session.compact"), }, + { + display: "/knowledge", + description: "extract knowledge from conversation", + onSelect: () => command.trigger("session.knowledge"), + }, { display: "/unshare", disabled: !s.share, diff --git a/packages/opencode/src/cli/cmd/tui/routes/session/index.tsx b/packages/opencode/src/cli/cmd/tui/routes/session/index.tsx index 48f7db054..b255ad8c2 100644 --- a/packages/opencode/src/cli/cmd/tui/routes/session/index.tsx +++ b/packages/opencode/src/cli/cmd/tui/routes/session/index.tsx @@ -317,6 +317,29 @@ export function Session() { dialog.clear() }, }, + { + title: "Extract knowledge", + value: "session.knowledge", + keybind: "session_knowledge", + category: "Session", + onSelect: (dialog) => { + const selectedModel = local.model.current() + if (!selectedModel) { + toast.show({ + variant: "warning", + message: "Connect a provider to extract knowledge", + duration: 3000, + }) + return + } + sdk.client.session.extractKnowledge({ + id: route.sessionID, + modelID: selectedModel.modelID, + providerID: selectedModel.providerID, + }) + dialog.clear() + }, + }, { title: "Unshare session", value: "session.unshare", diff --git a/packages/opencode/src/server/server.ts b/packages/opencode/src/server/server.ts index 0cbea9658..4674b7167 100644 --- a/packages/opencode/src/server/server.ts +++ b/packages/opencode/src/server/server.ts @@ -2,6 +2,9 @@ import { BusEvent } from "@/bus/bus-event" import { Bus } from "@/bus" import { GlobalBus } from "@/bus/global" import { Log } from "../util/log" +import path from "path" +import fs from "fs/promises" +import { Identifier } from "../id/id" import { describeRoute, generateSpecs, validator, resolver, openAPIRouteHandler } from "hono-openapi" import { Hono } from "hono" import { cors } from "hono/cors" @@ -1068,6 +1071,105 @@ export namespace Server { return c.json(true) }, ) + .post( + "/session/:id/extract-knowledge", + describeRoute({ + description: "Extract knowledge from the session", + operationId: "session.extractKnowledge", + responses: { + 200: { + description: "Knowledge extraction initiated", + content: { + "application/json": { + schema: resolver(z.boolean()), + }, + }, + }, + ...errors(400, 404), + }, + }), + validator( + "param", + z.object({ + id: z.string().meta({ description: "Session ID" }), + }), + ), + validator( + "json", + z.object({ + providerID: z.string(), + modelID: z.string(), + }), + ), + async (c) => { + const id = c.req.valid("param").id + const body = c.req.valid("json") + const msgs = await Session.messages({ sessionID: id }) + let currentAgent = "build" + for (let i = msgs.length - 1; i >= 0; i--) { + const info = msgs[i].info + if (info.role === "user") { + currentAgent = info.agent || "build" + break + } + } + + const session = await Session.get(id) + const sessDir = path.join(Instance.directory, ".opencode", "sess") + await fs.mkdir(sessDir, { recursive: true }) + const transcriptPath = path.join(sessDir, `${id}.md`) + + let transcript = `# ${session.title}\n\n` + transcript += `**Session ID:** ${session.id}\n` + transcript += `**Created:** ${new Date(session.time.created).toLocaleString()}\n\n---\n\n` + + for (const msg of msgs) { + const role = msg.info.role === "user" ? "User" : "Assistant" + transcript += `## ${role}\n\n` + for (const part of msg.parts) { + if (part.type === "text" && !part.synthetic) { + transcript += `${part.text}\n\n` + } else if (part.type === "tool" && part.state.status === "completed") { + transcript += `\`\`\`\nTool: ${part.tool}\n\`\`\`\n\n` + } + } + transcript += `---\n\n` + } + + await Bun.write(transcriptPath, transcript) + + const knowledgeDir = path.join(Instance.directory, ".opencode", "knowledge") + await fs.mkdir(knowledgeDir, { recursive: true }) + + const prompt = [ + `Session transcript: ${transcriptPath}`, + `Knowledge directory: ${knowledgeDir}`, + `Session ID: ${id}`, + ].join("\n") + + const msg = await Session.updateMessage({ + id: Identifier.ascending("message"), + role: "user", + model: { providerID: body.providerID, modelID: body.modelID }, + sessionID: id, + agent: currentAgent, + time: { created: Date.now() }, + }) + + await Session.updatePart({ + id: Identifier.ascending("part"), + messageID: msg.id, + sessionID: msg.sessionID, + type: "subtask", + prompt, + description: "Extract knowledge", + agent: "knowledge-extractor", + }) + + await SessionPrompt.loop(id) + return c.json(true) + }, + ) .get( "/session/:sessionID/message", describeRoute({ diff --git a/packages/opencode/src/session/prompt.ts b/packages/opencode/src/session/prompt.ts index e71162d0b..8dd6800ce 100644 --- a/packages/opencode/src/session/prompt.ts +++ b/packages/opencode/src/session/prompt.ts @@ -391,7 +391,7 @@ export namespace SessionPrompt { start: part.state.status === "running" ? part.state.time.start : Date.now(), end: Date.now(), }, - metadata: part.metadata, + metadata: part.state.status === "running" ? part.state.metadata : undefined, input: part.state.input, }, } satisfies MessageV2.ToolPart) diff --git a/packages/sdk/js/src/v2/gen/sdk.gen.ts b/packages/sdk/js/src/v2/gen/sdk.gen.ts index 16fe07ae4..1129f7e25 100644 --- a/packages/sdk/js/src/v2/gen/sdk.gen.ts +++ b/packages/sdk/js/src/v2/gen/sdk.gen.ts @@ -83,6 +83,8 @@ import type { SessionDeleteResponses, SessionDiffErrors, SessionDiffResponses, + SessionExtractKnowledgeErrors, + SessionExtractKnowledgeResponses, SessionForkResponses, SessionGetErrors, SessionGetResponses, @@ -1155,6 +1157,47 @@ export class Session extends HeyApiClient { }) } + /** + * Extract knowledge from the session + */ + public extractKnowledge( + parameters: { + id: string + directory?: string + providerID?: string + modelID?: string + }, + options?: Options, + ) { + const params = buildClientParams( + [parameters], + [ + { + args: [ + { in: "path", key: "id" }, + { in: "query", key: "directory" }, + { in: "body", key: "providerID" }, + { in: "body", key: "modelID" }, + ], + }, + ], + ) + return (options?.client ?? this.client).post< + SessionExtractKnowledgeResponses, + SessionExtractKnowledgeErrors, + ThrowOnError + >({ + url: "/session/{id}/extract-knowledge", + ...options, + ...params, + headers: { + "Content-Type": "application/json", + ...options?.headers, + ...params.headers, + }, + }) + } + /** * Get session messages * diff --git a/packages/sdk/js/src/v2/gen/types.gen.ts b/packages/sdk/js/src/v2/gen/types.gen.ts index ca8d25fd5..df5735bea 100644 --- a/packages/sdk/js/src/v2/gen/types.gen.ts +++ b/packages/sdk/js/src/v2/gen/types.gen.ts @@ -2766,6 +2766,45 @@ export type SessionSummarizeResponses = { export type SessionSummarizeResponse = SessionSummarizeResponses[keyof SessionSummarizeResponses] +export type SessionExtractKnowledgeData = { + body?: { + providerID: string + modelID: string + } + path: { + /** + * Session ID + */ + id: string + } + query?: { + directory?: string + } + url: "/session/{id}/extract-knowledge" +} + +export type SessionExtractKnowledgeErrors = { + /** + * Bad request + */ + 400: BadRequestError + /** + * Not found + */ + 404: NotFoundError +} + +export type SessionExtractKnowledgeError = SessionExtractKnowledgeErrors[keyof SessionExtractKnowledgeErrors] + +export type SessionExtractKnowledgeResponses = { + /** + * Knowledge extraction initiated + */ + 200: boolean +} + +export type SessionExtractKnowledgeResponse = SessionExtractKnowledgeResponses[keyof SessionExtractKnowledgeResponses] + export type SessionMessagesData = { body?: never path: { diff --git a/packages/sdk/openapi.json b/packages/sdk/openapi.json index 3be96c599..a65ad5f5a 100644 --- a/packages/sdk/openapi.json +++ b/packages/sdk/openapi.json @@ -1819,6 +1819,86 @@ ] } }, + "/session/{id}/extract-knowledge": { + "post": { + "operationId": "session.extractKnowledge", + "parameters": [ + { + "in": "query", + "name": "directory", + "schema": { + "type": "string" + } + }, + { + "in": "path", + "name": "id", + "schema": { + "type": "string" + }, + "required": true, + "description": "Session ID" + } + ], + "description": "Extract knowledge from the session", + "responses": { + "200": { + "description": "Knowledge extraction initiated", + "content": { + "application/json": { + "schema": { + "type": "boolean" + } + } + } + }, + "400": { + "description": "Bad request", + "content": { + "application/json": { + "schema": { + "$ref": "#/components/schemas/BadRequestError" + } + } + } + }, + "404": { + "description": "Not found", + "content": { + "application/json": { + "schema": { + "$ref": "#/components/schemas/NotFoundError" + } + } + } + } + }, + "requestBody": { + "content": { + "application/json": { + "schema": { + "type": "object", + "properties": { + "providerID": { + "type": "string" + }, + "modelID": { + "type": "string" + } + }, + "required": ["providerID", "modelID"] + } + } + } + }, + "x-codeSamples": [ + { + "lang": "js", + "source": "import { createOpencodeClient } from \"@opencode-ai/sdk\n\nconst client = createOpencodeClient()\nawait client.session.extractKnowledge({\n ...\n})" + } + ] + } + }, "/session/{sessionID}/message": { "get": { "operationId": "session.messages",