From 46671ecdc344efac4f952a50ffc0750010631db9 Mon Sep 17 00:00:00 2001
From: Christian Stewart <christian@aperture.us>
Date: Sun, 7 Dec 2025 02:26:28 -0800
Subject: [PATCH] feat: /knowledge to extract knowledge files

Adds a command /knowledge which creates a sub-agent to take any new "knowledge"
in the session and format it neatly into markdown docs in ./.opencode/knowledge/

This is particularly useful both for human-readable documentation for getting
familiarized with a codebase, and for providing an extended version of AGENTS.md
which does not pollute the context.

A follow-on commit adds a system to automatically load files from the knowledge
directory according to topics mentioned in the conversation.

Try running /knowledge to create the sub-agent.

Signed-off-by: Christian Stewart <christian@aperture.us>
---
 packages/opencode/src/agent/agent.ts          |  90 ++++++++++++++++
 .../cmd/tui/component/prompt/autocomplete.tsx |   5 +
 .../src/cli/cmd/tui/routes/session/index.tsx  |  23 ++++
 packages/opencode/src/server/server.ts        | 102 ++++++++++++++++++
 packages/opencode/src/session/prompt.ts       |   2 +-
 packages/sdk/js/src/v2/gen/sdk.gen.ts         |  43 ++++++++
 packages/sdk/js/src/v2/gen/types.gen.ts       |  39 +++++++
 packages/sdk/openapi.json                     |  80 ++++++++++++++
 8 files changed, 383 insertions(+), 1 deletion(-)
diff --git a/packages/opencode/src/agent/agent.ts b/packages/opencode/src/agent/agent.ts
index add120f91..feba54ec7 100644
--- a/packages/opencode/src/agent/agent.ts
+++ b/packages/opencode/src/agent/agent.ts
@@ -187,6 +187,96 @@ export namespace Agent {
         prompt: PROMPT_SUMMARY,
         tools: {},
       },
+      "knowledge-extractor": {
+        name: "knowledge-extractor",
+        description: "Extracts reusable knowledge from sessions into markdown files",
+        tools: {
+          read: true,
+          write: true,
+          edit: true,
+          glob: true,
+          grep: true,
+          list: true,
+          bash: true,
+          webfetch: false,
+          task: false,
+          todoread: false,
+          todowrite: false,
+          websearch: false,
+          codesearch: false,
+        },
+        prompt: [
+          `You are a knowledge extraction specialist. Your role is to preserve valuable planning and insights from session transcripts.`,
+          ``,
+          `## Context`,
+          `Sessions often contain rich planning content: architectural decisions, clarifying questions with answers, research findings, and reasoning. This content is already well-structured by the assistant in plan() mode.`,
+          ``,
+          `## Your Process`,
+          `1. Read the session transcript to assess if it contains valuable knowledge`,
+          `2. If worth preserving, use \`cp\` to copy the transcript to knowledge/ with a descriptive name`,
+          `3. Check existing knowledge files - if an exact topic match exists, append there instead`,
+          `4. Use Edit to delete noise: tool outputs, debugging, session metadata, ephemeral details`,
+          `5. Use Edit to add YAML frontmatter`,
+          `6. Make minimal edits for flow`,
+          `7. Return a summary of files created/updated`,
+          ``,
+          `## IMPORTANT: Use the Bash Tool with cp`,
+          `ALWAYS use the Bash tool to run \`cp <transcript> <knowledge-dir>/<name>.md\` first.`,
+          `This saves tokens vs using the Write tool to output the entire file contents.`,
+          `Then use Edit to remove noise and add frontmatter.`,
+          ``,
+          `## Key Principle: Preserve, Don't Rewrite`,
+          `The session transcript often contains beautifully structured plans and reasoning. Your job is to:`,
+          `- Copy the file with Bash cp command (NOT Write)`,
+          `- Delete the noise with Edit`,
+          `- Keep valuable content largely verbatim`,
+          `- Make minimal edits for flow`,
+          ``,
+          `Do NOT rewrite or heavily summarize good content. The assistant already did the hard work.`,
+          ``,
+          `## What to Keep`,
+          `- Architectural decisions and rationale`,
+          `- Planning sections with reasoning`,
+          `- Clarifying questions and answers`,
+          `- Non-obvious patterns and conventions`,
+          `- Bug root causes and prevention strategies`,
+          `- Gotchas that would otherwise be re-discovered`,
+          ``,
+          `## What to Delete`,
+          `- Raw tool outputs (file contents, grep results, etc.)`,
+          `- Debugging back-and-forth that led nowhere`,
+          `- Session-specific implementation logs`,
+          `- Generic programming knowledge`,
+          `- Ephemeral details`,
+          `- Session metadata header (ID, timestamps)`,
+          ``,
+          `## When to Skip Entirely`,
+          `If the session has no planning/decision content worth preserving, respond "No knowledge"`,
+          ``,
+          `## File Format`,
+          `After copying, use Edit to add YAML frontmatter at the top:`,
+          `\`\`\`markdown`,
+          `---`,
+          `created: YYYY-MM-DD`,
+          `source_sessions:`,
+          `  - <session-id>`,
+          `---`,
+          `\`\`\``,
+          ``,
+          `## Naming Convention`,
+          `Use descriptive kebab-case: draggable-tabs-design.md, api-patterns.md, auth-flow.md`,
+        ].join("\n"),
+        options: {},
+        permission: {
+          edit: "allow",
+          bash: { "cp *": "allow", "*": "deny" },
+          webfetch: "deny",
+          doom_loop: "deny",
+          external_directory: "ask",
+        },
+        mode: "subagent",
+        native: true,
+      },
     }
     for (const [key, value] of Object.entries(cfg.agent ?? {})) {
       if (value.disable) {
diff --git a/packages/opencode/src/cli/cmd/tui/component/prompt/autocomplete.tsx b/packages/opencode/src/cli/cmd/tui/component/prompt/autocomplete.tsx
index 6fde66944..99b966131 100644
--- a/packages/opencode/src/cli/cmd/tui/component/prompt/autocomplete.tsx
+++ b/packages/opencode/src/cli/cmd/tui/component/prompt/autocomplete.tsx
@@ -240,6 +240,11 @@ export function Autocomplete(props: {
           description: "compact the session",
           onSelect: () => command.trigger("session.compact"),
         },
+        {
+          display: "/knowledge",
+          description: "extract knowledge from conversation",
+          onSelect: () => command.trigger("session.knowledge"),
+        },
         {
           display: "/unshare",
           disabled: !s.share,
diff --git a/packages/opencode/src/cli/cmd/tui/routes/session/index.tsx b/packages/opencode/src/cli/cmd/tui/routes/session/index.tsx
index 48f7db054..b255ad8c2 100644
--- a/packages/opencode/src/cli/cmd/tui/routes/session/index.tsx
+++ b/packages/opencode/src/cli/cmd/tui/routes/session/index.tsx
@@ -317,6 +317,29 @@ export function Session() {
         dialog.clear()
       },
     },
+    {
+      title: "Extract knowledge",
+      value: "session.knowledge",
+      keybind: "session_knowledge",
+      category: "Session",
+      onSelect: (dialog) => {
+        const selectedModel = local.model.current()
+        if (!selectedModel) {
+          toast.show({
+            variant: "warning",
+            message: "Connect a provider to extract knowledge",
+            duration: 3000,
+          })
+          return
+        }
+        sdk.client.session.extractKnowledge({
+          id: route.sessionID,
+          modelID: selectedModel.modelID,
+          providerID: selectedModel.providerID,
+        })
+        dialog.clear()
+      },
+    },
     {
       title: "Unshare session",
       value: "session.unshare",
diff --git a/packages/opencode/src/server/server.ts b/packages/opencode/src/server/server.ts
index 0cbea9658..4674b7167 100644
--- a/packages/opencode/src/server/server.ts
+++ b/packages/opencode/src/server/server.ts
@@ -2,6 +2,9 @@ import { BusEvent } from "@/bus/bus-event"
 import { Bus } from "@/bus"
 import { GlobalBus } from "@/bus/global"
 import { Log } from "../util/log"
+import path from "path"
+import fs from "fs/promises"
+import { Identifier } from "../id/id"
 import { describeRoute, generateSpecs, validator, resolver, openAPIRouteHandler } from "hono-openapi"
 import { Hono } from "hono"
 import { cors } from "hono/cors"
@@ -1068,6 +1071,105 @@ export namespace Server {
           return c.json(true)
         },
       )
+      .post(
+        "/session/:id/extract-knowledge",
+        describeRoute({
+          description: "Extract knowledge from the session",
+          operationId: "session.extractKnowledge",
+          responses: {
+            200: {
+              description: "Knowledge extraction initiated",
+              content: {
+                "application/json": {
+                  schema: resolver(z.boolean()),
+                },
+              },
+            },
+            ...errors(400, 404),
+          },
+        }),
+        validator(
+          "param",
+          z.object({
+            id: z.string().meta({ description: "Session ID" }),
+          }),
+        ),
+        validator(
+          "json",
+          z.object({
+            providerID: z.string(),
+            modelID: z.string(),
+          }),
+        ),
+        async (c) => {
+          const id = c.req.valid("param").id
+          const body = c.req.valid("json")
+          const msgs = await Session.messages({ sessionID: id })
+          let currentAgent = "build"
+          for (let i = msgs.length - 1; i >= 0; i--) {
+            const info = msgs[i].info
+            if (info.role === "user") {
+              currentAgent = info.agent || "build"
+              break
+            }
+          }
+
+          const session = await Session.get(id)
+          const sessDir = path.join(Instance.directory, ".opencode", "sess")
+          await fs.mkdir(sessDir, { recursive: true })
+          const transcriptPath = path.join(sessDir, `${id}.md`)
+
+          let transcript = `# ${session.title}\n\n`
+          transcript += `**Session ID:** ${session.id}\n`
+          transcript += `**Created:** ${new Date(session.time.created).toLocaleString()}\n\n---\n\n`
+
+          for (const msg of msgs) {
+            const role = msg.info.role === "user" ? "User" : "Assistant"
+            transcript += `## ${role}\n\n`
+            for (const part of msg.parts) {
+              if (part.type === "text" && !part.synthetic) {
+                transcript += `${part.text}\n\n`
+              } else if (part.type === "tool" && part.state.status === "completed") {
+                transcript += `\`\`\`\nTool: ${part.tool}\n\`\`\`\n\n`
+              }
+            }
+            transcript += `---\n\n`
+          }
+
+          await Bun.write(transcriptPath, transcript)
+
+          const knowledgeDir = path.join(Instance.directory, ".opencode", "knowledge")
+          await fs.mkdir(knowledgeDir, { recursive: true })
+
+          const prompt = [
+            `Session transcript: ${transcriptPath}`,
+            `Knowledge directory: ${knowledgeDir}`,
+            `Session ID: ${id}`,
+          ].join("\n")
+
+          const msg = await Session.updateMessage({
+            id: Identifier.ascending("message"),
+            role: "user",
+            model: { providerID: body.providerID, modelID: body.modelID },
+            sessionID: id,
+            agent: currentAgent,
+            time: { created: Date.now() },
+          })
+
+          await Session.updatePart({
+            id: Identifier.ascending("part"),
+            messageID: msg.id,
+            sessionID: msg.sessionID,
+            type: "subtask",
+            prompt,
+            description: "Extract knowledge",
+            agent: "knowledge-extractor",
+          })
+
+          await SessionPrompt.loop(id)
+          return c.json(true)
+        },
+      )
       .get(
         "/session/:sessionID/message",
         describeRoute({
diff --git a/packages/opencode/src/session/prompt.ts b/packages/opencode/src/session/prompt.ts
index e71162d0b..8dd6800ce 100644
--- a/packages/opencode/src/session/prompt.ts
+++ b/packages/opencode/src/session/prompt.ts
@@ -391,7 +391,7 @@ export namespace SessionPrompt {
                 start: part.state.status === "running" ? part.state.time.start : Date.now(),
                 end: Date.now(),
               },
-              metadata: part.metadata,
+              metadata: part.state.status === "running" ? part.state.metadata : undefined,
               input: part.state.input,
             },
           } satisfies MessageV2.ToolPart)
diff --git a/packages/sdk/js/src/v2/gen/sdk.gen.ts b/packages/sdk/js/src/v2/gen/sdk.gen.ts
index 16fe07ae4..1129f7e25 100644
--- a/packages/sdk/js/src/v2/gen/sdk.gen.ts
+++ b/packages/sdk/js/src/v2/gen/sdk.gen.ts
@@ -83,6 +83,8 @@ import type {
   SessionDeleteResponses,
   SessionDiffErrors,
   SessionDiffResponses,
+  SessionExtractKnowledgeErrors,
+  SessionExtractKnowledgeResponses,
   SessionForkResponses,
   SessionGetErrors,
   SessionGetResponses,
@@ -1155,6 +1157,47 @@ export class Session extends HeyApiClient {
     })
   }
 
+  /**
+   * Extract knowledge from the session
+   */
+  public extractKnowledge<ThrowOnError extends boolean = false>(
+    parameters: {
+      id: string
+      directory?: string
+      providerID?: string
+      modelID?: string
+    },
+    options?: Options<never, ThrowOnError>,
+  ) {
+    const params = buildClientParams(
+      [parameters],
+      [
+        {
+          args: [
+            { in: "path", key: "id" },
+            { in: "query", key: "directory" },
+            { in: "body", key: "providerID" },
+            { in: "body", key: "modelID" },
+          ],
+        },
+      ],
+    )
+    return (options?.client ?? this.client).post<
+      SessionExtractKnowledgeResponses,
+      SessionExtractKnowledgeErrors,
+      ThrowOnError
+    >({
+      url: "/session/{id}/extract-knowledge",
+      ...options,
+      ...params,
+      headers: {
+        "Content-Type": "application/json",
+        ...options?.headers,
+        ...params.headers,
+      },
+    })
+  }
+
   /**
    * Get session messages
    *
diff --git a/packages/sdk/js/src/v2/gen/types.gen.ts b/packages/sdk/js/src/v2/gen/types.gen.ts
index ca8d25fd5..df5735bea 100644
--- a/packages/sdk/js/src/v2/gen/types.gen.ts
+++ b/packages/sdk/js/src/v2/gen/types.gen.ts
@@ -2766,6 +2766,45 @@ export type SessionSummarizeResponses = {
 
 export type SessionSummarizeResponse = SessionSummarizeResponses[keyof SessionSummarizeResponses]
 
+export type SessionExtractKnowledgeData = {
+  body?: {
+    providerID: string
+    modelID: string
+  }
+  path: {
+    /**
+     * Session ID
+     */
+    id: string
+  }
+  query?: {
+    directory?: string
+  }
+  url: "/session/{id}/extract-knowledge"
+}
+
+export type SessionExtractKnowledgeErrors = {
+  /**
+   * Bad request
+   */
+  400: BadRequestError
+  /**
+   * Not found
+   */
+  404: NotFoundError
+}
+
+export type SessionExtractKnowledgeError = SessionExtractKnowledgeErrors[keyof SessionExtractKnowledgeErrors]
+
+export type SessionExtractKnowledgeResponses = {
+  /**
+   * Knowledge extraction initiated
+   */
+  200: boolean
+}
+
+export type SessionExtractKnowledgeResponse = SessionExtractKnowledgeResponses[keyof SessionExtractKnowledgeResponses]
+
 export type SessionMessagesData = {
   body?: never
   path: {
diff --git a/packages/sdk/openapi.json b/packages/sdk/openapi.json
index 3be96c599..a65ad5f5a 100644
--- a/packages/sdk/openapi.json
+++ b/packages/sdk/openapi.json
@@ -1819,6 +1819,86 @@
         ]
       }
     },
+    "/session/{id}/extract-knowledge": {
+      "post": {
+        "operationId": "session.extractKnowledge",
+        "parameters": [
+          {
+            "in": "query",
+            "name": "directory",
+            "schema": {
+              "type": "string"
+            }
+          },
+          {
+            "in": "path",
+            "name": "id",
+            "schema": {
+              "type": "string"
+            },
+            "required": true,
+            "description": "Session ID"
+          }
+        ],
+        "description": "Extract knowledge from the session",
+        "responses": {
+          "200": {
+            "description": "Knowledge extraction initiated",
+            "content": {
+              "application/json": {
+                "schema": {
+                  "type": "boolean"
+                }
+              }
+            }
+          },
+          "400": {
+            "description": "Bad request",
+            "content": {
+              "application/json": {
+                "schema": {
+                  "$ref": "#/components/schemas/BadRequestError"
+                }
+              }
+            }
+          },
+          "404": {
+            "description": "Not found",
+            "content": {
+              "application/json": {
+                "schema": {
+                  "$ref": "#/components/schemas/NotFoundError"
+                }
+              }
+            }
+          }
+        },
+        "requestBody": {
+          "content": {
+            "application/json": {
+              "schema": {
+                "type": "object",
+                "properties": {
+                  "providerID": {
+                    "type": "string"
+                  },
+                  "modelID": {
+                    "type": "string"
+                  }
+                },
+                "required": ["providerID", "modelID"]
+              }
+            }
+          }
+        },
+        "x-codeSamples": [
+          {
+            "lang": "js",
+            "source": "import { createOpencodeClient } from \"@opencode-ai/sdk\n\nconst client = createOpencodeClient()\nawait client.session.extractKnowledge({\n  ...\n})"
+          }
+        ]
+      }
+    },
     "/session/{sessionID}/message": {
       "get": {
         "operationId": "session.messages",