feat: /knowledge to extract knowledge files

Adds a command /knowledge which creates a sub-agent to take any new "knowledge"
in the session and format it neatly into markdown docs in ./.opencode/knowledge/

This is particularly useful both for human-readable documentation for getting
familiarized with a codebase, and for providing an extended version of AGENTS.md
which does not pollute the context.

A follow-on commit adds a system to automatically load files from the knowledge
directory according to topics mentioned in the conversation.

Try running /knowledge to create the sub-agent.

Signed-off-by: Christian Stewart <christian@aperture.us>
This commit is contained in:
Christian Stewart 2025-12-07 02:26:28 -08:00
parent efac8cebb3
commit 46671ecdc3
8 changed files with 383 additions and 1 deletions

View file

@ -187,6 +187,96 @@ export namespace Agent {
prompt: PROMPT_SUMMARY,
tools: {},
},
"knowledge-extractor": {
name: "knowledge-extractor",
description: "Extracts reusable knowledge from sessions into markdown files",
tools: {
read: true,
write: true,
edit: true,
glob: true,
grep: true,
list: true,
bash: true,
webfetch: false,
task: false,
todoread: false,
todowrite: false,
websearch: false,
codesearch: false,
},
prompt: [
`You are a knowledge extraction specialist. Your role is to preserve valuable planning and insights from session transcripts.`,
``,
`## Context`,
`Sessions often contain rich planning content: architectural decisions, clarifying questions with answers, research findings, and reasoning. This content is already well-structured by the assistant in plan() mode.`,
``,
`## Your Process`,
`1. Read the session transcript to assess if it contains valuable knowledge`,
`2. If worth preserving, use \`cp\` to copy the transcript to knowledge/ with a descriptive name`,
`3. Check existing knowledge files - if an exact topic match exists, append there instead`,
`4. Use Edit to delete noise: tool outputs, debugging, session metadata, ephemeral details`,
`5. Use Edit to add YAML frontmatter`,
`6. Make minimal edits for flow`,
`7. Return a summary of files created/updated`,
``,
`## IMPORTANT: Use the Bash Tool with cp`,
`ALWAYS use the Bash tool to run \`cp <transcript> <knowledge-dir>/<name>.md\` first.`,
`This saves tokens vs using the Write tool to output the entire file contents.`,
`Then use Edit to remove noise and add frontmatter.`,
``,
`## Key Principle: Preserve, Don't Rewrite`,
`The session transcript often contains beautifully structured plans and reasoning. Your job is to:`,
`- Copy the file with Bash cp command (NOT Write)`,
`- Delete the noise with Edit`,
`- Keep valuable content largely verbatim`,
`- Make minimal edits for flow`,
``,
`Do NOT rewrite or heavily summarize good content. The assistant already did the hard work.`,
``,
`## What to Keep`,
`- Architectural decisions and rationale`,
`- Planning sections with reasoning`,
`- Clarifying questions and answers`,
`- Non-obvious patterns and conventions`,
`- Bug root causes and prevention strategies`,
`- Gotchas that would otherwise be re-discovered`,
``,
`## What to Delete`,
`- Raw tool outputs (file contents, grep results, etc.)`,
`- Debugging back-and-forth that led nowhere`,
`- Session-specific implementation logs`,
`- Generic programming knowledge`,
`- Ephemeral details`,
`- Session metadata header (ID, timestamps)`,
``,
`## When to Skip Entirely`,
`If the session has no planning/decision content worth preserving, respond "No knowledge"`,
``,
`## File Format`,
`After copying, use Edit to add YAML frontmatter at the top:`,
`\`\`\`markdown`,
`---`,
`created: YYYY-MM-DD`,
`source_sessions:`,
` - <session-id>`,
`---`,
`\`\`\``,
``,
`## Naming Convention`,
`Use descriptive kebab-case: draggable-tabs-design.md, api-patterns.md, auth-flow.md`,
].join("\n"),
options: {},
permission: {
edit: "allow",
bash: { "cp *": "allow", "*": "deny" },
webfetch: "deny",
doom_loop: "deny",
external_directory: "ask",
},
mode: "subagent",
native: true,
},
}
for (const [key, value] of Object.entries(cfg.agent ?? {})) {
if (value.disable) {

View file

@ -240,6 +240,11 @@ export function Autocomplete(props: {
description: "compact the session",
onSelect: () => command.trigger("session.compact"),
},
{
display: "/knowledge",
description: "extract knowledge from conversation",
onSelect: () => command.trigger("session.knowledge"),
},
{
display: "/unshare",
disabled: !s.share,

View file

@ -317,6 +317,29 @@ export function Session() {
dialog.clear()
},
},
{
title: "Extract knowledge",
value: "session.knowledge",
keybind: "session_knowledge",
category: "Session",
onSelect: (dialog) => {
const selectedModel = local.model.current()
if (!selectedModel) {
toast.show({
variant: "warning",
message: "Connect a provider to extract knowledge",
duration: 3000,
})
return
}
sdk.client.session.extractKnowledge({
id: route.sessionID,
modelID: selectedModel.modelID,
providerID: selectedModel.providerID,
})
dialog.clear()
},
},
{
title: "Unshare session",
value: "session.unshare",

View file

@ -2,6 +2,9 @@ import { BusEvent } from "@/bus/bus-event"
import { Bus } from "@/bus"
import { GlobalBus } from "@/bus/global"
import { Log } from "../util/log"
import path from "path"
import fs from "fs/promises"
import { Identifier } from "../id/id"
import { describeRoute, generateSpecs, validator, resolver, openAPIRouteHandler } from "hono-openapi"
import { Hono } from "hono"
import { cors } from "hono/cors"
@ -1068,6 +1071,105 @@ export namespace Server {
return c.json(true)
},
)
.post(
"/session/:id/extract-knowledge",
describeRoute({
description: "Extract knowledge from the session",
operationId: "session.extractKnowledge",
responses: {
200: {
description: "Knowledge extraction initiated",
content: {
"application/json": {
schema: resolver(z.boolean()),
},
},
},
...errors(400, 404),
},
}),
validator(
"param",
z.object({
id: z.string().meta({ description: "Session ID" }),
}),
),
validator(
"json",
z.object({
providerID: z.string(),
modelID: z.string(),
}),
),
async (c) => {
const id = c.req.valid("param").id
const body = c.req.valid("json")
const msgs = await Session.messages({ sessionID: id })
let currentAgent = "build"
for (let i = msgs.length - 1; i >= 0; i--) {
const info = msgs[i].info
if (info.role === "user") {
currentAgent = info.agent || "build"
break
}
}
const session = await Session.get(id)
const sessDir = path.join(Instance.directory, ".opencode", "sess")
await fs.mkdir(sessDir, { recursive: true })
const transcriptPath = path.join(sessDir, `${id}.md`)
let transcript = `# ${session.title}\n\n`
transcript += `**Session ID:** ${session.id}\n`
transcript += `**Created:** ${new Date(session.time.created).toLocaleString()}\n\n---\n\n`
for (const msg of msgs) {
const role = msg.info.role === "user" ? "User" : "Assistant"
transcript += `## ${role}\n\n`
for (const part of msg.parts) {
if (part.type === "text" && !part.synthetic) {
transcript += `${part.text}\n\n`
} else if (part.type === "tool" && part.state.status === "completed") {
transcript += `\`\`\`\nTool: ${part.tool}\n\`\`\`\n\n`
}
}
transcript += `---\n\n`
}
await Bun.write(transcriptPath, transcript)
const knowledgeDir = path.join(Instance.directory, ".opencode", "knowledge")
await fs.mkdir(knowledgeDir, { recursive: true })
const prompt = [
`Session transcript: ${transcriptPath}`,
`Knowledge directory: ${knowledgeDir}`,
`Session ID: ${id}`,
].join("\n")
const msg = await Session.updateMessage({
id: Identifier.ascending("message"),
role: "user",
model: { providerID: body.providerID, modelID: body.modelID },
sessionID: id,
agent: currentAgent,
time: { created: Date.now() },
})
await Session.updatePart({
id: Identifier.ascending("part"),
messageID: msg.id,
sessionID: msg.sessionID,
type: "subtask",
prompt,
description: "Extract knowledge",
agent: "knowledge-extractor",
})
await SessionPrompt.loop(id)
return c.json(true)
},
)
.get(
"/session/:sessionID/message",
describeRoute({

View file

@ -391,7 +391,7 @@ export namespace SessionPrompt {
start: part.state.status === "running" ? part.state.time.start : Date.now(),
end: Date.now(),
},
metadata: part.metadata,
metadata: part.state.status === "running" ? part.state.metadata : undefined,
input: part.state.input,
},
} satisfies MessageV2.ToolPart)

View file

@ -83,6 +83,8 @@ import type {
SessionDeleteResponses,
SessionDiffErrors,
SessionDiffResponses,
SessionExtractKnowledgeErrors,
SessionExtractKnowledgeResponses,
SessionForkResponses,
SessionGetErrors,
SessionGetResponses,
@ -1155,6 +1157,47 @@ export class Session extends HeyApiClient {
})
}
/**
* Extract knowledge from the session
*/
public extractKnowledge<ThrowOnError extends boolean = false>(
parameters: {
id: string
directory?: string
providerID?: string
modelID?: string
},
options?: Options<never, ThrowOnError>,
) {
const params = buildClientParams(
[parameters],
[
{
args: [
{ in: "path", key: "id" },
{ in: "query", key: "directory" },
{ in: "body", key: "providerID" },
{ in: "body", key: "modelID" },
],
},
],
)
return (options?.client ?? this.client).post<
SessionExtractKnowledgeResponses,
SessionExtractKnowledgeErrors,
ThrowOnError
>({
url: "/session/{id}/extract-knowledge",
...options,
...params,
headers: {
"Content-Type": "application/json",
...options?.headers,
...params.headers,
},
})
}
/**
* Get session messages
*

View file

@ -2766,6 +2766,45 @@ export type SessionSummarizeResponses = {
export type SessionSummarizeResponse = SessionSummarizeResponses[keyof SessionSummarizeResponses]
export type SessionExtractKnowledgeData = {
body?: {
providerID: string
modelID: string
}
path: {
/**
* Session ID
*/
id: string
}
query?: {
directory?: string
}
url: "/session/{id}/extract-knowledge"
}
export type SessionExtractKnowledgeErrors = {
/**
* Bad request
*/
400: BadRequestError
/**
* Not found
*/
404: NotFoundError
}
export type SessionExtractKnowledgeError = SessionExtractKnowledgeErrors[keyof SessionExtractKnowledgeErrors]
export type SessionExtractKnowledgeResponses = {
/**
* Knowledge extraction initiated
*/
200: boolean
}
export type SessionExtractKnowledgeResponse = SessionExtractKnowledgeResponses[keyof SessionExtractKnowledgeResponses]
export type SessionMessagesData = {
body?: never
path: {

View file

@ -1819,6 +1819,86 @@
]
}
},
"/session/{id}/extract-knowledge": {
"post": {
"operationId": "session.extractKnowledge",
"parameters": [
{
"in": "query",
"name": "directory",
"schema": {
"type": "string"
}
},
{
"in": "path",
"name": "id",
"schema": {
"type": "string"
},
"required": true,
"description": "Session ID"
}
],
"description": "Extract knowledge from the session",
"responses": {
"200": {
"description": "Knowledge extraction initiated",
"content": {
"application/json": {
"schema": {
"type": "boolean"
}
}
}
},
"400": {
"description": "Bad request",
"content": {
"application/json": {
"schema": {
"$ref": "#/components/schemas/BadRequestError"
}
}
}
},
"404": {
"description": "Not found",
"content": {
"application/json": {
"schema": {
"$ref": "#/components/schemas/NotFoundError"
}
}
}
}
},
"requestBody": {
"content": {
"application/json": {
"schema": {
"type": "object",
"properties": {
"providerID": {
"type": "string"
},
"modelID": {
"type": "string"
}
},
"required": ["providerID", "modelID"]
}
}
}
},
"x-codeSamples": [
{
"lang": "js",
"source": "import { createOpencodeClient } from \"@opencode-ai/sdk\n\nconst client = createOpencodeClient()\nawait client.session.extractKnowledge({\n ...\n})"
}
]
}
},
"/session/{sessionID}/message": {
"get": {
"operationId": "session.messages",