mirror of
https://github.com/sst/opencode.git
synced 2025-12-23 10:11:41 +00:00
feat: display tokens per second for assistant messages
Track firstToken timestamp when streaming begins and display tok/s rate next to response duration for completed text responses. - Add firstToken timestamp to AssistantMessage.time schema - Track first/last output delta timestamps during streaming - Accumulate tokens instead of overwriting for multi-part responses - New utility module with calculateTokensPerSecond validation - Minimum 250ms elapsed time threshold to avoid noisy metrics - Comprehensive test coverage for token utilities Closes #5374
This commit is contained in:
parent
974a24ba02
commit
613147cddf
6 changed files with 202 additions and 3 deletions
|
|
@ -58,6 +58,7 @@ import { Sidebar } from "./sidebar"
|
|||
import { LANGUAGE_EXTENSIONS } from "@/lsp/language"
|
||||
import parsers from "../../../../../../parsers-config.ts"
|
||||
import { Clipboard } from "../../util/clipboard"
|
||||
import { calculateTokensPerSecond, isValidForTokensPerSecond, totalGeneratedTokens } from "../../util/tokens"
|
||||
import { Toast, useToast } from "../../ui/toast"
|
||||
import { useKV } from "../../context/kv.tsx"
|
||||
import { Editor } from "../../util/editor"
|
||||
|
|
@ -1096,6 +1097,15 @@ function AssistantMessage(props: { message: AssistantMessage; parts: Part[]; las
|
|||
return props.message.time.completed - user.time.created
|
||||
})
|
||||
|
||||
const tokensPerSecond = createMemo(() => {
|
||||
if (!isValidForTokensPerSecond(props.message)) return undefined
|
||||
const elapsedMs = props.message.time.completed! - props.message.time.firstToken!
|
||||
return calculateTokensPerSecond({
|
||||
totalTokens: totalGeneratedTokens(props.message.tokens),
|
||||
elapsedMs,
|
||||
})
|
||||
})
|
||||
|
||||
return (
|
||||
<>
|
||||
<For each={props.parts}>
|
||||
|
|
@ -1137,6 +1147,9 @@ function AssistantMessage(props: { message: AssistantMessage; parts: Part[]; las
|
|||
<Show when={duration()}>
|
||||
<span style={{ fg: theme.textMuted }}> · {Locale.duration(duration())}</span>
|
||||
</Show>
|
||||
<Show when={tokensPerSecond() !== undefined}>
|
||||
<span style={{ fg: theme.textMuted }}> · {tokensPerSecond()?.toLocaleString()} tok/s</span>
|
||||
</Show>
|
||||
</text>
|
||||
</box>
|
||||
</Match>
|
||||
|
|
|
|||
127
packages/opencode/src/cli/cmd/tui/util/tokens.test.ts
Normal file
127
packages/opencode/src/cli/cmd/tui/util/tokens.test.ts
Normal file
|
|
@ -0,0 +1,127 @@
|
|||
import { describe, expect, test } from "bun:test"
|
||||
import {
|
||||
MIN_TOKENS_PER_SECOND_ELAPSED_MS,
|
||||
totalGeneratedTokens,
|
||||
isValidForTokensPerSecond,
|
||||
calculateTokensPerSecond,
|
||||
} from "./tokens"
|
||||
|
||||
describe("totalGeneratedTokens", () => {
|
||||
test("sums output and reasoning tokens", () => {
|
||||
expect(totalGeneratedTokens({ output: 100, reasoning: 50 })).toBe(150)
|
||||
})
|
||||
|
||||
test("handles zero tokens", () => {
|
||||
expect(totalGeneratedTokens({ output: 0, reasoning: 0 })).toBe(0)
|
||||
})
|
||||
})
|
||||
|
||||
describe("isValidForTokensPerSecond", () => {
|
||||
const validMessage = {
|
||||
finish: "stop",
|
||||
tokens: { output: 100, reasoning: 50 },
|
||||
time: { firstToken: 1000, completed: 2000 },
|
||||
}
|
||||
|
||||
test("returns true for valid message", () => {
|
||||
expect(isValidForTokensPerSecond(validMessage)).toBe(true)
|
||||
})
|
||||
|
||||
test("returns false for summary messages", () => {
|
||||
expect(isValidForTokensPerSecond({ ...validMessage, summary: true })).toBe(false)
|
||||
})
|
||||
|
||||
test("returns false for tool-calls finish reason", () => {
|
||||
expect(isValidForTokensPerSecond({ ...validMessage, finish: "tool-calls" })).toBe(false)
|
||||
})
|
||||
|
||||
test("returns false for unknown finish reason", () => {
|
||||
expect(isValidForTokensPerSecond({ ...validMessage, finish: "unknown" })).toBe(false)
|
||||
})
|
||||
|
||||
test("returns false for null/undefined finish", () => {
|
||||
expect(isValidForTokensPerSecond({ ...validMessage, finish: null })).toBe(false)
|
||||
expect(isValidForTokensPerSecond({ ...validMessage, finish: undefined })).toBe(false)
|
||||
})
|
||||
|
||||
test("returns false for zero tokens", () => {
|
||||
expect(
|
||||
isValidForTokensPerSecond({
|
||||
...validMessage,
|
||||
tokens: { output: 0, reasoning: 0 },
|
||||
}),
|
||||
).toBe(false)
|
||||
})
|
||||
|
||||
test("returns false for missing timestamps", () => {
|
||||
expect(
|
||||
isValidForTokensPerSecond({
|
||||
...validMessage,
|
||||
time: { firstToken: undefined, completed: 2000 },
|
||||
}),
|
||||
).toBe(false)
|
||||
expect(
|
||||
isValidForTokensPerSecond({
|
||||
...validMessage,
|
||||
time: { firstToken: 1000, completed: undefined },
|
||||
}),
|
||||
).toBe(false)
|
||||
})
|
||||
|
||||
test("returns false for elapsed time below threshold", () => {
|
||||
expect(
|
||||
isValidForTokensPerSecond({
|
||||
...validMessage,
|
||||
time: { firstToken: 1000, completed: 1000 + MIN_TOKENS_PER_SECOND_ELAPSED_MS - 1 },
|
||||
}),
|
||||
).toBe(false)
|
||||
})
|
||||
|
||||
test("returns true for elapsed time at threshold", () => {
|
||||
expect(
|
||||
isValidForTokensPerSecond({
|
||||
...validMessage,
|
||||
time: { firstToken: 1000, completed: 1000 + MIN_TOKENS_PER_SECOND_ELAPSED_MS },
|
||||
}),
|
||||
).toBe(true)
|
||||
})
|
||||
})
|
||||
|
||||
describe("calculateTokensPerSecond", () => {
|
||||
test("calculates correct rate", () => {
|
||||
expect(calculateTokensPerSecond({ totalTokens: 100, elapsedMs: 1000 })).toBe(100)
|
||||
expect(calculateTokensPerSecond({ totalTokens: 50, elapsedMs: 500 })).toBe(100)
|
||||
expect(calculateTokensPerSecond({ totalTokens: 150, elapsedMs: 1000 })).toBe(150)
|
||||
})
|
||||
|
||||
test("rounds to nearest integer", () => {
|
||||
expect(calculateTokensPerSecond({ totalTokens: 100, elapsedMs: 333 })).toBe(300)
|
||||
})
|
||||
|
||||
test("returns undefined for zero tokens", () => {
|
||||
expect(calculateTokensPerSecond({ totalTokens: 0, elapsedMs: 1000 })).toBe(undefined)
|
||||
})
|
||||
|
||||
test("returns undefined for elapsed time below default threshold", () => {
|
||||
expect(
|
||||
calculateTokensPerSecond({
|
||||
totalTokens: 100,
|
||||
elapsedMs: MIN_TOKENS_PER_SECOND_ELAPSED_MS - 1,
|
||||
}),
|
||||
).toBe(undefined)
|
||||
})
|
||||
|
||||
test("respects custom minElapsedMs", () => {
|
||||
expect(
|
||||
calculateTokensPerSecond({
|
||||
totalTokens: 100,
|
||||
elapsedMs: 100,
|
||||
minElapsedMs: 50,
|
||||
}),
|
||||
).toBe(1000)
|
||||
})
|
||||
|
||||
test("returns undefined for non-finite results", () => {
|
||||
expect(calculateTokensPerSecond({ totalTokens: 100, elapsedMs: 0, minElapsedMs: 0 })).toBe(undefined)
|
||||
})
|
||||
})
|
||||
34
packages/opencode/src/cli/cmd/tui/util/tokens.ts
Normal file
34
packages/opencode/src/cli/cmd/tui/util/tokens.ts
Normal file
|
|
@ -0,0 +1,34 @@
|
|||
export const MIN_TOKENS_PER_SECOND_ELAPSED_MS = 250
|
||||
|
||||
export function totalGeneratedTokens(tokens: { output: number; reasoning: number }) {
|
||||
return tokens.output + tokens.reasoning
|
||||
}
|
||||
|
||||
export function isValidForTokensPerSecond(msg: {
|
||||
summary?: boolean
|
||||
finish?: string | null
|
||||
tokens: { output: number; reasoning: number }
|
||||
time: { completed?: number; firstToken?: number }
|
||||
}): boolean {
|
||||
if (msg.summary) return false
|
||||
if (!msg.finish || ["tool-calls", "unknown"].includes(msg.finish)) return false
|
||||
const totalTokens = totalGeneratedTokens(msg.tokens)
|
||||
if (totalTokens <= 0) return false
|
||||
if (msg.time.completed === undefined || msg.time.firstToken === undefined) return false
|
||||
const elapsedMs = msg.time.completed - msg.time.firstToken
|
||||
return elapsedMs >= MIN_TOKENS_PER_SECOND_ELAPSED_MS
|
||||
}
|
||||
|
||||
export function calculateTokensPerSecond(input: {
|
||||
totalTokens: number
|
||||
elapsedMs: number
|
||||
minElapsedMs?: number
|
||||
}): number | undefined {
|
||||
if (input.totalTokens <= 0) return undefined
|
||||
const minElapsedMs = input.minElapsedMs ?? MIN_TOKENS_PER_SECOND_ELAPSED_MS
|
||||
if (input.elapsedMs < minElapsedMs) return undefined
|
||||
const rate = input.totalTokens / (input.elapsedMs / 1000)
|
||||
if (!Number.isFinite(rate)) return undefined
|
||||
return Math.round(rate)
|
||||
}
|
||||
|
||||
|
|
@ -335,6 +335,7 @@ export namespace MessageV2 {
|
|||
time: z.object({
|
||||
created: z.number(),
|
||||
completed: z.number().optional(),
|
||||
firstToken: z.number().optional(),
|
||||
}),
|
||||
error: z
|
||||
.discriminatedUnion("name", [
|
||||
|
|
|
|||
|
|
@ -39,6 +39,17 @@ export namespace SessionProcessor {
|
|||
let snapshot: string | undefined
|
||||
let blocked = false
|
||||
let attempt = 0
|
||||
let firstOutputDeltaTimestamp: number | undefined
|
||||
let lastOutputDeltaTimestamp: number | undefined
|
||||
|
||||
// Helper to track timestamps for all output-producing deltas
|
||||
const markOutputDeltaTimestamp = (now: number) => {
|
||||
if (firstOutputDeltaTimestamp === undefined) {
|
||||
firstOutputDeltaTimestamp = now
|
||||
input.assistantMessage.time.firstToken = now
|
||||
}
|
||||
lastOutputDeltaTimestamp = now
|
||||
}
|
||||
|
||||
const result = {
|
||||
get message() {
|
||||
|
|
@ -81,6 +92,8 @@ export namespace SessionProcessor {
|
|||
|
||||
case "reasoning-delta":
|
||||
if (value.id in reasoningMap) {
|
||||
const now = Date.now()
|
||||
markOutputDeltaTimestamp(now)
|
||||
const part = reasoningMap[value.id]
|
||||
part.text += value.text
|
||||
if (value.providerMetadata) part.metadata = value.providerMetadata
|
||||
|
|
@ -120,13 +133,17 @@ export namespace SessionProcessor {
|
|||
toolcalls[value.id] = part as MessageV2.ToolPart
|
||||
break
|
||||
|
||||
case "tool-input-delta":
|
||||
case "tool-input-delta": {
|
||||
const now = Date.now()
|
||||
markOutputDeltaTimestamp(now)
|
||||
break
|
||||
}
|
||||
|
||||
case "tool-input-end":
|
||||
break
|
||||
|
||||
case "tool-call": {
|
||||
markOutputDeltaTimestamp(Date.now())
|
||||
const match = toolcalls[value.toolCallId]
|
||||
if (match) {
|
||||
const part = await Session.updatePart({
|
||||
|
|
@ -256,7 +273,11 @@ export namespace SessionProcessor {
|
|||
})
|
||||
input.assistantMessage.finish = value.finishReason
|
||||
input.assistantMessage.cost += usage.cost
|
||||
input.assistantMessage.tokens = usage.tokens
|
||||
input.assistantMessage.tokens.input += usage.tokens.input
|
||||
input.assistantMessage.tokens.output += usage.tokens.output
|
||||
input.assistantMessage.tokens.reasoning += usage.tokens.reasoning
|
||||
input.assistantMessage.tokens.cache.read += usage.tokens.cache.read
|
||||
input.assistantMessage.tokens.cache.write += usage.tokens.cache.write
|
||||
await Session.updatePart({
|
||||
id: Identifier.ascending("part"),
|
||||
reason: value.finishReason,
|
||||
|
|
@ -304,6 +325,8 @@ export namespace SessionProcessor {
|
|||
|
||||
case "text-delta":
|
||||
if (currentText) {
|
||||
const now = Date.now()
|
||||
markOutputDeltaTimestamp(now)
|
||||
currentText.text += value.text
|
||||
if (value.providerMetadata) currentText.metadata = value.providerMetadata
|
||||
if (currentText.text)
|
||||
|
|
@ -389,7 +412,7 @@ export namespace SessionProcessor {
|
|||
})
|
||||
}
|
||||
}
|
||||
input.assistantMessage.time.completed = Date.now()
|
||||
input.assistantMessage.time.completed = lastOutputDeltaTimestamp ?? Date.now()
|
||||
await Session.updateMessage(input.assistantMessage)
|
||||
if (blocked) return "stop"
|
||||
if (input.assistantMessage.error) return "stop"
|
||||
|
|
|
|||
|
|
@ -141,6 +141,7 @@ export type AssistantMessage = {
|
|||
time: {
|
||||
created: number
|
||||
completed?: number
|
||||
firstToken?: number
|
||||
}
|
||||
error?: ProviderAuthError | UnknownError | MessageOutputLengthError | MessageAbortedError | ApiError
|
||||
parentID: string
|
||||
|
|
|
|||
Loading…
Add table
Add a link
Reference in a new issue