feat: display tokens per second for assistant messages

Track firstToken timestamp when streaming begins and display tok/s rate
next to response duration for completed text responses.

- Add firstToken timestamp to AssistantMessage.time schema
- Track first/last output delta timestamps during streaming
- Accumulate tokens instead of overwriting for multi-part responses
- New utility module with calculateTokensPerSecond validation
- Minimum 250ms elapsed time threshold to avoid noisy metrics
- Comprehensive test coverage for token utilities

Closes #5374
This commit is contained in:
edlsh 2025-12-13 19:14:17 -05:00
parent 974a24ba02
commit 613147cddf
6 changed files with 202 additions and 3 deletions

View file

@ -58,6 +58,7 @@ import { Sidebar } from "./sidebar"
import { LANGUAGE_EXTENSIONS } from "@/lsp/language"
import parsers from "../../../../../../parsers-config.ts"
import { Clipboard } from "../../util/clipboard"
import { calculateTokensPerSecond, isValidForTokensPerSecond, totalGeneratedTokens } from "../../util/tokens"
import { Toast, useToast } from "../../ui/toast"
import { useKV } from "../../context/kv.tsx"
import { Editor } from "../../util/editor"
@ -1096,6 +1097,15 @@ function AssistantMessage(props: { message: AssistantMessage; parts: Part[]; las
return props.message.time.completed - user.time.created
})
const tokensPerSecond = createMemo(() => {
if (!isValidForTokensPerSecond(props.message)) return undefined
const elapsedMs = props.message.time.completed! - props.message.time.firstToken!
return calculateTokensPerSecond({
totalTokens: totalGeneratedTokens(props.message.tokens),
elapsedMs,
})
})
return (
<>
<For each={props.parts}>
@ -1137,6 +1147,9 @@ function AssistantMessage(props: { message: AssistantMessage; parts: Part[]; las
<Show when={duration()}>
<span style={{ fg: theme.textMuted }}> · {Locale.duration(duration())}</span>
</Show>
<Show when={tokensPerSecond() !== undefined}>
<span style={{ fg: theme.textMuted }}> · {tokensPerSecond()?.toLocaleString()} tok/s</span>
</Show>
</text>
</box>
</Match>

View file

@ -0,0 +1,127 @@
import { describe, expect, test } from "bun:test"
import {
MIN_TOKENS_PER_SECOND_ELAPSED_MS,
totalGeneratedTokens,
isValidForTokensPerSecond,
calculateTokensPerSecond,
} from "./tokens"
describe("totalGeneratedTokens", () => {
test("sums output and reasoning tokens", () => {
expect(totalGeneratedTokens({ output: 100, reasoning: 50 })).toBe(150)
})
test("handles zero tokens", () => {
expect(totalGeneratedTokens({ output: 0, reasoning: 0 })).toBe(0)
})
})
describe("isValidForTokensPerSecond", () => {
const validMessage = {
finish: "stop",
tokens: { output: 100, reasoning: 50 },
time: { firstToken: 1000, completed: 2000 },
}
test("returns true for valid message", () => {
expect(isValidForTokensPerSecond(validMessage)).toBe(true)
})
test("returns false for summary messages", () => {
expect(isValidForTokensPerSecond({ ...validMessage, summary: true })).toBe(false)
})
test("returns false for tool-calls finish reason", () => {
expect(isValidForTokensPerSecond({ ...validMessage, finish: "tool-calls" })).toBe(false)
})
test("returns false for unknown finish reason", () => {
expect(isValidForTokensPerSecond({ ...validMessage, finish: "unknown" })).toBe(false)
})
test("returns false for null/undefined finish", () => {
expect(isValidForTokensPerSecond({ ...validMessage, finish: null })).toBe(false)
expect(isValidForTokensPerSecond({ ...validMessage, finish: undefined })).toBe(false)
})
test("returns false for zero tokens", () => {
expect(
isValidForTokensPerSecond({
...validMessage,
tokens: { output: 0, reasoning: 0 },
}),
).toBe(false)
})
test("returns false for missing timestamps", () => {
expect(
isValidForTokensPerSecond({
...validMessage,
time: { firstToken: undefined, completed: 2000 },
}),
).toBe(false)
expect(
isValidForTokensPerSecond({
...validMessage,
time: { firstToken: 1000, completed: undefined },
}),
).toBe(false)
})
test("returns false for elapsed time below threshold", () => {
expect(
isValidForTokensPerSecond({
...validMessage,
time: { firstToken: 1000, completed: 1000 + MIN_TOKENS_PER_SECOND_ELAPSED_MS - 1 },
}),
).toBe(false)
})
test("returns true for elapsed time at threshold", () => {
expect(
isValidForTokensPerSecond({
...validMessage,
time: { firstToken: 1000, completed: 1000 + MIN_TOKENS_PER_SECOND_ELAPSED_MS },
}),
).toBe(true)
})
})
describe("calculateTokensPerSecond", () => {
test("calculates correct rate", () => {
expect(calculateTokensPerSecond({ totalTokens: 100, elapsedMs: 1000 })).toBe(100)
expect(calculateTokensPerSecond({ totalTokens: 50, elapsedMs: 500 })).toBe(100)
expect(calculateTokensPerSecond({ totalTokens: 150, elapsedMs: 1000 })).toBe(150)
})
test("rounds to nearest integer", () => {
expect(calculateTokensPerSecond({ totalTokens: 100, elapsedMs: 333 })).toBe(300)
})
test("returns undefined for zero tokens", () => {
expect(calculateTokensPerSecond({ totalTokens: 0, elapsedMs: 1000 })).toBe(undefined)
})
test("returns undefined for elapsed time below default threshold", () => {
expect(
calculateTokensPerSecond({
totalTokens: 100,
elapsedMs: MIN_TOKENS_PER_SECOND_ELAPSED_MS - 1,
}),
).toBe(undefined)
})
test("respects custom minElapsedMs", () => {
expect(
calculateTokensPerSecond({
totalTokens: 100,
elapsedMs: 100,
minElapsedMs: 50,
}),
).toBe(1000)
})
test("returns undefined for non-finite results", () => {
expect(calculateTokensPerSecond({ totalTokens: 100, elapsedMs: 0, minElapsedMs: 0 })).toBe(undefined)
})
})

View file

@ -0,0 +1,34 @@
export const MIN_TOKENS_PER_SECOND_ELAPSED_MS = 250
export function totalGeneratedTokens(tokens: { output: number; reasoning: number }) {
return tokens.output + tokens.reasoning
}
export function isValidForTokensPerSecond(msg: {
summary?: boolean
finish?: string | null
tokens: { output: number; reasoning: number }
time: { completed?: number; firstToken?: number }
}): boolean {
if (msg.summary) return false
if (!msg.finish || ["tool-calls", "unknown"].includes(msg.finish)) return false
const totalTokens = totalGeneratedTokens(msg.tokens)
if (totalTokens <= 0) return false
if (msg.time.completed === undefined || msg.time.firstToken === undefined) return false
const elapsedMs = msg.time.completed - msg.time.firstToken
return elapsedMs >= MIN_TOKENS_PER_SECOND_ELAPSED_MS
}
export function calculateTokensPerSecond(input: {
totalTokens: number
elapsedMs: number
minElapsedMs?: number
}): number | undefined {
if (input.totalTokens <= 0) return undefined
const minElapsedMs = input.minElapsedMs ?? MIN_TOKENS_PER_SECOND_ELAPSED_MS
if (input.elapsedMs < minElapsedMs) return undefined
const rate = input.totalTokens / (input.elapsedMs / 1000)
if (!Number.isFinite(rate)) return undefined
return Math.round(rate)
}

View file

@ -335,6 +335,7 @@ export namespace MessageV2 {
time: z.object({
created: z.number(),
completed: z.number().optional(),
firstToken: z.number().optional(),
}),
error: z
.discriminatedUnion("name", [

View file

@ -39,6 +39,17 @@ export namespace SessionProcessor {
let snapshot: string | undefined
let blocked = false
let attempt = 0
let firstOutputDeltaTimestamp: number | undefined
let lastOutputDeltaTimestamp: number | undefined
// Helper to track timestamps for all output-producing deltas
const markOutputDeltaTimestamp = (now: number) => {
if (firstOutputDeltaTimestamp === undefined) {
firstOutputDeltaTimestamp = now
input.assistantMessage.time.firstToken = now
}
lastOutputDeltaTimestamp = now
}
const result = {
get message() {
@ -81,6 +92,8 @@ export namespace SessionProcessor {
case "reasoning-delta":
if (value.id in reasoningMap) {
const now = Date.now()
markOutputDeltaTimestamp(now)
const part = reasoningMap[value.id]
part.text += value.text
if (value.providerMetadata) part.metadata = value.providerMetadata
@ -120,13 +133,17 @@ export namespace SessionProcessor {
toolcalls[value.id] = part as MessageV2.ToolPart
break
case "tool-input-delta":
case "tool-input-delta": {
const now = Date.now()
markOutputDeltaTimestamp(now)
break
}
case "tool-input-end":
break
case "tool-call": {
markOutputDeltaTimestamp(Date.now())
const match = toolcalls[value.toolCallId]
if (match) {
const part = await Session.updatePart({
@ -256,7 +273,11 @@ export namespace SessionProcessor {
})
input.assistantMessage.finish = value.finishReason
input.assistantMessage.cost += usage.cost
input.assistantMessage.tokens = usage.tokens
input.assistantMessage.tokens.input += usage.tokens.input
input.assistantMessage.tokens.output += usage.tokens.output
input.assistantMessage.tokens.reasoning += usage.tokens.reasoning
input.assistantMessage.tokens.cache.read += usage.tokens.cache.read
input.assistantMessage.tokens.cache.write += usage.tokens.cache.write
await Session.updatePart({
id: Identifier.ascending("part"),
reason: value.finishReason,
@ -304,6 +325,8 @@ export namespace SessionProcessor {
case "text-delta":
if (currentText) {
const now = Date.now()
markOutputDeltaTimestamp(now)
currentText.text += value.text
if (value.providerMetadata) currentText.metadata = value.providerMetadata
if (currentText.text)
@ -389,7 +412,7 @@ export namespace SessionProcessor {
})
}
}
input.assistantMessage.time.completed = Date.now()
input.assistantMessage.time.completed = lastOutputDeltaTimestamp ?? Date.now()
await Session.updateMessage(input.assistantMessage)
if (blocked) return "stop"
if (input.assistantMessage.error) return "stop"

View file

@ -141,6 +141,7 @@ export type AssistantMessage = {
time: {
created: number
completed?: number
firstToken?: number
}
error?: ProviderAuthError | UnknownError | MessageOutputLengthError | MessageAbortedError | ApiError
parentID: string