feat: display tokens per second for assistant messages

Track firstToken timestamp when streaming begins and display tok/s rate next to response duration for completed text responses. - Add firstToken timestamp to AssistantMessage.time schema - Track first/last output delta timestamps during streaming - Accumulate tokens instead of overwriting for multi-part responses - New utility module with calculateTokensPerSecond validation - Minimum 250ms elapsed time threshold to avoid noisy metrics - Comprehensive test coverage for token utilities Closes #5374
2025-12-23 10:11:41 +00:00 · 2025-12-13 19:14:17 -05:00 · 2025-12-13 19:14:17 -05:00 · 613147cddf
commit 613147cddf
parent 974a24ba02
6 changed files with 202 additions and 3 deletions
--- a/packages/opencode/src/cli/cmd/tui/routes/session/index.tsx
+++ b/packages/opencode/src/cli/cmd/tui/routes/session/index.tsx
@ -58,6 +58,7 @@ import { Sidebar } from "./sidebar"
 import { LANGUAGE_EXTENSIONS } from "@/lsp/language"
 import parsers from "../../../../../../parsers-config.ts"
 import { Clipboard } from "../../util/clipboard"
+import { calculateTokensPerSecond, isValidForTokensPerSecond, totalGeneratedTokens } from "../../util/tokens"
 import { Toast, useToast } from "../../ui/toast"
 import { useKV } from "../../context/kv.tsx"
 import { Editor } from "../../util/editor"
@ -1096,6 +1097,15 @@ function AssistantMessage(props: { message: AssistantMessage; parts: Part[]; las
    return props.message.time.completed - user.time.created
  })

+  const tokensPerSecond = createMemo(() => {
+    if (!isValidForTokensPerSecond(props.message)) return undefined
+    const elapsedMs = props.message.time.completed! - props.message.time.firstToken!
+    return calculateTokensPerSecond({
+      totalTokens: totalGeneratedTokens(props.message.tokens),
+      elapsedMs,
+    })
+  })
+
  return (
    <>
      <For each={props.parts}>
@ -1137,6 +1147,9 @@ function AssistantMessage(props: { message: AssistantMessage; parts: Part[]; las
              <Show when={duration()}>
                <span style={{ fg: theme.textMuted }}> · {Locale.duration(duration())}</span>
              </Show>
+              <Show when={tokensPerSecond() !== undefined}>
+                <span style={{ fg: theme.textMuted }}> · {tokensPerSecond()?.toLocaleString()} tok/s</span>
+              </Show>
            </text>
          </box>
        </Match>
--- a/packages/opencode/src/cli/cmd/tui/util/tokens.test.ts
+++ b/packages/opencode/src/cli/cmd/tui/util/tokens.test.ts
@ -0,0 +1,127 @@
+import { describe, expect, test } from "bun:test"
+import {
+  MIN_TOKENS_PER_SECOND_ELAPSED_MS,
+  totalGeneratedTokens,
+  isValidForTokensPerSecond,
+  calculateTokensPerSecond,
+} from "./tokens"
+
+describe("totalGeneratedTokens", () => {
+  test("sums output and reasoning tokens", () => {
+    expect(totalGeneratedTokens({ output: 100, reasoning: 50 })).toBe(150)
+  })
+
+  test("handles zero tokens", () => {
+    expect(totalGeneratedTokens({ output: 0, reasoning: 0 })).toBe(0)
+  })
+})
+
+describe("isValidForTokensPerSecond", () => {
+  const validMessage = {
+    finish: "stop",
+    tokens: { output: 100, reasoning: 50 },
+    time: { firstToken: 1000, completed: 2000 },
+  }
+
+  test("returns true for valid message", () => {
+    expect(isValidForTokensPerSecond(validMessage)).toBe(true)
+  })
+
+  test("returns false for summary messages", () => {
+    expect(isValidForTokensPerSecond({ ...validMessage, summary: true })).toBe(false)
+  })
+
+  test("returns false for tool-calls finish reason", () => {
+    expect(isValidForTokensPerSecond({ ...validMessage, finish: "tool-calls" })).toBe(false)
+  })
+
+  test("returns false for unknown finish reason", () => {
+    expect(isValidForTokensPerSecond({ ...validMessage, finish: "unknown" })).toBe(false)
+  })
+
+  test("returns false for null/undefined finish", () => {
+    expect(isValidForTokensPerSecond({ ...validMessage, finish: null })).toBe(false)
+    expect(isValidForTokensPerSecond({ ...validMessage, finish: undefined })).toBe(false)
+  })
+
+  test("returns false for zero tokens", () => {
+    expect(
+      isValidForTokensPerSecond({
+        ...validMessage,
+        tokens: { output: 0, reasoning: 0 },
+      }),
+    ).toBe(false)
+  })
+
+  test("returns false for missing timestamps", () => {
+    expect(
+      isValidForTokensPerSecond({
+        ...validMessage,
+        time: { firstToken: undefined, completed: 2000 },
+      }),
+    ).toBe(false)
+    expect(
+      isValidForTokensPerSecond({
+        ...validMessage,
+        time: { firstToken: 1000, completed: undefined },
+      }),
+    ).toBe(false)
+  })
+
+  test("returns false for elapsed time below threshold", () => {
+    expect(
+      isValidForTokensPerSecond({
+        ...validMessage,
+        time: { firstToken: 1000, completed: 1000 + MIN_TOKENS_PER_SECOND_ELAPSED_MS - 1 },
+      }),
+    ).toBe(false)
+  })
+
+  test("returns true for elapsed time at threshold", () => {
+    expect(
+      isValidForTokensPerSecond({
+        ...validMessage,
+        time: { firstToken: 1000, completed: 1000 + MIN_TOKENS_PER_SECOND_ELAPSED_MS },
+      }),
+    ).toBe(true)
+  })
+})
+
+describe("calculateTokensPerSecond", () => {
+  test("calculates correct rate", () => {
+    expect(calculateTokensPerSecond({ totalTokens: 100, elapsedMs: 1000 })).toBe(100)
+    expect(calculateTokensPerSecond({ totalTokens: 50, elapsedMs: 500 })).toBe(100)
+    expect(calculateTokensPerSecond({ totalTokens: 150, elapsedMs: 1000 })).toBe(150)
+  })
+
+  test("rounds to nearest integer", () => {
+    expect(calculateTokensPerSecond({ totalTokens: 100, elapsedMs: 333 })).toBe(300)
+  })
+
+  test("returns undefined for zero tokens", () => {
+    expect(calculateTokensPerSecond({ totalTokens: 0, elapsedMs: 1000 })).toBe(undefined)
+  })
+
+  test("returns undefined for elapsed time below default threshold", () => {
+    expect(
+      calculateTokensPerSecond({
+        totalTokens: 100,
+        elapsedMs: MIN_TOKENS_PER_SECOND_ELAPSED_MS - 1,
+      }),
+    ).toBe(undefined)
+  })
+
+  test("respects custom minElapsedMs", () => {
+    expect(
+      calculateTokensPerSecond({
+        totalTokens: 100,
+        elapsedMs: 100,
+        minElapsedMs: 50,
+      }),
+    ).toBe(1000)
+  })
+
+  test("returns undefined for non-finite results", () => {
+    expect(calculateTokensPerSecond({ totalTokens: 100, elapsedMs: 0, minElapsedMs: 0 })).toBe(undefined)
+  })
+})
--- a/packages/opencode/src/cli/cmd/tui/util/tokens.ts
+++ b/packages/opencode/src/cli/cmd/tui/util/tokens.ts
@ -0,0 +1,34 @@
+export const MIN_TOKENS_PER_SECOND_ELAPSED_MS = 250
+
+export function totalGeneratedTokens(tokens: { output: number; reasoning: number }) {
+  return tokens.output + tokens.reasoning
+}
+
+export function isValidForTokensPerSecond(msg: {
+  summary?: boolean
+  finish?: string | null
+  tokens: { output: number; reasoning: number }
+  time: { completed?: number; firstToken?: number }
+}): boolean {
+  if (msg.summary) return false
+  if (!msg.finish || ["tool-calls", "unknown"].includes(msg.finish)) return false
+  const totalTokens = totalGeneratedTokens(msg.tokens)
+  if (totalTokens <= 0) return false
+  if (msg.time.completed === undefined || msg.time.firstToken === undefined) return false
+  const elapsedMs = msg.time.completed - msg.time.firstToken
+  return elapsedMs >= MIN_TOKENS_PER_SECOND_ELAPSED_MS
+}
+
+export function calculateTokensPerSecond(input: {
+  totalTokens: number
+  elapsedMs: number
+  minElapsedMs?: number
+}): number | undefined {
+  if (input.totalTokens <= 0) return undefined
+  const minElapsedMs = input.minElapsedMs ?? MIN_TOKENS_PER_SECOND_ELAPSED_MS
+  if (input.elapsedMs < minElapsedMs) return undefined
+  const rate = input.totalTokens / (input.elapsedMs / 1000)
+  if (!Number.isFinite(rate)) return undefined
+  return Math.round(rate)
+}
+
--- a/packages/opencode/src/session/message-v2.ts
+++ b/packages/opencode/src/session/message-v2.ts
@ -335,6 +335,7 @@ export namespace MessageV2 {
    time: z.object({
      created: z.number(),
      completed: z.number().optional(),
+      firstToken: z.number().optional(),
    }),
    error: z
      .discriminatedUnion("name", [
--- a/packages/opencode/src/session/processor.ts
+++ b/packages/opencode/src/session/processor.ts
@ -39,6 +39,17 @@ export namespace SessionProcessor {
    let snapshot: string | undefined
    let blocked = false
    let attempt = 0
+    let firstOutputDeltaTimestamp: number | undefined
+    let lastOutputDeltaTimestamp: number | undefined
+
+    // Helper to track timestamps for all output-producing deltas
+    const markOutputDeltaTimestamp = (now: number) => {
+      if (firstOutputDeltaTimestamp === undefined) {
+        firstOutputDeltaTimestamp = now
+        input.assistantMessage.time.firstToken = now
+      }
+      lastOutputDeltaTimestamp = now
+    }

    const result = {
      get message() {
@ -81,6 +92,8 @@ export namespace SessionProcessor {

                case "reasoning-delta":
                  if (value.id in reasoningMap) {
+                    const now = Date.now()
+                    markOutputDeltaTimestamp(now)
                    const part = reasoningMap[value.id]
                    part.text += value.text
                    if (value.providerMetadata) part.metadata = value.providerMetadata
@ -120,13 +133,17 @@ export namespace SessionProcessor {
                  toolcalls[value.id] = part as MessageV2.ToolPart
                  break

-                case "tool-input-delta":
+                case "tool-input-delta": {
+                  const now = Date.now()
+                  markOutputDeltaTimestamp(now)
                  break
+                }

                case "tool-input-end":
                  break

                case "tool-call": {
+                  markOutputDeltaTimestamp(Date.now())
                  const match = toolcalls[value.toolCallId]
                  if (match) {
                    const part = await Session.updatePart({
@ -256,7 +273,11 @@ export namespace SessionProcessor {
                  })
                  input.assistantMessage.finish = value.finishReason
                  input.assistantMessage.cost += usage.cost
-                  input.assistantMessage.tokens = usage.tokens
+                  input.assistantMessage.tokens.input += usage.tokens.input
+                  input.assistantMessage.tokens.output += usage.tokens.output
+                  input.assistantMessage.tokens.reasoning += usage.tokens.reasoning
+                  input.assistantMessage.tokens.cache.read += usage.tokens.cache.read
+                  input.assistantMessage.tokens.cache.write += usage.tokens.cache.write
                  await Session.updatePart({
                    id: Identifier.ascending("part"),
                    reason: value.finishReason,
@ -304,6 +325,8 @@ export namespace SessionProcessor {

                case "text-delta":
                  if (currentText) {
+                    const now = Date.now()
+                    markOutputDeltaTimestamp(now)
                    currentText.text += value.text
                    if (value.providerMetadata) currentText.metadata = value.providerMetadata
                    if (currentText.text)
@ -389,7 +412,7 @@ export namespace SessionProcessor {
              })
            }
          }
-          input.assistantMessage.time.completed = Date.now()
+          input.assistantMessage.time.completed = lastOutputDeltaTimestamp ?? Date.now()
          await Session.updateMessage(input.assistantMessage)
          if (blocked) return "stop"
          if (input.assistantMessage.error) return "stop"
--- a/packages/sdk/js/src/v2/gen/types.gen.ts
+++ b/packages/sdk/js/src/v2/gen/types.gen.ts
@ -141,6 +141,7 @@ export type AssistantMessage = {
  time: {
    created: number
    completed?: number
+    firstToken?: number
  }
  error?: ProviderAuthError | UnknownError | MessageOutputLengthError | MessageAbortedError | ApiError
  parentID: string