fix: accumulate token usage across retry attempts

Previously only the final attempt's tokenUsage was returned, causing under-reporting of actual model consumption when retries occurred. Now all attempts' token counts are summed in the returned result. Addresses Codex review P2 (token usage) on #37
2026-04-03 14:07:18 +08:00 · 2026-04-03 14:07:18 +08:00 · 63e1f7068a
parent 08cf01c6b4
commit 63e1f7068a
2 changed files with 64 additions and 4 deletions
--- a/src/orchestrator/orchestrator.ts
+++ b/src/orchestrator/orchestrator.ts
@ -133,12 +133,20 @@ export async function executeWithRetry(
  const backoff = Math.max(1, task.retryBackoff ?? 2)
  let lastError: string = ''
  // Accumulate token usage across all attempts so billing/observability
  // reflects the true cost of retries.
  let totalUsage: TokenUsage = { input_tokens: 0, output_tokens: 0 }
  for (let attempt = 1; attempt <= maxAttempts; attempt++) {
    try {
      const result = await run()
      totalUsage = {
        input_tokens: totalUsage.input_tokens + result.tokenUsage.input_tokens,
        output_tokens: totalUsage.output_tokens + result.tokenUsage.output_tokens,
      }
      if (result.success) {
-        return result
+        return { ...result, tokenUsage: totalUsage }
      }
      lastError = result.output
@ -150,7 +158,7 @@ export async function executeWithRetry(
        continue
      }
-      return result
+      return { ...result, tokenUsage: totalUsage }
    } catch (err) {
      lastError = err instanceof Error ? err.message : String(err)
@ -166,7 +174,7 @@ export async function executeWithRetry(
        success: false,
        output: lastError,
        messages: [],
-        tokenUsage: { input_tokens: 0, output_tokens: 0 },
+        tokenUsage: totalUsage,
        toolCalls: [],
      }
    }
@ -177,7 +185,7 @@ export async function executeWithRetry(
    success: false,
    output: lastError,
    messages: [],
-    tokenUsage: { input_tokens: 0, output_tokens: 0 },
+    tokenUsage: totalUsage,
    toolCalls: [],
  }
 }
--- a/tests/task-retry.test.ts
+++ b/tests/task-retry.test.ts
@ -276,6 +276,58 @@ describe('executeWithRetry', () => {
    expect(mockDelay).toHaveBeenCalledWith(30_000)  // capped
  })
  it('accumulates token usage across retry attempts', async () => {
    const failResult: AgentRunResult = {
      ...FAILURE_RESULT,
      tokenUsage: { input_tokens: 100, output_tokens: 50 },
    }
    const successResult: AgentRunResult = {
      ...SUCCESS_RESULT,
      tokenUsage: { input_tokens: 200, output_tokens: 80 },
    }
    const run = vi.fn()
      .mockResolvedValueOnce(failResult)
      .mockResolvedValueOnce(failResult)
      .mockResolvedValueOnce(successResult)
    const task = createTask({
      title: 'Token test',
      description: 'test',
      maxRetries: 2,
      retryDelayMs: 10,
    })
    const result = await executeWithRetry(run, task, undefined, noDelay)
    expect(result.success).toBe(true)
    // 100+100+200 input, 50+50+80 output
    expect(result.tokenUsage.input_tokens).toBe(400)
    expect(result.tokenUsage.output_tokens).toBe(180)
  })
  it('accumulates token usage even when all retries fail', async () => {
    const failResult: AgentRunResult = {
      ...FAILURE_RESULT,
      tokenUsage: { input_tokens: 50, output_tokens: 30 },
    }
    const run = vi.fn().mockResolvedValue(failResult)
    const task = createTask({
      title: 'Token fail test',
      description: 'test',
      maxRetries: 1,
    })
    const result = await executeWithRetry(run, task, undefined, noDelay)
    expect(result.success).toBe(false)
    // 50+50 input, 30+30 output (2 attempts)
    expect(result.tokenUsage.input_tokens).toBe(100)
    expect(result.tokenUsage.output_tokens).toBe(60)
  })
  it('clamps negative maxRetries to 0 (single attempt)', async () => {
    const run = vi.fn().mockRejectedValue(new Error('fail'))