test: add thinking budget token test (#11305)

* refactor: add thinking budget token test * fix comment
2025-12-26 11:44:28 +08:00 · 2025-11-22 21:43:57 +08:00 · 2025-11-22 21:43:57 +08:00 · c1f1d7996d
commit c1f1d7996d
parent 0a72c613af
6 changed files with 151 additions and 13 deletions
--- a/src/renderer/src/aiCore/prepareParams/parameterBuilder.ts
+++ b/src/renderer/src/aiCore/prepareParams/parameterBuilder.ts
@ -123,7 +123,11 @@ export async function buildStreamTextParams(
    isSupportedThinkingTokenClaudeModel(model) &&
    (provider.type === 'anthropic' || provider.type === 'aws-bedrock')
  ) {
-    maxTokens -= getAnthropicThinkingBudget(assistant, model)
+    const { reasoning_effort: reasoningEffort } = getAssistantSettings(assistant)
+    const budget = getAnthropicThinkingBudget(maxTokens, reasoningEffort, model.id)
+    if (budget) {
+      maxTokens -= budget
+    }
  }

  let webSearchPluginConfig: WebSearchPluginConfig | undefined = undefined
--- a/src/renderer/src/aiCore/trace/AiSdkSpanAdapter.ts
+++ b/src/renderer/src/aiCore/trace/AiSdkSpanAdapter.ts
@ -133,7 +133,7 @@ export class AiSdkSpanAdapter {

    // 详细记录转换过程
    const operationId = attributes['ai.operationId']
-    logger.info('Converting AI SDK span to SpanEntity', {
+    logger.debug('Converting AI SDK span to SpanEntity', {
      spanName: spanName,
      operationId,
      spanTag,
@ -149,7 +149,7 @@ export class AiSdkSpanAdapter {
    })

    if (tokenUsage) {
-      logger.info('Token usage data found', {
+      logger.debug('Token usage data found', {
        spanName: spanName,
        operationId,
        usage: tokenUsage,
@ -158,7 +158,7 @@ export class AiSdkSpanAdapter {
    }

    if (inputs || outputs) {
-      logger.info('Input/Output data extracted', {
+      logger.debug('Input/Output data extracted', {
        spanName: spanName,
        operationId,
        hasInputs: !!inputs,
@ -170,7 +170,7 @@ export class AiSdkSpanAdapter {
    }

    if (Object.keys(typeSpecificData).length > 0) {
-      logger.info('Type-specific data extracted', {
+      logger.debug('Type-specific data extracted', {
        spanName: spanName,
        operationId,
        typeSpecificKeys: Object.keys(typeSpecificData),
@ -204,7 +204,7 @@ export class AiSdkSpanAdapter {
      modelName: modelName || this.extractModelFromAttributes(attributes)
    }

-    logger.info('AI SDK span successfully converted to SpanEntity', {
+    logger.debug('AI SDK span successfully converted to SpanEntity', {
      spanName: spanName,
      operationId,
      spanId: spanContext.spanId,
--- a/src/renderer/src/aiCore/utils/tests/reasoning.test.ts
+++ b/src/renderer/src/aiCore/utils/tests/reasoning.test.ts
@ -0,0 +1,87 @@
+import * as models from '@renderer/config/models'
+import { beforeEach, describe, expect, it, vi } from 'vitest'
+
+import { getAnthropicThinkingBudget } from '../reasoning'
+
+vi.mock('@renderer/store', () => ({
+  default: {
+    getState: () => ({
+      llm: {
+        providers: []
+      },
+      settings: {}
+    })
+  },
+  useAppDispatch: () => vi.fn(),
+  useAppSelector: () => vi.fn()
+}))
+
+vi.mock('@renderer/hooks/useSettings', () => ({
+  getStoreSetting: () => undefined,
+  useSettings: () => ({})
+}))
+
+vi.mock('@renderer/services/AssistantService', () => ({
+  getAssistantSettings: () => ({ maxTokens: undefined }),
+  getProviderByModel: () => ({ id: '' })
+}))
+
+describe('reasoning utils', () => {
+  describe('getAnthropicThinkingBudget', () => {
+    const findTokenLimitSpy = vi.spyOn(models, 'findTokenLimit')
+    const applyTokenLimit = (limit?: { min: number; max: number }) => findTokenLimitSpy.mockReturnValueOnce(limit)
+
+    beforeEach(() => {
+      findTokenLimitSpy.mockReset()
+    })
+
+    it('returns undefined when reasoningEffort is undefined', () => {
+      const result = getAnthropicThinkingBudget(8000, undefined, 'claude-model')
+      expect(result).toBe(undefined)
+      expect(findTokenLimitSpy).not.toHaveBeenCalled()
+    })
+
+    it('returns undefined when tokenLimit is not found', () => {
+      const unknownId = 'unknown-model'
+      applyTokenLimit(undefined)
+      const result = getAnthropicThinkingBudget(8000, 'medium', unknownId)
+      expect(result).toBe(undefined)
+      expect(findTokenLimitSpy).toHaveBeenCalledWith(unknownId)
+    })
+
+    it('uses DEFAULT_MAX_TOKENS when maxTokens is undefined', () => {
+      applyTokenLimit({ min: 1000, max: 10_000 })
+      const result = getAnthropicThinkingBudget(undefined, 'medium', 'claude-model')
+      expect(result).toBe(2048)
+      expect(findTokenLimitSpy).toHaveBeenCalledWith('claude-model')
+    })
+
+    it('respects maxTokens limit when lower than token limit', () => {
+      applyTokenLimit({ min: 1000, max: 10_000 })
+      const result = getAnthropicThinkingBudget(8000, 'medium', 'claude-model')
+      expect(result).toBe(4000)
+      expect(findTokenLimitSpy).toHaveBeenCalledWith('claude-model')
+    })
+
+    it('caps to token limit when lower than maxTokens budget', () => {
+      applyTokenLimit({ min: 1000, max: 5000 })
+      const result = getAnthropicThinkingBudget(100_000, 'high', 'claude-model')
+      expect(result).toBe(4200)
+      expect(findTokenLimitSpy).toHaveBeenCalledWith('claude-model')
+    })
+
+    it('enforces minimum budget of 1024', () => {
+      applyTokenLimit({ min: 0, max: 500 })
+      const result = getAnthropicThinkingBudget(200, 'low', 'claude-model')
+      expect(result).toBe(1024)
+      expect(findTokenLimitSpy).toHaveBeenCalledWith('claude-model')
+    })
+
+    it('respects large token limits when maxTokens is high', () => {
+      applyTokenLimit({ min: 1024, max: 64_000 })
+      const result = getAnthropicThinkingBudget(64_000, 'high', 'claude-model')
+      expect(result).toBe(51_200)
+      expect(findTokenLimitSpy).toHaveBeenCalledWith('claude-model')
+    })
+  })
+})
--- a/src/renderer/src/aiCore/utils/reasoning.ts
+++ b/src/renderer/src/aiCore/utils/reasoning.ts
@ -393,19 +393,26 @@ export function getOpenAIReasoningParams(
  return {}
 }

-export function getAnthropicThinkingBudget(assistant: Assistant, model: Model): number {
-  const { maxTokens, reasoning_effort: reasoningEffort } = getAssistantSettings(assistant)
+export function getAnthropicThinkingBudget(
+  maxTokens: number | undefined,
+  reasoningEffort: string | undefined,
+  modelId: string
+): number | undefined {
  if (reasoningEffort === undefined || reasoningEffort === 'none') {
-    return 0
+    return undefined
  }
  const effortRatio = EFFORT_RATIO[reasoningEffort]

+  const tokenLimit = findTokenLimit(modelId)
+  if (!tokenLimit) {
+    return undefined
+  }
+
  const budgetTokens = Math.max(
    1024,
    Math.floor(
      Math.min(
-        (findTokenLimit(model.id)?.max! - findTokenLimit(model.id)?.min!) * effortRatio +
-          findTokenLimit(model.id)?.min!,
+        (tokenLimit.max - tokenLimit.min) * effortRatio + tokenLimit.min,
        (maxTokens || DEFAULT_MAX_TOKENS) * effortRatio
      )
    )
@ -437,7 +444,8 @@ export function getAnthropicReasoningParams(

  // Claude 推理参数
  if (isSupportedThinkingTokenClaudeModel(model)) {
-    const budgetTokens = getAnthropicThinkingBudget(assistant, model)
+    const { maxTokens } = getAssistantSettings(assistant)
+    const budgetTokens = getAnthropicThinkingBudget(maxTokens, reasoningEffort, model.id)

    return {
      thinking: {
@ -560,7 +568,8 @@ export function getBedrockReasoningParams(
    return {}
  }

-  const budgetTokens = getAnthropicThinkingBudget(assistant, model)
+  const { maxTokens } = getAssistantSettings(assistant)
+  const budgetTokens = getAnthropicThinkingBudget(maxTokens, reasoningEffort, model.id)
  return {
    reasoningConfig: {
      type: 'enabled',
--- a/src/renderer/src/config/test/reasoning.test.ts
+++ b/src/renderer/src/config/test/reasoning.test.ts
@ -1,6 +1,7 @@
 import { describe, expect, it, vi } from 'vitest'

 import {
+  findTokenLimit,
  isDoubaoSeedAfter251015,
  isDoubaoThinkingAutoModel,
  isGeminiReasoningModel,
@ -518,3 +519,35 @@ describe('Gemini Models', () => {
    })
  })
 })
+
+describe('findTokenLimit', () => {
+  const cases: Array<{ modelId: string; expected: { min: number; max: number } }> = [
+    { modelId: 'gemini-2.5-flash-lite-exp', expected: { min: 512, max: 24_576 } },
+    { modelId: 'gemini-1.5-flash', expected: { min: 0, max: 24_576 } },
+    { modelId: 'gemini-1.5-pro-001', expected: { min: 128, max: 32_768 } },
+    { modelId: 'qwen3-235b-a22b-thinking-2507', expected: { min: 0, max: 81_920 } },
+    { modelId: 'qwen3-30b-a3b-thinking-2507', expected: { min: 0, max: 81_920 } },
+    { modelId: 'qwen3-vl-235b-a22b-thinking', expected: { min: 0, max: 81_920 } },
+    { modelId: 'qwen3-vl-30b-a3b-thinking', expected: { min: 0, max: 81_920 } },
+    { modelId: 'qwen-plus-2025-07-14', expected: { min: 0, max: 38_912 } },
+    { modelId: 'qwen-plus-2025-04-28', expected: { min: 0, max: 38_912 } },
+    { modelId: 'qwen3-1.7b', expected: { min: 0, max: 30_720 } },
+    { modelId: 'qwen3-0.6b', expected: { min: 0, max: 30_720 } },
+    { modelId: 'qwen-plus-ultra', expected: { min: 0, max: 81_920 } },
+    { modelId: 'qwen-turbo-pro', expected: { min: 0, max: 38_912 } },
+    { modelId: 'qwen-flash-lite', expected: { min: 0, max: 81_920 } },
+    { modelId: 'qwen3-7b', expected: { min: 1_024, max: 38_912 } },
+    { modelId: 'claude-3.7-sonnet-extended', expected: { min: 1_024, max: 64_000 } },
+    { modelId: 'claude-sonnet-4.1', expected: { min: 1_024, max: 64_000 } },
+    { modelId: 'claude-sonnet-4-5-20250929', expected: { min: 1_024, max: 64_000 } },
+    { modelId: 'claude-opus-4-1-extended', expected: { min: 1_024, max: 32_000 } }
+  ]
+
+  it.each(cases)('returns correct limits for $modelId', ({ modelId, expected }) => {
+    expect(findTokenLimit(modelId)).toEqual(expected)
+  })
+
+  it('returns undefined for unknown models', () => {
+    expect(findTokenLimit('unknown-model')).toBeUndefined()
+  })
+})
--- a/tests/renderer.setup.ts
+++ b/tests/renderer.setup.ts
@ -14,6 +14,11 @@ vi.mock('@logger', async () => {
  }
 })

+// Mock uuid globally for renderer tests
+vi.mock('uuid', () => ({
+  v4: () => 'test-uuid-' + Date.now()
+}))
+
 vi.mock('axios', () => {
  const defaultAxiosMock = {
    get: vi.fn().mockResolvedValue({ data: {} }), // Mocking axios GET request