test: add thinking budget token test (#11305)

* refactor: add thinking budget token test * fix comment
2026-01-10 23:59:45 +08:00 · 2025-11-22 21:43:57 +08:00 · 2025-11-22 21:43:57 +08:00 · c1f1d7996d
commit c1f1d7996d
parent 0a72c613af
6 changed files with 151 additions and 13 deletions
--- a/src/renderer/src/aiCore/prepareParams/parameterBuilder.ts
+++ b/src/renderer/src/aiCore/prepareParams/parameterBuilder.ts
@ -123,7 +123,11 @@ export async function buildStreamTextParams(
    isSupportedThinkingTokenClaudeModel(model) &&
    (provider.type === 'anthropic' || provider.type === 'aws-bedrock')
  ) {
-    maxTokens -= getAnthropicThinkingBudget(assistant, model)
+    const { reasoning_effort: reasoningEffort } = getAssistantSettings(assistant)
    const budget = getAnthropicThinkingBudget(maxTokens, reasoningEffort, model.id)
    if (budget) {
      maxTokens -= budget
    }
  }
  let webSearchPluginConfig: WebSearchPluginConfig | undefined = undefined
--- a/src/renderer/src/aiCore/trace/AiSdkSpanAdapter.ts
+++ b/src/renderer/src/aiCore/trace/AiSdkSpanAdapter.ts
@ -133,7 +133,7 @@ export class AiSdkSpanAdapter {
    // 详细记录转换过程
    const operationId = attributes['ai.operationId']
-    logger.info('Converting AI SDK span to SpanEntity', {
+    logger.debug('Converting AI SDK span to SpanEntity', {
      spanName: spanName,
      operationId,
      spanTag,
@ -149,7 +149,7 @@ export class AiSdkSpanAdapter {
    })
    if (tokenUsage) {
-      logger.info('Token usage data found', {
+      logger.debug('Token usage data found', {
        spanName: spanName,
        operationId,
        usage: tokenUsage,
@ -158,7 +158,7 @@ export class AiSdkSpanAdapter {
    }
    if (inputs || outputs) {
-      logger.info('Input/Output data extracted', {
+      logger.debug('Input/Output data extracted', {
        spanName: spanName,
        operationId,
        hasInputs: !!inputs,
@ -170,7 +170,7 @@ export class AiSdkSpanAdapter {
    }
    if (Object.keys(typeSpecificData).length > 0) {
-      logger.info('Type-specific data extracted', {
+      logger.debug('Type-specific data extracted', {
        spanName: spanName,
        operationId,
        typeSpecificKeys: Object.keys(typeSpecificData),
@ -204,7 +204,7 @@ export class AiSdkSpanAdapter {
      modelName: modelName || this.extractModelFromAttributes(attributes)
    }
-    logger.info('AI SDK span successfully converted to SpanEntity', {
+    logger.debug('AI SDK span successfully converted to SpanEntity', {
      spanName: spanName,
      operationId,
      spanId: spanContext.spanId,
--- a/src/renderer/src/aiCore/utils/tests/reasoning.test.ts
+++ b/src/renderer/src/aiCore/utils/tests/reasoning.test.ts
@ -0,0 +1,87 @@
 import * as models from '@renderer/config/models'
 import { beforeEach, describe, expect, it, vi } from 'vitest'
 import { getAnthropicThinkingBudget } from '../reasoning'
 vi.mock('@renderer/store', () => ({
  default: {
    getState: () => ({
      llm: {
        providers: []
      },
      settings: {}
    })
  },
  useAppDispatch: () => vi.fn(),
  useAppSelector: () => vi.fn()
 }))
 vi.mock('@renderer/hooks/useSettings', () => ({
  getStoreSetting: () => undefined,
  useSettings: () => ({})
 }))
 vi.mock('@renderer/services/AssistantService', () => ({
  getAssistantSettings: () => ({ maxTokens: undefined }),
  getProviderByModel: () => ({ id: '' })
 }))
 describe('reasoning utils', () => {
  describe('getAnthropicThinkingBudget', () => {
    const findTokenLimitSpy = vi.spyOn(models, 'findTokenLimit')
    const applyTokenLimit = (limit?: { min: number; max: number }) => findTokenLimitSpy.mockReturnValueOnce(limit)
    beforeEach(() => {
      findTokenLimitSpy.mockReset()
    })
    it('returns undefined when reasoningEffort is undefined', () => {
      const result = getAnthropicThinkingBudget(8000, undefined, 'claude-model')
      expect(result).toBe(undefined)
      expect(findTokenLimitSpy).not.toHaveBeenCalled()
    })
    it('returns undefined when tokenLimit is not found', () => {
      const unknownId = 'unknown-model'
      applyTokenLimit(undefined)
      const result = getAnthropicThinkingBudget(8000, 'medium', unknownId)
      expect(result).toBe(undefined)
      expect(findTokenLimitSpy).toHaveBeenCalledWith(unknownId)
    })
    it('uses DEFAULT_MAX_TOKENS when maxTokens is undefined', () => {
      applyTokenLimit({ min: 1000, max: 10_000 })
      const result = getAnthropicThinkingBudget(undefined, 'medium', 'claude-model')
      expect(result).toBe(2048)
      expect(findTokenLimitSpy).toHaveBeenCalledWith('claude-model')
    })
    it('respects maxTokens limit when lower than token limit', () => {
      applyTokenLimit({ min: 1000, max: 10_000 })
      const result = getAnthropicThinkingBudget(8000, 'medium', 'claude-model')
      expect(result).toBe(4000)
      expect(findTokenLimitSpy).toHaveBeenCalledWith('claude-model')
    })
    it('caps to token limit when lower than maxTokens budget', () => {
      applyTokenLimit({ min: 1000, max: 5000 })
      const result = getAnthropicThinkingBudget(100_000, 'high', 'claude-model')
      expect(result).toBe(4200)
      expect(findTokenLimitSpy).toHaveBeenCalledWith('claude-model')
    })
    it('enforces minimum budget of 1024', () => {
      applyTokenLimit({ min: 0, max: 500 })
      const result = getAnthropicThinkingBudget(200, 'low', 'claude-model')
      expect(result).toBe(1024)
      expect(findTokenLimitSpy).toHaveBeenCalledWith('claude-model')
    })
    it('respects large token limits when maxTokens is high', () => {
      applyTokenLimit({ min: 1024, max: 64_000 })
      const result = getAnthropicThinkingBudget(64_000, 'high', 'claude-model')
      expect(result).toBe(51_200)
      expect(findTokenLimitSpy).toHaveBeenCalledWith('claude-model')
    })
  })
 })
--- a/src/renderer/src/aiCore/utils/reasoning.ts
+++ b/src/renderer/src/aiCore/utils/reasoning.ts
@ -393,19 +393,26 @@ export function getOpenAIReasoningParams(
  return {}
 }
-export function getAnthropicThinkingBudget(assistant: Assistant, model: Model): number {
+export function getAnthropicThinkingBudget(
-  const { maxTokens, reasoning_effort: reasoningEffort } = getAssistantSettings(assistant)
+  maxTokens: number | undefined,
  reasoningEffort: string | undefined,
  modelId: string
 ): number | undefined {
  if (reasoningEffort === undefined || reasoningEffort === 'none') {
-    return 0
+    return undefined
  }
  const effortRatio = EFFORT_RATIO[reasoningEffort]
  const tokenLimit = findTokenLimit(modelId)
  if (!tokenLimit) {
    return undefined
  }
  const budgetTokens = Math.max(
    1024,
    Math.floor(
      Math.min(
-        (findTokenLimit(model.id)?.max! - findTokenLimit(model.id)?.min!) * effortRatio +
+        (tokenLimit.max - tokenLimit.min) * effortRatio + tokenLimit.min,
          findTokenLimit(model.id)?.min!,
        (maxTokens || DEFAULT_MAX_TOKENS) * effortRatio
      )
    )
@ -437,7 +444,8 @@ export function getAnthropicReasoningParams(
  // Claude 推理参数
  if (isSupportedThinkingTokenClaudeModel(model)) {
-    const budgetTokens = getAnthropicThinkingBudget(assistant, model)
+    const { maxTokens } = getAssistantSettings(assistant)
    const budgetTokens = getAnthropicThinkingBudget(maxTokens, reasoningEffort, model.id)
    return {
      thinking: {
@ -560,7 +568,8 @@ export function getBedrockReasoningParams(
    return {}
  }
-  const budgetTokens = getAnthropicThinkingBudget(assistant, model)
+  const { maxTokens } = getAssistantSettings(assistant)
  const budgetTokens = getAnthropicThinkingBudget(maxTokens, reasoningEffort, model.id)
  return {
    reasoningConfig: {
      type: 'enabled',
--- a/src/renderer/src/config/test/reasoning.test.ts
+++ b/src/renderer/src/config/test/reasoning.test.ts
@ -1,6 +1,7 @@
 import { describe, expect, it, vi } from 'vitest'
 import {
  findTokenLimit,
  isDoubaoSeedAfter251015,
  isDoubaoThinkingAutoModel,
  isGeminiReasoningModel,
@ -518,3 +519,35 @@ describe('Gemini Models', () => {
    })
  })
 })
 describe('findTokenLimit', () => {
  const cases: Array<{ modelId: string; expected: { min: number; max: number } }> = [
    { modelId: 'gemini-2.5-flash-lite-exp', expected: { min: 512, max: 24_576 } },
    { modelId: 'gemini-1.5-flash', expected: { min: 0, max: 24_576 } },
    { modelId: 'gemini-1.5-pro-001', expected: { min: 128, max: 32_768 } },
    { modelId: 'qwen3-235b-a22b-thinking-2507', expected: { min: 0, max: 81_920 } },
    { modelId: 'qwen3-30b-a3b-thinking-2507', expected: { min: 0, max: 81_920 } },
    { modelId: 'qwen3-vl-235b-a22b-thinking', expected: { min: 0, max: 81_920 } },
    { modelId: 'qwen3-vl-30b-a3b-thinking', expected: { min: 0, max: 81_920 } },
    { modelId: 'qwen-plus-2025-07-14', expected: { min: 0, max: 38_912 } },
    { modelId: 'qwen-plus-2025-04-28', expected: { min: 0, max: 38_912 } },
    { modelId: 'qwen3-1.7b', expected: { min: 0, max: 30_720 } },
    { modelId: 'qwen3-0.6b', expected: { min: 0, max: 30_720 } },
    { modelId: 'qwen-plus-ultra', expected: { min: 0, max: 81_920 } },
    { modelId: 'qwen-turbo-pro', expected: { min: 0, max: 38_912 } },
    { modelId: 'qwen-flash-lite', expected: { min: 0, max: 81_920 } },
    { modelId: 'qwen3-7b', expected: { min: 1_024, max: 38_912 } },
    { modelId: 'claude-3.7-sonnet-extended', expected: { min: 1_024, max: 64_000 } },
    { modelId: 'claude-sonnet-4.1', expected: { min: 1_024, max: 64_000 } },
    { modelId: 'claude-sonnet-4-5-20250929', expected: { min: 1_024, max: 64_000 } },
    { modelId: 'claude-opus-4-1-extended', expected: { min: 1_024, max: 32_000 } }
  ]
  it.each(cases)('returns correct limits for $modelId', ({ modelId, expected }) => {
    expect(findTokenLimit(modelId)).toEqual(expected)
  })
  it('returns undefined for unknown models', () => {
    expect(findTokenLimit('unknown-model')).toBeUndefined()
  })
 })
--- a/tests/renderer.setup.ts
+++ b/tests/renderer.setup.ts
@ -14,6 +14,11 @@ vi.mock('@logger', async () => {
  }
 })
 // Mock uuid globally for renderer tests
 vi.mock('uuid', () => ({
  v4: () => 'test-uuid-' + Date.now()
 }))
 vi.mock('axios', () => {
  const defaultAxiosMock = {
    get: vi.fn().mockResolvedValue({ data: {} }), // Mocking axios GET request