test: add thinking budget token test (#11305)

* refactor: add thinking budget token test

* fix comment
This commit is contained in:
SuYao 2025-11-22 21:43:57 +08:00 committed by GitHub
parent 0a72c613af
commit c1f1d7996d
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194
6 changed files with 151 additions and 13 deletions

View File

@ -123,7 +123,11 @@ export async function buildStreamTextParams(
isSupportedThinkingTokenClaudeModel(model) &&
(provider.type === 'anthropic' || provider.type === 'aws-bedrock')
) {
maxTokens -= getAnthropicThinkingBudget(assistant, model)
const { reasoning_effort: reasoningEffort } = getAssistantSettings(assistant)
const budget = getAnthropicThinkingBudget(maxTokens, reasoningEffort, model.id)
if (budget) {
maxTokens -= budget
}
}
let webSearchPluginConfig: WebSearchPluginConfig | undefined = undefined

View File

@ -133,7 +133,7 @@ export class AiSdkSpanAdapter {
// 详细记录转换过程
const operationId = attributes['ai.operationId']
logger.info('Converting AI SDK span to SpanEntity', {
logger.debug('Converting AI SDK span to SpanEntity', {
spanName: spanName,
operationId,
spanTag,
@ -149,7 +149,7 @@ export class AiSdkSpanAdapter {
})
if (tokenUsage) {
logger.info('Token usage data found', {
logger.debug('Token usage data found', {
spanName: spanName,
operationId,
usage: tokenUsage,
@ -158,7 +158,7 @@ export class AiSdkSpanAdapter {
}
if (inputs || outputs) {
logger.info('Input/Output data extracted', {
logger.debug('Input/Output data extracted', {
spanName: spanName,
operationId,
hasInputs: !!inputs,
@ -170,7 +170,7 @@ export class AiSdkSpanAdapter {
}
if (Object.keys(typeSpecificData).length > 0) {
logger.info('Type-specific data extracted', {
logger.debug('Type-specific data extracted', {
spanName: spanName,
operationId,
typeSpecificKeys: Object.keys(typeSpecificData),
@ -204,7 +204,7 @@ export class AiSdkSpanAdapter {
modelName: modelName || this.extractModelFromAttributes(attributes)
}
logger.info('AI SDK span successfully converted to SpanEntity', {
logger.debug('AI SDK span successfully converted to SpanEntity', {
spanName: spanName,
operationId,
spanId: spanContext.spanId,

View File

@ -0,0 +1,87 @@
import * as models from '@renderer/config/models'
import { beforeEach, describe, expect, it, vi } from 'vitest'
import { getAnthropicThinkingBudget } from '../reasoning'
vi.mock('@renderer/store', () => ({
default: {
getState: () => ({
llm: {
providers: []
},
settings: {}
})
},
useAppDispatch: () => vi.fn(),
useAppSelector: () => vi.fn()
}))
vi.mock('@renderer/hooks/useSettings', () => ({
getStoreSetting: () => undefined,
useSettings: () => ({})
}))
vi.mock('@renderer/services/AssistantService', () => ({
getAssistantSettings: () => ({ maxTokens: undefined }),
getProviderByModel: () => ({ id: '' })
}))
describe('reasoning utils', () => {
describe('getAnthropicThinkingBudget', () => {
const findTokenLimitSpy = vi.spyOn(models, 'findTokenLimit')
const applyTokenLimit = (limit?: { min: number; max: number }) => findTokenLimitSpy.mockReturnValueOnce(limit)
beforeEach(() => {
findTokenLimitSpy.mockReset()
})
it('returns undefined when reasoningEffort is undefined', () => {
const result = getAnthropicThinkingBudget(8000, undefined, 'claude-model')
expect(result).toBe(undefined)
expect(findTokenLimitSpy).not.toHaveBeenCalled()
})
it('returns undefined when tokenLimit is not found', () => {
const unknownId = 'unknown-model'
applyTokenLimit(undefined)
const result = getAnthropicThinkingBudget(8000, 'medium', unknownId)
expect(result).toBe(undefined)
expect(findTokenLimitSpy).toHaveBeenCalledWith(unknownId)
})
it('uses DEFAULT_MAX_TOKENS when maxTokens is undefined', () => {
applyTokenLimit({ min: 1000, max: 10_000 })
const result = getAnthropicThinkingBudget(undefined, 'medium', 'claude-model')
expect(result).toBe(2048)
expect(findTokenLimitSpy).toHaveBeenCalledWith('claude-model')
})
it('respects maxTokens limit when lower than token limit', () => {
applyTokenLimit({ min: 1000, max: 10_000 })
const result = getAnthropicThinkingBudget(8000, 'medium', 'claude-model')
expect(result).toBe(4000)
expect(findTokenLimitSpy).toHaveBeenCalledWith('claude-model')
})
it('caps to token limit when lower than maxTokens budget', () => {
applyTokenLimit({ min: 1000, max: 5000 })
const result = getAnthropicThinkingBudget(100_000, 'high', 'claude-model')
expect(result).toBe(4200)
expect(findTokenLimitSpy).toHaveBeenCalledWith('claude-model')
})
it('enforces minimum budget of 1024', () => {
applyTokenLimit({ min: 0, max: 500 })
const result = getAnthropicThinkingBudget(200, 'low', 'claude-model')
expect(result).toBe(1024)
expect(findTokenLimitSpy).toHaveBeenCalledWith('claude-model')
})
it('respects large token limits when maxTokens is high', () => {
applyTokenLimit({ min: 1024, max: 64_000 })
const result = getAnthropicThinkingBudget(64_000, 'high', 'claude-model')
expect(result).toBe(51_200)
expect(findTokenLimitSpy).toHaveBeenCalledWith('claude-model')
})
})
})

View File

@ -393,19 +393,26 @@ export function getOpenAIReasoningParams(
return {}
}
export function getAnthropicThinkingBudget(assistant: Assistant, model: Model): number {
const { maxTokens, reasoning_effort: reasoningEffort } = getAssistantSettings(assistant)
export function getAnthropicThinkingBudget(
maxTokens: number | undefined,
reasoningEffort: string | undefined,
modelId: string
): number | undefined {
if (reasoningEffort === undefined || reasoningEffort === 'none') {
return 0
return undefined
}
const effortRatio = EFFORT_RATIO[reasoningEffort]
const tokenLimit = findTokenLimit(modelId)
if (!tokenLimit) {
return undefined
}
const budgetTokens = Math.max(
1024,
Math.floor(
Math.min(
(findTokenLimit(model.id)?.max! - findTokenLimit(model.id)?.min!) * effortRatio +
findTokenLimit(model.id)?.min!,
(tokenLimit.max - tokenLimit.min) * effortRatio + tokenLimit.min,
(maxTokens || DEFAULT_MAX_TOKENS) * effortRatio
)
)
@ -437,7 +444,8 @@ export function getAnthropicReasoningParams(
// Claude 推理参数
if (isSupportedThinkingTokenClaudeModel(model)) {
const budgetTokens = getAnthropicThinkingBudget(assistant, model)
const { maxTokens } = getAssistantSettings(assistant)
const budgetTokens = getAnthropicThinkingBudget(maxTokens, reasoningEffort, model.id)
return {
thinking: {
@ -560,7 +568,8 @@ export function getBedrockReasoningParams(
return {}
}
const budgetTokens = getAnthropicThinkingBudget(assistant, model)
const { maxTokens } = getAssistantSettings(assistant)
const budgetTokens = getAnthropicThinkingBudget(maxTokens, reasoningEffort, model.id)
return {
reasoningConfig: {
type: 'enabled',

View File

@ -1,6 +1,7 @@
import { describe, expect, it, vi } from 'vitest'
import {
findTokenLimit,
isDoubaoSeedAfter251015,
isDoubaoThinkingAutoModel,
isGeminiReasoningModel,
@ -518,3 +519,35 @@ describe('Gemini Models', () => {
})
})
})
describe('findTokenLimit', () => {
const cases: Array<{ modelId: string; expected: { min: number; max: number } }> = [
{ modelId: 'gemini-2.5-flash-lite-exp', expected: { min: 512, max: 24_576 } },
{ modelId: 'gemini-1.5-flash', expected: { min: 0, max: 24_576 } },
{ modelId: 'gemini-1.5-pro-001', expected: { min: 128, max: 32_768 } },
{ modelId: 'qwen3-235b-a22b-thinking-2507', expected: { min: 0, max: 81_920 } },
{ modelId: 'qwen3-30b-a3b-thinking-2507', expected: { min: 0, max: 81_920 } },
{ modelId: 'qwen3-vl-235b-a22b-thinking', expected: { min: 0, max: 81_920 } },
{ modelId: 'qwen3-vl-30b-a3b-thinking', expected: { min: 0, max: 81_920 } },
{ modelId: 'qwen-plus-2025-07-14', expected: { min: 0, max: 38_912 } },
{ modelId: 'qwen-plus-2025-04-28', expected: { min: 0, max: 38_912 } },
{ modelId: 'qwen3-1.7b', expected: { min: 0, max: 30_720 } },
{ modelId: 'qwen3-0.6b', expected: { min: 0, max: 30_720 } },
{ modelId: 'qwen-plus-ultra', expected: { min: 0, max: 81_920 } },
{ modelId: 'qwen-turbo-pro', expected: { min: 0, max: 38_912 } },
{ modelId: 'qwen-flash-lite', expected: { min: 0, max: 81_920 } },
{ modelId: 'qwen3-7b', expected: { min: 1_024, max: 38_912 } },
{ modelId: 'claude-3.7-sonnet-extended', expected: { min: 1_024, max: 64_000 } },
{ modelId: 'claude-sonnet-4.1', expected: { min: 1_024, max: 64_000 } },
{ modelId: 'claude-sonnet-4-5-20250929', expected: { min: 1_024, max: 64_000 } },
{ modelId: 'claude-opus-4-1-extended', expected: { min: 1_024, max: 32_000 } }
]
it.each(cases)('returns correct limits for $modelId', ({ modelId, expected }) => {
expect(findTokenLimit(modelId)).toEqual(expected)
})
it('returns undefined for unknown models', () => {
expect(findTokenLimit('unknown-model')).toBeUndefined()
})
})

View File

@ -14,6 +14,11 @@ vi.mock('@logger', async () => {
}
})
// Mock uuid globally for renderer tests
vi.mock('uuid', () => ({
v4: () => 'test-uuid-' + Date.now()
}))
vi.mock('axios', () => {
const defaultAxiosMock = {
get: vi.fn().mockResolvedValue({ data: {} }), // Mocking axios GET request