mirror of
https://github.com/CherryHQ/cherry-studio.git
synced 2026-01-10 23:59:45 +08:00
test: add thinking budget token test (#11305)
* refactor: add thinking budget token test * fix comment
This commit is contained in:
parent
0a72c613af
commit
c1f1d7996d
@ -123,7 +123,11 @@ export async function buildStreamTextParams(
|
|||||||
isSupportedThinkingTokenClaudeModel(model) &&
|
isSupportedThinkingTokenClaudeModel(model) &&
|
||||||
(provider.type === 'anthropic' || provider.type === 'aws-bedrock')
|
(provider.type === 'anthropic' || provider.type === 'aws-bedrock')
|
||||||
) {
|
) {
|
||||||
maxTokens -= getAnthropicThinkingBudget(assistant, model)
|
const { reasoning_effort: reasoningEffort } = getAssistantSettings(assistant)
|
||||||
|
const budget = getAnthropicThinkingBudget(maxTokens, reasoningEffort, model.id)
|
||||||
|
if (budget) {
|
||||||
|
maxTokens -= budget
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
let webSearchPluginConfig: WebSearchPluginConfig | undefined = undefined
|
let webSearchPluginConfig: WebSearchPluginConfig | undefined = undefined
|
||||||
|
|||||||
@ -133,7 +133,7 @@ export class AiSdkSpanAdapter {
|
|||||||
|
|
||||||
// 详细记录转换过程
|
// 详细记录转换过程
|
||||||
const operationId = attributes['ai.operationId']
|
const operationId = attributes['ai.operationId']
|
||||||
logger.info('Converting AI SDK span to SpanEntity', {
|
logger.debug('Converting AI SDK span to SpanEntity', {
|
||||||
spanName: spanName,
|
spanName: spanName,
|
||||||
operationId,
|
operationId,
|
||||||
spanTag,
|
spanTag,
|
||||||
@ -149,7 +149,7 @@ export class AiSdkSpanAdapter {
|
|||||||
})
|
})
|
||||||
|
|
||||||
if (tokenUsage) {
|
if (tokenUsage) {
|
||||||
logger.info('Token usage data found', {
|
logger.debug('Token usage data found', {
|
||||||
spanName: spanName,
|
spanName: spanName,
|
||||||
operationId,
|
operationId,
|
||||||
usage: tokenUsage,
|
usage: tokenUsage,
|
||||||
@ -158,7 +158,7 @@ export class AiSdkSpanAdapter {
|
|||||||
}
|
}
|
||||||
|
|
||||||
if (inputs || outputs) {
|
if (inputs || outputs) {
|
||||||
logger.info('Input/Output data extracted', {
|
logger.debug('Input/Output data extracted', {
|
||||||
spanName: spanName,
|
spanName: spanName,
|
||||||
operationId,
|
operationId,
|
||||||
hasInputs: !!inputs,
|
hasInputs: !!inputs,
|
||||||
@ -170,7 +170,7 @@ export class AiSdkSpanAdapter {
|
|||||||
}
|
}
|
||||||
|
|
||||||
if (Object.keys(typeSpecificData).length > 0) {
|
if (Object.keys(typeSpecificData).length > 0) {
|
||||||
logger.info('Type-specific data extracted', {
|
logger.debug('Type-specific data extracted', {
|
||||||
spanName: spanName,
|
spanName: spanName,
|
||||||
operationId,
|
operationId,
|
||||||
typeSpecificKeys: Object.keys(typeSpecificData),
|
typeSpecificKeys: Object.keys(typeSpecificData),
|
||||||
@ -204,7 +204,7 @@ export class AiSdkSpanAdapter {
|
|||||||
modelName: modelName || this.extractModelFromAttributes(attributes)
|
modelName: modelName || this.extractModelFromAttributes(attributes)
|
||||||
}
|
}
|
||||||
|
|
||||||
logger.info('AI SDK span successfully converted to SpanEntity', {
|
logger.debug('AI SDK span successfully converted to SpanEntity', {
|
||||||
spanName: spanName,
|
spanName: spanName,
|
||||||
operationId,
|
operationId,
|
||||||
spanId: spanContext.spanId,
|
spanId: spanContext.spanId,
|
||||||
|
|||||||
87
src/renderer/src/aiCore/utils/__tests__/reasoning.test.ts
Normal file
87
src/renderer/src/aiCore/utils/__tests__/reasoning.test.ts
Normal file
@ -0,0 +1,87 @@
|
|||||||
|
import * as models from '@renderer/config/models'
|
||||||
|
import { beforeEach, describe, expect, it, vi } from 'vitest'
|
||||||
|
|
||||||
|
import { getAnthropicThinkingBudget } from '../reasoning'
|
||||||
|
|
||||||
|
vi.mock('@renderer/store', () => ({
|
||||||
|
default: {
|
||||||
|
getState: () => ({
|
||||||
|
llm: {
|
||||||
|
providers: []
|
||||||
|
},
|
||||||
|
settings: {}
|
||||||
|
})
|
||||||
|
},
|
||||||
|
useAppDispatch: () => vi.fn(),
|
||||||
|
useAppSelector: () => vi.fn()
|
||||||
|
}))
|
||||||
|
|
||||||
|
vi.mock('@renderer/hooks/useSettings', () => ({
|
||||||
|
getStoreSetting: () => undefined,
|
||||||
|
useSettings: () => ({})
|
||||||
|
}))
|
||||||
|
|
||||||
|
vi.mock('@renderer/services/AssistantService', () => ({
|
||||||
|
getAssistantSettings: () => ({ maxTokens: undefined }),
|
||||||
|
getProviderByModel: () => ({ id: '' })
|
||||||
|
}))
|
||||||
|
|
||||||
|
describe('reasoning utils', () => {
|
||||||
|
describe('getAnthropicThinkingBudget', () => {
|
||||||
|
const findTokenLimitSpy = vi.spyOn(models, 'findTokenLimit')
|
||||||
|
const applyTokenLimit = (limit?: { min: number; max: number }) => findTokenLimitSpy.mockReturnValueOnce(limit)
|
||||||
|
|
||||||
|
beforeEach(() => {
|
||||||
|
findTokenLimitSpy.mockReset()
|
||||||
|
})
|
||||||
|
|
||||||
|
it('returns undefined when reasoningEffort is undefined', () => {
|
||||||
|
const result = getAnthropicThinkingBudget(8000, undefined, 'claude-model')
|
||||||
|
expect(result).toBe(undefined)
|
||||||
|
expect(findTokenLimitSpy).not.toHaveBeenCalled()
|
||||||
|
})
|
||||||
|
|
||||||
|
it('returns undefined when tokenLimit is not found', () => {
|
||||||
|
const unknownId = 'unknown-model'
|
||||||
|
applyTokenLimit(undefined)
|
||||||
|
const result = getAnthropicThinkingBudget(8000, 'medium', unknownId)
|
||||||
|
expect(result).toBe(undefined)
|
||||||
|
expect(findTokenLimitSpy).toHaveBeenCalledWith(unknownId)
|
||||||
|
})
|
||||||
|
|
||||||
|
it('uses DEFAULT_MAX_TOKENS when maxTokens is undefined', () => {
|
||||||
|
applyTokenLimit({ min: 1000, max: 10_000 })
|
||||||
|
const result = getAnthropicThinkingBudget(undefined, 'medium', 'claude-model')
|
||||||
|
expect(result).toBe(2048)
|
||||||
|
expect(findTokenLimitSpy).toHaveBeenCalledWith('claude-model')
|
||||||
|
})
|
||||||
|
|
||||||
|
it('respects maxTokens limit when lower than token limit', () => {
|
||||||
|
applyTokenLimit({ min: 1000, max: 10_000 })
|
||||||
|
const result = getAnthropicThinkingBudget(8000, 'medium', 'claude-model')
|
||||||
|
expect(result).toBe(4000)
|
||||||
|
expect(findTokenLimitSpy).toHaveBeenCalledWith('claude-model')
|
||||||
|
})
|
||||||
|
|
||||||
|
it('caps to token limit when lower than maxTokens budget', () => {
|
||||||
|
applyTokenLimit({ min: 1000, max: 5000 })
|
||||||
|
const result = getAnthropicThinkingBudget(100_000, 'high', 'claude-model')
|
||||||
|
expect(result).toBe(4200)
|
||||||
|
expect(findTokenLimitSpy).toHaveBeenCalledWith('claude-model')
|
||||||
|
})
|
||||||
|
|
||||||
|
it('enforces minimum budget of 1024', () => {
|
||||||
|
applyTokenLimit({ min: 0, max: 500 })
|
||||||
|
const result = getAnthropicThinkingBudget(200, 'low', 'claude-model')
|
||||||
|
expect(result).toBe(1024)
|
||||||
|
expect(findTokenLimitSpy).toHaveBeenCalledWith('claude-model')
|
||||||
|
})
|
||||||
|
|
||||||
|
it('respects large token limits when maxTokens is high', () => {
|
||||||
|
applyTokenLimit({ min: 1024, max: 64_000 })
|
||||||
|
const result = getAnthropicThinkingBudget(64_000, 'high', 'claude-model')
|
||||||
|
expect(result).toBe(51_200)
|
||||||
|
expect(findTokenLimitSpy).toHaveBeenCalledWith('claude-model')
|
||||||
|
})
|
||||||
|
})
|
||||||
|
})
|
||||||
@ -393,19 +393,26 @@ export function getOpenAIReasoningParams(
|
|||||||
return {}
|
return {}
|
||||||
}
|
}
|
||||||
|
|
||||||
export function getAnthropicThinkingBudget(assistant: Assistant, model: Model): number {
|
export function getAnthropicThinkingBudget(
|
||||||
const { maxTokens, reasoning_effort: reasoningEffort } = getAssistantSettings(assistant)
|
maxTokens: number | undefined,
|
||||||
|
reasoningEffort: string | undefined,
|
||||||
|
modelId: string
|
||||||
|
): number | undefined {
|
||||||
if (reasoningEffort === undefined || reasoningEffort === 'none') {
|
if (reasoningEffort === undefined || reasoningEffort === 'none') {
|
||||||
return 0
|
return undefined
|
||||||
}
|
}
|
||||||
const effortRatio = EFFORT_RATIO[reasoningEffort]
|
const effortRatio = EFFORT_RATIO[reasoningEffort]
|
||||||
|
|
||||||
|
const tokenLimit = findTokenLimit(modelId)
|
||||||
|
if (!tokenLimit) {
|
||||||
|
return undefined
|
||||||
|
}
|
||||||
|
|
||||||
const budgetTokens = Math.max(
|
const budgetTokens = Math.max(
|
||||||
1024,
|
1024,
|
||||||
Math.floor(
|
Math.floor(
|
||||||
Math.min(
|
Math.min(
|
||||||
(findTokenLimit(model.id)?.max! - findTokenLimit(model.id)?.min!) * effortRatio +
|
(tokenLimit.max - tokenLimit.min) * effortRatio + tokenLimit.min,
|
||||||
findTokenLimit(model.id)?.min!,
|
|
||||||
(maxTokens || DEFAULT_MAX_TOKENS) * effortRatio
|
(maxTokens || DEFAULT_MAX_TOKENS) * effortRatio
|
||||||
)
|
)
|
||||||
)
|
)
|
||||||
@ -437,7 +444,8 @@ export function getAnthropicReasoningParams(
|
|||||||
|
|
||||||
// Claude 推理参数
|
// Claude 推理参数
|
||||||
if (isSupportedThinkingTokenClaudeModel(model)) {
|
if (isSupportedThinkingTokenClaudeModel(model)) {
|
||||||
const budgetTokens = getAnthropicThinkingBudget(assistant, model)
|
const { maxTokens } = getAssistantSettings(assistant)
|
||||||
|
const budgetTokens = getAnthropicThinkingBudget(maxTokens, reasoningEffort, model.id)
|
||||||
|
|
||||||
return {
|
return {
|
||||||
thinking: {
|
thinking: {
|
||||||
@ -560,7 +568,8 @@ export function getBedrockReasoningParams(
|
|||||||
return {}
|
return {}
|
||||||
}
|
}
|
||||||
|
|
||||||
const budgetTokens = getAnthropicThinkingBudget(assistant, model)
|
const { maxTokens } = getAssistantSettings(assistant)
|
||||||
|
const budgetTokens = getAnthropicThinkingBudget(maxTokens, reasoningEffort, model.id)
|
||||||
return {
|
return {
|
||||||
reasoningConfig: {
|
reasoningConfig: {
|
||||||
type: 'enabled',
|
type: 'enabled',
|
||||||
|
|||||||
@ -1,6 +1,7 @@
|
|||||||
import { describe, expect, it, vi } from 'vitest'
|
import { describe, expect, it, vi } from 'vitest'
|
||||||
|
|
||||||
import {
|
import {
|
||||||
|
findTokenLimit,
|
||||||
isDoubaoSeedAfter251015,
|
isDoubaoSeedAfter251015,
|
||||||
isDoubaoThinkingAutoModel,
|
isDoubaoThinkingAutoModel,
|
||||||
isGeminiReasoningModel,
|
isGeminiReasoningModel,
|
||||||
@ -518,3 +519,35 @@ describe('Gemini Models', () => {
|
|||||||
})
|
})
|
||||||
})
|
})
|
||||||
})
|
})
|
||||||
|
|
||||||
|
describe('findTokenLimit', () => {
|
||||||
|
const cases: Array<{ modelId: string; expected: { min: number; max: number } }> = [
|
||||||
|
{ modelId: 'gemini-2.5-flash-lite-exp', expected: { min: 512, max: 24_576 } },
|
||||||
|
{ modelId: 'gemini-1.5-flash', expected: { min: 0, max: 24_576 } },
|
||||||
|
{ modelId: 'gemini-1.5-pro-001', expected: { min: 128, max: 32_768 } },
|
||||||
|
{ modelId: 'qwen3-235b-a22b-thinking-2507', expected: { min: 0, max: 81_920 } },
|
||||||
|
{ modelId: 'qwen3-30b-a3b-thinking-2507', expected: { min: 0, max: 81_920 } },
|
||||||
|
{ modelId: 'qwen3-vl-235b-a22b-thinking', expected: { min: 0, max: 81_920 } },
|
||||||
|
{ modelId: 'qwen3-vl-30b-a3b-thinking', expected: { min: 0, max: 81_920 } },
|
||||||
|
{ modelId: 'qwen-plus-2025-07-14', expected: { min: 0, max: 38_912 } },
|
||||||
|
{ modelId: 'qwen-plus-2025-04-28', expected: { min: 0, max: 38_912 } },
|
||||||
|
{ modelId: 'qwen3-1.7b', expected: { min: 0, max: 30_720 } },
|
||||||
|
{ modelId: 'qwen3-0.6b', expected: { min: 0, max: 30_720 } },
|
||||||
|
{ modelId: 'qwen-plus-ultra', expected: { min: 0, max: 81_920 } },
|
||||||
|
{ modelId: 'qwen-turbo-pro', expected: { min: 0, max: 38_912 } },
|
||||||
|
{ modelId: 'qwen-flash-lite', expected: { min: 0, max: 81_920 } },
|
||||||
|
{ modelId: 'qwen3-7b', expected: { min: 1_024, max: 38_912 } },
|
||||||
|
{ modelId: 'claude-3.7-sonnet-extended', expected: { min: 1_024, max: 64_000 } },
|
||||||
|
{ modelId: 'claude-sonnet-4.1', expected: { min: 1_024, max: 64_000 } },
|
||||||
|
{ modelId: 'claude-sonnet-4-5-20250929', expected: { min: 1_024, max: 64_000 } },
|
||||||
|
{ modelId: 'claude-opus-4-1-extended', expected: { min: 1_024, max: 32_000 } }
|
||||||
|
]
|
||||||
|
|
||||||
|
it.each(cases)('returns correct limits for $modelId', ({ modelId, expected }) => {
|
||||||
|
expect(findTokenLimit(modelId)).toEqual(expected)
|
||||||
|
})
|
||||||
|
|
||||||
|
it('returns undefined for unknown models', () => {
|
||||||
|
expect(findTokenLimit('unknown-model')).toBeUndefined()
|
||||||
|
})
|
||||||
|
})
|
||||||
|
|||||||
@ -14,6 +14,11 @@ vi.mock('@logger', async () => {
|
|||||||
}
|
}
|
||||||
})
|
})
|
||||||
|
|
||||||
|
// Mock uuid globally for renderer tests
|
||||||
|
vi.mock('uuid', () => ({
|
||||||
|
v4: () => 'test-uuid-' + Date.now()
|
||||||
|
}))
|
||||||
|
|
||||||
vi.mock('axios', () => {
|
vi.mock('axios', () => {
|
||||||
const defaultAxiosMock = {
|
const defaultAxiosMock = {
|
||||||
get: vi.fn().mockResolvedValue({ data: {} }), // Mocking axios GET request
|
get: vi.fn().mockResolvedValue({ data: {} }), // Mocking axios GET request
|
||||||
|
|||||||
Loading…
Reference in New Issue
Block a user