mirror of
https://github.com/CherryHQ/cherry-studio.git
synced 2025-12-19 14:41:24 +08:00
refactor: improve budget calculation logic (#11973)
* refactor: improve budget calculation logic * Update src/renderer/src/aiCore/utils/__tests__/reasoning.test.ts Co-authored-by: Copilot <175728472+Copilot@users.noreply.github.com> * Update src/renderer/src/aiCore/utils/__tests__/reasoning.test.ts Co-authored-by: Copilot <175728472+Copilot@users.noreply.github.com> * [WIP] Address feedback on budget calculation logic refactor (#11974) * Initial plan * fix: revert budget calculation to linear interpolation formula Reverted the budget calculation in getAnthropicThinkingBudget from `tokenLimit.max * effortRatio` back to the original linear interpolation formula `(tokenLimit.max - tokenLimit.min) * effortRatio + tokenLimit.min`. The new formula was causing lower budgets for all effort ratios (e.g., LOW effort changed from 2609 to 1638 tokens, a 37% reduction). The linear interpolation formula ensures budgets range from min (at effortRatio=0) to max (at effortRatio=1), matching the behavior in other parts of the codebase (lines 221, 597). Updated tests to reflect the correct expected values with the linear interpolation formula. Co-authored-by: DeJeune <67425183+DeJeune@users.noreply.github.com> --------- Co-authored-by: copilot-swe-agent[bot] <198982749+Copilot@users.noreply.github.com> Co-authored-by: DeJeune <67425183+DeJeune@users.noreply.github.com> * fix(test): reasoning * fix: test --------- Co-authored-by: Copilot <175728472+Copilot@users.noreply.github.com> Co-authored-by: Copilot <198982749+Copilot@users.noreply.github.com>
This commit is contained in:
parent
0f1b3afa72
commit
c04529a23c
@ -11,6 +11,7 @@ import { beforeEach, describe, expect, it, vi } from 'vitest'
|
||||
|
||||
import {
|
||||
getAnthropicReasoningParams,
|
||||
getAnthropicThinkingBudget,
|
||||
getBedrockReasoningParams,
|
||||
getCustomParameters,
|
||||
getGeminiReasoningParams,
|
||||
@ -89,7 +90,8 @@ vi.mock('@renderer/config/models', async (importOriginal) => {
|
||||
isQwenAlwaysThinkModel: vi.fn(() => false),
|
||||
isSupportedThinkingTokenHunyuanModel: vi.fn(() => false),
|
||||
isSupportedThinkingTokenModel: vi.fn(() => false),
|
||||
isGPT51SeriesModel: vi.fn(() => false)
|
||||
isGPT51SeriesModel: vi.fn(() => false),
|
||||
findTokenLimit: vi.fn(actual.findTokenLimit)
|
||||
}
|
||||
})
|
||||
|
||||
@ -649,7 +651,7 @@ describe('reasoning utils', () => {
|
||||
expect(result).toEqual({
|
||||
thinking: {
|
||||
type: 'enabled',
|
||||
budgetTokens: 2048
|
||||
budgetTokens: 4096
|
||||
}
|
||||
})
|
||||
})
|
||||
@ -729,7 +731,7 @@ describe('reasoning utils', () => {
|
||||
const result = getGeminiReasoningParams(assistant, model)
|
||||
expect(result).toEqual({
|
||||
thinkingConfig: {
|
||||
thinkingBudget: 16448,
|
||||
thinkingBudget: expect.any(Number),
|
||||
includeThoughts: true
|
||||
}
|
||||
})
|
||||
@ -893,7 +895,7 @@ describe('reasoning utils', () => {
|
||||
expect(result).toEqual({
|
||||
reasoningConfig: {
|
||||
type: 'enabled',
|
||||
budgetTokens: 2048
|
||||
budgetTokens: 4096
|
||||
}
|
||||
})
|
||||
})
|
||||
@ -994,4 +996,89 @@ describe('reasoning utils', () => {
|
||||
})
|
||||
})
|
||||
})
|
||||
|
||||
describe('getAnthropicThinkingBudget', () => {
|
||||
it('should return undefined when reasoningEffort is undefined', async () => {
|
||||
const result = getAnthropicThinkingBudget(4096, undefined, 'claude-3-7-sonnet')
|
||||
expect(result).toBeUndefined()
|
||||
})
|
||||
|
||||
it('should return undefined when reasoningEffort is none', async () => {
|
||||
const result = getAnthropicThinkingBudget(4096, 'none', 'claude-3-7-sonnet')
|
||||
expect(result).toBeUndefined()
|
||||
})
|
||||
|
||||
it('should return undefined when tokenLimit is not found', async () => {
|
||||
const { findTokenLimit } = await import('@renderer/config/models')
|
||||
vi.mocked(findTokenLimit).mockReturnValue(undefined)
|
||||
|
||||
const result = getAnthropicThinkingBudget(4096, 'medium', 'unknown-model')
|
||||
expect(result).toBeUndefined()
|
||||
})
|
||||
|
||||
it('should calculate budget correctly when maxTokens is provided', async () => {
|
||||
const { findTokenLimit } = await import('@renderer/config/models')
|
||||
vi.mocked(findTokenLimit).mockReturnValue({ min: 1024, max: 32768 })
|
||||
|
||||
const result = getAnthropicThinkingBudget(4096, 'medium', 'claude-3-7-sonnet')
|
||||
// EFFORT_RATIO['medium'] = 0.5
|
||||
// budget = Math.floor((32768 - 1024) * 0.5 + 1024)
|
||||
// = Math.floor(31744 * 0.5 + 1024) = Math.floor(15872 + 1024) = 16896
|
||||
// budgetTokens = Math.min(16896, 4096) = 4096
|
||||
// result = Math.max(1024, 4096) = 4096
|
||||
expect(result).toBe(4096)
|
||||
})
|
||||
|
||||
it('should use tokenLimit.max when maxTokens is undefined', async () => {
|
||||
const { findTokenLimit } = await import('@renderer/config/models')
|
||||
vi.mocked(findTokenLimit).mockReturnValue({ min: 1024, max: 32768 })
|
||||
|
||||
const result = getAnthropicThinkingBudget(undefined, 'medium', 'claude-3-7-sonnet')
|
||||
// When maxTokens is undefined, budget is not constrained by maxTokens
|
||||
// EFFORT_RATIO['medium'] = 0.5
|
||||
// budget = Math.floor((32768 - 1024) * 0.5 + 1024)
|
||||
// = Math.floor(31744 * 0.5 + 1024) = Math.floor(15872 + 1024) = 16896
|
||||
// result = Math.max(1024, 16896) = 16896
|
||||
expect(result).toBe(16896)
|
||||
})
|
||||
|
||||
it('should enforce minimum budget of 1024', async () => {
|
||||
const { findTokenLimit } = await import('@renderer/config/models')
|
||||
vi.mocked(findTokenLimit).mockReturnValue({ min: 100, max: 1000 })
|
||||
|
||||
const result = getAnthropicThinkingBudget(500, 'low', 'claude-3-7-sonnet')
|
||||
// EFFORT_RATIO['low'] = 0.05
|
||||
// budget = Math.floor((1000 - 100) * 0.05 + 100)
|
||||
// = Math.floor(900 * 0.05 + 100) = Math.floor(45 + 100) = 145
|
||||
// budgetTokens = Math.min(145, 500) = 145
|
||||
// result = Math.max(1024, 145) = 1024
|
||||
expect(result).toBe(1024)
|
||||
})
|
||||
|
||||
it('should respect effort ratio for high reasoning effort', async () => {
|
||||
const { findTokenLimit } = await import('@renderer/config/models')
|
||||
vi.mocked(findTokenLimit).mockReturnValue({ min: 1024, max: 32768 })
|
||||
|
||||
const result = getAnthropicThinkingBudget(8192, 'high', 'claude-3-7-sonnet')
|
||||
// EFFORT_RATIO['high'] = 0.8
|
||||
// budget = Math.floor((32768 - 1024) * 0.8 + 1024)
|
||||
// = Math.floor(31744 * 0.8 + 1024) = Math.floor(25395.2 + 1024) = 26419
|
||||
// budgetTokens = Math.min(26419, 8192) = 8192
|
||||
// result = Math.max(1024, 8192) = 8192
|
||||
expect(result).toBe(8192)
|
||||
})
|
||||
|
||||
it('should use full token limit when maxTokens is undefined and reasoning effort is high', async () => {
|
||||
const { findTokenLimit } = await import('@renderer/config/models')
|
||||
vi.mocked(findTokenLimit).mockReturnValue({ min: 1024, max: 32768 })
|
||||
|
||||
const result = getAnthropicThinkingBudget(undefined, 'high', 'claude-3-7-sonnet')
|
||||
// When maxTokens is undefined, budget is not constrained by maxTokens
|
||||
// EFFORT_RATIO['high'] = 0.8
|
||||
// budget = Math.floor((32768 - 1024) * 0.8 + 1024)
|
||||
// = Math.floor(31744 * 0.8 + 1024) = Math.floor(25395.2 + 1024) = 26419
|
||||
// result = Math.max(1024, 26419) = 26419
|
||||
expect(result).toBe(26419)
|
||||
})
|
||||
})
|
||||
})
|
||||
|
||||
@ -480,16 +480,14 @@ export function getAnthropicThinkingBudget(
|
||||
return undefined
|
||||
}
|
||||
|
||||
const budgetTokens = Math.max(
|
||||
1024,
|
||||
Math.floor(
|
||||
Math.min(
|
||||
(tokenLimit.max - tokenLimit.min) * effortRatio + tokenLimit.min,
|
||||
(maxTokens || DEFAULT_MAX_TOKENS) * effortRatio
|
||||
)
|
||||
)
|
||||
)
|
||||
return budgetTokens
|
||||
const budget = Math.floor((tokenLimit.max - tokenLimit.min) * effortRatio + tokenLimit.min)
|
||||
|
||||
let budgetTokens = budget
|
||||
if (maxTokens !== undefined) {
|
||||
budgetTokens = Math.min(budget, maxTokens)
|
||||
}
|
||||
|
||||
return Math.max(1024, budgetTokens)
|
||||
}
|
||||
|
||||
/**
|
||||
|
||||
Loading…
Reference in New Issue
Block a user