mirror of
https://github.com/CherryHQ/cherry-studio.git
synced 2026-01-10 07:19:02 +08:00
feat(reasoning): 增强模型推理控制逻辑,支持更多提供商和模型类型
添加对Hunyuan、DeepSeek混合推理等模型的支持 优化OpenRouter、Qwen等提供商的推理控制逻辑 统一不同提供商的思考控制方式 添加日志记录和错误处理
This commit is contained in:
parent
bdbb2c2c75
commit
a7b8b40301
@ -1,8 +1,15 @@
|
|||||||
|
import { loggerService } from '@logger'
|
||||||
import { DEFAULT_MAX_TOKENS } from '@renderer/config/constant'
|
import { DEFAULT_MAX_TOKENS } from '@renderer/config/constant'
|
||||||
import {
|
import {
|
||||||
findTokenLimit,
|
findTokenLimit,
|
||||||
GEMINI_FLASH_MODEL_REGEX,
|
GEMINI_FLASH_MODEL_REGEX,
|
||||||
|
getThinkModelType,
|
||||||
|
isDeepSeekHybridInferenceModel,
|
||||||
isDoubaoThinkingAutoModel,
|
isDoubaoThinkingAutoModel,
|
||||||
|
isGrokReasoningModel,
|
||||||
|
isOpenAIReasoningModel,
|
||||||
|
isQwenAlwaysThinkModel,
|
||||||
|
isQwenReasoningModel,
|
||||||
isReasoningModel,
|
isReasoningModel,
|
||||||
isSupportedReasoningEffortGrokModel,
|
isSupportedReasoningEffortGrokModel,
|
||||||
isSupportedReasoningEffortModel,
|
isSupportedReasoningEffortModel,
|
||||||
@ -10,15 +17,21 @@ import {
|
|||||||
isSupportedThinkingTokenClaudeModel,
|
isSupportedThinkingTokenClaudeModel,
|
||||||
isSupportedThinkingTokenDoubaoModel,
|
isSupportedThinkingTokenDoubaoModel,
|
||||||
isSupportedThinkingTokenGeminiModel,
|
isSupportedThinkingTokenGeminiModel,
|
||||||
|
isSupportedThinkingTokenHunyuanModel,
|
||||||
isSupportedThinkingTokenModel,
|
isSupportedThinkingTokenModel,
|
||||||
isSupportedThinkingTokenQwenModel
|
isSupportedThinkingTokenQwenModel,
|
||||||
|
MODEL_SUPPORTED_REASONING_EFFORT
|
||||||
} from '@renderer/config/models'
|
} from '@renderer/config/models'
|
||||||
|
import { isSupportEnableThinkingProvider } from '@renderer/config/providers'
|
||||||
import { getStoreSetting } from '@renderer/hooks/useSettings'
|
import { getStoreSetting } from '@renderer/hooks/useSettings'
|
||||||
import { getAssistantSettings, getProviderByModel } from '@renderer/services/AssistantService'
|
import { getAssistantSettings, getProviderByModel } from '@renderer/services/AssistantService'
|
||||||
import { SettingsState } from '@renderer/store/settings'
|
import { SettingsState } from '@renderer/store/settings'
|
||||||
import { Assistant, EFFORT_RATIO, Model } from '@renderer/types'
|
import { Assistant, EFFORT_RATIO, isSystemProvider, Model, SystemProviderIds } from '@renderer/types'
|
||||||
import { ReasoningEffortOptionalParams } from '@renderer/types/sdk'
|
import { ReasoningEffortOptionalParams } from '@renderer/types/sdk'
|
||||||
|
|
||||||
|
const logger = loggerService.withContext('reasoning')
|
||||||
|
|
||||||
|
// The function is only for generic provider. May extract some logics to independent provider
|
||||||
export function getReasoningEffort(assistant: Assistant, model: Model): ReasoningEffortOptionalParams {
|
export function getReasoningEffort(assistant: Assistant, model: Model): ReasoningEffortOptionalParams {
|
||||||
const provider = getProviderByModel(model)
|
const provider = getProviderByModel(model)
|
||||||
if (provider.id === 'groq') {
|
if (provider.id === 'groq') {
|
||||||
@ -31,62 +44,35 @@ export function getReasoningEffort(assistant: Assistant, model: Model): Reasonin
|
|||||||
const reasoningEffort = assistant?.settings?.reasoning_effort
|
const reasoningEffort = assistant?.settings?.reasoning_effort
|
||||||
|
|
||||||
if (!reasoningEffort) {
|
if (!reasoningEffort) {
|
||||||
if (model.provider === 'openrouter') {
|
// openrouter: use reasoning
|
||||||
return { reasoning: { enabled: false } }
|
if (model.provider === SystemProviderIds.openrouter) {
|
||||||
}
|
// Don't disable reasoning for Gemini models that support thinking tokens
|
||||||
if (isSupportedThinkingTokenQwenModel(model)) {
|
|
||||||
return { enable_thinking: false }
|
|
||||||
}
|
|
||||||
|
|
||||||
if (isSupportedThinkingTokenClaudeModel(model)) {
|
|
||||||
return {}
|
|
||||||
}
|
|
||||||
|
|
||||||
if (isSupportedThinkingTokenGeminiModel(model)) {
|
|
||||||
if (GEMINI_FLASH_MODEL_REGEX.test(model.id)) {
|
|
||||||
return { reasoning_effort: 'none' }
|
|
||||||
}
|
|
||||||
return {}
|
|
||||||
}
|
|
||||||
|
|
||||||
if (isSupportedThinkingTokenDoubaoModel(model)) {
|
|
||||||
return { thinking: { type: 'disabled' } }
|
|
||||||
}
|
|
||||||
|
|
||||||
return {}
|
|
||||||
}
|
|
||||||
|
|
||||||
// Doubao 思考模式支持
|
|
||||||
if (isSupportedThinkingTokenDoubaoModel(model)) {
|
|
||||||
// reasoningEffort 为空,默认开启 enabled
|
|
||||||
if (!reasoningEffort) {
|
|
||||||
return { thinking: { type: 'disabled' } }
|
|
||||||
}
|
|
||||||
if (reasoningEffort === 'high') {
|
|
||||||
return { thinking: { type: 'enabled' } }
|
|
||||||
}
|
|
||||||
if (reasoningEffort === 'auto' && isDoubaoThinkingAutoModel(model)) {
|
|
||||||
return { thinking: { type: 'auto' } }
|
|
||||||
}
|
|
||||||
// 其他情况不带 thinking 字段
|
|
||||||
return {}
|
|
||||||
}
|
|
||||||
|
|
||||||
if (!reasoningEffort) {
|
|
||||||
if (model.provider === 'openrouter') {
|
|
||||||
if (isSupportedThinkingTokenGeminiModel(model) && !GEMINI_FLASH_MODEL_REGEX.test(model.id)) {
|
if (isSupportedThinkingTokenGeminiModel(model) && !GEMINI_FLASH_MODEL_REGEX.test(model.id)) {
|
||||||
return {}
|
return {}
|
||||||
}
|
}
|
||||||
|
// Don't disable reasoning for models that require it
|
||||||
|
if (isGrokReasoningModel(model) || isOpenAIReasoningModel(model)) {
|
||||||
|
return {}
|
||||||
|
}
|
||||||
return { reasoning: { enabled: false, exclude: true } }
|
return { reasoning: { enabled: false, exclude: true } }
|
||||||
}
|
}
|
||||||
if (isSupportedThinkingTokenQwenModel(model)) {
|
|
||||||
|
// providers that use enable_thinking
|
||||||
|
if (
|
||||||
|
isSupportEnableThinkingProvider(provider) &&
|
||||||
|
(isSupportedThinkingTokenQwenModel(model) ||
|
||||||
|
isSupportedThinkingTokenHunyuanModel(model) ||
|
||||||
|
(provider.id === SystemProviderIds.dashscope && isDeepSeekHybridInferenceModel(model)))
|
||||||
|
) {
|
||||||
return { enable_thinking: false }
|
return { enable_thinking: false }
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// claude
|
||||||
if (isSupportedThinkingTokenClaudeModel(model)) {
|
if (isSupportedThinkingTokenClaudeModel(model)) {
|
||||||
return {}
|
return {}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// gemini
|
||||||
if (isSupportedThinkingTokenGeminiModel(model)) {
|
if (isSupportedThinkingTokenGeminiModel(model)) {
|
||||||
if (GEMINI_FLASH_MODEL_REGEX.test(model.id)) {
|
if (GEMINI_FLASH_MODEL_REGEX.test(model.id)) {
|
||||||
return {
|
return {
|
||||||
@ -108,13 +94,50 @@ export function getReasoningEffort(assistant: Assistant, model: Model): Reasonin
|
|||||||
|
|
||||||
return {}
|
return {}
|
||||||
}
|
}
|
||||||
const effortRatio = EFFORT_RATIO[reasoningEffort]
|
|
||||||
const budgetTokens = Math.floor(
|
// reasoningEffort有效的情况
|
||||||
(findTokenLimit(model.id)?.max! - findTokenLimit(model.id)?.min!) * effortRatio + findTokenLimit(model.id)?.min!
|
// DeepSeek hybrid inference models, v3.1 and maybe more in the future
|
||||||
)
|
// 不同的 provider 有不同的思考控制方式,在这里统一解决
|
||||||
|
if (isDeepSeekHybridInferenceModel(model)) {
|
||||||
|
if (isSystemProvider(provider)) {
|
||||||
|
switch (provider.id) {
|
||||||
|
case SystemProviderIds.dashscope:
|
||||||
|
return {
|
||||||
|
enable_thinking: true,
|
||||||
|
incremental_output: true
|
||||||
|
}
|
||||||
|
case SystemProviderIds.silicon:
|
||||||
|
return {
|
||||||
|
enable_thinking: true
|
||||||
|
}
|
||||||
|
case SystemProviderIds.doubao:
|
||||||
|
return {
|
||||||
|
thinking: {
|
||||||
|
type: 'enabled' // auto is invalid
|
||||||
|
}
|
||||||
|
}
|
||||||
|
case SystemProviderIds.openrouter:
|
||||||
|
return {
|
||||||
|
reasoning: {
|
||||||
|
enabled: true
|
||||||
|
}
|
||||||
|
}
|
||||||
|
case 'nvidia':
|
||||||
|
return {
|
||||||
|
chat_template_kwargs: {
|
||||||
|
thinking: true
|
||||||
|
}
|
||||||
|
}
|
||||||
|
default:
|
||||||
|
logger.warn(
|
||||||
|
`Skipping thinking options for provider ${provider.name} as DeepSeek v3.1 thinking control method is unknown`
|
||||||
|
)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
// OpenRouter models
|
// OpenRouter models
|
||||||
if (model.provider === 'openrouter') {
|
if (model.provider === SystemProviderIds.openrouter) {
|
||||||
if (isSupportedReasoningEffortModel(model) || isSupportedThinkingTokenModel(model)) {
|
if (isSupportedReasoningEffortModel(model) || isSupportedThinkingTokenModel(model)) {
|
||||||
return {
|
return {
|
||||||
reasoning: {
|
reasoning: {
|
||||||
@ -124,28 +147,75 @@ export function getReasoningEffort(assistant: Assistant, model: Model): Reasonin
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
// Qwen models
|
// Doubao 思考模式支持
|
||||||
if (isSupportedThinkingTokenQwenModel(model)) {
|
if (isSupportedThinkingTokenDoubaoModel(model)) {
|
||||||
return {
|
// reasoningEffort 为空,默认开启 enabled
|
||||||
enable_thinking: true,
|
if (reasoningEffort === 'high') {
|
||||||
|
return { thinking: { type: 'enabled' } }
|
||||||
|
}
|
||||||
|
if (reasoningEffort === 'auto' && isDoubaoThinkingAutoModel(model)) {
|
||||||
|
return { thinking: { type: 'auto' } }
|
||||||
|
}
|
||||||
|
// 其他情况不带 thinking 字段
|
||||||
|
return {}
|
||||||
|
}
|
||||||
|
|
||||||
|
const effortRatio = EFFORT_RATIO[reasoningEffort]
|
||||||
|
const budgetTokens = Math.floor(
|
||||||
|
(findTokenLimit(model.id)?.max! - findTokenLimit(model.id)?.min!) * effortRatio + findTokenLimit(model.id)?.min!
|
||||||
|
)
|
||||||
|
|
||||||
|
// OpenRouter models, use thinking
|
||||||
|
if (model.provider === SystemProviderIds.openrouter) {
|
||||||
|
if (isSupportedReasoningEffortModel(model) || isSupportedThinkingTokenModel(model)) {
|
||||||
|
return {
|
||||||
|
reasoning: {
|
||||||
|
effort: reasoningEffort === 'auto' ? 'medium' : reasoningEffort
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
// Qwen models, use enable_thinking
|
||||||
|
if (isQwenReasoningModel(model)) {
|
||||||
|
const thinkConfig = {
|
||||||
|
enable_thinking: isQwenAlwaysThinkModel(model) || !isSupportEnableThinkingProvider(provider) ? undefined : true,
|
||||||
thinking_budget: budgetTokens
|
thinking_budget: budgetTokens
|
||||||
}
|
}
|
||||||
|
if (provider.id === SystemProviderIds.dashscope) {
|
||||||
|
return {
|
||||||
|
...thinkConfig,
|
||||||
|
incremental_output: true
|
||||||
|
}
|
||||||
|
}
|
||||||
|
return thinkConfig
|
||||||
}
|
}
|
||||||
|
|
||||||
// Grok models
|
// Hunyuan models, use enable_thinking
|
||||||
if (isSupportedReasoningEffortGrokModel(model)) {
|
if (isSupportedThinkingTokenHunyuanModel(model) && isSupportEnableThinkingProvider(provider)) {
|
||||||
return {
|
return {
|
||||||
reasoningEffort: reasoningEffort
|
enable_thinking: true
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
// OpenAI models
|
// Grok models/Perplexity models/OpenAI models, use reasoning_effort
|
||||||
if (isSupportedReasoningEffortOpenAIModel(model)) {
|
if (isSupportedReasoningEffortModel(model)) {
|
||||||
return {
|
// 检查模型是否支持所选选项
|
||||||
reasoningEffort: reasoningEffort
|
const modelType = getThinkModelType(model)
|
||||||
|
const supportedOptions = MODEL_SUPPORTED_REASONING_EFFORT[modelType]
|
||||||
|
if (supportedOptions.includes(reasoningEffort)) {
|
||||||
|
return {
|
||||||
|
reasoning_effort: reasoningEffort
|
||||||
|
}
|
||||||
|
} else {
|
||||||
|
// 如果不支持,fallback到第一个支持的值
|
||||||
|
return {
|
||||||
|
reasoning_effort: supportedOptions[0]
|
||||||
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// gemini series, openai compatible api
|
||||||
if (isSupportedThinkingTokenGeminiModel(model)) {
|
if (isSupportedThinkingTokenGeminiModel(model)) {
|
||||||
if (reasoningEffort === 'auto') {
|
if (reasoningEffort === 'auto') {
|
||||||
return {
|
return {
|
||||||
@ -171,7 +241,7 @@ export function getReasoningEffort(assistant: Assistant, model: Model): Reasonin
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
// Claude models
|
// Claude models, openai compatible api
|
||||||
if (isSupportedThinkingTokenClaudeModel(model)) {
|
if (isSupportedThinkingTokenClaudeModel(model)) {
|
||||||
const maxTokens = assistant.settings?.maxTokens
|
const maxTokens = assistant.settings?.maxTokens
|
||||||
return {
|
return {
|
||||||
@ -184,7 +254,7 @@ export function getReasoningEffort(assistant: Assistant, model: Model): Reasonin
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
// Doubao models
|
// Doubao models, use thinking
|
||||||
if (isSupportedThinkingTokenDoubaoModel(model)) {
|
if (isSupportedThinkingTokenDoubaoModel(model)) {
|
||||||
if (assistant.settings?.reasoning_effort === 'high') {
|
if (assistant.settings?.reasoning_effort === 'high') {
|
||||||
return {
|
return {
|
||||||
|
|||||||
Loading…
Reference in New Issue
Block a user