mirror of
https://github.com/CherryHQ/cherry-studio.git
synced 2025-12-28 05:11:24 +08:00
feat(reasoning): 增强模型推理控制逻辑,支持更多提供商和模型类型
添加对Hunyuan、DeepSeek混合推理等模型的支持 优化OpenRouter、Qwen等提供商的推理控制逻辑 统一不同提供商的思考控制方式 添加日志记录和错误处理
This commit is contained in:
parent
bdbb2c2c75
commit
a7b8b40301
@ -1,8 +1,15 @@
|
||||
import { loggerService } from '@logger'
|
||||
import { DEFAULT_MAX_TOKENS } from '@renderer/config/constant'
|
||||
import {
|
||||
findTokenLimit,
|
||||
GEMINI_FLASH_MODEL_REGEX,
|
||||
getThinkModelType,
|
||||
isDeepSeekHybridInferenceModel,
|
||||
isDoubaoThinkingAutoModel,
|
||||
isGrokReasoningModel,
|
||||
isOpenAIReasoningModel,
|
||||
isQwenAlwaysThinkModel,
|
||||
isQwenReasoningModel,
|
||||
isReasoningModel,
|
||||
isSupportedReasoningEffortGrokModel,
|
||||
isSupportedReasoningEffortModel,
|
||||
@ -10,15 +17,21 @@ import {
|
||||
isSupportedThinkingTokenClaudeModel,
|
||||
isSupportedThinkingTokenDoubaoModel,
|
||||
isSupportedThinkingTokenGeminiModel,
|
||||
isSupportedThinkingTokenHunyuanModel,
|
||||
isSupportedThinkingTokenModel,
|
||||
isSupportedThinkingTokenQwenModel
|
||||
isSupportedThinkingTokenQwenModel,
|
||||
MODEL_SUPPORTED_REASONING_EFFORT
|
||||
} from '@renderer/config/models'
|
||||
import { isSupportEnableThinkingProvider } from '@renderer/config/providers'
|
||||
import { getStoreSetting } from '@renderer/hooks/useSettings'
|
||||
import { getAssistantSettings, getProviderByModel } from '@renderer/services/AssistantService'
|
||||
import { SettingsState } from '@renderer/store/settings'
|
||||
import { Assistant, EFFORT_RATIO, Model } from '@renderer/types'
|
||||
import { Assistant, EFFORT_RATIO, isSystemProvider, Model, SystemProviderIds } from '@renderer/types'
|
||||
import { ReasoningEffortOptionalParams } from '@renderer/types/sdk'
|
||||
|
||||
const logger = loggerService.withContext('reasoning')
|
||||
|
||||
// The function is only for generic provider. May extract some logics to independent provider
|
||||
export function getReasoningEffort(assistant: Assistant, model: Model): ReasoningEffortOptionalParams {
|
||||
const provider = getProviderByModel(model)
|
||||
if (provider.id === 'groq') {
|
||||
@ -31,62 +44,35 @@ export function getReasoningEffort(assistant: Assistant, model: Model): Reasonin
|
||||
const reasoningEffort = assistant?.settings?.reasoning_effort
|
||||
|
||||
if (!reasoningEffort) {
|
||||
if (model.provider === 'openrouter') {
|
||||
return { reasoning: { enabled: false } }
|
||||
}
|
||||
if (isSupportedThinkingTokenQwenModel(model)) {
|
||||
return { enable_thinking: false }
|
||||
}
|
||||
|
||||
if (isSupportedThinkingTokenClaudeModel(model)) {
|
||||
return {}
|
||||
}
|
||||
|
||||
if (isSupportedThinkingTokenGeminiModel(model)) {
|
||||
if (GEMINI_FLASH_MODEL_REGEX.test(model.id)) {
|
||||
return { reasoning_effort: 'none' }
|
||||
}
|
||||
return {}
|
||||
}
|
||||
|
||||
if (isSupportedThinkingTokenDoubaoModel(model)) {
|
||||
return { thinking: { type: 'disabled' } }
|
||||
}
|
||||
|
||||
return {}
|
||||
}
|
||||
|
||||
// Doubao 思考模式支持
|
||||
if (isSupportedThinkingTokenDoubaoModel(model)) {
|
||||
// reasoningEffort 为空,默认开启 enabled
|
||||
if (!reasoningEffort) {
|
||||
return { thinking: { type: 'disabled' } }
|
||||
}
|
||||
if (reasoningEffort === 'high') {
|
||||
return { thinking: { type: 'enabled' } }
|
||||
}
|
||||
if (reasoningEffort === 'auto' && isDoubaoThinkingAutoModel(model)) {
|
||||
return { thinking: { type: 'auto' } }
|
||||
}
|
||||
// 其他情况不带 thinking 字段
|
||||
return {}
|
||||
}
|
||||
|
||||
if (!reasoningEffort) {
|
||||
if (model.provider === 'openrouter') {
|
||||
// openrouter: use reasoning
|
||||
if (model.provider === SystemProviderIds.openrouter) {
|
||||
// Don't disable reasoning for Gemini models that support thinking tokens
|
||||
if (isSupportedThinkingTokenGeminiModel(model) && !GEMINI_FLASH_MODEL_REGEX.test(model.id)) {
|
||||
return {}
|
||||
}
|
||||
// Don't disable reasoning for models that require it
|
||||
if (isGrokReasoningModel(model) || isOpenAIReasoningModel(model)) {
|
||||
return {}
|
||||
}
|
||||
return { reasoning: { enabled: false, exclude: true } }
|
||||
}
|
||||
if (isSupportedThinkingTokenQwenModel(model)) {
|
||||
|
||||
// providers that use enable_thinking
|
||||
if (
|
||||
isSupportEnableThinkingProvider(provider) &&
|
||||
(isSupportedThinkingTokenQwenModel(model) ||
|
||||
isSupportedThinkingTokenHunyuanModel(model) ||
|
||||
(provider.id === SystemProviderIds.dashscope && isDeepSeekHybridInferenceModel(model)))
|
||||
) {
|
||||
return { enable_thinking: false }
|
||||
}
|
||||
|
||||
// claude
|
||||
if (isSupportedThinkingTokenClaudeModel(model)) {
|
||||
return {}
|
||||
}
|
||||
|
||||
// gemini
|
||||
if (isSupportedThinkingTokenGeminiModel(model)) {
|
||||
if (GEMINI_FLASH_MODEL_REGEX.test(model.id)) {
|
||||
return {
|
||||
@ -108,13 +94,50 @@ export function getReasoningEffort(assistant: Assistant, model: Model): Reasonin
|
||||
|
||||
return {}
|
||||
}
|
||||
const effortRatio = EFFORT_RATIO[reasoningEffort]
|
||||
const budgetTokens = Math.floor(
|
||||
(findTokenLimit(model.id)?.max! - findTokenLimit(model.id)?.min!) * effortRatio + findTokenLimit(model.id)?.min!
|
||||
)
|
||||
|
||||
// reasoningEffort有效的情况
|
||||
// DeepSeek hybrid inference models, v3.1 and maybe more in the future
|
||||
// 不同的 provider 有不同的思考控制方式,在这里统一解决
|
||||
if (isDeepSeekHybridInferenceModel(model)) {
|
||||
if (isSystemProvider(provider)) {
|
||||
switch (provider.id) {
|
||||
case SystemProviderIds.dashscope:
|
||||
return {
|
||||
enable_thinking: true,
|
||||
incremental_output: true
|
||||
}
|
||||
case SystemProviderIds.silicon:
|
||||
return {
|
||||
enable_thinking: true
|
||||
}
|
||||
case SystemProviderIds.doubao:
|
||||
return {
|
||||
thinking: {
|
||||
type: 'enabled' // auto is invalid
|
||||
}
|
||||
}
|
||||
case SystemProviderIds.openrouter:
|
||||
return {
|
||||
reasoning: {
|
||||
enabled: true
|
||||
}
|
||||
}
|
||||
case 'nvidia':
|
||||
return {
|
||||
chat_template_kwargs: {
|
||||
thinking: true
|
||||
}
|
||||
}
|
||||
default:
|
||||
logger.warn(
|
||||
`Skipping thinking options for provider ${provider.name} as DeepSeek v3.1 thinking control method is unknown`
|
||||
)
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// OpenRouter models
|
||||
if (model.provider === 'openrouter') {
|
||||
if (model.provider === SystemProviderIds.openrouter) {
|
||||
if (isSupportedReasoningEffortModel(model) || isSupportedThinkingTokenModel(model)) {
|
||||
return {
|
||||
reasoning: {
|
||||
@ -124,28 +147,75 @@ export function getReasoningEffort(assistant: Assistant, model: Model): Reasonin
|
||||
}
|
||||
}
|
||||
|
||||
// Qwen models
|
||||
if (isSupportedThinkingTokenQwenModel(model)) {
|
||||
return {
|
||||
enable_thinking: true,
|
||||
// Doubao 思考模式支持
|
||||
if (isSupportedThinkingTokenDoubaoModel(model)) {
|
||||
// reasoningEffort 为空,默认开启 enabled
|
||||
if (reasoningEffort === 'high') {
|
||||
return { thinking: { type: 'enabled' } }
|
||||
}
|
||||
if (reasoningEffort === 'auto' && isDoubaoThinkingAutoModel(model)) {
|
||||
return { thinking: { type: 'auto' } }
|
||||
}
|
||||
// 其他情况不带 thinking 字段
|
||||
return {}
|
||||
}
|
||||
|
||||
const effortRatio = EFFORT_RATIO[reasoningEffort]
|
||||
const budgetTokens = Math.floor(
|
||||
(findTokenLimit(model.id)?.max! - findTokenLimit(model.id)?.min!) * effortRatio + findTokenLimit(model.id)?.min!
|
||||
)
|
||||
|
||||
// OpenRouter models, use thinking
|
||||
if (model.provider === SystemProviderIds.openrouter) {
|
||||
if (isSupportedReasoningEffortModel(model) || isSupportedThinkingTokenModel(model)) {
|
||||
return {
|
||||
reasoning: {
|
||||
effort: reasoningEffort === 'auto' ? 'medium' : reasoningEffort
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// Qwen models, use enable_thinking
|
||||
if (isQwenReasoningModel(model)) {
|
||||
const thinkConfig = {
|
||||
enable_thinking: isQwenAlwaysThinkModel(model) || !isSupportEnableThinkingProvider(provider) ? undefined : true,
|
||||
thinking_budget: budgetTokens
|
||||
}
|
||||
if (provider.id === SystemProviderIds.dashscope) {
|
||||
return {
|
||||
...thinkConfig,
|
||||
incremental_output: true
|
||||
}
|
||||
}
|
||||
return thinkConfig
|
||||
}
|
||||
|
||||
// Grok models
|
||||
if (isSupportedReasoningEffortGrokModel(model)) {
|
||||
// Hunyuan models, use enable_thinking
|
||||
if (isSupportedThinkingTokenHunyuanModel(model) && isSupportEnableThinkingProvider(provider)) {
|
||||
return {
|
||||
reasoningEffort: reasoningEffort
|
||||
enable_thinking: true
|
||||
}
|
||||
}
|
||||
|
||||
// OpenAI models
|
||||
if (isSupportedReasoningEffortOpenAIModel(model)) {
|
||||
return {
|
||||
reasoningEffort: reasoningEffort
|
||||
// Grok models/Perplexity models/OpenAI models, use reasoning_effort
|
||||
if (isSupportedReasoningEffortModel(model)) {
|
||||
// 检查模型是否支持所选选项
|
||||
const modelType = getThinkModelType(model)
|
||||
const supportedOptions = MODEL_SUPPORTED_REASONING_EFFORT[modelType]
|
||||
if (supportedOptions.includes(reasoningEffort)) {
|
||||
return {
|
||||
reasoning_effort: reasoningEffort
|
||||
}
|
||||
} else {
|
||||
// 如果不支持,fallback到第一个支持的值
|
||||
return {
|
||||
reasoning_effort: supportedOptions[0]
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// gemini series, openai compatible api
|
||||
if (isSupportedThinkingTokenGeminiModel(model)) {
|
||||
if (reasoningEffort === 'auto') {
|
||||
return {
|
||||
@ -171,7 +241,7 @@ export function getReasoningEffort(assistant: Assistant, model: Model): Reasonin
|
||||
}
|
||||
}
|
||||
|
||||
// Claude models
|
||||
// Claude models, openai compatible api
|
||||
if (isSupportedThinkingTokenClaudeModel(model)) {
|
||||
const maxTokens = assistant.settings?.maxTokens
|
||||
return {
|
||||
@ -184,7 +254,7 @@ export function getReasoningEffort(assistant: Assistant, model: Model): Reasonin
|
||||
}
|
||||
}
|
||||
|
||||
// Doubao models
|
||||
// Doubao models, use thinking
|
||||
if (isSupportedThinkingTokenDoubaoModel(model)) {
|
||||
if (assistant.settings?.reasoning_effort === 'high') {
|
||||
return {
|
||||
|
||||
Loading…
Reference in New Issue
Block a user