From bd4ba47e61e90183c03f5d89df32de7e2920de1c Mon Sep 17 00:00:00 2001 From: Phantom <59059173+EurFelux@users.noreply.github.com> Date: Tue, 26 Aug 2025 17:43:29 +0800 Subject: [PATCH] feat(models): support qwen-flash & deepseek v3.1 (limited) (#9539) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit * feat(models): 添加 qwen-flash 模型支持并更新相关配置 * feat(ai): 添加对deepseek-v3.1模型的支持(dashscope) 添加deepseek_hybrid类型到ThinkModelTypes,并实现相关推理逻辑 * refactor(openai): 移除调试日志语句以清理代码 * feat(推理模型): 增强DeepSeek混合推理模型支持并优化逻辑顺序 重构DeepSeek混合推理模型的判断逻辑,支持更多版本格式 将Doubao思考模式处理逻辑移至更合理的位置 添加对DeepSeek模型在不同provider下的思考控制处理 * fix(deepseek): 支持openrouter的deepseek-chat-v3.1模型推理控制 统一处理不同provider的DeepSeek混合推理模型控制方式,添加对openrouter的deepseek-chat-v3.1模型支持 * fix(模型): 修正函数调用模型的判断逻辑 更新函数调用模型的判断条件,明确不支持v3.1函数调用的提供商并处理openrouter的特殊情况 * feat(openai): 为silicon模型添加enable_thinking配置 * fix(模型配置): 修正深度求索混合推理模型的函数调用支持逻辑 更新深度求索混合推理模型的函数调用判断逻辑,默认支持函数调用 * feat(模型支持): 为DeepSeek V3.1添加白名单支持 添加对DeepSeek V3.1混合推理模型的白名单支持,目前仅允许openrouter、dashscope和doubao作为提供商 * feat(config): 添加silicon到DeepSeek V3.1支持的白名单中 * feat(sdk): 添加对NVIDIA推理参数的支持 在ReasoningEffortOptionalParams类型中添加chat_template_kwargs参数,用于支持NVIDIA的推理配置。同时在模型支持列表中新增nvidia提供商,并在OpenAIApiClient中实现对应的参数处理逻辑。 * refactor(openai): 使用SystemProviderIds替换硬编码的provider id字符串 --- .../aiCore/clients/openai/OpenAIApiClient.ts | 92 ++++++++++++++----- src/renderer/src/config/models.ts | 51 ++++++++-- src/renderer/src/types/index.ts | 3 +- src/renderer/src/types/sdk.ts | 4 + 4 files changed, 121 insertions(+), 29 deletions(-) diff --git a/src/renderer/src/aiCore/clients/openai/OpenAIApiClient.ts b/src/renderer/src/aiCore/clients/openai/OpenAIApiClient.ts index f2bfeed026..089d1af799 100644 --- a/src/renderer/src/aiCore/clients/openai/OpenAIApiClient.ts +++ b/src/renderer/src/aiCore/clients/openai/OpenAIApiClient.ts @@ -6,6 +6,7 @@ import { getOpenAIWebSearchParams, getThinkModelType, isClaudeReasoningModel, + isDeepSeekHybridInferenceModel, isDoubaoThinkingAutoModel, isGeminiReasoningModel, isGPT5SeriesModel, @@ -44,6 +45,7 @@ import { Assistant, EFFORT_RATIO, FileTypes, + isSystemProvider, MCPCallToolResponse, MCPTool, MCPToolResponse, @@ -113,7 +115,7 @@ export class OpenAIAPIClient extends OpenAIBaseClient< */ // Method for reasoning effort, moved from OpenAIProvider override getReasoningEffort(assistant: Assistant, model: Model): ReasoningEffortOptionalParams { - if (this.provider.id === 'groq') { + if (this.provider.id === SystemProviderIds.groq) { return {} } @@ -122,22 +124,6 @@ export class OpenAIAPIClient extends OpenAIBaseClient< } const reasoningEffort = assistant?.settings?.reasoning_effort - // Doubao 思考模式支持 - if (isSupportedThinkingTokenDoubaoModel(model)) { - // reasoningEffort 为空,默认开启 enabled - if (!reasoningEffort) { - return { thinking: { type: 'disabled' } } - } - if (reasoningEffort === 'high') { - return { thinking: { type: 'enabled' } } - } - if (reasoningEffort === 'auto' && isDoubaoThinkingAutoModel(model)) { - return { thinking: { type: 'auto' } } - } - // 其他情况不带 thinking 字段 - return {} - } - if (isSupportedThinkingTokenZhipuModel(model)) { if (!reasoningEffort) { return { thinking: { type: 'disabled' } } @@ -146,7 +132,14 @@ export class OpenAIAPIClient extends OpenAIBaseClient< } if (!reasoningEffort) { - if (model.provider === 'openrouter') { + // DeepSeek hybrid inference models, v3.1 and maybe more in the future + // 不同的 provider 有不同的思考控制方式,在这里统一解决 + // if (isDeepSeekHybridInferenceModel(model)) { + // // do nothing for now. default to non-think. + // } + + // openrouter: use reasoning + if (model.provider === SystemProviderIds.openrouter) { // Don't disable reasoning for Gemini models that support thinking tokens if (isSupportedThinkingTokenGeminiModel(model) && !GEMINI_FLASH_MODEL_REGEX.test(model.id)) { return {} @@ -158,17 +151,22 @@ export class OpenAIAPIClient extends OpenAIBaseClient< return { reasoning: { enabled: false, exclude: true } } } + // providers that use enable_thinking if ( isSupportEnableThinkingProvider(this.provider) && - (isSupportedThinkingTokenQwenModel(model) || isSupportedThinkingTokenHunyuanModel(model)) + (isSupportedThinkingTokenQwenModel(model) || + isSupportedThinkingTokenHunyuanModel(model) || + (this.provider.id === SystemProviderIds.dashscope && isDeepSeekHybridInferenceModel(model))) ) { return { enable_thinking: false } } + // claude if (isSupportedThinkingTokenClaudeModel(model)) { return {} } + // gemini if (isSupportedThinkingTokenGeminiModel(model)) { if (GEMINI_FLASH_MODEL_REGEX.test(model.id)) { return { @@ -197,8 +195,48 @@ export class OpenAIAPIClient extends OpenAIBaseClient< (findTokenLimit(model.id)?.max! - findTokenLimit(model.id)?.min!) * effortRatio + findTokenLimit(model.id)?.min! ) + // DeepSeek hybrid inference models, v3.1 and maybe more in the future + // 不同的 provider 有不同的思考控制方式,在这里统一解决 + if (isDeepSeekHybridInferenceModel(model)) { + if (isSystemProvider(this.provider)) { + switch (this.provider.id) { + case SystemProviderIds.dashscope: + return { + enable_thinking: true, + incremental_output: true + } + case SystemProviderIds.silicon: + return { + enable_thinking: true + } + case SystemProviderIds.doubao: + return { + thinking: { + type: 'enabled' // auto is invalid + } + } + case SystemProviderIds.openrouter: + return { + reasoning: { + enabled: true + } + } + case 'nvidia': + return { + chat_template_kwargs: { + thinking: true + } + } + default: + logger.warn( + `Skipping thinking options for provider ${this.provider.name} as DeepSeek v3.1 thinking control method is unknown` + ) + } + } + } + // OpenRouter models - if (model.provider === 'openrouter') { + if (model.provider === SystemProviderIds.openrouter) { if (isSupportedReasoningEffortModel(model) || isSupportedThinkingTokenModel(model)) { return { reasoning: { @@ -208,6 +246,18 @@ export class OpenAIAPIClient extends OpenAIBaseClient< } } + // Doubao 思考模式支持 + if (isSupportedThinkingTokenDoubaoModel(model)) { + if (reasoningEffort === 'high') { + return { thinking: { type: 'enabled' } } + } + if (reasoningEffort === 'auto' && isDoubaoThinkingAutoModel(model)) { + return { thinking: { type: 'auto' } } + } + // 其他情况不带 thinking 字段 + return {} + } + // Qwen models if (isQwenReasoningModel(model)) { const thinkConfig = { @@ -215,7 +265,7 @@ export class OpenAIAPIClient extends OpenAIBaseClient< isQwenAlwaysThinkModel(model) || !isSupportEnableThinkingProvider(this.provider) ? undefined : true, thinking_budget: budgetTokens } - if (this.provider.id === 'dashscope') { + if (this.provider.id === SystemProviderIds.dashscope) { return { ...thinkConfig, incremental_output: true diff --git a/src/renderer/src/config/models.ts b/src/renderer/src/config/models.ts index 5425c918db..fb2ee8dd67 100644 --- a/src/renderer/src/config/models.ts +++ b/src/renderer/src/config/models.ts @@ -150,6 +150,7 @@ import YoudaoLogo from '@renderer/assets/images/providers/netease-youdao.svg' import NomicLogo from '@renderer/assets/images/providers/nomic.png' import { getProviderByModel } from '@renderer/services/AssistantService' import { + isSystemProviderId, Model, ReasoningEffortConfig, SystemProviderId, @@ -290,6 +291,7 @@ export const CLAUDE_SUPPORTED_WEBSEARCH_REGEX = new RegExp( ) // 模型类型到支持的reasoning_effort的映射表 +// TODO: refactor this. too many identical options export const MODEL_SUPPORTED_REASONING_EFFORT: ReasoningEffortConfig = { default: ['low', 'medium', 'high'] as const, o: ['low', 'medium', 'high'] as const, @@ -303,7 +305,8 @@ export const MODEL_SUPPORTED_REASONING_EFFORT: ReasoningEffortConfig = { doubao_no_auto: ['high'] as const, hunyuan: ['auto'] as const, zhipu: ['auto'] as const, - perplexity: ['low', 'medium', 'high'] as const + perplexity: ['low', 'medium', 'high'] as const, + deepseek_hybrid: ['auto'] as const } as const // 模型类型到支持选项的映射表 @@ -320,7 +323,8 @@ export const MODEL_SUPPORTED_OPTIONS: ThinkingOptionConfig = { doubao_no_auto: ['off', ...MODEL_SUPPORTED_REASONING_EFFORT.doubao_no_auto] as const, hunyuan: ['off', ...MODEL_SUPPORTED_REASONING_EFFORT.hunyuan] as const, zhipu: ['off', ...MODEL_SUPPORTED_REASONING_EFFORT.zhipu] as const, - perplexity: MODEL_SUPPORTED_REASONING_EFFORT.perplexity + perplexity: MODEL_SUPPORTED_REASONING_EFFORT.perplexity, + deepseek_hybrid: ['off', ...MODEL_SUPPORTED_REASONING_EFFORT.deepseek_hybrid] as const } as const export const getThinkModelType = (model: Model): ThinkingModelType => { @@ -350,6 +354,7 @@ export const getThinkModelType = (model: Model): ThinkingModelType => { } else if (isSupportedThinkingTokenHunyuanModel(model)) thinkingModelType = 'hunyuan' else if (isSupportedReasoningEffortPerplexityModel(model)) thinkingModelType = 'perplexity' else if (isSupportedThinkingTokenZhipuModel(model)) thinkingModelType = 'zhipu' + else if (isDeepSeekHybridInferenceModel(model)) thinkingModelType = 'deepseek_hybrid' return thinkingModelType } @@ -372,11 +377,21 @@ export function isFunctionCallingModel(model?: Model): boolean { return FUNCTION_CALLING_REGEX.test(modelId) || FUNCTION_CALLING_REGEX.test(model.name) } - if (['deepseek', 'anthropic'].includes(model.provider)) { + if (['deepseek', 'anthropic', 'kimi', 'moonshot'].includes(model.provider)) { return true } - if (['kimi', 'moonshot'].includes(model.provider)) { + // 2025/08/26 百炼与火山引擎均不支持 v3.1 函数调用 + // 先默认支持 + if (isDeepSeekHybridInferenceModel(model)) { + if (isSystemProviderId(model.provider)) { + switch (model.provider) { + case 'dashscope': + case 'doubao': + // case 'nvidia': // nvidia api 太烂了 测不了能不能用 先假设能用 + return false + } + } return true } @@ -1401,7 +1416,7 @@ export const SYSTEM_MODELS: Record = dashscope: [ { id: 'qwen-vl-plus', name: 'qwen-vl-plus', provider: 'dashscope', group: 'qwen-vl', owned_by: 'system' }, { id: 'qwen-coder-plus', name: 'qwen-coder-plus', provider: 'dashscope', group: 'qwen-coder', owned_by: 'system' }, - { id: 'qwen-turbo', name: 'qwen-turbo', provider: 'dashscope', group: 'qwen-turbo', owned_by: 'system' }, + { id: 'qwen-flash', name: 'qwen-flash', provider: 'dashscope', group: 'qwen-flash', owned_by: 'system' }, { id: 'qwen-plus', name: 'qwen-plus', provider: 'dashscope', group: 'qwen-plus', owned_by: 'system' }, { id: 'qwen-max', name: 'qwen-max', provider: 'dashscope', group: 'qwen-max', owned_by: 'system' } ], @@ -2627,6 +2642,13 @@ export function isSupportedThinkingTokenModel(model?: Model): boolean { return false } + // Specifically for DeepSeek V3.1. White list for now + if (isDeepSeekHybridInferenceModel(model)) { + return (['openrouter', 'dashscope', 'doubao', 'silicon', 'nvidia'] satisfies SystemProviderId[]).some( + (id) => id === model.provider + ) + } + return ( isSupportedThinkingTokenGeminiModel(model) || isSupportedThinkingTokenQwenModel(model) || @@ -2764,7 +2786,9 @@ export function isSupportedThinkingTokenQwenModel(model?: Model): boolean { 'qwen-turbo-0428', 'qwen-turbo-2025-04-28', 'qwen-turbo-0715', - 'qwen-turbo-2025-07-15' + 'qwen-turbo-2025-07-15', + 'qwen-flash', + 'qwen-flash-2025-07-28' ].includes(modelId) } @@ -2838,6 +2862,15 @@ export const isSupportedThinkingTokenZhipuModel = (model: Model): boolean => { return modelId.includes('glm-4.5') } +export const isDeepSeekHybridInferenceModel = (model: Model) => { + const modelId = getLowerBaseModelName(model.id) + // deepseek官方使用chat和reasoner做推理控制,其他provider需要单独判断,id可能会有所差别 + // openrouter: deepseek/deepseek-chat-v3.1 不知道会不会有其他provider仿照ds官方分出一个同id的作为非思考模式的模型,这里有风险 + return /deepseek-v3(?:\.1|-1-\d+)?/.test(modelId) || modelId === 'deepseek-chat-v3.1' +} + +export const isSupportedThinkingTokenDeepSeekModel = isDeepSeekHybridInferenceModel + export const isZhipuReasoningModel = (model?: Model): boolean => { if (!model) { return false @@ -2870,6 +2903,8 @@ export function isReasoningModel(model?: Model): boolean { REASONING_REGEX.test(modelId) || REASONING_REGEX.test(model.name) || isSupportedThinkingTokenDoubaoModel(model) || + isDeepSeekHybridInferenceModel(model) || + isDeepSeekHybridInferenceModel({ ...model, id: model.name }) || false ) } @@ -2884,6 +2919,7 @@ export function isReasoningModel(model?: Model): boolean { isPerplexityReasoningModel(model) || isZhipuReasoningModel(model) || isStepReasoningModel(model) || + isDeepSeekHybridInferenceModel(model) || modelId.includes('magistral') || modelId.includes('minimax-m1') || modelId.includes('pangu-pro-moe') @@ -2992,7 +3028,7 @@ export function isWebSearchModel(model: Model): boolean { } if (provider.id === 'dashscope') { - const models = ['qwen-turbo', 'qwen-max', 'qwen-plus', 'qwq'] + const models = ['qwen-turbo', 'qwen-max', 'qwen-plus', 'qwq', 'qwen-flash'] // matches id like qwen-max-0919, qwen-max-latest return models.some((i) => modelId.startsWith(i)) } @@ -3196,6 +3232,7 @@ export const THINKING_TOKEN_MAP: Record = 'qwen3-0\\.6b$': { min: 0, max: 30_720 }, 'qwen-plus.*$': { min: 0, max: 38_912 }, 'qwen-turbo.*$': { min: 0, max: 38_912 }, + 'qwen-flash.*$': { min: 0, max: 81_920 }, 'qwen3-.*$': { min: 1024, max: 38_912 }, // Claude models diff --git a/src/renderer/src/types/index.ts b/src/renderer/src/types/index.ts index ee35d7202f..76fbf9a687 100644 --- a/src/renderer/src/types/index.ts +++ b/src/renderer/src/types/index.ts @@ -67,7 +67,8 @@ const ThinkModelTypes = [ 'doubao_no_auto', 'hunyuan', 'zhipu', - 'perplexity' + 'perplexity', + 'deepseek_hybrid' ] as const export type ReasoningEffortOption = NonNullable | 'auto' diff --git a/src/renderer/src/types/sdk.ts b/src/renderer/src/types/sdk.ts index e897098963..4c7fa299d4 100644 --- a/src/renderer/src/types/sdk.ts +++ b/src/renderer/src/types/sdk.ts @@ -81,6 +81,10 @@ export type ReasoningEffortOptionalParams = { thinking_budget?: number incremental_output?: boolean enable_reasoning?: boolean + // nvidia + chat_template_kwargs?: { + thinking: boolean + } extra_body?: { google?: { thinking_config: {