From 29c11733654da645d3cfc0afea9279a76b3b23d9 Mon Sep 17 00:00:00 2001 From: Copilot <198982749+Copilot@users.noreply.github.com> Date: Tue, 28 Oct 2025 14:26:54 +0800 Subject: [PATCH] Fix Qwen3 thinking mode control for Ollama using aiCore middleware (#10947) * Initial plan * Fix Qwen3 thinking mode control for Ollama by using reasoning_effort instead of qwenThinkMode Co-authored-by: DeJeune <67425183+DeJeune@users.noreply.github.com> * Refactor: Move Qwen thinking mode control from legacy to aiCore middleware - Revert changes to legacy OpenAIApiClient.ts - Create new qwenThinkingMiddleware for handling /think and /no_think suffixes - Update AiSdkMiddlewareBuilder to add assistant field and apply Qwen middleware - Middleware automatically handles Qwen models on Ollama, LM Studio, and NVIDIA providers - This follows the modern aiCore architecture instead of deprecated legacy approach Co-authored-by: DeJeune <67425183+DeJeune@users.noreply.github.com> * Fix: Explicitly pass assistant field to buildAiSdkMiddlewares Ensure that the assistant field from ModernAiProviderConfig is explicitly passed to buildAiSdkMiddlewares so that Qwen thinking middleware can access assistant.settings.reasoning_effort correctly. Co-authored-by: DeJeune <67425183+DeJeune@users.noreply.github.com> * Fix: Remove unnecessary whitespace in processPostsuffixQwen3Model tests * Refactor: Simplify user message suffix handling in qwenThinkingMiddleware * Refactor: Remove processPostsuffixQwen3Model tests to streamline ModelMessageService tests * refactor: remove logger and debug statement from qwenThinkingMiddleware --------- Co-authored-by: copilot-swe-agent[bot] <198982749+Copilot@users.noreply.github.com> Co-authored-by: DeJeune <67425183+DeJeune@users.noreply.github.com> Co-authored-by: suyao --- src/renderer/src/aiCore/index_new.ts | 3 +- .../middleware/AiSdkMiddlewareBuilder.ts | 21 +++++++++- .../middleware/qwenThinkingMiddleware.ts | 39 +++++++++++++++++++ 3 files changed, 61 insertions(+), 2 deletions(-) create mode 100644 src/renderer/src/aiCore/middleware/qwenThinkingMiddleware.ts diff --git a/src/renderer/src/aiCore/index_new.ts b/src/renderer/src/aiCore/index_new.ts index b748e3c832..6211b70314 100644 --- a/src/renderer/src/aiCore/index_new.ts +++ b/src/renderer/src/aiCore/index_new.ts @@ -97,7 +97,8 @@ export default class ModernAiProvider { // 提前构建中间件 const middlewares = buildAiSdkMiddlewares({ ...config, - provider: this.actualProvider + provider: this.actualProvider, + assistant: config.assistant }) logger.debug('Built middlewares in completions', { middlewareCount: middlewares.length, diff --git a/src/renderer/src/aiCore/middleware/AiSdkMiddlewareBuilder.ts b/src/renderer/src/aiCore/middleware/AiSdkMiddlewareBuilder.ts index 924cc5f47e..54116949f1 100644 --- a/src/renderer/src/aiCore/middleware/AiSdkMiddlewareBuilder.ts +++ b/src/renderer/src/aiCore/middleware/AiSdkMiddlewareBuilder.ts @@ -1,12 +1,15 @@ import { WebSearchPluginConfig } from '@cherrystudio/ai-core/built-in/plugins' import { loggerService } from '@logger' -import { type MCPTool, type Message, type Model, type Provider } from '@renderer/types' +import { isSupportedThinkingTokenQwenModel } from '@renderer/config/models' +import { isSupportEnableThinkingProvider } from '@renderer/config/providers' +import { type Assistant, MCPTool, type Message, type Model, type Provider } from '@renderer/types' import type { Chunk } from '@renderer/types/chunk' import { extractReasoningMiddleware, LanguageModelMiddleware, simulateStreamingMiddleware } from 'ai' import { isOpenRouterGeminiGenerateImageModel } from '../utils/image' import { noThinkMiddleware } from './noThinkMiddleware' import { openrouterGenerateImageMiddleware } from './openrouterGenerateImageMiddleware' +import { qwenThinkingMiddleware } from './qwenThinkingMiddleware' import { toolChoiceMiddleware } from './toolChoiceMiddleware' const logger = loggerService.withContext('AiSdkMiddlewareBuilder') @@ -19,6 +22,7 @@ export interface AiSdkMiddlewareConfig { onChunk?: (chunk: Chunk) => void model?: Model provider?: Provider + assistant?: Assistant enableReasoning: boolean // 是否开启提示词工具调用 isPromptToolUse: boolean @@ -218,6 +222,21 @@ function addProviderSpecificMiddlewares(builder: AiSdkMiddlewareBuilder, config: function addModelSpecificMiddlewares(builder: AiSdkMiddlewareBuilder, config: AiSdkMiddlewareConfig): void { if (!config.model || !config.provider) return + // Qwen models on providers that don't support enable_thinking parameter (like Ollama, LM Studio, NVIDIA) + // Use /think or /no_think suffix to control thinking mode + if ( + config.provider && + isSupportedThinkingTokenQwenModel(config.model) && + !isSupportEnableThinkingProvider(config.provider) + ) { + const enableThinking = config.assistant?.settings?.reasoning_effort !== undefined + builder.add({ + name: 'qwen-thinking-control', + middleware: qwenThinkingMiddleware(enableThinking) + }) + logger.debug(`Added Qwen thinking middleware with thinking ${enableThinking ? 'enabled' : 'disabled'}`) + } + // 可以根据模型ID或特性添加特定中间件 // 例如:图像生成模型、多模态模型等 if (isOpenRouterGeminiGenerateImageModel(config.model, config.provider)) { diff --git a/src/renderer/src/aiCore/middleware/qwenThinkingMiddleware.ts b/src/renderer/src/aiCore/middleware/qwenThinkingMiddleware.ts new file mode 100644 index 0000000000..34515a42c9 --- /dev/null +++ b/src/renderer/src/aiCore/middleware/qwenThinkingMiddleware.ts @@ -0,0 +1,39 @@ +import { LanguageModelMiddleware } from 'ai' + +/** + * Qwen Thinking Middleware + * Controls thinking mode for Qwen models on providers that don't support enable_thinking parameter (like Ollama) + * Appends '/think' or '/no_think' suffix to user messages based on reasoning_effort setting + * @param enableThinking - Whether thinking mode is enabled (based on reasoning_effort !== undefined) + * @returns LanguageModelMiddleware + */ +export function qwenThinkingMiddleware(enableThinking: boolean): LanguageModelMiddleware { + const suffix = enableThinking ? ' /think' : ' /no_think' + + return { + middlewareVersion: 'v2', + + transformParams: async ({ params }) => { + const transformedParams = { ...params } + // Process messages in prompt + if (transformedParams.prompt && Array.isArray(transformedParams.prompt)) { + transformedParams.prompt = transformedParams.prompt.map((message) => { + // Only process user messages + if (message.role === 'user') { + // Process content array + if (Array.isArray(message.content)) { + for (const part of message.content) { + if (part.type === 'text' && !part.text.endsWith('/think') && !part.text.endsWith('/no_think')) { + part.text += suffix + } + } + } + } + return message + }) + } + + return transformedParams + } + } +}