Fix Qwen3 thinking mode control for Ollama using aiCore middleware (#10947)

* Initial plan

* Fix Qwen3 thinking mode control for Ollama by using reasoning_effort instead of qwenThinkMode

Co-authored-by: DeJeune <67425183+DeJeune@users.noreply.github.com>

* Refactor: Move Qwen thinking mode control from legacy to aiCore middleware

- Revert changes to legacy OpenAIApiClient.ts
- Create new qwenThinkingMiddleware for handling /think and /no_think suffixes
- Update AiSdkMiddlewareBuilder to add assistant field and apply Qwen middleware
- Middleware automatically handles Qwen models on Ollama, LM Studio, and NVIDIA providers
- This follows the modern aiCore architecture instead of deprecated legacy approach

Co-authored-by: DeJeune <67425183+DeJeune@users.noreply.github.com>

* Fix: Explicitly pass assistant field to buildAiSdkMiddlewares

Ensure that the assistant field from ModernAiProviderConfig is explicitly passed
to buildAiSdkMiddlewares so that Qwen thinking middleware can access
assistant.settings.reasoning_effort correctly.

Co-authored-by: DeJeune <67425183+DeJeune@users.noreply.github.com>

* Fix: Remove unnecessary whitespace in processPostsuffixQwen3Model tests

* Refactor: Simplify user message suffix handling in qwenThinkingMiddleware

* Refactor: Remove processPostsuffixQwen3Model tests to streamline ModelMessageService tests

* refactor: remove logger and debug statement from qwenThinkingMiddleware

---------

Co-authored-by: copilot-swe-agent[bot] <198982749+Copilot@users.noreply.github.com>
Co-authored-by: DeJeune <67425183+DeJeune@users.noreply.github.com>
Co-authored-by: suyao <sy20010504@gmail.com>
This commit is contained in:
Copilot 2025-10-28 14:26:54 +08:00 committed by GitHub
parent c7ceb3035d
commit 29c1173365
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194
3 changed files with 61 additions and 2 deletions

View File

@ -97,7 +97,8 @@ export default class ModernAiProvider {
// 提前构建中间件
const middlewares = buildAiSdkMiddlewares({
...config,
provider: this.actualProvider
provider: this.actualProvider,
assistant: config.assistant
})
logger.debug('Built middlewares in completions', {
middlewareCount: middlewares.length,

View File

@ -1,12 +1,15 @@
import { WebSearchPluginConfig } from '@cherrystudio/ai-core/built-in/plugins'
import { loggerService } from '@logger'
import { type MCPTool, type Message, type Model, type Provider } from '@renderer/types'
import { isSupportedThinkingTokenQwenModel } from '@renderer/config/models'
import { isSupportEnableThinkingProvider } from '@renderer/config/providers'
import { type Assistant, MCPTool, type Message, type Model, type Provider } from '@renderer/types'
import type { Chunk } from '@renderer/types/chunk'
import { extractReasoningMiddleware, LanguageModelMiddleware, simulateStreamingMiddleware } from 'ai'
import { isOpenRouterGeminiGenerateImageModel } from '../utils/image'
import { noThinkMiddleware } from './noThinkMiddleware'
import { openrouterGenerateImageMiddleware } from './openrouterGenerateImageMiddleware'
import { qwenThinkingMiddleware } from './qwenThinkingMiddleware'
import { toolChoiceMiddleware } from './toolChoiceMiddleware'
const logger = loggerService.withContext('AiSdkMiddlewareBuilder')
@ -19,6 +22,7 @@ export interface AiSdkMiddlewareConfig {
onChunk?: (chunk: Chunk) => void
model?: Model
provider?: Provider
assistant?: Assistant
enableReasoning: boolean
// 是否开启提示词工具调用
isPromptToolUse: boolean
@ -218,6 +222,21 @@ function addProviderSpecificMiddlewares(builder: AiSdkMiddlewareBuilder, config:
function addModelSpecificMiddlewares(builder: AiSdkMiddlewareBuilder, config: AiSdkMiddlewareConfig): void {
if (!config.model || !config.provider) return
// Qwen models on providers that don't support enable_thinking parameter (like Ollama, LM Studio, NVIDIA)
// Use /think or /no_think suffix to control thinking mode
if (
config.provider &&
isSupportedThinkingTokenQwenModel(config.model) &&
!isSupportEnableThinkingProvider(config.provider)
) {
const enableThinking = config.assistant?.settings?.reasoning_effort !== undefined
builder.add({
name: 'qwen-thinking-control',
middleware: qwenThinkingMiddleware(enableThinking)
})
logger.debug(`Added Qwen thinking middleware with thinking ${enableThinking ? 'enabled' : 'disabled'}`)
}
// 可以根据模型ID或特性添加特定中间件
// 例如:图像生成模型、多模态模型等
if (isOpenRouterGeminiGenerateImageModel(config.model, config.provider)) {

View File

@ -0,0 +1,39 @@
import { LanguageModelMiddleware } from 'ai'
/**
* Qwen Thinking Middleware
* Controls thinking mode for Qwen models on providers that don't support enable_thinking parameter (like Ollama)
* Appends '/think' or '/no_think' suffix to user messages based on reasoning_effort setting
* @param enableThinking - Whether thinking mode is enabled (based on reasoning_effort !== undefined)
* @returns LanguageModelMiddleware
*/
export function qwenThinkingMiddleware(enableThinking: boolean): LanguageModelMiddleware {
const suffix = enableThinking ? ' /think' : ' /no_think'
return {
middlewareVersion: 'v2',
transformParams: async ({ params }) => {
const transformedParams = { ...params }
// Process messages in prompt
if (transformedParams.prompt && Array.isArray(transformedParams.prompt)) {
transformedParams.prompt = transformedParams.prompt.map((message) => {
// Only process user messages
if (message.role === 'user') {
// Process content array
if (Array.isArray(message.content)) {
for (const part of message.content) {
if (part.type === 'text' && !part.text.endsWith('/think') && !part.text.endsWith('/no_think')) {
part.text += suffix
}
}
}
}
return message
})
}
return transformedParams
}
}
}