mirror of
https://github.com/CherryHQ/cherry-studio.git
synced 2026-01-04 03:40:33 +08:00
Fix Qwen3 thinking mode control for Ollama using aiCore middleware (#10947)
* Initial plan * Fix Qwen3 thinking mode control for Ollama by using reasoning_effort instead of qwenThinkMode Co-authored-by: DeJeune <67425183+DeJeune@users.noreply.github.com> * Refactor: Move Qwen thinking mode control from legacy to aiCore middleware - Revert changes to legacy OpenAIApiClient.ts - Create new qwenThinkingMiddleware for handling /think and /no_think suffixes - Update AiSdkMiddlewareBuilder to add assistant field and apply Qwen middleware - Middleware automatically handles Qwen models on Ollama, LM Studio, and NVIDIA providers - This follows the modern aiCore architecture instead of deprecated legacy approach Co-authored-by: DeJeune <67425183+DeJeune@users.noreply.github.com> * Fix: Explicitly pass assistant field to buildAiSdkMiddlewares Ensure that the assistant field from ModernAiProviderConfig is explicitly passed to buildAiSdkMiddlewares so that Qwen thinking middleware can access assistant.settings.reasoning_effort correctly. Co-authored-by: DeJeune <67425183+DeJeune@users.noreply.github.com> * Fix: Remove unnecessary whitespace in processPostsuffixQwen3Model tests * Refactor: Simplify user message suffix handling in qwenThinkingMiddleware * Refactor: Remove processPostsuffixQwen3Model tests to streamline ModelMessageService tests * refactor: remove logger and debug statement from qwenThinkingMiddleware --------- Co-authored-by: copilot-swe-agent[bot] <198982749+Copilot@users.noreply.github.com> Co-authored-by: DeJeune <67425183+DeJeune@users.noreply.github.com> Co-authored-by: suyao <sy20010504@gmail.com>
This commit is contained in:
parent
c7ceb3035d
commit
29c1173365
@ -97,7 +97,8 @@ export default class ModernAiProvider {
|
||||
// 提前构建中间件
|
||||
const middlewares = buildAiSdkMiddlewares({
|
||||
...config,
|
||||
provider: this.actualProvider
|
||||
provider: this.actualProvider,
|
||||
assistant: config.assistant
|
||||
})
|
||||
logger.debug('Built middlewares in completions', {
|
||||
middlewareCount: middlewares.length,
|
||||
|
||||
@ -1,12 +1,15 @@
|
||||
import { WebSearchPluginConfig } from '@cherrystudio/ai-core/built-in/plugins'
|
||||
import { loggerService } from '@logger'
|
||||
import { type MCPTool, type Message, type Model, type Provider } from '@renderer/types'
|
||||
import { isSupportedThinkingTokenQwenModel } from '@renderer/config/models'
|
||||
import { isSupportEnableThinkingProvider } from '@renderer/config/providers'
|
||||
import { type Assistant, MCPTool, type Message, type Model, type Provider } from '@renderer/types'
|
||||
import type { Chunk } from '@renderer/types/chunk'
|
||||
import { extractReasoningMiddleware, LanguageModelMiddleware, simulateStreamingMiddleware } from 'ai'
|
||||
|
||||
import { isOpenRouterGeminiGenerateImageModel } from '../utils/image'
|
||||
import { noThinkMiddleware } from './noThinkMiddleware'
|
||||
import { openrouterGenerateImageMiddleware } from './openrouterGenerateImageMiddleware'
|
||||
import { qwenThinkingMiddleware } from './qwenThinkingMiddleware'
|
||||
import { toolChoiceMiddleware } from './toolChoiceMiddleware'
|
||||
|
||||
const logger = loggerService.withContext('AiSdkMiddlewareBuilder')
|
||||
@ -19,6 +22,7 @@ export interface AiSdkMiddlewareConfig {
|
||||
onChunk?: (chunk: Chunk) => void
|
||||
model?: Model
|
||||
provider?: Provider
|
||||
assistant?: Assistant
|
||||
enableReasoning: boolean
|
||||
// 是否开启提示词工具调用
|
||||
isPromptToolUse: boolean
|
||||
@ -218,6 +222,21 @@ function addProviderSpecificMiddlewares(builder: AiSdkMiddlewareBuilder, config:
|
||||
function addModelSpecificMiddlewares(builder: AiSdkMiddlewareBuilder, config: AiSdkMiddlewareConfig): void {
|
||||
if (!config.model || !config.provider) return
|
||||
|
||||
// Qwen models on providers that don't support enable_thinking parameter (like Ollama, LM Studio, NVIDIA)
|
||||
// Use /think or /no_think suffix to control thinking mode
|
||||
if (
|
||||
config.provider &&
|
||||
isSupportedThinkingTokenQwenModel(config.model) &&
|
||||
!isSupportEnableThinkingProvider(config.provider)
|
||||
) {
|
||||
const enableThinking = config.assistant?.settings?.reasoning_effort !== undefined
|
||||
builder.add({
|
||||
name: 'qwen-thinking-control',
|
||||
middleware: qwenThinkingMiddleware(enableThinking)
|
||||
})
|
||||
logger.debug(`Added Qwen thinking middleware with thinking ${enableThinking ? 'enabled' : 'disabled'}`)
|
||||
}
|
||||
|
||||
// 可以根据模型ID或特性添加特定中间件
|
||||
// 例如:图像生成模型、多模态模型等
|
||||
if (isOpenRouterGeminiGenerateImageModel(config.model, config.provider)) {
|
||||
|
||||
39
src/renderer/src/aiCore/middleware/qwenThinkingMiddleware.ts
Normal file
39
src/renderer/src/aiCore/middleware/qwenThinkingMiddleware.ts
Normal file
@ -0,0 +1,39 @@
|
||||
import { LanguageModelMiddleware } from 'ai'
|
||||
|
||||
/**
|
||||
* Qwen Thinking Middleware
|
||||
* Controls thinking mode for Qwen models on providers that don't support enable_thinking parameter (like Ollama)
|
||||
* Appends '/think' or '/no_think' suffix to user messages based on reasoning_effort setting
|
||||
* @param enableThinking - Whether thinking mode is enabled (based on reasoning_effort !== undefined)
|
||||
* @returns LanguageModelMiddleware
|
||||
*/
|
||||
export function qwenThinkingMiddleware(enableThinking: boolean): LanguageModelMiddleware {
|
||||
const suffix = enableThinking ? ' /think' : ' /no_think'
|
||||
|
||||
return {
|
||||
middlewareVersion: 'v2',
|
||||
|
||||
transformParams: async ({ params }) => {
|
||||
const transformedParams = { ...params }
|
||||
// Process messages in prompt
|
||||
if (transformedParams.prompt && Array.isArray(transformedParams.prompt)) {
|
||||
transformedParams.prompt = transformedParams.prompt.map((message) => {
|
||||
// Only process user messages
|
||||
if (message.role === 'user') {
|
||||
// Process content array
|
||||
if (Array.isArray(message.content)) {
|
||||
for (const part of message.content) {
|
||||
if (part.type === 'text' && !part.text.endsWith('/think') && !part.text.endsWith('/no_think')) {
|
||||
part.text += suffix
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
return message
|
||||
})
|
||||
}
|
||||
|
||||
return transformedParams
|
||||
}
|
||||
}
|
||||
}
|
||||
Loading…
Reference in New Issue
Block a user