mirror of
https://github.com/CherryHQ/cherry-studio.git
synced 2025-12-27 04:31:27 +08:00
fix: silicon reasoning (#10932)
* refactor(aiCore): reorganize reasoning effort logic for different providers
Restructure the reasoning effort calculation logic to handle different model providers more clearly. Move OpenRouter and SiliconFlow specific logic to dedicated sections and remove duplicate checks. Improve maintainability by grouping related provider logic together.
* refactor(sdk): update thinking config type and property names
- Replace inline thinking config type with imported ThinkingConfig type
- Update property names from snake_case to camelCase for consistency
- Add null checks for token limit calculations
- Clarify hard-coded maximum for silicon provider in comments
* refactor(openai): standardize property names to camelCase in thinking_config
Update property names in thinking_config object from snake_case to camelCase for consistency with codebase conventions
(cherry picked from commit 4dfb73c982)
This commit is contained in:
parent
7b10ff5010
commit
f71ce7fe3d
@ -184,7 +184,7 @@ export class OpenAIAPIClient extends OpenAIBaseClient<
|
||||
extra_body: {
|
||||
google: {
|
||||
thinking_config: {
|
||||
thinking_budget: 0
|
||||
thinkingBudget: 0
|
||||
}
|
||||
}
|
||||
}
|
||||
@ -319,8 +319,8 @@ export class OpenAIAPIClient extends OpenAIBaseClient<
|
||||
extra_body: {
|
||||
google: {
|
||||
thinking_config: {
|
||||
thinking_budget: -1,
|
||||
include_thoughts: true
|
||||
thinkingBudget: -1,
|
||||
includeThoughts: true
|
||||
}
|
||||
}
|
||||
}
|
||||
@ -330,8 +330,8 @@ export class OpenAIAPIClient extends OpenAIBaseClient<
|
||||
extra_body: {
|
||||
google: {
|
||||
thinking_config: {
|
||||
thinking_budget: budgetTokens,
|
||||
include_thoughts: true
|
||||
thinkingBudget: budgetTokens,
|
||||
includeThoughts: true
|
||||
}
|
||||
}
|
||||
}
|
||||
@ -662,7 +662,7 @@ export class OpenAIAPIClient extends OpenAIBaseClient<
|
||||
} else if (isClaudeReasoningModel(model) && reasoningEffort.thinking?.budget_tokens) {
|
||||
suffix = ` --thinking_budget ${reasoningEffort.thinking.budget_tokens}`
|
||||
} else if (isGeminiReasoningModel(model) && reasoningEffort.extra_body?.google?.thinking_config) {
|
||||
suffix = ` --thinking_budget ${reasoningEffort.extra_body.google.thinking_config.thinking_budget}`
|
||||
suffix = ` --thinking_budget ${reasoningEffort.extra_body.google.thinking_config.thinkingBudget}`
|
||||
}
|
||||
// FIXME: poe 不支持多个text part,上传文本文件的时候用的不是file part而是text part,因此会出问题
|
||||
// 临时解决方案是强制poe用string content,但是其实poe部分支持array
|
||||
|
||||
@ -32,6 +32,7 @@ import { getAssistantSettings, getProviderByModel } from '@renderer/services/Ass
|
||||
import { SettingsState } from '@renderer/store/settings'
|
||||
import { Assistant, EFFORT_RATIO, isSystemProvider, Model, SystemProviderIds } from '@renderer/types'
|
||||
import { ReasoningEffortOptionalParams } from '@renderer/types/sdk'
|
||||
import { toInteger } from 'lodash'
|
||||
|
||||
const logger = loggerService.withContext('reasoning')
|
||||
|
||||
@ -94,7 +95,7 @@ export function getReasoningEffort(assistant: Assistant, model: Model): Reasonin
|
||||
extra_body: {
|
||||
google: {
|
||||
thinking_config: {
|
||||
thinking_budget: 0
|
||||
thinkingBudget: 0
|
||||
}
|
||||
}
|
||||
}
|
||||
@ -112,9 +113,54 @@ export function getReasoningEffort(assistant: Assistant, model: Model): Reasonin
|
||||
}
|
||||
|
||||
// reasoningEffort有效的情况
|
||||
|
||||
// OpenRouter models
|
||||
if (model.provider === SystemProviderIds.openrouter) {
|
||||
// Grok 4 Fast doesn't support effort levels, always use enabled: true
|
||||
if (isGrok4FastReasoningModel(model)) {
|
||||
return {
|
||||
reasoning: {
|
||||
enabled: true // Ignore effort level, just enable reasoning
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// Other OpenRouter models that support effort levels
|
||||
if (isSupportedReasoningEffortModel(model) || isSupportedThinkingTokenModel(model)) {
|
||||
return {
|
||||
reasoning: {
|
||||
effort: reasoningEffort === 'auto' ? 'medium' : reasoningEffort
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
const effortRatio = EFFORT_RATIO[reasoningEffort]
|
||||
const tokenLimit = findTokenLimit(model.id)
|
||||
let budgetTokens: number | undefined
|
||||
if (tokenLimit) {
|
||||
budgetTokens = Math.floor((tokenLimit.max - tokenLimit.min) * effortRatio + tokenLimit.min)
|
||||
}
|
||||
|
||||
// See https://docs.siliconflow.cn/cn/api-reference/chat-completions/chat-completions
|
||||
if (model.provider === SystemProviderIds.silicon) {
|
||||
if (
|
||||
isDeepSeekHybridInferenceModel(model) ||
|
||||
isSupportedThinkingTokenZhipuModel(model) ||
|
||||
isSupportedThinkingTokenQwenModel(model) ||
|
||||
isSupportedThinkingTokenHunyuanModel(model)
|
||||
) {
|
||||
return {
|
||||
enable_thinking: true,
|
||||
// Hard-encoded maximum, only for silicon
|
||||
thinking_budget: budgetTokens ? toInteger(Math.max(budgetTokens, 32768)) : undefined
|
||||
}
|
||||
}
|
||||
return {}
|
||||
}
|
||||
|
||||
// DeepSeek hybrid inference models, v3.1 and maybe more in the future
|
||||
// 不同的 provider 有不同的思考控制方式,在这里统一解决
|
||||
|
||||
if (isDeepSeekHybridInferenceModel(model)) {
|
||||
if (isSystemProvider(provider)) {
|
||||
switch (provider.id) {
|
||||
@ -123,10 +169,6 @@ export function getReasoningEffort(assistant: Assistant, model: Model): Reasonin
|
||||
enable_thinking: true,
|
||||
incremental_output: true
|
||||
}
|
||||
case SystemProviderIds.silicon:
|
||||
return {
|
||||
enable_thinking: true
|
||||
}
|
||||
case SystemProviderIds.hunyuan:
|
||||
case SystemProviderIds['tencent-cloud-ti']:
|
||||
case SystemProviderIds.doubao:
|
||||
@ -151,53 +193,12 @@ export function getReasoningEffort(assistant: Assistant, model: Model): Reasonin
|
||||
logger.warn(
|
||||
`Skipping thinking options for provider ${provider.name} as DeepSeek v3.1 thinking control method is unknown`
|
||||
)
|
||||
case SystemProviderIds.silicon:
|
||||
// specially handled before
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// OpenRouter models
|
||||
if (model.provider === SystemProviderIds.openrouter) {
|
||||
// Grok 4 Fast doesn't support effort levels, always use enabled: true
|
||||
if (isGrok4FastReasoningModel(model)) {
|
||||
return {
|
||||
reasoning: {
|
||||
enabled: true // Ignore effort level, just enable reasoning
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// Other OpenRouter models that support effort levels
|
||||
if (isSupportedReasoningEffortModel(model) || isSupportedThinkingTokenModel(model)) {
|
||||
return {
|
||||
reasoning: {
|
||||
effort: reasoningEffort === 'auto' ? 'medium' : reasoningEffort
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// Doubao 思考模式支持
|
||||
if (isSupportedThinkingTokenDoubaoModel(model)) {
|
||||
if (isDoubaoSeedAfter251015(model)) {
|
||||
return { reasoningEffort }
|
||||
}
|
||||
// Comment below this line seems weird. reasoning is high instead of null/undefined. Who wrote this?
|
||||
// reasoningEffort 为空,默认开启 enabled
|
||||
if (reasoningEffort === 'high') {
|
||||
return { thinking: { type: 'enabled' } }
|
||||
}
|
||||
if (reasoningEffort === 'auto' && isDoubaoThinkingAutoModel(model)) {
|
||||
return { thinking: { type: 'auto' } }
|
||||
}
|
||||
// 其他情况不带 thinking 字段
|
||||
return {}
|
||||
}
|
||||
|
||||
const effortRatio = EFFORT_RATIO[reasoningEffort]
|
||||
const budgetTokens = Math.floor(
|
||||
(findTokenLimit(model.id)?.max! - findTokenLimit(model.id)?.min!) * effortRatio + findTokenLimit(model.id)?.min!
|
||||
)
|
||||
|
||||
// OpenRouter models, use thinking
|
||||
if (model.provider === SystemProviderIds.openrouter) {
|
||||
if (isSupportedReasoningEffortModel(model) || isSupportedThinkingTokenModel(model)) {
|
||||
@ -255,8 +256,8 @@ export function getReasoningEffort(assistant: Assistant, model: Model): Reasonin
|
||||
extra_body: {
|
||||
google: {
|
||||
thinking_config: {
|
||||
thinking_budget: -1,
|
||||
include_thoughts: true
|
||||
thinkingBudget: -1,
|
||||
includeThoughts: true
|
||||
}
|
||||
}
|
||||
}
|
||||
@ -266,8 +267,8 @@ export function getReasoningEffort(assistant: Assistant, model: Model): Reasonin
|
||||
extra_body: {
|
||||
google: {
|
||||
thinking_config: {
|
||||
thinking_budget: budgetTokens,
|
||||
include_thoughts: true
|
||||
thinkingBudget: budgetTokens,
|
||||
includeThoughts: true
|
||||
}
|
||||
}
|
||||
}
|
||||
@ -280,22 +281,26 @@ export function getReasoningEffort(assistant: Assistant, model: Model): Reasonin
|
||||
return {
|
||||
thinking: {
|
||||
type: 'enabled',
|
||||
budget_tokens: Math.floor(
|
||||
Math.max(1024, Math.min(budgetTokens, (maxTokens || DEFAULT_MAX_TOKENS) * effortRatio))
|
||||
)
|
||||
budget_tokens: budgetTokens
|
||||
? Math.floor(Math.max(1024, Math.min(budgetTokens, (maxTokens || DEFAULT_MAX_TOKENS) * effortRatio)))
|
||||
: undefined
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// Use thinking, doubao, zhipu, etc.
|
||||
if (isSupportedThinkingTokenDoubaoModel(model)) {
|
||||
if (assistant.settings?.reasoning_effort === 'high') {
|
||||
return {
|
||||
thinking: {
|
||||
type: 'enabled'
|
||||
}
|
||||
}
|
||||
if (isDoubaoSeedAfter251015(model)) {
|
||||
return { reasoningEffort }
|
||||
}
|
||||
if (reasoningEffort === 'high') {
|
||||
return { thinking: { type: 'enabled' } }
|
||||
}
|
||||
if (reasoningEffort === 'auto' && isDoubaoThinkingAutoModel(model)) {
|
||||
return { thinking: { type: 'auto' } }
|
||||
}
|
||||
// 其他情况不带 thinking 字段
|
||||
return {}
|
||||
}
|
||||
if (isSupportedThinkingTokenZhipuModel(model)) {
|
||||
return { thinking: { type: 'enabled' } }
|
||||
|
||||
@ -19,6 +19,7 @@ import {
|
||||
GoogleGenAI,
|
||||
Model as GeminiModel,
|
||||
SendMessageParameters,
|
||||
ThinkingConfig,
|
||||
Tool
|
||||
} from '@google/genai'
|
||||
import OpenAI, { AzureOpenAI } from 'openai'
|
||||
@ -90,10 +91,7 @@ export type ReasoningEffortOptionalParams = {
|
||||
}
|
||||
extra_body?: {
|
||||
google?: {
|
||||
thinking_config: {
|
||||
thinking_budget: number
|
||||
include_thoughts?: boolean
|
||||
}
|
||||
thinking_config: ThinkingConfig
|
||||
}
|
||||
}
|
||||
// Add any other potential reasoning-related keys here if they exist
|
||||
|
||||
Loading…
Reference in New Issue
Block a user