fix: silicon reasoning (#10932)

* refactor(aiCore): reorganize reasoning effort logic for different providers

Restructure the reasoning effort calculation logic to handle different model providers more clearly. Move OpenRouter and SiliconFlow specific logic to dedicated sections and remove duplicate checks. Improve maintainability by grouping related provider logic together.

* refactor(sdk): update thinking config type and property names

- Replace inline thinking config type with imported ThinkingConfig type
- Update property names from snake_case to camelCase for consistency
- Add null checks for token limit calculations
- Clarify hard-coded maximum for silicon provider in comments

* refactor(openai): standardize property names to camelCase in thinking_config

Update property names in thinking_config object from snake_case to camelCase for consistency with codebase conventions

(cherry picked from commit 4dfb73c982)
This commit is contained in:
Phantom 2025-10-24 13:01:00 +08:00 committed by dev
parent 7b10ff5010
commit f71ce7fe3d
3 changed files with 75 additions and 72 deletions

View File

@ -184,7 +184,7 @@ export class OpenAIAPIClient extends OpenAIBaseClient<
extra_body: {
google: {
thinking_config: {
thinking_budget: 0
thinkingBudget: 0
}
}
}
@ -319,8 +319,8 @@ export class OpenAIAPIClient extends OpenAIBaseClient<
extra_body: {
google: {
thinking_config: {
thinking_budget: -1,
include_thoughts: true
thinkingBudget: -1,
includeThoughts: true
}
}
}
@ -330,8 +330,8 @@ export class OpenAIAPIClient extends OpenAIBaseClient<
extra_body: {
google: {
thinking_config: {
thinking_budget: budgetTokens,
include_thoughts: true
thinkingBudget: budgetTokens,
includeThoughts: true
}
}
}
@ -662,7 +662,7 @@ export class OpenAIAPIClient extends OpenAIBaseClient<
} else if (isClaudeReasoningModel(model) && reasoningEffort.thinking?.budget_tokens) {
suffix = ` --thinking_budget ${reasoningEffort.thinking.budget_tokens}`
} else if (isGeminiReasoningModel(model) && reasoningEffort.extra_body?.google?.thinking_config) {
suffix = ` --thinking_budget ${reasoningEffort.extra_body.google.thinking_config.thinking_budget}`
suffix = ` --thinking_budget ${reasoningEffort.extra_body.google.thinking_config.thinkingBudget}`
}
// FIXME: poe 不支持多个text part上传文本文件的时候用的不是file part而是text part因此会出问题
// 临时解决方案是强制poe用string content但是其实poe部分支持array

View File

@ -32,6 +32,7 @@ import { getAssistantSettings, getProviderByModel } from '@renderer/services/Ass
import { SettingsState } from '@renderer/store/settings'
import { Assistant, EFFORT_RATIO, isSystemProvider, Model, SystemProviderIds } from '@renderer/types'
import { ReasoningEffortOptionalParams } from '@renderer/types/sdk'
import { toInteger } from 'lodash'
const logger = loggerService.withContext('reasoning')
@ -94,7 +95,7 @@ export function getReasoningEffort(assistant: Assistant, model: Model): Reasonin
extra_body: {
google: {
thinking_config: {
thinking_budget: 0
thinkingBudget: 0
}
}
}
@ -112,9 +113,54 @@ export function getReasoningEffort(assistant: Assistant, model: Model): Reasonin
}
// reasoningEffort有效的情况
// OpenRouter models
if (model.provider === SystemProviderIds.openrouter) {
// Grok 4 Fast doesn't support effort levels, always use enabled: true
if (isGrok4FastReasoningModel(model)) {
return {
reasoning: {
enabled: true // Ignore effort level, just enable reasoning
}
}
}
// Other OpenRouter models that support effort levels
if (isSupportedReasoningEffortModel(model) || isSupportedThinkingTokenModel(model)) {
return {
reasoning: {
effort: reasoningEffort === 'auto' ? 'medium' : reasoningEffort
}
}
}
}
const effortRatio = EFFORT_RATIO[reasoningEffort]
const tokenLimit = findTokenLimit(model.id)
let budgetTokens: number | undefined
if (tokenLimit) {
budgetTokens = Math.floor((tokenLimit.max - tokenLimit.min) * effortRatio + tokenLimit.min)
}
// See https://docs.siliconflow.cn/cn/api-reference/chat-completions/chat-completions
if (model.provider === SystemProviderIds.silicon) {
if (
isDeepSeekHybridInferenceModel(model) ||
isSupportedThinkingTokenZhipuModel(model) ||
isSupportedThinkingTokenQwenModel(model) ||
isSupportedThinkingTokenHunyuanModel(model)
) {
return {
enable_thinking: true,
// Hard-encoded maximum, only for silicon
thinking_budget: budgetTokens ? toInteger(Math.max(budgetTokens, 32768)) : undefined
}
}
return {}
}
// DeepSeek hybrid inference models, v3.1 and maybe more in the future
// 不同的 provider 有不同的思考控制方式,在这里统一解决
if (isDeepSeekHybridInferenceModel(model)) {
if (isSystemProvider(provider)) {
switch (provider.id) {
@ -123,10 +169,6 @@ export function getReasoningEffort(assistant: Assistant, model: Model): Reasonin
enable_thinking: true,
incremental_output: true
}
case SystemProviderIds.silicon:
return {
enable_thinking: true
}
case SystemProviderIds.hunyuan:
case SystemProviderIds['tencent-cloud-ti']:
case SystemProviderIds.doubao:
@ -151,53 +193,12 @@ export function getReasoningEffort(assistant: Assistant, model: Model): Reasonin
logger.warn(
`Skipping thinking options for provider ${provider.name} as DeepSeek v3.1 thinking control method is unknown`
)
case SystemProviderIds.silicon:
// specially handled before
}
}
}
// OpenRouter models
if (model.provider === SystemProviderIds.openrouter) {
// Grok 4 Fast doesn't support effort levels, always use enabled: true
if (isGrok4FastReasoningModel(model)) {
return {
reasoning: {
enabled: true // Ignore effort level, just enable reasoning
}
}
}
// Other OpenRouter models that support effort levels
if (isSupportedReasoningEffortModel(model) || isSupportedThinkingTokenModel(model)) {
return {
reasoning: {
effort: reasoningEffort === 'auto' ? 'medium' : reasoningEffort
}
}
}
}
// Doubao 思考模式支持
if (isSupportedThinkingTokenDoubaoModel(model)) {
if (isDoubaoSeedAfter251015(model)) {
return { reasoningEffort }
}
// Comment below this line seems weird. reasoning is high instead of null/undefined. Who wrote this?
// reasoningEffort 为空,默认开启 enabled
if (reasoningEffort === 'high') {
return { thinking: { type: 'enabled' } }
}
if (reasoningEffort === 'auto' && isDoubaoThinkingAutoModel(model)) {
return { thinking: { type: 'auto' } }
}
// 其他情况不带 thinking 字段
return {}
}
const effortRatio = EFFORT_RATIO[reasoningEffort]
const budgetTokens = Math.floor(
(findTokenLimit(model.id)?.max! - findTokenLimit(model.id)?.min!) * effortRatio + findTokenLimit(model.id)?.min!
)
// OpenRouter models, use thinking
if (model.provider === SystemProviderIds.openrouter) {
if (isSupportedReasoningEffortModel(model) || isSupportedThinkingTokenModel(model)) {
@ -255,8 +256,8 @@ export function getReasoningEffort(assistant: Assistant, model: Model): Reasonin
extra_body: {
google: {
thinking_config: {
thinking_budget: -1,
include_thoughts: true
thinkingBudget: -1,
includeThoughts: true
}
}
}
@ -266,8 +267,8 @@ export function getReasoningEffort(assistant: Assistant, model: Model): Reasonin
extra_body: {
google: {
thinking_config: {
thinking_budget: budgetTokens,
include_thoughts: true
thinkingBudget: budgetTokens,
includeThoughts: true
}
}
}
@ -280,22 +281,26 @@ export function getReasoningEffort(assistant: Assistant, model: Model): Reasonin
return {
thinking: {
type: 'enabled',
budget_tokens: Math.floor(
Math.max(1024, Math.min(budgetTokens, (maxTokens || DEFAULT_MAX_TOKENS) * effortRatio))
)
budget_tokens: budgetTokens
? Math.floor(Math.max(1024, Math.min(budgetTokens, (maxTokens || DEFAULT_MAX_TOKENS) * effortRatio)))
: undefined
}
}
}
// Use thinking, doubao, zhipu, etc.
if (isSupportedThinkingTokenDoubaoModel(model)) {
if (assistant.settings?.reasoning_effort === 'high') {
return {
thinking: {
type: 'enabled'
}
}
if (isDoubaoSeedAfter251015(model)) {
return { reasoningEffort }
}
if (reasoningEffort === 'high') {
return { thinking: { type: 'enabled' } }
}
if (reasoningEffort === 'auto' && isDoubaoThinkingAutoModel(model)) {
return { thinking: { type: 'auto' } }
}
// 其他情况不带 thinking 字段
return {}
}
if (isSupportedThinkingTokenZhipuModel(model)) {
return { thinking: { type: 'enabled' } }

View File

@ -19,6 +19,7 @@ import {
GoogleGenAI,
Model as GeminiModel,
SendMessageParameters,
ThinkingConfig,
Tool
} from '@google/genai'
import OpenAI, { AzureOpenAI } from 'openai'
@ -90,10 +91,7 @@ export type ReasoningEffortOptionalParams = {
}
extra_body?: {
google?: {
thinking_config: {
thinking_budget: number
include_thoughts?: boolean
}
thinking_config: ThinkingConfig
}
}
// Add any other potential reasoning-related keys here if they exist