diff --git a/src/renderer/src/aiCore/middleware/AiSdkMiddlewareBuilder.ts b/src/renderer/src/aiCore/middleware/AiSdkMiddlewareBuilder.ts index 247dc8e5c8..ab4a5d8c48 100644 --- a/src/renderer/src/aiCore/middleware/AiSdkMiddlewareBuilder.ts +++ b/src/renderer/src/aiCore/middleware/AiSdkMiddlewareBuilder.ts @@ -1,6 +1,6 @@ import type { WebSearchPluginConfig } from '@cherrystudio/ai-core/built-in/plugins' import { loggerService } from '@logger' -import { isGemini3Model, isSupportedThinkingTokenQwenModel } from '@renderer/config/models' +import { isAnthropicModel, isGemini3Model, isSupportedThinkingTokenQwenModel } from '@renderer/config/models' import type { McpMode, MCPTool } from '@renderer/types' import { type Assistant, type Message, type Model, type Provider, SystemProviderIds } from '@renderer/types' import type { Chunk } from '@renderer/types/chunk' @@ -10,6 +10,7 @@ import { extractReasoningMiddleware, simulateStreamingMiddleware } from 'ai' import { getAiSdkProviderId } from '../provider/factory' import { isOpenRouterGeminiGenerateImageModel } from '../utils/image' +import { anthropicCacheMiddleware } from './anthropicCacheMiddleware' import { noThinkMiddleware } from './noThinkMiddleware' import { openrouterGenerateImageMiddleware } from './openrouterGenerateImageMiddleware' import { openrouterReasoningMiddleware } from './openrouterReasoningMiddleware' @@ -179,7 +180,12 @@ function addProviderSpecificMiddlewares(builder: AiSdkMiddlewareBuilder, config: // 根据不同provider添加特定中间件 switch (config.provider.type) { case 'anthropic': - // Anthropic特定中间件 + if (isAnthropicModel(config.model) && config.provider.anthropicCacheControl?.tokenThreshold) { + builder.add({ + name: 'anthropic-cache', + middleware: anthropicCacheMiddleware(config.provider) + }) + } break case 'openai': case 'azure-openai': { diff --git a/src/renderer/src/aiCore/middleware/anthropicCacheMiddleware.ts b/src/renderer/src/aiCore/middleware/anthropicCacheMiddleware.ts new file mode 100644 index 0000000000..df50798940 --- /dev/null +++ b/src/renderer/src/aiCore/middleware/anthropicCacheMiddleware.ts @@ -0,0 +1,79 @@ +/** + * Anthropic Prompt Caching Middleware + * @see https://ai-sdk.dev/providers/ai-sdk-providers/anthropic#cache-control + */ +import { estimateTextTokens } from '@renderer/services/TokenService' +import type { Provider } from '@renderer/types' +import type { LanguageModelMiddleware } from 'ai' + +const cacheProviderOptions = { + anthropic: { cacheControl: { type: 'ephemeral' } } +} + +function estimateContentTokens(content: unknown): number { + if (typeof content === 'string') return estimateTextTokens(content) + if (Array.isArray(content)) { + return content.reduce((acc, part) => { + if (typeof part === 'object' && part !== null && 'text' in part) { + return acc + estimateTextTokens(part.text as string) + } + return acc + }, 0) + } + return 0 +} + +function addCacheToContentParts(content: unknown): unknown { + if (typeof content === 'string') { + return [{ type: 'text', text: content, providerOptions: cacheProviderOptions }] + } + if (Array.isArray(content) && content.length > 0) { + const result = [...content] + const last = result[result.length - 1] + if (typeof last === 'object' && last !== null) { + result[result.length - 1] = { ...last, providerOptions: cacheProviderOptions } + } + return result + } + return content +} + +export function anthropicCacheMiddleware(provider: Provider): LanguageModelMiddleware { + return { + middlewareVersion: 'v2', + transformParams: async ({ params }) => { + const settings = provider.anthropicCacheControl + if (!settings?.tokenThreshold || !Array.isArray(params.prompt) || params.prompt.length === 0) { + return params + } + + const { tokenThreshold, cacheSystemMessage, cacheLastNMessages } = settings + const messages = [...params.prompt] + let cachedCount = 0 + + // Cache system message (providerOptions on message object) + if (cacheSystemMessage) { + for (let i = 0; i < messages.length; i++) { + const msg = messages[i] as any + if (msg.role === 'system' && estimateContentTokens(msg.content) >= tokenThreshold) { + messages[i] = { ...msg, providerOptions: cacheProviderOptions } + break + } + } + } + + // Cache last N non-system messages (providerOptions on content parts) + if (cacheLastNMessages > 0) { + for (let i = messages.length - 1; i >= 0 && cachedCount < cacheLastNMessages; i--) { + const msg = messages[i] as any + if (msg.role !== 'system' && estimateContentTokens(msg.content) >= tokenThreshold) { + messages[i] = { ...msg, content: addCacheToContentParts(msg.content) } + cachedCount++ + } + } + } + + return { ...params, prompt: messages } + } + } +} diff --git a/src/renderer/src/i18n/locales/en-us.json b/src/renderer/src/i18n/locales/en-us.json index 200899f7fd..0632d83941 100644 --- a/src/renderer/src/i18n/locales/en-us.json +++ b/src/renderer/src/i18n/locales/en-us.json @@ -4475,6 +4475,14 @@ } }, "options": { + "anthropic_cache": { + "cache_last_n": "Cache Last N Messages", + "cache_last_n_help": "Cache the last N conversation messages (excluding system messages)", + "cache_system": "Cache System Message", + "cache_system_help": "Whether to cache the system prompt", + "token_threshold": "Cache Token Threshold", + "token_threshold_help": "Messages exceeding this token count will be cached. Set to 0 to disable caching." + }, "array_content": { "help": "Does the provider support the content field of the message being of array type?", "label": "Supports array format message content" diff --git a/src/renderer/src/i18n/locales/zh-cn.json b/src/renderer/src/i18n/locales/zh-cn.json index 533819faef..caf4e90ce6 100644 --- a/src/renderer/src/i18n/locales/zh-cn.json +++ b/src/renderer/src/i18n/locales/zh-cn.json @@ -4475,6 +4475,14 @@ } }, "options": { + "anthropic_cache": { + "cache_last_n": "缓存最后 N 条消息", + "cache_last_n_help": "缓存最后的 N 条对话消息(不含系统消息)", + "cache_system": "缓存系统消息", + "cache_system_help": "是否缓存系统提示词", + "token_threshold": "缓存 Token 阈值", + "token_threshold_help": "消息超过此 Token 数才会被缓存,设为 0 禁用缓存" + }, "array_content": { "help": "该提供商是否支持 message 的 content 字段为 array 类型", "label": "支持数组格式的 message content" diff --git a/src/renderer/src/i18n/locales/zh-tw.json b/src/renderer/src/i18n/locales/zh-tw.json index b828ca4812..928d783d53 100644 --- a/src/renderer/src/i18n/locales/zh-tw.json +++ b/src/renderer/src/i18n/locales/zh-tw.json @@ -4475,6 +4475,14 @@ } }, "options": { + "anthropic_cache": { + "cache_last_n": "[to be translated]:Cache Last N Messages", + "cache_last_n_help": "[to be translated]:Cache the last N conversation messages (excluding system messages)", + "cache_system": "[to be translated]:Cache System Message", + "cache_system_help": "[to be translated]:Whether to cache the system prompt", + "token_threshold": "[to be translated]:Cache Token Threshold", + "token_threshold_help": "[to be translated]:Messages exceeding this token count will be cached. Set to 0 to disable caching." + }, "array_content": { "help": "該供應商是否支援 message 的 content 欄位為 array 類型", "label": "支援陣列格式的 message content" diff --git a/src/renderer/src/i18n/translate/de-de.json b/src/renderer/src/i18n/translate/de-de.json index 41bc6a2611..16680f070a 100644 --- a/src/renderer/src/i18n/translate/de-de.json +++ b/src/renderer/src/i18n/translate/de-de.json @@ -4475,6 +4475,14 @@ } }, "options": { + "anthropic_cache": { + "cache_last_n": "[to be translated]:Cache Last N Messages", + "cache_last_n_help": "[to be translated]:Cache the last N conversation messages (excluding system messages)", + "cache_system": "[to be translated]:Cache System Message", + "cache_system_help": "[to be translated]:Whether to cache the system prompt", + "token_threshold": "[to be translated]:Cache Token Threshold", + "token_threshold_help": "[to be translated]:Messages exceeding this token count will be cached. Set to 0 to disable caching." + }, "array_content": { "help": "Unterstützt Array-Format für message content", "label": "Unterstützt Array-Format für message content" diff --git a/src/renderer/src/i18n/translate/el-gr.json b/src/renderer/src/i18n/translate/el-gr.json index 79b4517656..137e8ad5ab 100644 --- a/src/renderer/src/i18n/translate/el-gr.json +++ b/src/renderer/src/i18n/translate/el-gr.json @@ -4475,6 +4475,14 @@ } }, "options": { + "anthropic_cache": { + "cache_last_n": "[to be translated]:Cache Last N Messages", + "cache_last_n_help": "[to be translated]:Cache the last N conversation messages (excluding system messages)", + "cache_system": "[to be translated]:Cache System Message", + "cache_system_help": "[to be translated]:Whether to cache the system prompt", + "token_threshold": "[to be translated]:Cache Token Threshold", + "token_threshold_help": "[to be translated]:Messages exceeding this token count will be cached. Set to 0 to disable caching." + }, "array_content": { "help": "Εάν ο πάροχος υποστηρίζει το πεδίο περιεχομένου του μηνύματος ως τύπο πίνακα", "label": "Υποστήριξη για περιεχόμενο μηνύματος με μορφή πίνακα" diff --git a/src/renderer/src/i18n/translate/es-es.json b/src/renderer/src/i18n/translate/es-es.json index 9c90ac3996..f8721c5bba 100644 --- a/src/renderer/src/i18n/translate/es-es.json +++ b/src/renderer/src/i18n/translate/es-es.json @@ -4475,6 +4475,14 @@ } }, "options": { + "anthropic_cache": { + "cache_last_n": "[to be translated]:Cache Last N Messages", + "cache_last_n_help": "[to be translated]:Cache the last N conversation messages (excluding system messages)", + "cache_system": "[to be translated]:Cache System Message", + "cache_system_help": "[to be translated]:Whether to cache the system prompt", + "token_threshold": "[to be translated]:Cache Token Threshold", + "token_threshold_help": "[to be translated]:Messages exceeding this token count will be cached. Set to 0 to disable caching." + }, "array_content": { "help": "¿Admite el proveedor que el campo content del mensaje sea de tipo array?", "label": "Contenido del mensaje compatible con formato de matriz" diff --git a/src/renderer/src/i18n/translate/fr-fr.json b/src/renderer/src/i18n/translate/fr-fr.json index af743c5784..42bc37ad0b 100644 --- a/src/renderer/src/i18n/translate/fr-fr.json +++ b/src/renderer/src/i18n/translate/fr-fr.json @@ -4475,6 +4475,14 @@ } }, "options": { + "anthropic_cache": { + "cache_last_n": "[to be translated]:Cache Last N Messages", + "cache_last_n_help": "[to be translated]:Cache the last N conversation messages (excluding system messages)", + "cache_system": "[to be translated]:Cache System Message", + "cache_system_help": "[to be translated]:Whether to cache the system prompt", + "token_threshold": "[to be translated]:Cache Token Threshold", + "token_threshold_help": "[to be translated]:Messages exceeding this token count will be cached. Set to 0 to disable caching." + }, "array_content": { "help": "Ce fournisseur prend-il en charge le champ content du message sous forme de tableau ?", "label": "Prise en charge du format de tableau pour le contenu du message" diff --git a/src/renderer/src/i18n/translate/ja-jp.json b/src/renderer/src/i18n/translate/ja-jp.json index 8972411ea5..2971e36be6 100644 --- a/src/renderer/src/i18n/translate/ja-jp.json +++ b/src/renderer/src/i18n/translate/ja-jp.json @@ -4475,6 +4475,14 @@ } }, "options": { + "anthropic_cache": { + "cache_last_n": "[to be translated]:Cache Last N Messages", + "cache_last_n_help": "[to be translated]:Cache the last N conversation messages (excluding system messages)", + "cache_system": "[to be translated]:Cache System Message", + "cache_system_help": "[to be translated]:Whether to cache the system prompt", + "token_threshold": "[to be translated]:Cache Token Threshold", + "token_threshold_help": "[to be translated]:Messages exceeding this token count will be cached. Set to 0 to disable caching." + }, "array_content": { "help": "このプロバイダーは、message の content フィールドが配列型であることをサポートしていますか", "label": "配列形式のメッセージコンテンツをサポート" diff --git a/src/renderer/src/i18n/translate/pt-pt.json b/src/renderer/src/i18n/translate/pt-pt.json index 8e9c0a59e4..633ce438cb 100644 --- a/src/renderer/src/i18n/translate/pt-pt.json +++ b/src/renderer/src/i18n/translate/pt-pt.json @@ -4475,6 +4475,14 @@ } }, "options": { + "anthropic_cache": { + "cache_last_n": "[to be translated]:Cache Last N Messages", + "cache_last_n_help": "[to be translated]:Cache the last N conversation messages (excluding system messages)", + "cache_system": "[to be translated]:Cache System Message", + "cache_system_help": "[to be translated]:Whether to cache the system prompt", + "token_threshold": "[to be translated]:Cache Token Threshold", + "token_threshold_help": "[to be translated]:Messages exceeding this token count will be cached. Set to 0 to disable caching." + }, "array_content": { "help": "O fornecedor suporta que o campo content da mensagem seja do tipo array?", "label": "suporta o formato de matriz do conteúdo da mensagem" diff --git a/src/renderer/src/i18n/translate/ro-ro.json b/src/renderer/src/i18n/translate/ro-ro.json index 9c0ba398c9..ae05e564fe 100644 --- a/src/renderer/src/i18n/translate/ro-ro.json +++ b/src/renderer/src/i18n/translate/ro-ro.json @@ -4475,6 +4475,14 @@ } }, "options": { + "anthropic_cache": { + "cache_last_n": "[to be translated]:Cache Last N Messages", + "cache_last_n_help": "[to be translated]:Cache the last N conversation messages (excluding system messages)", + "cache_system": "[to be translated]:Cache System Message", + "cache_system_help": "[to be translated]:Whether to cache the system prompt", + "token_threshold": "[to be translated]:Cache Token Threshold", + "token_threshold_help": "[to be translated]:Messages exceeding this token count will be cached. Set to 0 to disable caching." + }, "array_content": { "help": "Furnizorul acceptă ca câmpul content al mesajului să fie de tip array?", "label": "Acceptă conținut mesaj în format array" diff --git a/src/renderer/src/i18n/translate/ru-ru.json b/src/renderer/src/i18n/translate/ru-ru.json index cde0f0e6ca..320ecb4741 100644 --- a/src/renderer/src/i18n/translate/ru-ru.json +++ b/src/renderer/src/i18n/translate/ru-ru.json @@ -1311,6 +1311,7 @@ "backup": { "file_format": "Ошибка формата файла резервной копии" }, + "base64DataTruncated": "[to be translated]:Base64 image data truncated, size", "base64DataTruncated": "Данные изображения в формате Base64 усечены, размер", "boundary": { "default": { @@ -1392,7 +1393,9 @@ "text": "текст", "toolInput": "ввод инструмента", "toolName": "имя инструмента", + "truncated": "[to be translated]:Data truncated, original size", "truncated": "Данные усечены, исходный размер", + "truncatedBadge": "[to be translated]:Truncated", "truncatedBadge": "Усечённый", "unknown": "Неизвестная ошибка", "usage": "Дозировка", @@ -4475,6 +4478,14 @@ } }, "options": { + "anthropic_cache": { + "cache_last_n": "[to be translated]:Cache Last N Messages", + "cache_last_n_help": "[to be translated]:Cache the last N conversation messages (excluding system messages)", + "cache_system": "[to be translated]:Cache System Message", + "cache_system_help": "[to be translated]:Whether to cache the system prompt", + "token_threshold": "[to be translated]:Cache Token Threshold", + "token_threshold_help": "[to be translated]:Messages exceeding this token count will be cached. Set to 0 to disable caching." + }, "array_content": { "help": "Поддерживает ли данный провайдер тип массива для поля content в сообщении", "label": "поддержка формата массива для содержимого сообщения" diff --git a/src/renderer/src/pages/settings/ProviderSettings/ApiOptionsSettings/ApiOptionsSettings.tsx b/src/renderer/src/pages/settings/ProviderSettings/ApiOptionsSettings/ApiOptionsSettings.tsx index 3e4aa4c7c7..de883ccf9e 100644 --- a/src/renderer/src/pages/settings/ProviderSettings/ApiOptionsSettings/ApiOptionsSettings.tsx +++ b/src/renderer/src/pages/settings/ProviderSettings/ApiOptionsSettings/ApiOptionsSettings.tsx @@ -1,8 +1,9 @@ import { HStack } from '@renderer/components/Layout' import { InfoTooltip } from '@renderer/components/TooltipIcons' import { useProvider } from '@renderer/hooks/useProvider' -import type { Provider } from '@renderer/types' -import { Flex, Switch } from 'antd' +import { type AnthropicCacheControlSettings, type Provider } from '@renderer/types' +import { isSupportAnthropicPromptCacheProvider } from '@renderer/utils/provider' +import { Divider, Flex, InputNumber, Switch } from 'antd' import { startTransition, useCallback, useMemo } from 'react' import { useTranslation } from 'react-i18next' @@ -114,6 +115,27 @@ const ApiOptionsSettings = ({ providerId }: Props) => { return items }, [openAIOptions, provider.apiOptions, provider.type, t, updateProviderTransition]) + const isSupportAnthropicPromptCache = isSupportAnthropicPromptCacheProvider(provider) + + const cacheSettings = useMemo( + () => + provider.anthropicCacheControl ?? { + tokenThreshold: 0, + cacheSystemMessage: true, + cacheLastNMessages: 0 + }, + [provider.anthropicCacheControl] + ) + + const updateCacheSettings = useCallback( + (updates: Partial) => { + updateProviderTransition({ + anthropicCacheControl: { ...cacheSettings, ...updates } + }) + }, + [cacheSettings, updateProviderTransition] + ) + return ( {options.map((item) => ( @@ -127,6 +149,52 @@ const ApiOptionsSettings = ({ providerId }: Props) => { ))} + + {isSupportAnthropicPromptCache && ( + <> + + + + {t('settings.provider.api.options.anthropic_cache.token_threshold')} + + + updateCacheSettings({ tokenThreshold: v ?? 0 })} + style={{ width: 100 }} + /> + + {cacheSettings.tokenThreshold > 0 && ( + <> + + + {t('settings.provider.api.options.anthropic_cache.cache_system')} + + + updateCacheSettings({ cacheSystemMessage: v })} + /> + + + + {t('settings.provider.api.options.anthropic_cache.cache_last_n')} + + + updateCacheSettings({ cacheLastNMessages: v ?? 0 })} + style={{ width: 100 }} + /> + + + )} + + )} ) } diff --git a/src/renderer/src/pages/settings/ProviderSettings/ProviderSetting.tsx b/src/renderer/src/pages/settings/ProviderSettings/ProviderSetting.tsx index 91c65a7e3e..9d770dd578 100644 --- a/src/renderer/src/pages/settings/ProviderSettings/ProviderSetting.tsx +++ b/src/renderer/src/pages/settings/ProviderSettings/ProviderSetting.tsx @@ -31,6 +31,7 @@ import { isOllamaProvider, isOpenAICompatibleProvider, isOpenAIProvider, + isSupportAnthropicPromptCacheProvider, isVertexProvider } from '@renderer/utils/provider' import { Button, Divider, Flex, Input, Select, Space, Switch, Tooltip } from 'antd' @@ -400,7 +401,7 @@ const ProviderSetting: FC = ({ providerId }) => {