Add Anthropic Cache (#12333)

* add anthropic cache

* i18n: sync

* fix: condition judgment

* lg

* ag

---------

Co-authored-by: suyao <sy20010504@gmail.com>
This commit is contained in:
花月喵梦 2026-01-07 23:05:30 +08:00 committed by GitHub
parent 1d07e89e38
commit 81ea847989
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194
17 changed files with 269 additions and 5 deletions

View File

@ -1,6 +1,6 @@
import type { WebSearchPluginConfig } from '@cherrystudio/ai-core/built-in/plugins'
import { loggerService } from '@logger'
import { isGemini3Model, isSupportedThinkingTokenQwenModel } from '@renderer/config/models'
import { isAnthropicModel, isGemini3Model, isSupportedThinkingTokenQwenModel } from '@renderer/config/models'
import type { McpMode, MCPTool } from '@renderer/types'
import { type Assistant, type Message, type Model, type Provider, SystemProviderIds } from '@renderer/types'
import type { Chunk } from '@renderer/types/chunk'
@ -10,6 +10,7 @@ import { extractReasoningMiddleware, simulateStreamingMiddleware } from 'ai'
import { getAiSdkProviderId } from '../provider/factory'
import { isOpenRouterGeminiGenerateImageModel } from '../utils/image'
import { anthropicCacheMiddleware } from './anthropicCacheMiddleware'
import { noThinkMiddleware } from './noThinkMiddleware'
import { openrouterGenerateImageMiddleware } from './openrouterGenerateImageMiddleware'
import { openrouterReasoningMiddleware } from './openrouterReasoningMiddleware'
@ -179,7 +180,12 @@ function addProviderSpecificMiddlewares(builder: AiSdkMiddlewareBuilder, config:
// 根据不同provider添加特定中间件
switch (config.provider.type) {
case 'anthropic':
// Anthropic特定中间件
if (isAnthropicModel(config.model) && config.provider.anthropicCacheControl?.tokenThreshold) {
builder.add({
name: 'anthropic-cache',
middleware: anthropicCacheMiddleware(config.provider)
})
}
break
case 'openai':
case 'azure-openai': {

View File

@ -0,0 +1,79 @@
/**
* Anthropic Prompt Caching Middleware
* @see https://ai-sdk.dev/providers/ai-sdk-providers/anthropic#cache-control
*/
import { estimateTextTokens } from '@renderer/services/TokenService'
import type { Provider } from '@renderer/types'
import type { LanguageModelMiddleware } from 'ai'
const cacheProviderOptions = {
anthropic: { cacheControl: { type: 'ephemeral' } }
}
function estimateContentTokens(content: unknown): number {
if (typeof content === 'string') return estimateTextTokens(content)
if (Array.isArray(content)) {
return content.reduce((acc, part) => {
if (typeof part === 'object' && part !== null && 'text' in part) {
return acc + estimateTextTokens(part.text as string)
}
return acc
}, 0)
}
return 0
}
function addCacheToContentParts(content: unknown): unknown {
if (typeof content === 'string') {
return [{ type: 'text', text: content, providerOptions: cacheProviderOptions }]
}
if (Array.isArray(content) && content.length > 0) {
const result = [...content]
const last = result[result.length - 1]
if (typeof last === 'object' && last !== null) {
result[result.length - 1] = { ...last, providerOptions: cacheProviderOptions }
}
return result
}
return content
}
export function anthropicCacheMiddleware(provider: Provider): LanguageModelMiddleware {
return {
middlewareVersion: 'v2',
transformParams: async ({ params }) => {
const settings = provider.anthropicCacheControl
if (!settings?.tokenThreshold || !Array.isArray(params.prompt) || params.prompt.length === 0) {
return params
}
const { tokenThreshold, cacheSystemMessage, cacheLastNMessages } = settings
const messages = [...params.prompt]
let cachedCount = 0
// Cache system message (providerOptions on message object)
if (cacheSystemMessage) {
for (let i = 0; i < messages.length; i++) {
const msg = messages[i] as any
if (msg.role === 'system' && estimateContentTokens(msg.content) >= tokenThreshold) {
messages[i] = { ...msg, providerOptions: cacheProviderOptions }
break
}
}
}
// Cache last N non-system messages (providerOptions on content parts)
if (cacheLastNMessages > 0) {
for (let i = messages.length - 1; i >= 0 && cachedCount < cacheLastNMessages; i--) {
const msg = messages[i] as any
if (msg.role !== 'system' && estimateContentTokens(msg.content) >= tokenThreshold) {
messages[i] = { ...msg, content: addCacheToContentParts(msg.content) }
cachedCount++
}
}
}
return { ...params, prompt: messages }
}
}
}

View File

@ -4475,6 +4475,14 @@
}
},
"options": {
"anthropic_cache": {
"cache_last_n": "Cache Last N Messages",
"cache_last_n_help": "Cache the last N conversation messages (excluding system messages)",
"cache_system": "Cache System Message",
"cache_system_help": "Whether to cache the system prompt",
"token_threshold": "Cache Token Threshold",
"token_threshold_help": "Messages exceeding this token count will be cached. Set to 0 to disable caching."
},
"array_content": {
"help": "Does the provider support the content field of the message being of array type?",
"label": "Supports array format message content"

View File

@ -4475,6 +4475,14 @@
}
},
"options": {
"anthropic_cache": {
"cache_last_n": "缓存最后 N 条消息",
"cache_last_n_help": "缓存最后的 N 条对话消息(不含系统消息)",
"cache_system": "缓存系统消息",
"cache_system_help": "是否缓存系统提示词",
"token_threshold": "缓存 Token 阈值",
"token_threshold_help": "消息超过此 Token 数才会被缓存,设为 0 禁用缓存"
},
"array_content": {
"help": "该提供商是否支持 message 的 content 字段为 array 类型",
"label": "支持数组格式的 message content"

View File

@ -4475,6 +4475,14 @@
}
},
"options": {
"anthropic_cache": {
"cache_last_n": "[to be translated]:Cache Last N Messages",
"cache_last_n_help": "[to be translated]:Cache the last N conversation messages (excluding system messages)",
"cache_system": "[to be translated]:Cache System Message",
"cache_system_help": "[to be translated]:Whether to cache the system prompt",
"token_threshold": "[to be translated]:Cache Token Threshold",
"token_threshold_help": "[to be translated]:Messages exceeding this token count will be cached. Set to 0 to disable caching."
},
"array_content": {
"help": "該供應商是否支援 message 的 content 欄位為 array 類型",
"label": "支援陣列格式的 message content"

View File

@ -4475,6 +4475,14 @@
}
},
"options": {
"anthropic_cache": {
"cache_last_n": "[to be translated]:Cache Last N Messages",
"cache_last_n_help": "[to be translated]:Cache the last N conversation messages (excluding system messages)",
"cache_system": "[to be translated]:Cache System Message",
"cache_system_help": "[to be translated]:Whether to cache the system prompt",
"token_threshold": "[to be translated]:Cache Token Threshold",
"token_threshold_help": "[to be translated]:Messages exceeding this token count will be cached. Set to 0 to disable caching."
},
"array_content": {
"help": "Unterstützt Array-Format für message content",
"label": "Unterstützt Array-Format für message content"

View File

@ -4475,6 +4475,14 @@
}
},
"options": {
"anthropic_cache": {
"cache_last_n": "[to be translated]:Cache Last N Messages",
"cache_last_n_help": "[to be translated]:Cache the last N conversation messages (excluding system messages)",
"cache_system": "[to be translated]:Cache System Message",
"cache_system_help": "[to be translated]:Whether to cache the system prompt",
"token_threshold": "[to be translated]:Cache Token Threshold",
"token_threshold_help": "[to be translated]:Messages exceeding this token count will be cached. Set to 0 to disable caching."
},
"array_content": {
"help": "Εάν ο πάροχος υποστηρίζει το πεδίο περιεχομένου του μηνύματος ως τύπο πίνακα",
"label": "Υποστήριξη για περιεχόμενο μηνύματος με μορφή πίνακα"

View File

@ -4475,6 +4475,14 @@
}
},
"options": {
"anthropic_cache": {
"cache_last_n": "[to be translated]:Cache Last N Messages",
"cache_last_n_help": "[to be translated]:Cache the last N conversation messages (excluding system messages)",
"cache_system": "[to be translated]:Cache System Message",
"cache_system_help": "[to be translated]:Whether to cache the system prompt",
"token_threshold": "[to be translated]:Cache Token Threshold",
"token_threshold_help": "[to be translated]:Messages exceeding this token count will be cached. Set to 0 to disable caching."
},
"array_content": {
"help": "¿Admite el proveedor que el campo content del mensaje sea de tipo array?",
"label": "Contenido del mensaje compatible con formato de matriz"

View File

@ -4475,6 +4475,14 @@
}
},
"options": {
"anthropic_cache": {
"cache_last_n": "[to be translated]:Cache Last N Messages",
"cache_last_n_help": "[to be translated]:Cache the last N conversation messages (excluding system messages)",
"cache_system": "[to be translated]:Cache System Message",
"cache_system_help": "[to be translated]:Whether to cache the system prompt",
"token_threshold": "[to be translated]:Cache Token Threshold",
"token_threshold_help": "[to be translated]:Messages exceeding this token count will be cached. Set to 0 to disable caching."
},
"array_content": {
"help": "Ce fournisseur prend-il en charge le champ content du message sous forme de tableau ?",
"label": "Prise en charge du format de tableau pour le contenu du message"

View File

@ -4475,6 +4475,14 @@
}
},
"options": {
"anthropic_cache": {
"cache_last_n": "[to be translated]:Cache Last N Messages",
"cache_last_n_help": "[to be translated]:Cache the last N conversation messages (excluding system messages)",
"cache_system": "[to be translated]:Cache System Message",
"cache_system_help": "[to be translated]:Whether to cache the system prompt",
"token_threshold": "[to be translated]:Cache Token Threshold",
"token_threshold_help": "[to be translated]:Messages exceeding this token count will be cached. Set to 0 to disable caching."
},
"array_content": {
"help": "このプロバイダーは、message の content フィールドが配列型であることをサポートしていますか",
"label": "配列形式のメッセージコンテンツをサポート"

View File

@ -4475,6 +4475,14 @@
}
},
"options": {
"anthropic_cache": {
"cache_last_n": "[to be translated]:Cache Last N Messages",
"cache_last_n_help": "[to be translated]:Cache the last N conversation messages (excluding system messages)",
"cache_system": "[to be translated]:Cache System Message",
"cache_system_help": "[to be translated]:Whether to cache the system prompt",
"token_threshold": "[to be translated]:Cache Token Threshold",
"token_threshold_help": "[to be translated]:Messages exceeding this token count will be cached. Set to 0 to disable caching."
},
"array_content": {
"help": "O fornecedor suporta que o campo content da mensagem seja do tipo array?",
"label": "suporta o formato de matriz do conteúdo da mensagem"

View File

@ -4475,6 +4475,14 @@
}
},
"options": {
"anthropic_cache": {
"cache_last_n": "[to be translated]:Cache Last N Messages",
"cache_last_n_help": "[to be translated]:Cache the last N conversation messages (excluding system messages)",
"cache_system": "[to be translated]:Cache System Message",
"cache_system_help": "[to be translated]:Whether to cache the system prompt",
"token_threshold": "[to be translated]:Cache Token Threshold",
"token_threshold_help": "[to be translated]:Messages exceeding this token count will be cached. Set to 0 to disable caching."
},
"array_content": {
"help": "Furnizorul acceptă ca câmpul content al mesajului să fie de tip array?",
"label": "Acceptă conținut mesaj în format array"

View File

@ -1311,6 +1311,7 @@
"backup": {
"file_format": "Ошибка формата файла резервной копии"
},
"base64DataTruncated": "[to be translated]:Base64 image data truncated, size",
"base64DataTruncated": "Данные изображения в формате Base64 усечены, размер",
"boundary": {
"default": {
@ -1392,7 +1393,9 @@
"text": "текст",
"toolInput": "ввод инструмента",
"toolName": "имя инструмента",
"truncated": "[to be translated]:Data truncated, original size",
"truncated": "Данные усечены, исходный размер",
"truncatedBadge": "[to be translated]:Truncated",
"truncatedBadge": "Усечённый",
"unknown": "Неизвестная ошибка",
"usage": "Дозировка",
@ -4475,6 +4478,14 @@
}
},
"options": {
"anthropic_cache": {
"cache_last_n": "[to be translated]:Cache Last N Messages",
"cache_last_n_help": "[to be translated]:Cache the last N conversation messages (excluding system messages)",
"cache_system": "[to be translated]:Cache System Message",
"cache_system_help": "[to be translated]:Whether to cache the system prompt",
"token_threshold": "[to be translated]:Cache Token Threshold",
"token_threshold_help": "[to be translated]:Messages exceeding this token count will be cached. Set to 0 to disable caching."
},
"array_content": {
"help": "Поддерживает ли данный провайдер тип массива для поля content в сообщении",
"label": "поддержка формата массива для содержимого сообщения"

View File

@ -1,8 +1,9 @@
import { HStack } from '@renderer/components/Layout'
import { InfoTooltip } from '@renderer/components/TooltipIcons'
import { useProvider } from '@renderer/hooks/useProvider'
import type { Provider } from '@renderer/types'
import { Flex, Switch } from 'antd'
import { type AnthropicCacheControlSettings, type Provider } from '@renderer/types'
import { isSupportAnthropicPromptCacheProvider } from '@renderer/utils/provider'
import { Divider, Flex, InputNumber, Switch } from 'antd'
import { startTransition, useCallback, useMemo } from 'react'
import { useTranslation } from 'react-i18next'
@ -114,6 +115,27 @@ const ApiOptionsSettings = ({ providerId }: Props) => {
return items
}, [openAIOptions, provider.apiOptions, provider.type, t, updateProviderTransition])
const isSupportAnthropicPromptCache = isSupportAnthropicPromptCacheProvider(provider)
const cacheSettings = useMemo(
() =>
provider.anthropicCacheControl ?? {
tokenThreshold: 0,
cacheSystemMessage: true,
cacheLastNMessages: 0
},
[provider.anthropicCacheControl]
)
const updateCacheSettings = useCallback(
(updates: Partial<AnthropicCacheControlSettings>) => {
updateProviderTransition({
anthropicCacheControl: { ...cacheSettings, ...updates }
})
},
[cacheSettings, updateProviderTransition]
)
return (
<Flex vertical gap="middle">
{options.map((item) => (
@ -127,6 +149,52 @@ const ApiOptionsSettings = ({ providerId }: Props) => {
<Switch id={item.key} checked={item.checked} onChange={item.onChange} />
</HStack>
))}
{isSupportAnthropicPromptCache && (
<>
<Divider style={{ margin: '8px 0' }} />
<HStack justifyContent="space-between">
<HStack alignItems="center" gap={6}>
<span>{t('settings.provider.api.options.anthropic_cache.token_threshold')}</span>
<InfoTooltip title={t('settings.provider.api.options.anthropic_cache.token_threshold_help')} />
</HStack>
<InputNumber
min={0}
max={100000}
value={cacheSettings.tokenThreshold}
onChange={(v) => updateCacheSettings({ tokenThreshold: v ?? 0 })}
style={{ width: 100 }}
/>
</HStack>
{cacheSettings.tokenThreshold > 0 && (
<>
<HStack justifyContent="space-between">
<HStack alignItems="center" gap={6}>
<span>{t('settings.provider.api.options.anthropic_cache.cache_system')}</span>
<InfoTooltip title={t('settings.provider.api.options.anthropic_cache.cache_system_help')} />
</HStack>
<Switch
checked={cacheSettings.cacheSystemMessage}
onChange={(v) => updateCacheSettings({ cacheSystemMessage: v })}
/>
</HStack>
<HStack justifyContent="space-between">
<HStack alignItems="center" gap={6}>
<span>{t('settings.provider.api.options.anthropic_cache.cache_last_n')}</span>
<InfoTooltip title={t('settings.provider.api.options.anthropic_cache.cache_last_n_help')} />
</HStack>
<InputNumber
min={0}
max={10}
value={cacheSettings.cacheLastNMessages}
onChange={(v) => updateCacheSettings({ cacheLastNMessages: v ?? 0 })}
style={{ width: 100 }}
/>
</HStack>
</>
)}
</>
)}
</Flex>
)
}

View File

@ -31,6 +31,7 @@ import {
isOllamaProvider,
isOpenAICompatibleProvider,
isOpenAIProvider,
isSupportAnthropicPromptCacheProvider,
isVertexProvider
} from '@renderer/utils/provider'
import { Button, Divider, Flex, Input, Select, Space, Switch, Tooltip } from 'antd'
@ -400,7 +401,7 @@ const ProviderSetting: FC<Props> = ({ providerId }) => {
<Button type="text" size="small" icon={<SquareArrowOutUpRight size={14} />} />
</Link>
)}
{!isSystemProvider(provider) && (
{(!isSystemProvider(provider) || isSupportAnthropicPromptCacheProvider(provider)) && (
<Tooltip title={t('settings.provider.api.options.label')}>
<Button
type="text"

View File

@ -79,6 +79,12 @@ export function isGroqServiceTier(tier: string | undefined | null): tier is Groq
export type ServiceTier = OpenAIServiceTier | GroqServiceTier
export type AnthropicCacheControlSettings = {
tokenThreshold: number
cacheSystemMessage: boolean
cacheLastNMessages: number
}
export function isServiceTier(tier: string | null | undefined): tier is ServiceTier {
return isGroqServiceTier(tier) || isOpenAIServiceTier(tier)
}
@ -127,6 +133,9 @@ export type Provider = {
isVertex?: boolean
notes?: string
extra_headers?: Record<string, string>
// Anthropic prompt caching settings
anthropicCacheControl?: AnthropicCacheControlSettings
}
export const SystemProviderIdSchema = z.enum([

View File

@ -198,3 +198,13 @@ export const NOT_SUPPORT_API_KEY_PROVIDERS: readonly SystemProviderId[] = [
]
export const NOT_SUPPORT_API_KEY_PROVIDER_TYPES: readonly ProviderType[] = ['vertexai', 'aws-bedrock']
// https://platform.claude.com/docs/en/build-with-claude/prompt-caching#1-hour-cache-duration
export const isSupportAnthropicPromptCacheProvider = (provider: Provider) => {
return (
provider.type === 'anthropic' ||
isNewApiProvider(provider) ||
provider.id === SystemProviderIds.aihubmix ||
isAzureOpenAIProvider(provider)
)
}