Add Anthropic Cache (#12333)

* add anthropic cache * i18n: sync * fix: condition judgment * lg * ag --------- Co-authored-by: suyao <sy20010504@gmail.com>
2026-01-10 23:59:45 +08:00 · 2026-01-07 23:05:30 +08:00 · 2026-01-07 23:05:30 +08:00 · 81ea847989
commit 81ea847989
parent 1d07e89e38
17 changed files with 269 additions and 5 deletions
--- a/src/renderer/src/aiCore/middleware/AiSdkMiddlewareBuilder.ts
+++ b/src/renderer/src/aiCore/middleware/AiSdkMiddlewareBuilder.ts
@ -1,6 +1,6 @@
 import type { WebSearchPluginConfig } from '@cherrystudio/ai-core/built-in/plugins'
 import { loggerService } from '@logger'
-import { isGemini3Model, isSupportedThinkingTokenQwenModel } from '@renderer/config/models'
+import { isAnthropicModel, isGemini3Model, isSupportedThinkingTokenQwenModel } from '@renderer/config/models'
 import type { McpMode, MCPTool } from '@renderer/types'
 import { type Assistant, type Message, type Model, type Provider, SystemProviderIds } from '@renderer/types'
 import type { Chunk } from '@renderer/types/chunk'
@ -10,6 +10,7 @@ import { extractReasoningMiddleware, simulateStreamingMiddleware } from 'ai'
 import { getAiSdkProviderId } from '../provider/factory'
 import { isOpenRouterGeminiGenerateImageModel } from '../utils/image'
 import { anthropicCacheMiddleware } from './anthropicCacheMiddleware'
 import { noThinkMiddleware } from './noThinkMiddleware'
 import { openrouterGenerateImageMiddleware } from './openrouterGenerateImageMiddleware'
 import { openrouterReasoningMiddleware } from './openrouterReasoningMiddleware'
@ -179,7 +180,12 @@ function addProviderSpecificMiddlewares(builder: AiSdkMiddlewareBuilder, config:
  // 根据不同provider添加特定中间件
  switch (config.provider.type) {
    case 'anthropic':
-      // Anthropic特定中间件
+      if (isAnthropicModel(config.model) && config.provider.anthropicCacheControl?.tokenThreshold) {
        builder.add({
          name: 'anthropic-cache',
          middleware: anthropicCacheMiddleware(config.provider)
        })
      }
      break
    case 'openai':
    case 'azure-openai': {
--- a/src/renderer/src/aiCore/middleware/anthropicCacheMiddleware.ts
+++ b/src/renderer/src/aiCore/middleware/anthropicCacheMiddleware.ts
@ -0,0 +1,79 @@
 /**
 * Anthropic Prompt Caching Middleware
 * @see https://ai-sdk.dev/providers/ai-sdk-providers/anthropic#cache-control
 */
 import { estimateTextTokens } from '@renderer/services/TokenService'
 import type { Provider } from '@renderer/types'
 import type { LanguageModelMiddleware } from 'ai'
 const cacheProviderOptions = {
  anthropic: { cacheControl: { type: 'ephemeral' } }
 }
 function estimateContentTokens(content: unknown): number {
  if (typeof content === 'string') return estimateTextTokens(content)
  if (Array.isArray(content)) {
    return content.reduce((acc, part) => {
      if (typeof part === 'object' && part !== null && 'text' in part) {
        return acc + estimateTextTokens(part.text as string)
      }
      return acc
    }, 0)
  }
  return 0
 }
 function addCacheToContentParts(content: unknown): unknown {
  if (typeof content === 'string') {
    return [{ type: 'text', text: content, providerOptions: cacheProviderOptions }]
  }
  if (Array.isArray(content) && content.length > 0) {
    const result = [...content]
    const last = result[result.length - 1]
    if (typeof last === 'object' && last !== null) {
      result[result.length - 1] = { ...last, providerOptions: cacheProviderOptions }
    }
    return result
  }
  return content
 }
 export function anthropicCacheMiddleware(provider: Provider): LanguageModelMiddleware {
  return {
    middlewareVersion: 'v2',
    transformParams: async ({ params }) => {
      const settings = provider.anthropicCacheControl
      if (!settings?.tokenThreshold || !Array.isArray(params.prompt) || params.prompt.length === 0) {
        return params
      }
      const { tokenThreshold, cacheSystemMessage, cacheLastNMessages } = settings
      const messages = [...params.prompt]
      let cachedCount = 0
      // Cache system message (providerOptions on message object)
      if (cacheSystemMessage) {
        for (let i = 0; i < messages.length; i++) {
          const msg = messages[i] as any
          if (msg.role === 'system' && estimateContentTokens(msg.content) >= tokenThreshold) {
            messages[i] = { ...msg, providerOptions: cacheProviderOptions }
            break
          }
        }
      }
      // Cache last N non-system messages (providerOptions on content parts)
      if (cacheLastNMessages > 0) {
        for (let i = messages.length - 1; i >= 0 && cachedCount < cacheLastNMessages; i--) {
          const msg = messages[i] as any
          if (msg.role !== 'system' && estimateContentTokens(msg.content) >= tokenThreshold) {
            messages[i] = { ...msg, content: addCacheToContentParts(msg.content) }
            cachedCount++
          }
        }
      }
      return { ...params, prompt: messages }
    }
  }
 }
--- a/src/renderer/src/i18n/locales/en-us.json
+++ b/src/renderer/src/i18n/locales/en-us.json
@ -4475,6 +4475,14 @@
          }
        },
        "options": {
          "anthropic_cache": {
            "cache_last_n": "Cache Last N Messages",
            "cache_last_n_help": "Cache the last N conversation messages (excluding system messages)",
            "cache_system": "Cache System Message",
            "cache_system_help": "Whether to cache the system prompt",
            "token_threshold": "Cache Token Threshold",
            "token_threshold_help": "Messages exceeding this token count will be cached. Set to 0 to disable caching."
          },
          "array_content": {
            "help": "Does the provider support the content field of the message being of array type?",
            "label": "Supports array format message content"
--- a/src/renderer/src/i18n/locales/zh-cn.json
+++ b/src/renderer/src/i18n/locales/zh-cn.json
@ -4475,6 +4475,14 @@
          }
        },
        "options": {
          "anthropic_cache": {
            "cache_last_n": "缓存最后 N 条消息",
            "cache_last_n_help": "缓存最后的 N 条对话消息（不含系统消息）",
            "cache_system": "缓存系统消息",
            "cache_system_help": "是否缓存系统提示词",
            "token_threshold": "缓存 Token 阈值",
            "token_threshold_help": "消息超过此 Token 数才会被缓存，设为 0 禁用缓存"
          },
          "array_content": {
            "help": "该提供商是否支持 message 的 content 字段为 array 类型",
            "label": "支持数组格式的 message content"
--- a/src/renderer/src/i18n/locales/zh-tw.json
+++ b/src/renderer/src/i18n/locales/zh-tw.json
@ -4475,6 +4475,14 @@
          }
        },
        "options": {
          "anthropic_cache": {
            "cache_last_n": "[to be translated]:Cache Last N Messages",
            "cache_last_n_help": "[to be translated]:Cache the last N conversation messages (excluding system messages)",
            "cache_system": "[to be translated]:Cache System Message",
            "cache_system_help": "[to be translated]:Whether to cache the system prompt",
            "token_threshold": "[to be translated]:Cache Token Threshold",
            "token_threshold_help": "[to be translated]:Messages exceeding this token count will be cached. Set to 0 to disable caching."
          },
          "array_content": {
            "help": "該供應商是否支援 message 的 content 欄位為 array 類型",
            "label": "支援陣列格式的 message content"
--- a/src/renderer/src/i18n/translate/de-de.json
+++ b/src/renderer/src/i18n/translate/de-de.json
@ -4475,6 +4475,14 @@
          }
        },
        "options": {
          "anthropic_cache": {
            "cache_last_n": "[to be translated]:Cache Last N Messages",
            "cache_last_n_help": "[to be translated]:Cache the last N conversation messages (excluding system messages)",
            "cache_system": "[to be translated]:Cache System Message",
            "cache_system_help": "[to be translated]:Whether to cache the system prompt",
            "token_threshold": "[to be translated]:Cache Token Threshold",
            "token_threshold_help": "[to be translated]:Messages exceeding this token count will be cached. Set to 0 to disable caching."
          },
          "array_content": {
            "help": "Unterstützt Array-Format für message content",
            "label": "Unterstützt Array-Format für message content"
--- a/src/renderer/src/i18n/translate/el-gr.json
+++ b/src/renderer/src/i18n/translate/el-gr.json
@ -4475,6 +4475,14 @@
          }
        },
        "options": {
          "anthropic_cache": {
            "cache_last_n": "[to be translated]:Cache Last N Messages",
            "cache_last_n_help": "[to be translated]:Cache the last N conversation messages (excluding system messages)",
            "cache_system": "[to be translated]:Cache System Message",
            "cache_system_help": "[to be translated]:Whether to cache the system prompt",
            "token_threshold": "[to be translated]:Cache Token Threshold",
            "token_threshold_help": "[to be translated]:Messages exceeding this token count will be cached. Set to 0 to disable caching."
          },
          "array_content": {
            "help": "Εάν ο πάροχος υποστηρίζει το πεδίο περιεχομένου του μηνύματος ως τύπο πίνακα",
            "label": "Υποστήριξη για περιεχόμενο μηνύματος με μορφή πίνακα"
--- a/src/renderer/src/i18n/translate/es-es.json
+++ b/src/renderer/src/i18n/translate/es-es.json
@ -4475,6 +4475,14 @@
          }
        },
        "options": {
          "anthropic_cache": {
            "cache_last_n": "[to be translated]:Cache Last N Messages",
            "cache_last_n_help": "[to be translated]:Cache the last N conversation messages (excluding system messages)",
            "cache_system": "[to be translated]:Cache System Message",
            "cache_system_help": "[to be translated]:Whether to cache the system prompt",
            "token_threshold": "[to be translated]:Cache Token Threshold",
            "token_threshold_help": "[to be translated]:Messages exceeding this token count will be cached. Set to 0 to disable caching."
          },
          "array_content": {
            "help": "¿Admite el proveedor que el campo content del mensaje sea de tipo array?",
            "label": "Contenido del mensaje compatible con formato de matriz"
--- a/src/renderer/src/i18n/translate/fr-fr.json
+++ b/src/renderer/src/i18n/translate/fr-fr.json
@ -4475,6 +4475,14 @@
          }
        },
        "options": {
          "anthropic_cache": {
            "cache_last_n": "[to be translated]:Cache Last N Messages",
            "cache_last_n_help": "[to be translated]:Cache the last N conversation messages (excluding system messages)",
            "cache_system": "[to be translated]:Cache System Message",
            "cache_system_help": "[to be translated]:Whether to cache the system prompt",
            "token_threshold": "[to be translated]:Cache Token Threshold",
            "token_threshold_help": "[to be translated]:Messages exceeding this token count will be cached. Set to 0 to disable caching."
          },
          "array_content": {
            "help": "Ce fournisseur prend-il en charge le champ content du message sous forme de tableau ?",
            "label": "Prise en charge du format de tableau pour le contenu du message"
--- a/src/renderer/src/i18n/translate/ja-jp.json
+++ b/src/renderer/src/i18n/translate/ja-jp.json
@ -4475,6 +4475,14 @@
          }
        },
        "options": {
          "anthropic_cache": {
            "cache_last_n": "[to be translated]:Cache Last N Messages",
            "cache_last_n_help": "[to be translated]:Cache the last N conversation messages (excluding system messages)",
            "cache_system": "[to be translated]:Cache System Message",
            "cache_system_help": "[to be translated]:Whether to cache the system prompt",
            "token_threshold": "[to be translated]:Cache Token Threshold",
            "token_threshold_help": "[to be translated]:Messages exceeding this token count will be cached. Set to 0 to disable caching."
          },
          "array_content": {
            "help": "このプロバイダーは、message の content フィールドが配列型であることをサポートしていますか",
            "label": "配列形式のメッセージコンテンツをサポート"
--- a/src/renderer/src/i18n/translate/pt-pt.json
+++ b/src/renderer/src/i18n/translate/pt-pt.json
@ -4475,6 +4475,14 @@
          }
        },
        "options": {
          "anthropic_cache": {
            "cache_last_n": "[to be translated]:Cache Last N Messages",
            "cache_last_n_help": "[to be translated]:Cache the last N conversation messages (excluding system messages)",
            "cache_system": "[to be translated]:Cache System Message",
            "cache_system_help": "[to be translated]:Whether to cache the system prompt",
            "token_threshold": "[to be translated]:Cache Token Threshold",
            "token_threshold_help": "[to be translated]:Messages exceeding this token count will be cached. Set to 0 to disable caching."
          },
          "array_content": {
            "help": "O fornecedor suporta que o campo content da mensagem seja do tipo array?",
            "label": "suporta o formato de matriz do conteúdo da mensagem"
--- a/src/renderer/src/i18n/translate/ro-ro.json
+++ b/src/renderer/src/i18n/translate/ro-ro.json
@ -4475,6 +4475,14 @@
          }
        },
        "options": {
          "anthropic_cache": {
            "cache_last_n": "[to be translated]:Cache Last N Messages",
            "cache_last_n_help": "[to be translated]:Cache the last N conversation messages (excluding system messages)",
            "cache_system": "[to be translated]:Cache System Message",
            "cache_system_help": "[to be translated]:Whether to cache the system prompt",
            "token_threshold": "[to be translated]:Cache Token Threshold",
            "token_threshold_help": "[to be translated]:Messages exceeding this token count will be cached. Set to 0 to disable caching."
          },
          "array_content": {
            "help": "Furnizorul acceptă ca câmpul content al mesajului să fie de tip array?",
            "label": "Acceptă conținut mesaj în format array"
--- a/src/renderer/src/i18n/translate/ru-ru.json
+++ b/src/renderer/src/i18n/translate/ru-ru.json
@ -1311,6 +1311,7 @@
    "backup": {
      "file_format": "Ошибка формата файла резервной копии"
    },
    "base64DataTruncated": "[to be translated]:Base64 image data truncated, size",
    "base64DataTruncated": "Данные изображения в формате Base64 усечены, размер",
    "boundary": {
      "default": {
@ -1392,7 +1393,9 @@
    "text": "текст",
    "toolInput": "ввод инструмента",
    "toolName": "имя инструмента",
    "truncated": "[to be translated]:Data truncated, original size",
    "truncated": "Данные усечены, исходный размер",
    "truncatedBadge": "[to be translated]:Truncated",
    "truncatedBadge": "Усечённый",
    "unknown": "Неизвестная ошибка",
    "usage": "Дозировка",
@ -4475,6 +4478,14 @@
          }
        },
        "options": {
          "anthropic_cache": {
            "cache_last_n": "[to be translated]:Cache Last N Messages",
            "cache_last_n_help": "[to be translated]:Cache the last N conversation messages (excluding system messages)",
            "cache_system": "[to be translated]:Cache System Message",
            "cache_system_help": "[to be translated]:Whether to cache the system prompt",
            "token_threshold": "[to be translated]:Cache Token Threshold",
            "token_threshold_help": "[to be translated]:Messages exceeding this token count will be cached. Set to 0 to disable caching."
          },
          "array_content": {
            "help": "Поддерживает ли данный провайдер тип массива для поля content в сообщении",
            "label": "поддержка формата массива для содержимого сообщения"
--- a/src/renderer/src/pages/settings/ProviderSettings/ApiOptionsSettings/ApiOptionsSettings.tsx
+++ b/src/renderer/src/pages/settings/ProviderSettings/ApiOptionsSettings/ApiOptionsSettings.tsx
@ -1,8 +1,9 @@
 import { HStack } from '@renderer/components/Layout'
 import { InfoTooltip } from '@renderer/components/TooltipIcons'
 import { useProvider } from '@renderer/hooks/useProvider'
-import type { Provider } from '@renderer/types'
+import { type AnthropicCacheControlSettings, type Provider } from '@renderer/types'
-import { Flex, Switch } from 'antd'
+import { isSupportAnthropicPromptCacheProvider } from '@renderer/utils/provider'
 import { Divider, Flex, InputNumber, Switch } from 'antd'
 import { startTransition, useCallback, useMemo } from 'react'
 import { useTranslation } from 'react-i18next'
@ -114,6 +115,27 @@ const ApiOptionsSettings = ({ providerId }: Props) => {
    return items
  }, [openAIOptions, provider.apiOptions, provider.type, t, updateProviderTransition])
  const isSupportAnthropicPromptCache = isSupportAnthropicPromptCacheProvider(provider)
  const cacheSettings = useMemo(
    () =>
      provider.anthropicCacheControl ?? {
        tokenThreshold: 0,
        cacheSystemMessage: true,
        cacheLastNMessages: 0
      },
    [provider.anthropicCacheControl]
  )
  const updateCacheSettings = useCallback(
    (updates: Partial<AnthropicCacheControlSettings>) => {
      updateProviderTransition({
        anthropicCacheControl: { ...cacheSettings, ...updates }
      })
    },
    [cacheSettings, updateProviderTransition]
  )
  return (
    <Flex vertical gap="middle">
      {options.map((item) => (
@ -127,6 +149,52 @@ const ApiOptionsSettings = ({ providerId }: Props) => {
          <Switch id={item.key} checked={item.checked} onChange={item.onChange} />
        </HStack>
      ))}
      {isSupportAnthropicPromptCache && (
        <>
          <Divider style={{ margin: '8px 0' }} />
          <HStack justifyContent="space-between">
            <HStack alignItems="center" gap={6}>
              <span>{t('settings.provider.api.options.anthropic_cache.token_threshold')}</span>
              <InfoTooltip title={t('settings.provider.api.options.anthropic_cache.token_threshold_help')} />
            </HStack>
            <InputNumber
              min={0}
              max={100000}
              value={cacheSettings.tokenThreshold}
              onChange={(v) => updateCacheSettings({ tokenThreshold: v ?? 0 })}
              style={{ width: 100 }}
            />
          </HStack>
          {cacheSettings.tokenThreshold > 0 && (
            <>
              <HStack justifyContent="space-between">
                <HStack alignItems="center" gap={6}>
                  <span>{t('settings.provider.api.options.anthropic_cache.cache_system')}</span>
                  <InfoTooltip title={t('settings.provider.api.options.anthropic_cache.cache_system_help')} />
                </HStack>
                <Switch
                  checked={cacheSettings.cacheSystemMessage}
                  onChange={(v) => updateCacheSettings({ cacheSystemMessage: v })}
                />
              </HStack>
              <HStack justifyContent="space-between">
                <HStack alignItems="center" gap={6}>
                  <span>{t('settings.provider.api.options.anthropic_cache.cache_last_n')}</span>
                  <InfoTooltip title={t('settings.provider.api.options.anthropic_cache.cache_last_n_help')} />
                </HStack>
                <InputNumber
                  min={0}
                  max={10}
                  value={cacheSettings.cacheLastNMessages}
                  onChange={(v) => updateCacheSettings({ cacheLastNMessages: v ?? 0 })}
                  style={{ width: 100 }}
                />
              </HStack>
            </>
          )}
        </>
      )}
    </Flex>
  )
 }
--- a/src/renderer/src/pages/settings/ProviderSettings/ProviderSetting.tsx
+++ b/src/renderer/src/pages/settings/ProviderSettings/ProviderSetting.tsx
@ -31,6 +31,7 @@ import {
  isOllamaProvider,
  isOpenAICompatibleProvider,
  isOpenAIProvider,
  isSupportAnthropicPromptCacheProvider,
  isVertexProvider
 } from '@renderer/utils/provider'
 import { Button, Divider, Flex, Input, Select, Space, Switch, Tooltip } from 'antd'
@ -400,7 +401,7 @@ const ProviderSetting: FC<Props> = ({ providerId }) => {
              <Button type="text" size="small" icon={<SquareArrowOutUpRight size={14} />} />
            </Link>
          )}
-          {!isSystemProvider(provider) && (
+          {(!isSystemProvider(provider) || isSupportAnthropicPromptCacheProvider(provider)) && (
            <Tooltip title={t('settings.provider.api.options.label')}>
              <Button
                type="text"
--- a/src/renderer/src/types/provider.ts
+++ b/src/renderer/src/types/provider.ts
@ -79,6 +79,12 @@ export function isGroqServiceTier(tier: string | undefined | null): tier is Groq
 export type ServiceTier = OpenAIServiceTier | GroqServiceTier
 export type AnthropicCacheControlSettings = {
  tokenThreshold: number
  cacheSystemMessage: boolean
  cacheLastNMessages: number
 }
 export function isServiceTier(tier: string | null | undefined): tier is ServiceTier {
  return isGroqServiceTier(tier) || isOpenAIServiceTier(tier)
 }
@ -127,6 +133,9 @@ export type Provider = {
  isVertex?: boolean
  notes?: string
  extra_headers?: Record<string, string>
  // Anthropic prompt caching settings
  anthropicCacheControl?: AnthropicCacheControlSettings
 }
 export const SystemProviderIdSchema = z.enum([
--- a/src/renderer/src/utils/provider.ts
+++ b/src/renderer/src/utils/provider.ts
@ -198,3 +198,13 @@ export const NOT_SUPPORT_API_KEY_PROVIDERS: readonly SystemProviderId[] = [
 ]
 export const NOT_SUPPORT_API_KEY_PROVIDER_TYPES: readonly ProviderType[] = ['vertexai', 'aws-bedrock']
 // https://platform.claude.com/docs/en/build-with-claude/prompt-caching#1-hour-cache-duration
 export const isSupportAnthropicPromptCacheProvider = (provider: Provider) => {
  return (
    provider.type === 'anthropic' ||
    isNewApiProvider(provider) ||
    provider.id === SystemProviderIds.aihubmix ||
    isAzureOpenAIProvider(provider)
  )
 }