Add Anthropic Cache (#12333)

* add anthropic cache * i18n: sync * fix: condition judgment * lg * ag --------- Co-authored-by: suyao <sy20010504@gmail.com>
2026-01-09 23:10:20 +08:00 · 2026-01-07 23:05:30 +08:00 · 2026-01-07 23:05:30 +08:00 · 81ea847989
commit 81ea847989
parent 1d07e89e38
17 changed files with 269 additions and 5 deletions
--- a/src/renderer/src/aiCore/middleware/AiSdkMiddlewareBuilder.ts
+++ b/src/renderer/src/aiCore/middleware/AiSdkMiddlewareBuilder.ts
@ -1,6 +1,6 @@
 import type { WebSearchPluginConfig } from '@cherrystudio/ai-core/built-in/plugins'
 import { loggerService } from '@logger'
-import { isGemini3Model, isSupportedThinkingTokenQwenModel } from '@renderer/config/models'
+import { isAnthropicModel, isGemini3Model, isSupportedThinkingTokenQwenModel } from '@renderer/config/models'
 import type { McpMode, MCPTool } from '@renderer/types'
 import { type Assistant, type Message, type Model, type Provider, SystemProviderIds } from '@renderer/types'
 import type { Chunk } from '@renderer/types/chunk'
@ -10,6 +10,7 @@ import { extractReasoningMiddleware, simulateStreamingMiddleware } from 'ai'

 import { getAiSdkProviderId } from '../provider/factory'
 import { isOpenRouterGeminiGenerateImageModel } from '../utils/image'
+import { anthropicCacheMiddleware } from './anthropicCacheMiddleware'
 import { noThinkMiddleware } from './noThinkMiddleware'
 import { openrouterGenerateImageMiddleware } from './openrouterGenerateImageMiddleware'
 import { openrouterReasoningMiddleware } from './openrouterReasoningMiddleware'
@ -179,7 +180,12 @@ function addProviderSpecificMiddlewares(builder: AiSdkMiddlewareBuilder, config:
  // 根据不同provider添加特定中间件
  switch (config.provider.type) {
    case 'anthropic':
-      // Anthropic特定中间件
+      if (isAnthropicModel(config.model) && config.provider.anthropicCacheControl?.tokenThreshold) {
+        builder.add({
+          name: 'anthropic-cache',
+          middleware: anthropicCacheMiddleware(config.provider)
+        })
+      }
      break
    case 'openai':
    case 'azure-openai': {
--- a/src/renderer/src/aiCore/middleware/anthropicCacheMiddleware.ts
+++ b/src/renderer/src/aiCore/middleware/anthropicCacheMiddleware.ts
@ -0,0 +1,79 @@
+/**
+ * Anthropic Prompt Caching Middleware
+ * @see https://ai-sdk.dev/providers/ai-sdk-providers/anthropic#cache-control
+ */
+import { estimateTextTokens } from '@renderer/services/TokenService'
+import type { Provider } from '@renderer/types'
+import type { LanguageModelMiddleware } from 'ai'
+
+const cacheProviderOptions = {
+  anthropic: { cacheControl: { type: 'ephemeral' } }
+}
+
+function estimateContentTokens(content: unknown): number {
+  if (typeof content === 'string') return estimateTextTokens(content)
+  if (Array.isArray(content)) {
+    return content.reduce((acc, part) => {
+      if (typeof part === 'object' && part !== null && 'text' in part) {
+        return acc + estimateTextTokens(part.text as string)
+      }
+      return acc
+    }, 0)
+  }
+  return 0
+}
+
+function addCacheToContentParts(content: unknown): unknown {
+  if (typeof content === 'string') {
+    return [{ type: 'text', text: content, providerOptions: cacheProviderOptions }]
+  }
+  if (Array.isArray(content) && content.length > 0) {
+    const result = [...content]
+    const last = result[result.length - 1]
+    if (typeof last === 'object' && last !== null) {
+      result[result.length - 1] = { ...last, providerOptions: cacheProviderOptions }
+    }
+    return result
+  }
+  return content
+}
+
+export function anthropicCacheMiddleware(provider: Provider): LanguageModelMiddleware {
+  return {
+    middlewareVersion: 'v2',
+    transformParams: async ({ params }) => {
+      const settings = provider.anthropicCacheControl
+      if (!settings?.tokenThreshold || !Array.isArray(params.prompt) || params.prompt.length === 0) {
+        return params
+      }
+
+      const { tokenThreshold, cacheSystemMessage, cacheLastNMessages } = settings
+      const messages = [...params.prompt]
+      let cachedCount = 0
+
+      // Cache system message (providerOptions on message object)
+      if (cacheSystemMessage) {
+        for (let i = 0; i < messages.length; i++) {
+          const msg = messages[i] as any
+          if (msg.role === 'system' && estimateContentTokens(msg.content) >= tokenThreshold) {
+            messages[i] = { ...msg, providerOptions: cacheProviderOptions }
+            break
+          }
+        }
+      }
+
+      // Cache last N non-system messages (providerOptions on content parts)
+      if (cacheLastNMessages > 0) {
+        for (let i = messages.length - 1; i >= 0 && cachedCount < cacheLastNMessages; i--) {
+          const msg = messages[i] as any
+          if (msg.role !== 'system' && estimateContentTokens(msg.content) >= tokenThreshold) {
+            messages[i] = { ...msg, content: addCacheToContentParts(msg.content) }
+            cachedCount++
+          }
+        }
+      }
+
+      return { ...params, prompt: messages }
+    }
+  }
+}
--- a/src/renderer/src/i18n/locales/en-us.json
+++ b/src/renderer/src/i18n/locales/en-us.json
@ -4475,6 +4475,14 @@
          }
        },
        "options": {
+          "anthropic_cache": {
+            "cache_last_n": "Cache Last N Messages",
+            "cache_last_n_help": "Cache the last N conversation messages (excluding system messages)",
+            "cache_system": "Cache System Message",
+            "cache_system_help": "Whether to cache the system prompt",
+            "token_threshold": "Cache Token Threshold",
+            "token_threshold_help": "Messages exceeding this token count will be cached. Set to 0 to disable caching."
+          },
          "array_content": {
            "help": "Does the provider support the content field of the message being of array type?",
            "label": "Supports array format message content"
--- a/src/renderer/src/i18n/locales/zh-cn.json
+++ b/src/renderer/src/i18n/locales/zh-cn.json
@ -4475,6 +4475,14 @@
          }
        },
        "options": {
+          "anthropic_cache": {
+            "cache_last_n": "缓存最后 N 条消息",
+            "cache_last_n_help": "缓存最后的 N 条对话消息（不含系统消息）",
+            "cache_system": "缓存系统消息",
+            "cache_system_help": "是否缓存系统提示词",
+            "token_threshold": "缓存 Token 阈值",
+            "token_threshold_help": "消息超过此 Token 数才会被缓存，设为 0 禁用缓存"
+          },
          "array_content": {
            "help": "该提供商是否支持 message 的 content 字段为 array 类型",
            "label": "支持数组格式的 message content"
--- a/src/renderer/src/i18n/locales/zh-tw.json
+++ b/src/renderer/src/i18n/locales/zh-tw.json
@ -4475,6 +4475,14 @@
          }
        },
        "options": {
+          "anthropic_cache": {
+            "cache_last_n": "[to be translated]:Cache Last N Messages",
+            "cache_last_n_help": "[to be translated]:Cache the last N conversation messages (excluding system messages)",
+            "cache_system": "[to be translated]:Cache System Message",
+            "cache_system_help": "[to be translated]:Whether to cache the system prompt",
+            "token_threshold": "[to be translated]:Cache Token Threshold",
+            "token_threshold_help": "[to be translated]:Messages exceeding this token count will be cached. Set to 0 to disable caching."
+          },
          "array_content": {
            "help": "該供應商是否支援 message 的 content 欄位為 array 類型",
            "label": "支援陣列格式的 message content"
--- a/src/renderer/src/i18n/translate/de-de.json
+++ b/src/renderer/src/i18n/translate/de-de.json
@ -4475,6 +4475,14 @@
          }
        },
        "options": {
+          "anthropic_cache": {
+            "cache_last_n": "[to be translated]:Cache Last N Messages",
+            "cache_last_n_help": "[to be translated]:Cache the last N conversation messages (excluding system messages)",
+            "cache_system": "[to be translated]:Cache System Message",
+            "cache_system_help": "[to be translated]:Whether to cache the system prompt",
+            "token_threshold": "[to be translated]:Cache Token Threshold",
+            "token_threshold_help": "[to be translated]:Messages exceeding this token count will be cached. Set to 0 to disable caching."
+          },
          "array_content": {
            "help": "Unterstützt Array-Format für message content",
            "label": "Unterstützt Array-Format für message content"
--- a/src/renderer/src/i18n/translate/el-gr.json
+++ b/src/renderer/src/i18n/translate/el-gr.json
@ -4475,6 +4475,14 @@
          }
        },
        "options": {
+          "anthropic_cache": {
+            "cache_last_n": "[to be translated]:Cache Last N Messages",
+            "cache_last_n_help": "[to be translated]:Cache the last N conversation messages (excluding system messages)",
+            "cache_system": "[to be translated]:Cache System Message",
+            "cache_system_help": "[to be translated]:Whether to cache the system prompt",
+            "token_threshold": "[to be translated]:Cache Token Threshold",
+            "token_threshold_help": "[to be translated]:Messages exceeding this token count will be cached. Set to 0 to disable caching."
+          },
          "array_content": {
            "help": "Εάν ο πάροχος υποστηρίζει το πεδίο περιεχομένου του μηνύματος ως τύπο πίνακα",
            "label": "Υποστήριξη για περιεχόμενο μηνύματος με μορφή πίνακα"
--- a/src/renderer/src/i18n/translate/es-es.json
+++ b/src/renderer/src/i18n/translate/es-es.json
@ -4475,6 +4475,14 @@
          }
        },
        "options": {
+          "anthropic_cache": {
+            "cache_last_n": "[to be translated]:Cache Last N Messages",
+            "cache_last_n_help": "[to be translated]:Cache the last N conversation messages (excluding system messages)",
+            "cache_system": "[to be translated]:Cache System Message",
+            "cache_system_help": "[to be translated]:Whether to cache the system prompt",
+            "token_threshold": "[to be translated]:Cache Token Threshold",
+            "token_threshold_help": "[to be translated]:Messages exceeding this token count will be cached. Set to 0 to disable caching."
+          },
          "array_content": {
            "help": "¿Admite el proveedor que el campo content del mensaje sea de tipo array?",
            "label": "Contenido del mensaje compatible con formato de matriz"
--- a/src/renderer/src/i18n/translate/fr-fr.json
+++ b/src/renderer/src/i18n/translate/fr-fr.json
@ -4475,6 +4475,14 @@
          }
        },
        "options": {
+          "anthropic_cache": {
+            "cache_last_n": "[to be translated]:Cache Last N Messages",
+            "cache_last_n_help": "[to be translated]:Cache the last N conversation messages (excluding system messages)",
+            "cache_system": "[to be translated]:Cache System Message",
+            "cache_system_help": "[to be translated]:Whether to cache the system prompt",
+            "token_threshold": "[to be translated]:Cache Token Threshold",
+            "token_threshold_help": "[to be translated]:Messages exceeding this token count will be cached. Set to 0 to disable caching."
+          },
          "array_content": {
            "help": "Ce fournisseur prend-il en charge le champ content du message sous forme de tableau ?",
            "label": "Prise en charge du format de tableau pour le contenu du message"
--- a/src/renderer/src/i18n/translate/ja-jp.json
+++ b/src/renderer/src/i18n/translate/ja-jp.json
@ -4475,6 +4475,14 @@
          }
        },
        "options": {
+          "anthropic_cache": {
+            "cache_last_n": "[to be translated]:Cache Last N Messages",
+            "cache_last_n_help": "[to be translated]:Cache the last N conversation messages (excluding system messages)",
+            "cache_system": "[to be translated]:Cache System Message",
+            "cache_system_help": "[to be translated]:Whether to cache the system prompt",
+            "token_threshold": "[to be translated]:Cache Token Threshold",
+            "token_threshold_help": "[to be translated]:Messages exceeding this token count will be cached. Set to 0 to disable caching."
+          },
          "array_content": {
            "help": "このプロバイダーは、message の content フィールドが配列型であることをサポートしていますか",
            "label": "配列形式のメッセージコンテンツをサポート"
--- a/src/renderer/src/i18n/translate/pt-pt.json
+++ b/src/renderer/src/i18n/translate/pt-pt.json
@ -4475,6 +4475,14 @@
          }
        },
        "options": {
+          "anthropic_cache": {
+            "cache_last_n": "[to be translated]:Cache Last N Messages",
+            "cache_last_n_help": "[to be translated]:Cache the last N conversation messages (excluding system messages)",
+            "cache_system": "[to be translated]:Cache System Message",
+            "cache_system_help": "[to be translated]:Whether to cache the system prompt",
+            "token_threshold": "[to be translated]:Cache Token Threshold",
+            "token_threshold_help": "[to be translated]:Messages exceeding this token count will be cached. Set to 0 to disable caching."
+          },
          "array_content": {
            "help": "O fornecedor suporta que o campo content da mensagem seja do tipo array?",
            "label": "suporta o formato de matriz do conteúdo da mensagem"
--- a/src/renderer/src/i18n/translate/ro-ro.json
+++ b/src/renderer/src/i18n/translate/ro-ro.json
@ -4475,6 +4475,14 @@
          }
        },
        "options": {
+          "anthropic_cache": {
+            "cache_last_n": "[to be translated]:Cache Last N Messages",
+            "cache_last_n_help": "[to be translated]:Cache the last N conversation messages (excluding system messages)",
+            "cache_system": "[to be translated]:Cache System Message",
+            "cache_system_help": "[to be translated]:Whether to cache the system prompt",
+            "token_threshold": "[to be translated]:Cache Token Threshold",
+            "token_threshold_help": "[to be translated]:Messages exceeding this token count will be cached. Set to 0 to disable caching."
+          },
          "array_content": {
            "help": "Furnizorul acceptă ca câmpul content al mesajului să fie de tip array?",
            "label": "Acceptă conținut mesaj în format array"
--- a/src/renderer/src/i18n/translate/ru-ru.json
+++ b/src/renderer/src/i18n/translate/ru-ru.json
@ -1311,6 +1311,7 @@
    "backup": {
      "file_format": "Ошибка формата файла резервной копии"
    },
+    "base64DataTruncated": "[to be translated]:Base64 image data truncated, size",
    "base64DataTruncated": "Данные изображения в формате Base64 усечены, размер",
    "boundary": {
      "default": {
@ -1392,7 +1393,9 @@
    "text": "текст",
    "toolInput": "ввод инструмента",
    "toolName": "имя инструмента",
+    "truncated": "[to be translated]:Data truncated, original size",
    "truncated": "Данные усечены, исходный размер",
+    "truncatedBadge": "[to be translated]:Truncated",
    "truncatedBadge": "Усечённый",
    "unknown": "Неизвестная ошибка",
    "usage": "Дозировка",
@ -4475,6 +4478,14 @@
          }
        },
        "options": {
+          "anthropic_cache": {
+            "cache_last_n": "[to be translated]:Cache Last N Messages",
+            "cache_last_n_help": "[to be translated]:Cache the last N conversation messages (excluding system messages)",
+            "cache_system": "[to be translated]:Cache System Message",
+            "cache_system_help": "[to be translated]:Whether to cache the system prompt",
+            "token_threshold": "[to be translated]:Cache Token Threshold",
+            "token_threshold_help": "[to be translated]:Messages exceeding this token count will be cached. Set to 0 to disable caching."
+          },
          "array_content": {
            "help": "Поддерживает ли данный провайдер тип массива для поля content в сообщении",
            "label": "поддержка формата массива для содержимого сообщения"
--- a/src/renderer/src/pages/settings/ProviderSettings/ApiOptionsSettings/ApiOptionsSettings.tsx
+++ b/src/renderer/src/pages/settings/ProviderSettings/ApiOptionsSettings/ApiOptionsSettings.tsx
@ -1,8 +1,9 @@
 import { HStack } from '@renderer/components/Layout'
 import { InfoTooltip } from '@renderer/components/TooltipIcons'
 import { useProvider } from '@renderer/hooks/useProvider'
-import type { Provider } from '@renderer/types'
-import { Flex, Switch } from 'antd'
+import { type AnthropicCacheControlSettings, type Provider } from '@renderer/types'
+import { isSupportAnthropicPromptCacheProvider } from '@renderer/utils/provider'
+import { Divider, Flex, InputNumber, Switch } from 'antd'
 import { startTransition, useCallback, useMemo } from 'react'
 import { useTranslation } from 'react-i18next'

@ -114,6 +115,27 @@ const ApiOptionsSettings = ({ providerId }: Props) => {
    return items
  }, [openAIOptions, provider.apiOptions, provider.type, t, updateProviderTransition])

+  const isSupportAnthropicPromptCache = isSupportAnthropicPromptCacheProvider(provider)
+
+  const cacheSettings = useMemo(
+    () =>
+      provider.anthropicCacheControl ?? {
+        tokenThreshold: 0,
+        cacheSystemMessage: true,
+        cacheLastNMessages: 0
+      },
+    [provider.anthropicCacheControl]
+  )
+
+  const updateCacheSettings = useCallback(
+    (updates: Partial<AnthropicCacheControlSettings>) => {
+      updateProviderTransition({
+        anthropicCacheControl: { ...cacheSettings, ...updates }
+      })
+    },
+    [cacheSettings, updateProviderTransition]
+  )
+
  return (
    <Flex vertical gap="middle">
      {options.map((item) => (
@ -127,6 +149,52 @@ const ApiOptionsSettings = ({ providerId }: Props) => {
          <Switch id={item.key} checked={item.checked} onChange={item.onChange} />
        </HStack>
      ))}
+
+      {isSupportAnthropicPromptCache && (
+        <>
+          <Divider style={{ margin: '8px 0' }} />
+          <HStack justifyContent="space-between">
+            <HStack alignItems="center" gap={6}>
+              <span>{t('settings.provider.api.options.anthropic_cache.token_threshold')}</span>
+              <InfoTooltip title={t('settings.provider.api.options.anthropic_cache.token_threshold_help')} />
+            </HStack>
+            <InputNumber
+              min={0}
+              max={100000}
+              value={cacheSettings.tokenThreshold}
+              onChange={(v) => updateCacheSettings({ tokenThreshold: v ?? 0 })}
+              style={{ width: 100 }}
+            />
+          </HStack>
+          {cacheSettings.tokenThreshold > 0 && (
+            <>
+              <HStack justifyContent="space-between">
+                <HStack alignItems="center" gap={6}>
+                  <span>{t('settings.provider.api.options.anthropic_cache.cache_system')}</span>
+                  <InfoTooltip title={t('settings.provider.api.options.anthropic_cache.cache_system_help')} />
+                </HStack>
+                <Switch
+                  checked={cacheSettings.cacheSystemMessage}
+                  onChange={(v) => updateCacheSettings({ cacheSystemMessage: v })}
+                />
+              </HStack>
+              <HStack justifyContent="space-between">
+                <HStack alignItems="center" gap={6}>
+                  <span>{t('settings.provider.api.options.anthropic_cache.cache_last_n')}</span>
+                  <InfoTooltip title={t('settings.provider.api.options.anthropic_cache.cache_last_n_help')} />
+                </HStack>
+                <InputNumber
+                  min={0}
+                  max={10}
+                  value={cacheSettings.cacheLastNMessages}
+                  onChange={(v) => updateCacheSettings({ cacheLastNMessages: v ?? 0 })}
+                  style={{ width: 100 }}
+                />
+              </HStack>
+            </>
+          )}
+        </>
+      )}
    </Flex>
  )
 }
--- a/src/renderer/src/pages/settings/ProviderSettings/ProviderSetting.tsx
+++ b/src/renderer/src/pages/settings/ProviderSettings/ProviderSetting.tsx
@ -31,6 +31,7 @@ import {
  isOllamaProvider,
  isOpenAICompatibleProvider,
  isOpenAIProvider,
+  isSupportAnthropicPromptCacheProvider,
  isVertexProvider
 } from '@renderer/utils/provider'
 import { Button, Divider, Flex, Input, Select, Space, Switch, Tooltip } from 'antd'
@ -400,7 +401,7 @@ const ProviderSetting: FC<Props> = ({ providerId }) => {
              <Button type="text" size="small" icon={<SquareArrowOutUpRight size={14} />} />
            </Link>
          )}
-          {!isSystemProvider(provider) && (
+          {(!isSystemProvider(provider) || isSupportAnthropicPromptCacheProvider(provider)) && (
            <Tooltip title={t('settings.provider.api.options.label')}>
              <Button
                type="text"
--- a/src/renderer/src/types/provider.ts
+++ b/src/renderer/src/types/provider.ts
@ -79,6 +79,12 @@ export function isGroqServiceTier(tier: string | undefined | null): tier is Groq

 export type ServiceTier = OpenAIServiceTier | GroqServiceTier

+export type AnthropicCacheControlSettings = {
+  tokenThreshold: number
+  cacheSystemMessage: boolean
+  cacheLastNMessages: number
+}
+
 export function isServiceTier(tier: string | null | undefined): tier is ServiceTier {
  return isGroqServiceTier(tier) || isOpenAIServiceTier(tier)
 }
@ -127,6 +133,9 @@ export type Provider = {
  isVertex?: boolean
  notes?: string
  extra_headers?: Record<string, string>
+
+  // Anthropic prompt caching settings
+  anthropicCacheControl?: AnthropicCacheControlSettings
 }

 export const SystemProviderIdSchema = z.enum([
--- a/src/renderer/src/utils/provider.ts
+++ b/src/renderer/src/utils/provider.ts
@ -198,3 +198,13 @@ export const NOT_SUPPORT_API_KEY_PROVIDERS: readonly SystemProviderId[] = [
 ]

 export const NOT_SUPPORT_API_KEY_PROVIDER_TYPES: readonly ProviderType[] = ['vertexai', 'aws-bedrock']
+
+// https://platform.claude.com/docs/en/build-with-claude/prompt-caching#1-hour-cache-duration
+export const isSupportAnthropicPromptCacheProvider = (provider: Provider) => {
+  return (
+    provider.type === 'anthropic' ||
+    isNewApiProvider(provider) ||
+    provider.id === SystemProviderIds.aihubmix ||
+    isAzureOpenAIProvider(provider)
+  )
+}