mirror of
https://github.com/CherryHQ/cherry-studio.git
synced 2026-01-09 23:10:20 +08:00
Add Anthropic Cache (#12333)
* add anthropic cache * i18n: sync * fix: condition judgment * lg * ag --------- Co-authored-by: suyao <sy20010504@gmail.com>
This commit is contained in:
parent
1d07e89e38
commit
81ea847989
@ -1,6 +1,6 @@
|
||||
import type { WebSearchPluginConfig } from '@cherrystudio/ai-core/built-in/plugins'
|
||||
import { loggerService } from '@logger'
|
||||
import { isGemini3Model, isSupportedThinkingTokenQwenModel } from '@renderer/config/models'
|
||||
import { isAnthropicModel, isGemini3Model, isSupportedThinkingTokenQwenModel } from '@renderer/config/models'
|
||||
import type { McpMode, MCPTool } from '@renderer/types'
|
||||
import { type Assistant, type Message, type Model, type Provider, SystemProviderIds } from '@renderer/types'
|
||||
import type { Chunk } from '@renderer/types/chunk'
|
||||
@ -10,6 +10,7 @@ import { extractReasoningMiddleware, simulateStreamingMiddleware } from 'ai'
|
||||
|
||||
import { getAiSdkProviderId } from '../provider/factory'
|
||||
import { isOpenRouterGeminiGenerateImageModel } from '../utils/image'
|
||||
import { anthropicCacheMiddleware } from './anthropicCacheMiddleware'
|
||||
import { noThinkMiddleware } from './noThinkMiddleware'
|
||||
import { openrouterGenerateImageMiddleware } from './openrouterGenerateImageMiddleware'
|
||||
import { openrouterReasoningMiddleware } from './openrouterReasoningMiddleware'
|
||||
@ -179,7 +180,12 @@ function addProviderSpecificMiddlewares(builder: AiSdkMiddlewareBuilder, config:
|
||||
// 根据不同provider添加特定中间件
|
||||
switch (config.provider.type) {
|
||||
case 'anthropic':
|
||||
// Anthropic特定中间件
|
||||
if (isAnthropicModel(config.model) && config.provider.anthropicCacheControl?.tokenThreshold) {
|
||||
builder.add({
|
||||
name: 'anthropic-cache',
|
||||
middleware: anthropicCacheMiddleware(config.provider)
|
||||
})
|
||||
}
|
||||
break
|
||||
case 'openai':
|
||||
case 'azure-openai': {
|
||||
|
||||
@ -0,0 +1,79 @@
|
||||
/**
|
||||
* Anthropic Prompt Caching Middleware
|
||||
* @see https://ai-sdk.dev/providers/ai-sdk-providers/anthropic#cache-control
|
||||
*/
|
||||
import { estimateTextTokens } from '@renderer/services/TokenService'
|
||||
import type { Provider } from '@renderer/types'
|
||||
import type { LanguageModelMiddleware } from 'ai'
|
||||
|
||||
const cacheProviderOptions = {
|
||||
anthropic: { cacheControl: { type: 'ephemeral' } }
|
||||
}
|
||||
|
||||
function estimateContentTokens(content: unknown): number {
|
||||
if (typeof content === 'string') return estimateTextTokens(content)
|
||||
if (Array.isArray(content)) {
|
||||
return content.reduce((acc, part) => {
|
||||
if (typeof part === 'object' && part !== null && 'text' in part) {
|
||||
return acc + estimateTextTokens(part.text as string)
|
||||
}
|
||||
return acc
|
||||
}, 0)
|
||||
}
|
||||
return 0
|
||||
}
|
||||
|
||||
function addCacheToContentParts(content: unknown): unknown {
|
||||
if (typeof content === 'string') {
|
||||
return [{ type: 'text', text: content, providerOptions: cacheProviderOptions }]
|
||||
}
|
||||
if (Array.isArray(content) && content.length > 0) {
|
||||
const result = [...content]
|
||||
const last = result[result.length - 1]
|
||||
if (typeof last === 'object' && last !== null) {
|
||||
result[result.length - 1] = { ...last, providerOptions: cacheProviderOptions }
|
||||
}
|
||||
return result
|
||||
}
|
||||
return content
|
||||
}
|
||||
|
||||
export function anthropicCacheMiddleware(provider: Provider): LanguageModelMiddleware {
|
||||
return {
|
||||
middlewareVersion: 'v2',
|
||||
transformParams: async ({ params }) => {
|
||||
const settings = provider.anthropicCacheControl
|
||||
if (!settings?.tokenThreshold || !Array.isArray(params.prompt) || params.prompt.length === 0) {
|
||||
return params
|
||||
}
|
||||
|
||||
const { tokenThreshold, cacheSystemMessage, cacheLastNMessages } = settings
|
||||
const messages = [...params.prompt]
|
||||
let cachedCount = 0
|
||||
|
||||
// Cache system message (providerOptions on message object)
|
||||
if (cacheSystemMessage) {
|
||||
for (let i = 0; i < messages.length; i++) {
|
||||
const msg = messages[i] as any
|
||||
if (msg.role === 'system' && estimateContentTokens(msg.content) >= tokenThreshold) {
|
||||
messages[i] = { ...msg, providerOptions: cacheProviderOptions }
|
||||
break
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// Cache last N non-system messages (providerOptions on content parts)
|
||||
if (cacheLastNMessages > 0) {
|
||||
for (let i = messages.length - 1; i >= 0 && cachedCount < cacheLastNMessages; i--) {
|
||||
const msg = messages[i] as any
|
||||
if (msg.role !== 'system' && estimateContentTokens(msg.content) >= tokenThreshold) {
|
||||
messages[i] = { ...msg, content: addCacheToContentParts(msg.content) }
|
||||
cachedCount++
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
return { ...params, prompt: messages }
|
||||
}
|
||||
}
|
||||
}
|
||||
@ -4475,6 +4475,14 @@
|
||||
}
|
||||
},
|
||||
"options": {
|
||||
"anthropic_cache": {
|
||||
"cache_last_n": "Cache Last N Messages",
|
||||
"cache_last_n_help": "Cache the last N conversation messages (excluding system messages)",
|
||||
"cache_system": "Cache System Message",
|
||||
"cache_system_help": "Whether to cache the system prompt",
|
||||
"token_threshold": "Cache Token Threshold",
|
||||
"token_threshold_help": "Messages exceeding this token count will be cached. Set to 0 to disable caching."
|
||||
},
|
||||
"array_content": {
|
||||
"help": "Does the provider support the content field of the message being of array type?",
|
||||
"label": "Supports array format message content"
|
||||
|
||||
@ -4475,6 +4475,14 @@
|
||||
}
|
||||
},
|
||||
"options": {
|
||||
"anthropic_cache": {
|
||||
"cache_last_n": "缓存最后 N 条消息",
|
||||
"cache_last_n_help": "缓存最后的 N 条对话消息(不含系统消息)",
|
||||
"cache_system": "缓存系统消息",
|
||||
"cache_system_help": "是否缓存系统提示词",
|
||||
"token_threshold": "缓存 Token 阈值",
|
||||
"token_threshold_help": "消息超过此 Token 数才会被缓存,设为 0 禁用缓存"
|
||||
},
|
||||
"array_content": {
|
||||
"help": "该提供商是否支持 message 的 content 字段为 array 类型",
|
||||
"label": "支持数组格式的 message content"
|
||||
|
||||
@ -4475,6 +4475,14 @@
|
||||
}
|
||||
},
|
||||
"options": {
|
||||
"anthropic_cache": {
|
||||
"cache_last_n": "[to be translated]:Cache Last N Messages",
|
||||
"cache_last_n_help": "[to be translated]:Cache the last N conversation messages (excluding system messages)",
|
||||
"cache_system": "[to be translated]:Cache System Message",
|
||||
"cache_system_help": "[to be translated]:Whether to cache the system prompt",
|
||||
"token_threshold": "[to be translated]:Cache Token Threshold",
|
||||
"token_threshold_help": "[to be translated]:Messages exceeding this token count will be cached. Set to 0 to disable caching."
|
||||
},
|
||||
"array_content": {
|
||||
"help": "該供應商是否支援 message 的 content 欄位為 array 類型",
|
||||
"label": "支援陣列格式的 message content"
|
||||
|
||||
@ -4475,6 +4475,14 @@
|
||||
}
|
||||
},
|
||||
"options": {
|
||||
"anthropic_cache": {
|
||||
"cache_last_n": "[to be translated]:Cache Last N Messages",
|
||||
"cache_last_n_help": "[to be translated]:Cache the last N conversation messages (excluding system messages)",
|
||||
"cache_system": "[to be translated]:Cache System Message",
|
||||
"cache_system_help": "[to be translated]:Whether to cache the system prompt",
|
||||
"token_threshold": "[to be translated]:Cache Token Threshold",
|
||||
"token_threshold_help": "[to be translated]:Messages exceeding this token count will be cached. Set to 0 to disable caching."
|
||||
},
|
||||
"array_content": {
|
||||
"help": "Unterstützt Array-Format für message content",
|
||||
"label": "Unterstützt Array-Format für message content"
|
||||
|
||||
@ -4475,6 +4475,14 @@
|
||||
}
|
||||
},
|
||||
"options": {
|
||||
"anthropic_cache": {
|
||||
"cache_last_n": "[to be translated]:Cache Last N Messages",
|
||||
"cache_last_n_help": "[to be translated]:Cache the last N conversation messages (excluding system messages)",
|
||||
"cache_system": "[to be translated]:Cache System Message",
|
||||
"cache_system_help": "[to be translated]:Whether to cache the system prompt",
|
||||
"token_threshold": "[to be translated]:Cache Token Threshold",
|
||||
"token_threshold_help": "[to be translated]:Messages exceeding this token count will be cached. Set to 0 to disable caching."
|
||||
},
|
||||
"array_content": {
|
||||
"help": "Εάν ο πάροχος υποστηρίζει το πεδίο περιεχομένου του μηνύματος ως τύπο πίνακα",
|
||||
"label": "Υποστήριξη για περιεχόμενο μηνύματος με μορφή πίνακα"
|
||||
|
||||
@ -4475,6 +4475,14 @@
|
||||
}
|
||||
},
|
||||
"options": {
|
||||
"anthropic_cache": {
|
||||
"cache_last_n": "[to be translated]:Cache Last N Messages",
|
||||
"cache_last_n_help": "[to be translated]:Cache the last N conversation messages (excluding system messages)",
|
||||
"cache_system": "[to be translated]:Cache System Message",
|
||||
"cache_system_help": "[to be translated]:Whether to cache the system prompt",
|
||||
"token_threshold": "[to be translated]:Cache Token Threshold",
|
||||
"token_threshold_help": "[to be translated]:Messages exceeding this token count will be cached. Set to 0 to disable caching."
|
||||
},
|
||||
"array_content": {
|
||||
"help": "¿Admite el proveedor que el campo content del mensaje sea de tipo array?",
|
||||
"label": "Contenido del mensaje compatible con formato de matriz"
|
||||
|
||||
@ -4475,6 +4475,14 @@
|
||||
}
|
||||
},
|
||||
"options": {
|
||||
"anthropic_cache": {
|
||||
"cache_last_n": "[to be translated]:Cache Last N Messages",
|
||||
"cache_last_n_help": "[to be translated]:Cache the last N conversation messages (excluding system messages)",
|
||||
"cache_system": "[to be translated]:Cache System Message",
|
||||
"cache_system_help": "[to be translated]:Whether to cache the system prompt",
|
||||
"token_threshold": "[to be translated]:Cache Token Threshold",
|
||||
"token_threshold_help": "[to be translated]:Messages exceeding this token count will be cached. Set to 0 to disable caching."
|
||||
},
|
||||
"array_content": {
|
||||
"help": "Ce fournisseur prend-il en charge le champ content du message sous forme de tableau ?",
|
||||
"label": "Prise en charge du format de tableau pour le contenu du message"
|
||||
|
||||
@ -4475,6 +4475,14 @@
|
||||
}
|
||||
},
|
||||
"options": {
|
||||
"anthropic_cache": {
|
||||
"cache_last_n": "[to be translated]:Cache Last N Messages",
|
||||
"cache_last_n_help": "[to be translated]:Cache the last N conversation messages (excluding system messages)",
|
||||
"cache_system": "[to be translated]:Cache System Message",
|
||||
"cache_system_help": "[to be translated]:Whether to cache the system prompt",
|
||||
"token_threshold": "[to be translated]:Cache Token Threshold",
|
||||
"token_threshold_help": "[to be translated]:Messages exceeding this token count will be cached. Set to 0 to disable caching."
|
||||
},
|
||||
"array_content": {
|
||||
"help": "このプロバイダーは、message の content フィールドが配列型であることをサポートしていますか",
|
||||
"label": "配列形式のメッセージコンテンツをサポート"
|
||||
|
||||
@ -4475,6 +4475,14 @@
|
||||
}
|
||||
},
|
||||
"options": {
|
||||
"anthropic_cache": {
|
||||
"cache_last_n": "[to be translated]:Cache Last N Messages",
|
||||
"cache_last_n_help": "[to be translated]:Cache the last N conversation messages (excluding system messages)",
|
||||
"cache_system": "[to be translated]:Cache System Message",
|
||||
"cache_system_help": "[to be translated]:Whether to cache the system prompt",
|
||||
"token_threshold": "[to be translated]:Cache Token Threshold",
|
||||
"token_threshold_help": "[to be translated]:Messages exceeding this token count will be cached. Set to 0 to disable caching."
|
||||
},
|
||||
"array_content": {
|
||||
"help": "O fornecedor suporta que o campo content da mensagem seja do tipo array?",
|
||||
"label": "suporta o formato de matriz do conteúdo da mensagem"
|
||||
|
||||
@ -4475,6 +4475,14 @@
|
||||
}
|
||||
},
|
||||
"options": {
|
||||
"anthropic_cache": {
|
||||
"cache_last_n": "[to be translated]:Cache Last N Messages",
|
||||
"cache_last_n_help": "[to be translated]:Cache the last N conversation messages (excluding system messages)",
|
||||
"cache_system": "[to be translated]:Cache System Message",
|
||||
"cache_system_help": "[to be translated]:Whether to cache the system prompt",
|
||||
"token_threshold": "[to be translated]:Cache Token Threshold",
|
||||
"token_threshold_help": "[to be translated]:Messages exceeding this token count will be cached. Set to 0 to disable caching."
|
||||
},
|
||||
"array_content": {
|
||||
"help": "Furnizorul acceptă ca câmpul content al mesajului să fie de tip array?",
|
||||
"label": "Acceptă conținut mesaj în format array"
|
||||
|
||||
@ -1311,6 +1311,7 @@
|
||||
"backup": {
|
||||
"file_format": "Ошибка формата файла резервной копии"
|
||||
},
|
||||
"base64DataTruncated": "[to be translated]:Base64 image data truncated, size",
|
||||
"base64DataTruncated": "Данные изображения в формате Base64 усечены, размер",
|
||||
"boundary": {
|
||||
"default": {
|
||||
@ -1392,7 +1393,9 @@
|
||||
"text": "текст",
|
||||
"toolInput": "ввод инструмента",
|
||||
"toolName": "имя инструмента",
|
||||
"truncated": "[to be translated]:Data truncated, original size",
|
||||
"truncated": "Данные усечены, исходный размер",
|
||||
"truncatedBadge": "[to be translated]:Truncated",
|
||||
"truncatedBadge": "Усечённый",
|
||||
"unknown": "Неизвестная ошибка",
|
||||
"usage": "Дозировка",
|
||||
@ -4475,6 +4478,14 @@
|
||||
}
|
||||
},
|
||||
"options": {
|
||||
"anthropic_cache": {
|
||||
"cache_last_n": "[to be translated]:Cache Last N Messages",
|
||||
"cache_last_n_help": "[to be translated]:Cache the last N conversation messages (excluding system messages)",
|
||||
"cache_system": "[to be translated]:Cache System Message",
|
||||
"cache_system_help": "[to be translated]:Whether to cache the system prompt",
|
||||
"token_threshold": "[to be translated]:Cache Token Threshold",
|
||||
"token_threshold_help": "[to be translated]:Messages exceeding this token count will be cached. Set to 0 to disable caching."
|
||||
},
|
||||
"array_content": {
|
||||
"help": "Поддерживает ли данный провайдер тип массива для поля content в сообщении",
|
||||
"label": "поддержка формата массива для содержимого сообщения"
|
||||
|
||||
@ -1,8 +1,9 @@
|
||||
import { HStack } from '@renderer/components/Layout'
|
||||
import { InfoTooltip } from '@renderer/components/TooltipIcons'
|
||||
import { useProvider } from '@renderer/hooks/useProvider'
|
||||
import type { Provider } from '@renderer/types'
|
||||
import { Flex, Switch } from 'antd'
|
||||
import { type AnthropicCacheControlSettings, type Provider } from '@renderer/types'
|
||||
import { isSupportAnthropicPromptCacheProvider } from '@renderer/utils/provider'
|
||||
import { Divider, Flex, InputNumber, Switch } from 'antd'
|
||||
import { startTransition, useCallback, useMemo } from 'react'
|
||||
import { useTranslation } from 'react-i18next'
|
||||
|
||||
@ -114,6 +115,27 @@ const ApiOptionsSettings = ({ providerId }: Props) => {
|
||||
return items
|
||||
}, [openAIOptions, provider.apiOptions, provider.type, t, updateProviderTransition])
|
||||
|
||||
const isSupportAnthropicPromptCache = isSupportAnthropicPromptCacheProvider(provider)
|
||||
|
||||
const cacheSettings = useMemo(
|
||||
() =>
|
||||
provider.anthropicCacheControl ?? {
|
||||
tokenThreshold: 0,
|
||||
cacheSystemMessage: true,
|
||||
cacheLastNMessages: 0
|
||||
},
|
||||
[provider.anthropicCacheControl]
|
||||
)
|
||||
|
||||
const updateCacheSettings = useCallback(
|
||||
(updates: Partial<AnthropicCacheControlSettings>) => {
|
||||
updateProviderTransition({
|
||||
anthropicCacheControl: { ...cacheSettings, ...updates }
|
||||
})
|
||||
},
|
||||
[cacheSettings, updateProviderTransition]
|
||||
)
|
||||
|
||||
return (
|
||||
<Flex vertical gap="middle">
|
||||
{options.map((item) => (
|
||||
@ -127,6 +149,52 @@ const ApiOptionsSettings = ({ providerId }: Props) => {
|
||||
<Switch id={item.key} checked={item.checked} onChange={item.onChange} />
|
||||
</HStack>
|
||||
))}
|
||||
|
||||
{isSupportAnthropicPromptCache && (
|
||||
<>
|
||||
<Divider style={{ margin: '8px 0' }} />
|
||||
<HStack justifyContent="space-between">
|
||||
<HStack alignItems="center" gap={6}>
|
||||
<span>{t('settings.provider.api.options.anthropic_cache.token_threshold')}</span>
|
||||
<InfoTooltip title={t('settings.provider.api.options.anthropic_cache.token_threshold_help')} />
|
||||
</HStack>
|
||||
<InputNumber
|
||||
min={0}
|
||||
max={100000}
|
||||
value={cacheSettings.tokenThreshold}
|
||||
onChange={(v) => updateCacheSettings({ tokenThreshold: v ?? 0 })}
|
||||
style={{ width: 100 }}
|
||||
/>
|
||||
</HStack>
|
||||
{cacheSettings.tokenThreshold > 0 && (
|
||||
<>
|
||||
<HStack justifyContent="space-between">
|
||||
<HStack alignItems="center" gap={6}>
|
||||
<span>{t('settings.provider.api.options.anthropic_cache.cache_system')}</span>
|
||||
<InfoTooltip title={t('settings.provider.api.options.anthropic_cache.cache_system_help')} />
|
||||
</HStack>
|
||||
<Switch
|
||||
checked={cacheSettings.cacheSystemMessage}
|
||||
onChange={(v) => updateCacheSettings({ cacheSystemMessage: v })}
|
||||
/>
|
||||
</HStack>
|
||||
<HStack justifyContent="space-between">
|
||||
<HStack alignItems="center" gap={6}>
|
||||
<span>{t('settings.provider.api.options.anthropic_cache.cache_last_n')}</span>
|
||||
<InfoTooltip title={t('settings.provider.api.options.anthropic_cache.cache_last_n_help')} />
|
||||
</HStack>
|
||||
<InputNumber
|
||||
min={0}
|
||||
max={10}
|
||||
value={cacheSettings.cacheLastNMessages}
|
||||
onChange={(v) => updateCacheSettings({ cacheLastNMessages: v ?? 0 })}
|
||||
style={{ width: 100 }}
|
||||
/>
|
||||
</HStack>
|
||||
</>
|
||||
)}
|
||||
</>
|
||||
)}
|
||||
</Flex>
|
||||
)
|
||||
}
|
||||
|
||||
@ -31,6 +31,7 @@ import {
|
||||
isOllamaProvider,
|
||||
isOpenAICompatibleProvider,
|
||||
isOpenAIProvider,
|
||||
isSupportAnthropicPromptCacheProvider,
|
||||
isVertexProvider
|
||||
} from '@renderer/utils/provider'
|
||||
import { Button, Divider, Flex, Input, Select, Space, Switch, Tooltip } from 'antd'
|
||||
@ -400,7 +401,7 @@ const ProviderSetting: FC<Props> = ({ providerId }) => {
|
||||
<Button type="text" size="small" icon={<SquareArrowOutUpRight size={14} />} />
|
||||
</Link>
|
||||
)}
|
||||
{!isSystemProvider(provider) && (
|
||||
{(!isSystemProvider(provider) || isSupportAnthropicPromptCacheProvider(provider)) && (
|
||||
<Tooltip title={t('settings.provider.api.options.label')}>
|
||||
<Button
|
||||
type="text"
|
||||
|
||||
@ -79,6 +79,12 @@ export function isGroqServiceTier(tier: string | undefined | null): tier is Groq
|
||||
|
||||
export type ServiceTier = OpenAIServiceTier | GroqServiceTier
|
||||
|
||||
export type AnthropicCacheControlSettings = {
|
||||
tokenThreshold: number
|
||||
cacheSystemMessage: boolean
|
||||
cacheLastNMessages: number
|
||||
}
|
||||
|
||||
export function isServiceTier(tier: string | null | undefined): tier is ServiceTier {
|
||||
return isGroqServiceTier(tier) || isOpenAIServiceTier(tier)
|
||||
}
|
||||
@ -127,6 +133,9 @@ export type Provider = {
|
||||
isVertex?: boolean
|
||||
notes?: string
|
||||
extra_headers?: Record<string, string>
|
||||
|
||||
// Anthropic prompt caching settings
|
||||
anthropicCacheControl?: AnthropicCacheControlSettings
|
||||
}
|
||||
|
||||
export const SystemProviderIdSchema = z.enum([
|
||||
|
||||
@ -198,3 +198,13 @@ export const NOT_SUPPORT_API_KEY_PROVIDERS: readonly SystemProviderId[] = [
|
||||
]
|
||||
|
||||
export const NOT_SUPPORT_API_KEY_PROVIDER_TYPES: readonly ProviderType[] = ['vertexai', 'aws-bedrock']
|
||||
|
||||
// https://platform.claude.com/docs/en/build-with-claude/prompt-caching#1-hour-cache-duration
|
||||
export const isSupportAnthropicPromptCacheProvider = (provider: Provider) => {
|
||||
return (
|
||||
provider.type === 'anthropic' ||
|
||||
isNewApiProvider(provider) ||
|
||||
provider.id === SystemProviderIds.aihubmix ||
|
||||
isAzureOpenAIProvider(provider)
|
||||
)
|
||||
}
|
||||
|
||||
Loading…
Reference in New Issue
Block a user