feat: support image

This commit is contained in:
suyao 2025-07-07 14:27:03 +08:00
parent 9e252d7eb0
commit c72156b2da
No known key found for this signature in database
5 changed files with 144 additions and 41 deletions

View File

@ -60,6 +60,10 @@ export type {
LanguageModelUsage, // AI SDK 4.0 中 TokenUsage 改名为 LanguageModelUsage
// 消息相关类型
ModelMessage,
TextPart,
FilePart,
ImagePart,
ToolCallPart,
// 错误类型
NoSuchToolError,
StreamTextResult,
@ -75,6 +79,7 @@ export type {
ToolSet,
UserModelMessage
} from 'ai'
export type { ReasoningPart } from '@ai-sdk/provider-utils'
export {
defaultSettingsMiddleware,
extractReasoningMiddleware,

View File

@ -3,7 +3,19 @@
* apiClient
*/
import { type ModelMessage, stepCountIs, type StreamTextParams } from '@cherrystudio/ai-core'
import {
AssistantModelMessage,
FilePart,
ImagePart,
ModelMessage,
ReasoningPart,
stepCountIs,
type StreamTextParams,
TextPart,
ToolCallPart,
ToolResultPart,
UserModelMessage
} from '@cherrystudio/ai-core'
import { DEFAULT_MAX_TOKENS } from '@renderer/config/constant'
import {
isGenerateImageModel,
@ -14,18 +26,25 @@ import {
isSupportedFlexServiceTier,
isSupportedReasoningEffortModel,
isSupportedThinkingTokenModel,
isVisionModel,
isWebSearchModel
} from '@renderer/config/models'
import { getAssistantSettings, getDefaultModel } from '@renderer/services/AssistantService'
import type { Assistant, MCPTool, Message, Model } from '@renderer/types'
import { FileTypes } from '@renderer/types'
import { findFileBlocks, findImageBlocks, getMainTextContent } from '@renderer/utils/messageUtils/find'
import {
findFileBlocks,
findImageBlocks,
findThinkingBlocks,
getMainTextContent
} from '@renderer/utils/messageUtils/find'
import { buildSystemPrompt } from '@renderer/utils/prompt'
import { defaultTimeout } from '@shared/config/constant'
// import { jsonSchemaToZod } from 'json-schema-to-zod'
import { setupToolsConfig } from './utils/mcp'
import { buildProviderOptions } from './utils/options'
import { FileMessageBlock, ImageMessageBlock, ThinkingMessageBlock } from '@renderer/types/newMessage'
/**
*
@ -94,22 +113,25 @@ export async function extractFileContent(message: Message): Promise<string> {
* AI SDK
* OpenAI
*/
export async function convertMessageToSdkParam(message: Message, isVisionModel = false): Promise<any> {
export async function convertMessageToSdkParam(message: Message, isVisionModel = false): Promise<ModelMessage> {
const content = getMainTextContent(message)
const fileBlocks = findFileBlocks(message)
const imageBlocks = findImageBlocks(message)
// 简单消息(无文件无图片)
if (fileBlocks.length === 0 && imageBlocks.length === 0) {
return {
role: message.role === 'system' ? 'user' : message.role,
content
}
const reasoningBlocks = findThinkingBlocks(message)
if (message.role === 'user' || message.role === 'system') {
return convertMessageToUserModelMessage(content, fileBlocks, imageBlocks, isVisionModel)
} else {
return convertMessageToAssistantModelMessage(content, fileBlocks, reasoningBlocks)
}
}
// 复杂消息(包含文件或图片)
const parts: any[] = []
async function convertMessageToUserModelMessage(
content: string,
fileBlocks: FileMessageBlock[],
imageBlocks: ImageMessageBlock[],
isVisionModel = false
): Promise<UserModelMessage> {
const parts: Array<TextPart | FilePart | ImagePart> = []
if (content) {
parts.push({ type: 'text', text: content })
}
@ -121,16 +143,17 @@ export async function convertMessageToSdkParam(message: Message, isVisionModel =
try {
const image = await window.api.file.base64Image(imageBlock.file.id + imageBlock.file.ext)
parts.push({
type: 'image_url',
image_url: { url: image.data }
type: 'image',
image: image.base64,
mediaType: image.mime
})
} catch (error) {
console.warn('Failed to load image:', error)
}
} else if (imageBlock.url && imageBlock.url.startsWith('data:')) {
} else if (imageBlock.url) {
parts.push({
type: 'image_url',
image_url: { url: imageBlock.url }
type: 'image',
image: imageBlock.url
})
}
}
@ -138,28 +161,63 @@ export async function convertMessageToSdkParam(message: Message, isVisionModel =
// 处理文件
for (const fileBlock of fileBlocks) {
const file = fileBlock.file
if (!file) continue
if ([FileTypes.TEXT, FileTypes.DOCUMENT].includes(file.type)) {
try {
const fileContent = await window.api.file.read(file.id + file.ext)
parts.push({
type: 'text',
text: `${file.origin_name}\n${fileContent.trim()}`
})
} catch (error) {
console.warn('Failed to read file:', error)
}
const textPart = await convertFileBlockToTextPart(fileBlock)
if (textPart) {
parts.push(textPart)
}
}
return {
role: message.role === 'system' ? 'user' : message.role,
content: parts.length === 1 && parts[0].type === 'text' ? parts[0].text : parts
role: 'user',
content: parts
}
}
async function convertMessageToAssistantModelMessage(
content: string,
fileBlocks: FileMessageBlock[],
thinkingBlocks: ThinkingMessageBlock[]
): Promise<AssistantModelMessage> {
const parts: Array<TextPart | FilePart> = []
if (content) {
parts.push({ type: 'text', text: content })
}
for (const thinkingBlock of thinkingBlocks) {
parts.push({ type: 'text', text: thinkingBlock.content })
}
for (const fileBlock of fileBlocks) {
const textPart = await convertFileBlockToTextPart(fileBlock)
if (textPart) {
parts.push(textPart)
}
}
return {
role: 'assistant',
content: parts
}
}
async function convertFileBlockToTextPart(fileBlock: FileMessageBlock): Promise<TextPart | null> {
const file = fileBlock.file
if ([FileTypes.TEXT, FileTypes.DOCUMENT].includes(file.type)) {
try {
const fileContent = await window.api.file.read(file.id + file.ext)
return {
type: 'text',
text: `${file.origin_name}\n${fileContent.trim()}`
}
} catch (error) {
console.warn('Failed to read file:', error)
}
}
return null
}
/**
* Cherry Studio AI SDK
*/
@ -168,7 +226,7 @@ export async function convertMessagesToSdkMessages(
model: Model
): Promise<StreamTextParams['messages']> {
const sdkMessages: StreamTextParams['messages'] = []
const isVision = model.id.includes('vision') || model.id.includes('gpt-4') // 简单的视觉模型检测
const isVision = isVisionModel(model)
for (const message of messages) {
const sdkMessage = await convertMessageToSdkParam(message, isVision)

View File

@ -30,6 +30,9 @@ export function buildProviderOptions(
// 构建 provider 特定的选项
let providerSpecificOptions: Record<string, any> = {}
console.log('buildProviderOptions', providerId)
console.log('buildProviderOptions', provider)
// 根据 provider 类型分离构建逻辑
switch (provider.type) {
case 'openai-response':

View File

@ -42,7 +42,7 @@ export function getReasoningEffort(assistant: Assistant, model: Model): Reasonin
if (isSupportedThinkingTokenGeminiModel(model)) {
if (GEMINI_FLASH_MODEL_REGEX.test(model.id)) {
return { reasoningEffort: 'none' }
return { reasoning_effort: 'none' }
}
return {}
}
@ -71,6 +71,12 @@ export function getReasoningEffort(assistant: Assistant, model: Model): Reasonin
}
if (!reasoningEffort) {
if (model.provider === 'openrouter') {
if (isSupportedThinkingTokenGeminiModel(model) && !GEMINI_FLASH_MODEL_REGEX.test(model.id)) {
return {}
}
return { reasoning: { enabled: false, exclude: true } }
}
if (isSupportedThinkingTokenQwenModel(model)) {
return { enable_thinking: false }
}
@ -80,12 +86,16 @@ export function getReasoningEffort(assistant: Assistant, model: Model): Reasonin
}
if (isSupportedThinkingTokenGeminiModel(model)) {
// openrouter没有提供一个不推理的选项先隐藏
if (provider.id === 'openrouter') {
return { reasoning: { max_tokens: 0, exclude: true } }
}
if (GEMINI_FLASH_MODEL_REGEX.test(model.id)) {
return { reasoningEffort: 'none' }
return {
extra_body: {
google: {
thinking_config: {
thinking_budget: 0
}
}
}
}
}
return {}
}
@ -128,12 +138,37 @@ export function getReasoningEffort(assistant: Assistant, model: Model): Reasonin
}
// OpenAI models
if (isSupportedReasoningEffortOpenAIModel(model) || isSupportedThinkingTokenGeminiModel(model)) {
if (isSupportedReasoningEffortOpenAIModel(model)) {
return {
reasoningEffort: reasoningEffort
}
}
if (isSupportedThinkingTokenGeminiModel(model)) {
if (reasoningEffort === 'auto') {
return {
extra_body: {
google: {
thinking_config: {
thinking_budget: -1,
include_thoughts: true
}
}
}
}
}
return {
extra_body: {
google: {
thinking_config: {
thinking_budget: budgetTokens,
include_thoughts: true
}
}
}
}
}
// Claude models
if (isSupportedThinkingTokenClaudeModel(model)) {
const maxTokens = assistant.settings?.maxTokens

View File

@ -50,9 +50,11 @@ export type ReasoningEffortOptionalParams = {
thinking?: { type: 'disabled' | 'enabled' | 'auto'; budget_tokens?: number }
reasoning?: { max_tokens?: number; exclude?: boolean; effort?: string; enabled?: boolean } | OpenAI.Reasoning
reasoningEffort?: OpenAI.Chat.Completions.ChatCompletionCreateParams['reasoning_effort'] | 'none' | 'auto'
reasoning_effort?: OpenAI.Chat.Completions.ChatCompletionCreateParams['reasoning_effort'] | 'none' | 'auto'
enable_thinking?: boolean
thinking_budget?: number
enable_reasoning?: boolean
extra_body?: Record<string, any>
// Add any other potential reasoning-related keys here if they exist
}