diff --git a/src/renderer/src/aiCore/prepareParams/__tests__/message-converter.test.ts b/src/renderer/src/aiCore/prepareParams/__tests__/message-converter.test.ts index 2433192cd..cb0c5cf9a 100644 --- a/src/renderer/src/aiCore/prepareParams/__tests__/message-converter.test.ts +++ b/src/renderer/src/aiCore/prepareParams/__tests__/message-converter.test.ts @@ -137,6 +137,73 @@ describe('messageConverter', () => { }) }) + it('extracts base64 data from data URLs and preserves mediaType', async () => { + const model = createModel() + const message = createMessage('user') + message.__mockContent = 'Check this image' + message.__mockImageBlocks = [createImageBlock(message.id, { url: '' })] + + const result = await convertMessageToSdkParam(message, true, model) + + expect(result).toEqual({ + role: 'user', + content: [ + { type: 'text', text: 'Check this image' }, + { type: 'image', image: 'iVBORw0KGgoAAAANS', mediaType: 'image/png' } + ] + }) + }) + + it('handles data URLs without mediaType gracefully', async () => { + const model = createModel() + const message = createMessage('user') + message.__mockContent = 'Check this' + message.__mockImageBlocks = [createImageBlock(message.id, { url: 'data:;base64,AAABBBCCC' })] + + const result = await convertMessageToSdkParam(message, true, model) + + expect(result).toEqual({ + role: 'user', + content: [ + { type: 'text', text: 'Check this' }, + { type: 'image', image: 'AAABBBCCC' } + ] + }) + }) + + it('skips malformed data URLs without comma separator', async () => { + const model = createModel() + const message = createMessage('user') + message.__mockContent = 'Malformed data url' + message.__mockImageBlocks = [createImageBlock(message.id, { url: 'data:image/pngAAABBB' })] + + const result = await convertMessageToSdkParam(message, true, model) + + expect(result).toEqual({ + role: 'user', + content: [ + { type: 'text', text: 'Malformed data url' } + // Malformed data URL is excluded from the content + ] + }) + }) + + it('handles multiple large base64 images without stack overflow', async () => { + const model = createModel() + const message = createMessage('user') + // Create large base64 strings (~500KB each) to simulate real-world large images + const largeBase64 = 'A'.repeat(500_000) + message.__mockContent = 'Check these images' + message.__mockImageBlocks = [ + createImageBlock(message.id, { url: `data:image/png;base64,${largeBase64}` }), + createImageBlock(message.id, { url: `data:image/png;base64,${largeBase64}` }), + createImageBlock(message.id, { url: `data:image/png;base64,${largeBase64}` }) + ] + + // Should not throw RangeError: Maximum call stack size exceeded + await expect(convertMessageToSdkParam(message, true, model)).resolves.toBeDefined() + }) + it('returns file instructions as a system message when native uploads succeed', async () => { const model = createModel() const message = createMessage('user') @@ -165,7 +232,7 @@ describe('messageConverter', () => { }) describe('convertMessagesToSdkMessages', () => { - it('appends assistant images to the final user message for image enhancement models', async () => { + it('collapses to [system?, user(image)] for image enhancement models', async () => { const model = createModel({ id: 'qwen-image-edit', name: 'Qwen Image Edit', provider: 'qwen', group: 'qwen' }) const initialUser = createMessage('user') initialUser.__mockContent = 'Start editing' @@ -180,14 +247,6 @@ describe('messageConverter', () => { const result = await convertMessagesToSdkMessages([initialUser, assistant, finalUser], model) expect(result).toEqual([ - { - role: 'user', - content: [{ type: 'text', text: 'Start editing' }] - }, - { - role: 'assistant', - content: [{ type: 'text', text: 'Here is the current preview' }] - }, { role: 'user', content: [ @@ -198,7 +257,7 @@ describe('messageConverter', () => { ]) }) - it('preserves preceding system instructions when building enhancement payloads', async () => { + it('preserves system messages and collapses others for enhancement payloads', async () => { const model = createModel({ id: 'qwen-image-edit', name: 'Qwen Image Edit', provider: 'qwen', group: 'qwen' }) const fileUser = createMessage('user') fileUser.__mockContent = 'Use this document as inspiration' @@ -221,11 +280,6 @@ describe('messageConverter', () => { expect(result).toEqual([ { role: 'system', content: 'fileid://reference' }, - { role: 'user', content: [{ type: 'text', text: 'Use this document as inspiration' }] }, - { - role: 'assistant', - content: [{ type: 'text', text: 'Generated previews ready' }] - }, { role: 'user', content: [ @@ -235,5 +289,120 @@ describe('messageConverter', () => { } ]) }) + + it('handles no previous assistant message with images', async () => { + const model = createModel({ id: 'qwen-image-edit', name: 'Qwen Image Edit', provider: 'qwen', group: 'qwen' }) + const user1 = createMessage('user') + user1.__mockContent = 'Start' + + const user2 = createMessage('user') + user2.__mockContent = 'Continue without images' + + const result = await convertMessagesToSdkMessages([user1, user2], model) + + expect(result).toEqual([ + { + role: 'user', + content: [{ type: 'text', text: 'Continue without images' }] + } + ]) + }) + + it('handles assistant message without images', async () => { + const model = createModel({ id: 'qwen-image-edit', name: 'Qwen Image Edit', provider: 'qwen', group: 'qwen' }) + const user1 = createMessage('user') + user1.__mockContent = 'Start' + + const assistant = createMessage('assistant') + assistant.__mockContent = 'Text only response' + assistant.__mockImageBlocks = [] + + const user2 = createMessage('user') + user2.__mockContent = 'Follow up' + + const result = await convertMessagesToSdkMessages([user1, assistant, user2], model) + + expect(result).toEqual([ + { + role: 'user', + content: [{ type: 'text', text: 'Follow up' }] + } + ]) + }) + + it('handles multiple assistant messages by using the most recent one', async () => { + const model = createModel({ id: 'qwen-image-edit', name: 'Qwen Image Edit', provider: 'qwen', group: 'qwen' }) + const user1 = createMessage('user') + user1.__mockContent = 'Start' + + const assistant1 = createMessage('assistant') + assistant1.__mockContent = 'First response' + assistant1.__mockImageBlocks = [createImageBlock(assistant1.id, { url: 'https://example.com/old.png' })] + + const user2 = createMessage('user') + user2.__mockContent = 'Continue' + + const assistant2 = createMessage('assistant') + assistant2.__mockContent = 'Second response' + assistant2.__mockImageBlocks = [createImageBlock(assistant2.id, { url: 'https://example.com/new.png' })] + + const user3 = createMessage('user') + user3.__mockContent = 'Final request' + + const result = await convertMessagesToSdkMessages([user1, assistant1, user2, assistant2, user3], model) + + expect(result).toEqual([ + { + role: 'user', + content: [ + { type: 'text', text: 'Final request' }, + { type: 'image', image: 'https://example.com/new.png' } + ] + } + ]) + }) + + it('handles conversation ending with assistant message', async () => { + const model = createModel({ id: 'qwen-image-edit', name: 'Qwen Image Edit', provider: 'qwen', group: 'qwen' }) + const user = createMessage('user') + user.__mockContent = 'Start' + + const assistant = createMessage('assistant') + assistant.__mockContent = 'Response with image' + assistant.__mockImageBlocks = [createImageBlock(assistant.id, { url: 'https://example.com/image.png' })] + + const result = await convertMessagesToSdkMessages([user, assistant], model) + + // The user message is the last user message, but since the assistant comes after, + // there's no "previous" assistant message (search starts from messages.length-2 backwards) + expect(result).toEqual([ + { + role: 'user', + content: [{ type: 'text', text: 'Start' }] + } + ]) + }) + + it('handles empty content in last user message', async () => { + const model = createModel({ id: 'qwen-image-edit', name: 'Qwen Image Edit', provider: 'qwen', group: 'qwen' }) + const user1 = createMessage('user') + user1.__mockContent = 'Start' + + const assistant = createMessage('assistant') + assistant.__mockContent = 'Here is the preview' + assistant.__mockImageBlocks = [createImageBlock(assistant.id, { url: 'https://example.com/preview.png' })] + + const user2 = createMessage('user') + user2.__mockContent = '' + + const result = await convertMessagesToSdkMessages([user1, assistant, user2], model) + + expect(result).toEqual([ + { + role: 'user', + content: [{ type: 'image', image: 'https://example.com/preview.png' }] + } + ]) + }) }) }) diff --git a/src/renderer/src/aiCore/prepareParams/messageConverter.ts b/src/renderer/src/aiCore/prepareParams/messageConverter.ts index b0c432ef8..328a10b94 100644 --- a/src/renderer/src/aiCore/prepareParams/messageConverter.ts +++ b/src/renderer/src/aiCore/prepareParams/messageConverter.ts @@ -7,6 +7,7 @@ import { loggerService } from '@logger' import { isImageEnhancementModel, isVisionModel } from '@renderer/config/models' import type { Message, Model } from '@renderer/types' import type { FileMessageBlock, ImageMessageBlock, ThinkingMessageBlock } from '@renderer/types/newMessage' +import { parseDataUrlMediaType } from '@renderer/utils/image' import { findFileBlocks, findImageBlocks, @@ -59,23 +60,29 @@ async function convertImageBlockToImagePart(imageBlocks: ImageMessageBlock[]): P mediaType: image.mime }) } catch (error) { - logger.warn('Failed to load image:', error as Error) + logger.error('Failed to load image file, image will be excluded from message:', { + fileId: imageBlock.file.id, + fileName: imageBlock.file.origin_name, + error: error as Error + }) } } else if (imageBlock.url) { - const isBase64 = imageBlock.url.startsWith('data:') - if (isBase64) { - const base64 = imageBlock.url.match(/^data:[^;]*;base64,(.+)$/)![1] - const mimeMatch = imageBlock.url.match(/^data:([^;]+)/) - parts.push({ - type: 'image', - image: base64, - mediaType: mimeMatch ? mimeMatch[1] : 'image/png' - }) + const url = imageBlock.url + const isDataUrl = url.startsWith('data:') + if (isDataUrl) { + const { mediaType } = parseDataUrlMediaType(url) + const commaIndex = url.indexOf(',') + if (commaIndex === -1) { + logger.error('Malformed data URL detected (missing comma separator), image will be excluded:', { + urlPrefix: url.slice(0, 50) + '...' + }) + continue + } + const base64Data = url.slice(commaIndex + 1) + parts.push({ type: 'image', image: base64Data, ...(mediaType ? { mediaType } : {}) }) } else { - parts.push({ - type: 'image', - image: imageBlock.url - }) + // For remote URLs we keep payload minimal to match existing expectations. + parts.push({ type: 'image', image: url }) } } } @@ -194,17 +201,20 @@ async function convertMessageToAssistantModelMessage( * This function processes messages and transforms them into the format required by the SDK. * It handles special cases for vision models and image enhancement models. * - * @param messages - Array of messages to convert. Must contain at least 3 messages when using image enhancement models for special handling. + * @param messages - Array of messages to convert. * @param model - The model configuration that determines conversion behavior * * @returns A promise that resolves to an array of SDK-compatible model messages * * @remarks - * For image enhancement models with 3+ messages: - * - Examines the last 2 messages to find an assistant message containing image blocks - * - If found, extracts images from the assistant message and appends them to the last user message content - * - Returns all converted messages (not just the last two) with the images merged into the user message - * - Typical pattern: [system?, assistant(image), user] -> [system?, assistant, user(image)] + * For image enhancement models: + * - Collapses the conversation into [system?, user(image)] format + * - Searches backwards through all messages to find the most recent assistant message with images + * - Preserves all system messages (including ones generated from file uploads like 'fileid://...') + * - Extracts the last user message content and merges images from the previous assistant message + * - Returns only the collapsed messages: system messages (if any) followed by a single user message + * - If no user message is found, returns only system messages + * - Typical pattern: [system?, user, assistant(image), user] -> [system?, user(image)] * * For other models: * - Returns all converted messages in order without special image handling @@ -220,25 +230,66 @@ export async function convertMessagesToSdkMessages(messages: Message[], model: M sdkMessages.push(...(Array.isArray(sdkMessage) ? sdkMessage : [sdkMessage])) } // Special handling for image enhancement models - // Only merge images into the user message - // [system?, assistant(image), user] -> [system?, assistant, user(image)] - if (isImageEnhancementModel(model) && messages.length >= 3) { - const needUpdatedMessages = messages.slice(-2) - const assistantMessage = needUpdatedMessages.find((m) => m.role === 'assistant') - const userSdkMessage = sdkMessages[sdkMessages.length - 1] + // Target behavior: Collapse the conversation into [system?, user(image)]. + // Explanation of why we don't simply use slice: + // 1) We need to preserve all system messages: During the convertMessageToSdkParam process, native file uploads may insert `system(fileid://...)`. + // Directly slicing the original messages or already converted sdkMessages could easily result in missing these system instructions. + // Therefore, we first perform a full conversion and then aggregate the system messages afterward. + // 2) The conversion process may split messages: A single user message might be broken into two SDK messages—[system, user]. + // Slicing either side could lead to obtaining semantically incorrect fragments (e.g., only the split-out system message). + // 3) The “previous assistant message” is not necessarily the second-to-last one: There might be system messages or other message blocks inserted in between, + // making a simple slice(-2) assumption too rigid. Here, we trace back from the end of the original messages to locate the most recent assistant message, which better aligns with business semantics. + // 4) This is a “collapse” rather than a simple “slice”: Ultimately, we need to synthesize a new user message + // (with text from the last user message and images from the previous assistant message). Using slice can only extract subarrays, + // which still require reassembly; constructing directly according to the target structure is clearer and more reliable. + if (isImageEnhancementModel(model)) { + // Collect all system messages (including ones generated from file uploads) + const systemMessages = sdkMessages.filter((m): m is SystemModelMessage => m.role === 'system') - if (assistantMessage && userSdkMessage?.role === 'user') { - const imageBlocks = findImageBlocks(assistantMessage) - const imageParts = await convertImageBlockToImagePart(imageBlocks) + // Find the last user message (SDK converted) + const lastUserSdkIndex = (() => { + for (let i = sdkMessages.length - 1; i >= 0; i--) { + if (sdkMessages[i].role === 'user') return i + } + return -1 + })() - if (imageParts.length > 0) { - if (typeof userSdkMessage.content === 'string') { - userSdkMessage.content = [{ type: 'text', text: userSdkMessage.content }, ...imageParts] - } else if (Array.isArray(userSdkMessage.content)) { - userSdkMessage.content.push(...imageParts) - } + const lastUserSdk = lastUserSdkIndex >= 0 ? (sdkMessages[lastUserSdkIndex] as UserModelMessage) : null + + // Find the nearest preceding assistant message in original messages + let prevAssistant: Message | null = null + for (let i = messages.length - 2; i >= 0; i--) { + if (messages[i].role === 'assistant') { + prevAssistant = messages[i] + break } } + + // Build the final user content parts + let finalUserParts: Array = [] + if (lastUserSdk) { + if (typeof lastUserSdk.content === 'string') { + finalUserParts.push({ type: 'text', text: lastUserSdk.content }) + } else if (Array.isArray(lastUserSdk.content)) { + finalUserParts = [...lastUserSdk.content] + } + } + + // Append images from the previous assistant message if any + if (prevAssistant) { + const imageBlocks = findImageBlocks(prevAssistant) + const imageParts = await convertImageBlockToImagePart(imageBlocks) + if (imageParts.length > 0) { + finalUserParts.push(...imageParts) + } + } + + // If we couldn't find a last user message, fall back to returning collected system messages only + if (!lastUserSdk) { + return systemMessages + } + + return [...systemMessages, { role: 'user', content: finalUserParts }] } return sdkMessages diff --git a/src/renderer/src/services/ApiService.ts b/src/renderer/src/services/ApiService.ts index 0d9e8cd0b..3c081c3da 100644 --- a/src/renderer/src/services/ApiService.ts +++ b/src/renderer/src/services/ApiService.ts @@ -22,7 +22,7 @@ import { isPromptToolUse, isSupportedToolUse } from '@renderer/utils/mcp-tools' import { findFileBlocks, getMainTextContent } from '@renderer/utils/messageUtils/find' import { containsSupportedVariables, replacePromptVariables } from '@renderer/utils/prompt' import { NOT_SUPPORT_API_KEY_PROVIDER_TYPES, NOT_SUPPORT_API_KEY_PROVIDERS } from '@renderer/utils/provider' -import { cloneDeep, isEmpty, takeRight } from 'lodash' +import { isEmpty, takeRight } from 'lodash' import type { ModernAiProviderConfig } from '../aiCore/index_new' import AiProviderNew from '../aiCore/index_new' @@ -99,9 +99,11 @@ export async function fetchChatCompletion({ }) // Get base provider and apply API key rotation + // NOTE: Shallow copy is intentional. Provider objects are not mutated by downstream code. + // Nested properties (if any) are never modified after creation. const baseProvider = getProviderByModel(assistant.model || getDefaultModel()) const providerWithRotatedKey = { - ...cloneDeep(baseProvider), + ...baseProvider, apiKey: getRotatedApiKey(baseProvider) } @@ -183,8 +185,10 @@ export async function fetchMessagesSummary({ messages, assistant }: { messages: } // Apply API key rotation + // NOTE: Shallow copy is intentional. Provider objects are not mutated by downstream code. + // Nested properties (if any) are never modified after creation. const providerWithRotatedKey = { - ...cloneDeep(provider), + ...provider, apiKey: getRotatedApiKey(provider) } @@ -288,8 +292,10 @@ export async function fetchNoteSummary({ content, assistant }: { content: string } // Apply API key rotation + // NOTE: Shallow copy is intentional. Provider objects are not mutated by downstream code. + // Nested properties (if any) are never modified after creation. const providerWithRotatedKey = { - ...cloneDeep(provider), + ...provider, apiKey: getRotatedApiKey(provider) } @@ -382,8 +388,10 @@ export async function fetchGenerate({ } // Apply API key rotation + // NOTE: Shallow copy is intentional. Provider objects are not mutated by downstream code. + // Nested properties (if any) are never modified after creation. const providerWithRotatedKey = { - ...cloneDeep(provider), + ...provider, apiKey: getRotatedApiKey(provider) } @@ -492,8 +500,10 @@ function getRotatedApiKey(provider: Provider): string { export async function fetchModels(provider: Provider): Promise { // Apply API key rotation + // NOTE: Shallow copy is intentional. Provider objects are not mutated by downstream code. + // Nested properties (if any) are never modified after creation. const providerWithRotatedKey = { - ...cloneDeep(provider), + ...provider, apiKey: getRotatedApiKey(provider) } diff --git a/src/renderer/src/utils/__tests__/image.test.ts b/src/renderer/src/utils/__tests__/image.test.ts index 0e0295739..3f35e7c73 100644 --- a/src/renderer/src/utils/__tests__/image.test.ts +++ b/src/renderer/src/utils/__tests__/image.test.ts @@ -7,7 +7,8 @@ import { captureScrollableAsDataURL, compressImage, convertToBase64, - makeSvgSizeAdaptive + makeSvgSizeAdaptive, + parseDataUrlMediaType } from '../image' // mock 依赖 @@ -201,4 +202,36 @@ describe('utils/image', () => { expect(result.outerHTML).toBe(originalOuterHTML) }) }) + + describe('parseDataUrlMediaType', () => { + it('extracts media type and base64 flag from standard data url', () => { + const r = parseDataUrlMediaType('') + expect(r.mediaType).toBe('image/png') + expect(r.isBase64).toBe(true) + }) + + it('handles additional parameters in header', () => { + const r = parseDataUrlMediaType('data:image/jpeg;name=foo;base64,AAA') + expect(r.mediaType).toBe('image/jpeg') + expect(r.isBase64).toBe(true) + }) + + it('returns undefined media type when missing and detects non-base64', () => { + const r = parseDataUrlMediaType('data:text/plain,hello') + expect(r.mediaType).toBe('text/plain') + expect(r.isBase64).toBe(false) + }) + + it('handles empty mediatype header', () => { + const r = parseDataUrlMediaType('data:;base64,AAA') + expect(r.mediaType).toBeUndefined() + expect(r.isBase64).toBe(true) + }) + + it('gracefully handles non data urls', () => { + const r = parseDataUrlMediaType('https://example.com/x.png') + expect(r.mediaType).toBeUndefined() + expect(r.isBase64).toBe(false) + }) + }) }) diff --git a/src/renderer/src/utils/image.ts b/src/renderer/src/utils/image.ts index 3d4824549..6f8d8c3d1 100644 --- a/src/renderer/src/utils/image.ts +++ b/src/renderer/src/utils/image.ts @@ -617,3 +617,23 @@ export const convertImageToPng = async (blob: Blob): Promise => { img.src = url }) } + +/** + * Parse media type from a data URL without using heavy regular expressions. + * + * data:[][;base64], + * - mediatype may be empty (defaults to text/plain;charset=US-ASCII per spec) + * - we only care about extracting media type and whether it's base64 + */ +export function parseDataUrlMediaType(url: string): { mediaType?: string; isBase64: boolean } { + if (!url.startsWith('data:')) return { isBase64: false } + const comma = url.indexOf(',') + if (comma === -1) return { isBase64: false } + // strip leading 'data:' and take header portion only + const header = url.slice(5, comma) + const semi = header.indexOf(';') + const mediaType = (semi === -1 ? header : header.slice(0, semi)).trim() || undefined + // base64 flag may appear anywhere after mediatype in the header + const isBase64 = header.indexOf(';base64') !== -1 + return { mediaType, isBase64 } +}