From c5e746b6c619519520b56f5ecbf92413b9cfcda7 Mon Sep 17 00:00:00 2001 From: Phantom <59059173+EurFelux@users.noreply.github.com> Date: Sat, 30 Aug 2025 18:24:44 +0800 Subject: [PATCH] fix: filter inline base64 image in messages summary (#9687) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit * feat(markdown): 添加清理base64图片链接的功能 添加purifyMarkdownImages函数用于将Markdown中的base64图片链接替换为普通链接格式 * fix(utils): 清理markdown中的base64图片链接并应用到消息摘要 在ApiService中调用purifyMarkdownImages清理消息摘要中的base64图片链接 --- src/renderer/src/services/ApiService.ts | 3 +- .../src/utils/__tests__/markdown.test.ts | 67 +++++++++++++++++++ src/renderer/src/utils/markdown.ts | 18 +++++ 3 files changed, 87 insertions(+), 1 deletion(-) diff --git a/src/renderer/src/services/ApiService.ts b/src/renderer/src/services/ApiService.ts index d86854acec..d1e2a18664 100644 --- a/src/renderer/src/services/ApiService.ts +++ b/src/renderer/src/services/ApiService.ts @@ -42,6 +42,7 @@ import { removeSpecialCharactersForTopicName, uuid } from '@renderer/utils' import { abortCompletion } from '@renderer/utils/abortController' import { isAbortError } from '@renderer/utils/error' import { extractInfoFromXML, ExtractResults } from '@renderer/utils/extract' +import { purifyMarkdownImages } from '@renderer/utils/markdown' import { filterAdjacentUserMessaegs, filterLastAssistantMessage } from '@renderer/utils/messageUtils/filters' import { findFileBlocks, getMainTextContent } from '@renderer/utils/messageUtils/find' import { @@ -699,7 +700,7 @@ export async function fetchMessagesSummary({ messages, assistant }: { messages: const structredMessages = contextMessages.map((message) => { const structredMessage = { role: message.role, - mainText: getMainTextContent(message) + mainText: purifyMarkdownImages(getMainTextContent(message)) } // 让LLM知道消息中包含的文件,但只提供文件名 diff --git a/src/renderer/src/utils/__tests__/markdown.test.ts b/src/renderer/src/utils/__tests__/markdown.test.ts index 3b3c4f6cd6..9418af0dc5 100644 --- a/src/renderer/src/utils/__tests__/markdown.test.ts +++ b/src/renderer/src/utils/__tests__/markdown.test.ts @@ -9,6 +9,7 @@ import { isHtmlCode, markdownToPlainText, processLatexBrackets, + purifyMarkdownImages, removeTrailingDoubleSpaces, updateCodeBlock } from '../markdown' @@ -634,4 +635,70 @@ $$ expect(isHtmlCode('a < b')).toBe(false) }) }) + + describe('purifyMarkdownImages', () => { + it('should replace base64 image with placeholder', () => { + const input = '![cat](data:image/png;base64,iVBORw0KGgo)' + const expected = '![cat](image_url)' + expect(purifyMarkdownImages(input)).toBe(expected) + }) + + it('should handle multiple base64 images', () => { + const input = ` + ![dog](data:image/jpeg;base64,ABC123) + Some text + ![avatar](data:image/png;base64,XYZ789) + ` + const expected = ` + ![dog](image_url) + Some text + ![avatar](image_url) + ` + expect(purifyMarkdownImages(input)).toBe(expected) + }) + + it('should ignore normal image links', () => { + const input = '![cat](https://example.com/cat.png)' + expect(purifyMarkdownImages(input)).toBe(input) + }) + + it('should handle whitespace in base64 url', () => { + const input = '![logo]( data:image/svg+xml;base64,CONTENT )' + const expected = '![logo](image_url)' + expect(purifyMarkdownImages(input)).toBe(expected) + }) + + it('should preserve alt text', () => { + const input = '![User Avatar](data:image/png;base64,xxx)' + const expected = '![User Avatar](image_url)' + expect(purifyMarkdownImages(input)).toBe(expected) + }) + + it('should handle uppercase data URL', () => { + const input = '![test](DATA:IMAGE/PNG;BASE64,ABC)' + const expected = '![test](image_url)' + expect(purifyMarkdownImages(input)).toBe(expected) + }) + + it('should not modify text that is not image', () => { + const input = 'This is a data:image/png;base64,iVBORw line of text' + expect(purifyMarkdownImages(input)).toBe(input) + }) + + it('should handle mixed content', () => { + const input = ` + Regular: ![cat](https://example.com/cat.png) + Base64: ![dog](data:image/jpeg;base64,BASE64DATA) + Another: ![bird](https://example.com/bird.gif) + Inline: ![icon]( data:image/x-icon;base64,ICONDATA ) + ` + const expected = ` + Regular: ![cat](https://example.com/cat.png) + Base64: ![dog](image_url) + Another: ![bird](https://example.com/bird.gif) + Inline: ![icon](image_url) + ` + expect(purifyMarkdownImages(input)).toBe(expected) + }) + }) }) diff --git a/src/renderer/src/utils/markdown.ts b/src/renderer/src/utils/markdown.ts index 60bdaaac32..bbeb2a2f74 100644 --- a/src/renderer/src/utils/markdown.ts +++ b/src/renderer/src/utils/markdown.ts @@ -312,3 +312,21 @@ export const markdownToPlainText = (markdown: string): string => { // 直接用 remove-markdown 库,使用默认的 removeMarkdown 参数 return removeMarkdown(markdown) } + +/** + * 清理 Markdown 中的 base64 图片链接 + * + * 将 Markdown 中的 base64 格式图片链接替换为普通链接格式。 + * + * @param {string} markdown - 包含图片链接的 Markdown 文本 + * @returns {string} 处理后的 Markdown 文本,所有 base64 图片链接都被替换为普通链接 + * @example + * - 输入: `![image](data:image/png;base64,iVBORw0...)` + * - 输出: `![image](image_url)` + */ +export const purifyMarkdownImages = (markdown: string): string => { + return markdown.replace( + /(!\[[^\]]*\]\()\s*data:image\/[\w+.-]+;base64\s*,[\w+/=]+(?:\s*[\w+/=]+)*\s*\)/gi, + '$1image_url)' + ) +}