fix: filter inline base64 image in messages summary (#9687)

* feat(markdown): 添加清理base64图片链接的功能

添加purifyMarkdownImages函数用于将Markdown中的base64图片链接替换为普通链接格式

* fix(utils): 清理markdown中的base64图片链接并应用到消息摘要

在ApiService中调用purifyMarkdownImages清理消息摘要中的base64图片链接
This commit is contained in:
Phantom 2025-08-30 18:24:44 +08:00 committed by GitHub
parent e5327aba78
commit c5e746b6c6
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194
3 changed files with 87 additions and 1 deletions

View File

@ -42,6 +42,7 @@ import { removeSpecialCharactersForTopicName, uuid } from '@renderer/utils'
import { abortCompletion } from '@renderer/utils/abortController'
import { isAbortError } from '@renderer/utils/error'
import { extractInfoFromXML, ExtractResults } from '@renderer/utils/extract'
import { purifyMarkdownImages } from '@renderer/utils/markdown'
import { filterAdjacentUserMessaegs, filterLastAssistantMessage } from '@renderer/utils/messageUtils/filters'
import { findFileBlocks, getMainTextContent } from '@renderer/utils/messageUtils/find'
import {
@ -699,7 +700,7 @@ export async function fetchMessagesSummary({ messages, assistant }: { messages:
const structredMessages = contextMessages.map((message) => {
const structredMessage = {
role: message.role,
mainText: getMainTextContent(message)
mainText: purifyMarkdownImages(getMainTextContent(message))
}
// 让LLM知道消息中包含的文件但只提供文件名

View File

@ -9,6 +9,7 @@ import {
isHtmlCode,
markdownToPlainText,
processLatexBrackets,
purifyMarkdownImages,
removeTrailingDoubleSpaces,
updateCodeBlock
} from '../markdown'
@ -634,4 +635,70 @@ $$
expect(isHtmlCode('a < b')).toBe(false)
})
})
describe('purifyMarkdownImages', () => {
it('should replace base64 image with placeholder', () => {
const input = '![cat](data:image/png;base64,iVBORw0KGgo)'
const expected = '![cat](image_url)'
expect(purifyMarkdownImages(input)).toBe(expected)
})
it('should handle multiple base64 images', () => {
const input = `
![dog](data:image/jpeg;base64,ABC123)
Some text
![avatar](data:image/png;base64,XYZ789)
`
const expected = `
![dog](image_url)
Some text
![avatar](image_url)
`
expect(purifyMarkdownImages(input)).toBe(expected)
})
it('should ignore normal image links', () => {
const input = '![cat](https://example.com/cat.png)'
expect(purifyMarkdownImages(input)).toBe(input)
})
it('should handle whitespace in base64 url', () => {
const input = '![logo]( data:image/svg+xml;base64,CONTENT )'
const expected = '![logo](image_url)'
expect(purifyMarkdownImages(input)).toBe(expected)
})
it('should preserve alt text', () => {
const input = '![User Avatar](data:image/png;base64,xxx)'
const expected = '![User Avatar](image_url)'
expect(purifyMarkdownImages(input)).toBe(expected)
})
it('should handle uppercase data URL', () => {
const input = '![test](DATA:IMAGE/PNG;BASE64,ABC)'
const expected = '![test](image_url)'
expect(purifyMarkdownImages(input)).toBe(expected)
})
it('should not modify text that is not image', () => {
const input = 'This is a data:image/png;base64,iVBORw line of text'
expect(purifyMarkdownImages(input)).toBe(input)
})
it('should handle mixed content', () => {
const input = `
Regular: ![cat](https://example.com/cat.png)
Base64: ![dog](data:image/jpeg;base64,BASE64DATA)
Another: ![bird](https://example.com/bird.gif)
Inline: ![icon]( data:image/x-icon;base64,ICONDATA )
`
const expected = `
Regular: ![cat](https://example.com/cat.png)
Base64: ![dog](image_url)
Another: ![bird](https://example.com/bird.gif)
Inline: ![icon](image_url)
`
expect(purifyMarkdownImages(input)).toBe(expected)
})
})
})

View File

@ -312,3 +312,21 @@ export const markdownToPlainText = (markdown: string): string => {
// 直接用 remove-markdown 库,使用默认的 removeMarkdown 参数
return removeMarkdown(markdown)
}
/**
* Markdown base64
*
* Markdown base64
*
* @param {string} markdown - Markdown
* @returns {string} Markdown base64
* @example
* - : `![image](data:image/png;base64,iVBORw0...)`
* - : `![image](image_url)`
*/
export const purifyMarkdownImages = (markdown: string): string => {
return markdown.replace(
/(!\[[^\]]*\]\()\s*data:image\/[\w+.-]+;base64\s*,[\w+/=]+(?:\s*[\w+/=]+)*\s*\)/gi,
'$1image_url)'
)
}