mirror of
https://github.com/CherryHQ/cherry-studio.git
synced 2025-12-28 13:31:32 +08:00
fix: filter inline base64 image in messages summary (#9687)
* feat(markdown): 添加清理base64图片链接的功能 添加purifyMarkdownImages函数用于将Markdown中的base64图片链接替换为普通链接格式 * fix(utils): 清理markdown中的base64图片链接并应用到消息摘要 在ApiService中调用purifyMarkdownImages清理消息摘要中的base64图片链接
This commit is contained in:
parent
e5327aba78
commit
c5e746b6c6
@ -42,6 +42,7 @@ import { removeSpecialCharactersForTopicName, uuid } from '@renderer/utils'
|
||||
import { abortCompletion } from '@renderer/utils/abortController'
|
||||
import { isAbortError } from '@renderer/utils/error'
|
||||
import { extractInfoFromXML, ExtractResults } from '@renderer/utils/extract'
|
||||
import { purifyMarkdownImages } from '@renderer/utils/markdown'
|
||||
import { filterAdjacentUserMessaegs, filterLastAssistantMessage } from '@renderer/utils/messageUtils/filters'
|
||||
import { findFileBlocks, getMainTextContent } from '@renderer/utils/messageUtils/find'
|
||||
import {
|
||||
@ -699,7 +700,7 @@ export async function fetchMessagesSummary({ messages, assistant }: { messages:
|
||||
const structredMessages = contextMessages.map((message) => {
|
||||
const structredMessage = {
|
||||
role: message.role,
|
||||
mainText: getMainTextContent(message)
|
||||
mainText: purifyMarkdownImages(getMainTextContent(message))
|
||||
}
|
||||
|
||||
// 让LLM知道消息中包含的文件,但只提供文件名
|
||||
|
||||
@ -9,6 +9,7 @@ import {
|
||||
isHtmlCode,
|
||||
markdownToPlainText,
|
||||
processLatexBrackets,
|
||||
purifyMarkdownImages,
|
||||
removeTrailingDoubleSpaces,
|
||||
updateCodeBlock
|
||||
} from '../markdown'
|
||||
@ -634,4 +635,70 @@ $$
|
||||
expect(isHtmlCode('a < b')).toBe(false)
|
||||
})
|
||||
})
|
||||
|
||||
describe('purifyMarkdownImages', () => {
|
||||
it('should replace base64 image with placeholder', () => {
|
||||
const input = ''
|
||||
const expected = ''
|
||||
expect(purifyMarkdownImages(input)).toBe(expected)
|
||||
})
|
||||
|
||||
it('should handle multiple base64 images', () => {
|
||||
const input = `
|
||||

|
||||
Some text
|
||||

|
||||
`
|
||||
const expected = `
|
||||

|
||||
Some text
|
||||

|
||||
`
|
||||
expect(purifyMarkdownImages(input)).toBe(expected)
|
||||
})
|
||||
|
||||
it('should ignore normal image links', () => {
|
||||
const input = ''
|
||||
expect(purifyMarkdownImages(input)).toBe(input)
|
||||
})
|
||||
|
||||
it('should handle whitespace in base64 url', () => {
|
||||
const input = ''
|
||||
const expected = ''
|
||||
expect(purifyMarkdownImages(input)).toBe(expected)
|
||||
})
|
||||
|
||||
it('should preserve alt text', () => {
|
||||
const input = ''
|
||||
const expected = ''
|
||||
expect(purifyMarkdownImages(input)).toBe(expected)
|
||||
})
|
||||
|
||||
it('should handle uppercase data URL', () => {
|
||||
const input = ''
|
||||
const expected = ''
|
||||
expect(purifyMarkdownImages(input)).toBe(expected)
|
||||
})
|
||||
|
||||
it('should not modify text that is not image', () => {
|
||||
const input = 'This is a data:image/png;base64,iVBORw line of text'
|
||||
expect(purifyMarkdownImages(input)).toBe(input)
|
||||
})
|
||||
|
||||
it('should handle mixed content', () => {
|
||||
const input = `
|
||||
Regular: 
|
||||
Base64: 
|
||||
Another: 
|
||||
Inline: 
|
||||
`
|
||||
const expected = `
|
||||
Regular: 
|
||||
Base64: 
|
||||
Another: 
|
||||
Inline: 
|
||||
`
|
||||
expect(purifyMarkdownImages(input)).toBe(expected)
|
||||
})
|
||||
})
|
||||
})
|
||||
|
||||
@ -312,3 +312,21 @@ export const markdownToPlainText = (markdown: string): string => {
|
||||
// 直接用 remove-markdown 库,使用默认的 removeMarkdown 参数
|
||||
return removeMarkdown(markdown)
|
||||
}
|
||||
|
||||
/**
|
||||
* 清理 Markdown 中的 base64 图片链接
|
||||
*
|
||||
* 将 Markdown 中的 base64 格式图片链接替换为普通链接格式。
|
||||
*
|
||||
* @param {string} markdown - 包含图片链接的 Markdown 文本
|
||||
* @returns {string} 处理后的 Markdown 文本,所有 base64 图片链接都被替换为普通链接
|
||||
* @example
|
||||
* - 输入: ``
|
||||
* - 输出: ``
|
||||
*/
|
||||
export const purifyMarkdownImages = (markdown: string): string => {
|
||||
return markdown.replace(
|
||||
/(!\[[^\]]*\]\()\s*data:image\/[\w+.-]+;base64\s*,[\w+/=]+(?:\s*[\w+/=]+)*\s*\)/gi,
|
||||
'$1image_url)'
|
||||
)
|
||||
}
|
||||
|
||||
Loading…
Reference in New Issue
Block a user