fix(Markdown): improve latex brackets handling (#7358)

This commit is contained in:
one 2025-06-23 15:19:21 +08:00 committed by GitHub
parent a2e2eb3b73
commit 4f2c8bd905
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194
8 changed files with 298 additions and 64 deletions

View File

@ -112,6 +112,7 @@
"@testing-library/jest-dom": "^6.6.3",
"@testing-library/react": "^16.3.0",
"@tryfabric/martian": "^1.2.4",
"@types/balanced-match": "^3",
"@types/diff": "^7",
"@types/fs-extra": "^11",
"@types/lodash": "^4.17.5",
@ -138,6 +139,7 @@
"archiver": "^7.0.1",
"async-mutex": "^0.5.0",
"axios": "^1.7.3",
"balanced-match": "^3.0.1",
"browser-image-compression": "^2.0.2",
"color": "^5.0.0",
"dayjs": "^1.11.11",

View File

@ -8,8 +8,8 @@ import { useSettings } from '@renderer/hooks/useSettings'
import { EVENT_NAMES, EventEmitter } from '@renderer/services/EventService'
import type { MainTextMessageBlock, ThinkingMessageBlock, TranslationMessageBlock } from '@renderer/types/newMessage'
import { parseJSON } from '@renderer/utils'
import { escapeBrackets, removeSvgEmptyLines } from '@renderer/utils/formats'
import { findCitationInChildren, getCodeBlockId } from '@renderer/utils/markdown'
import { removeSvgEmptyLines } from '@renderer/utils/formats'
import { findCitationInChildren, getCodeBlockId, processLatexBrackets } from '@renderer/utils/markdown'
import { isEmpty } from 'lodash'
import { type FC, memo, useCallback, useMemo } from 'react'
import { useTranslation } from 'react-i18next'
@ -52,7 +52,7 @@ const Markdown: FC<Props> = ({ block }) => {
const empty = isEmpty(block.content)
const paused = block.status === 'paused'
const content = empty && paused ? t('message.chat.completion.paused') : block.content
return removeSvgEmptyLines(escapeBrackets(content))
return removeSvgEmptyLines(processLatexBrackets(content))
}, [block, t])
const rehypePlugins = useMemo(() => {

View File

@ -42,13 +42,13 @@ vi.mock('@renderer/utils', () => ({
}))
vi.mock('@renderer/utils/formats', () => ({
escapeBrackets: vi.fn((str) => str),
removeSvgEmptyLines: vi.fn((str) => str)
}))
vi.mock('@renderer/utils/markdown', () => ({
findCitationInChildren: vi.fn(() => '{"id": 1, "url": "https://example.com"}'),
getCodeBlockId: vi.fn(() => 'code-block-1')
getCodeBlockId: vi.fn(() => 'code-block-1'),
processLatexBrackets: vi.fn((str) => str)
}))
// Mock components with more realistic behavior
@ -212,16 +212,6 @@ describe('Markdown', () => {
expect(markdown).not.toHaveTextContent('Paused')
})
it('should process content through format utilities', async () => {
const { escapeBrackets, removeSvgEmptyLines } = await import('@renderer/utils/formats')
const content = 'Content with [brackets] and SVG'
render(<Markdown block={createMainTextBlock({ content })} />)
expect(escapeBrackets).toHaveBeenCalledWith(content)
expect(removeSvgEmptyLines).toHaveBeenCalledWith(content)
})
it('should match snapshot', () => {
const { container } = render(<Markdown block={createMainTextBlock()} />)
expect(container.firstChild).toMatchSnapshot()

View File

@ -6,7 +6,6 @@ import { describe, expect, it, vi } from 'vitest'
import {
addImageFileToContents,
encodeHTML,
escapeBrackets,
escapeDollarNumber,
extractTitle,
removeSvgEmptyLines,
@ -180,36 +179,6 @@ describe('formats', () => {
})
})
describe('escapeBrackets', () => {
it('should convert \\[...\\] to display math format', () => {
expect(escapeBrackets('The formula is \\[a+b=c\\]')).toBe('The formula is \n$$\na+b=c\n$$\n')
})
it('should convert \\(...\\) to inline math format', () => {
expect(escapeBrackets('The formula is \\(a+b=c\\)')).toBe('The formula is $a+b=c$')
})
it('should not affect code blocks', () => {
const codeBlock = 'This is text with a code block ```const x = \\[1, 2, 3\\]```'
expect(escapeBrackets(codeBlock)).toBe(codeBlock)
})
it('should not affect inline code', () => {
const inlineCode = 'This is text with `const x = \\[1, 2, 3\\]` inline code'
expect(escapeBrackets(inlineCode)).toBe(inlineCode)
})
it('should handle multiple occurrences', () => {
const input = 'Formula 1: \\[a+b=c\\] and formula 2: \\(x+y=z\\)'
const expected = 'Formula 1: \n$$\na+b=c\n$$\n and formula 2: $x+y=z$'
expect(escapeBrackets(input)).toBe(expected)
})
it('should handle empty string', () => {
expect(escapeBrackets('')).toBe('')
})
})
describe('extractTitle', () => {
it('should extract title from HTML string', () => {
const html = '<html><head><title>Page Title</title></head><body>Content</body></html>'

View File

@ -9,6 +9,7 @@ import {
getCodeBlockId,
getExtensionByLanguage,
markdownToPlainText,
processLatexBrackets,
removeTrailingDoubleSpaces,
updateCodeBlock
} from '../markdown'
@ -461,4 +462,198 @@ describe('markdown', () => {
expect(markdownToPlainText('This is plain text.')).toBe('This is plain text.')
})
})
describe('processLatexBrackets', () => {
describe('basic LaTeX conversion', () => {
it('should convert display math \\[...\\] to $$...$$', () => {
expect(processLatexBrackets('The formula is \\[a+b=c\\]')).toBe('The formula is $$a+b=c$$')
})
it('should convert inline math \\(...\\) to $...$', () => {
expect(processLatexBrackets('The formula is \\(a+b=c\\)')).toBe('The formula is $a+b=c$')
})
})
describe('code block protection', () => {
it('should not affect multi-line code blocks', () => {
const input = 'Text ```const arr = \\[1, 2, 3\\]\\nconst func = \\(x\\) => x``` more text'
expect(processLatexBrackets(input)).toBe(input)
})
it('should not affect inline code', () => {
const input = 'This is text with `const x = \\[1, 2, 3\\]` inline code'
expect(processLatexBrackets(input)).toBe(input)
})
it('should handle mixed code and LaTeX', () => {
const input = 'Math: \\[x + y\\] and code: `arr = \\[1, 2\\]` and more math: \\(z\\)'
const expected = 'Math: $$x + y$$ and code: `arr = \\[1, 2\\]` and more math: $z$'
expect(processLatexBrackets(input)).toBe(expected)
})
it('should protect complex code blocks', () => {
for (const [input, expected] of new Map([
[
'```javascript\\nconst latex = "\\\\[formula\\\\]"\\n```',
'```javascript\\nconst latex = "\\\\[formula\\\\]"\\n```'
],
['`\\[escaped brackets\\]`', '`\\[escaped brackets\\]`'],
[
'```\\narray = \\[\\n \\(item1\\),\\n \\(item2\\)\\n\\]\\n```',
'```\\narray = \\[\\n \\(item1\\),\\n \\(item2\\)\\n\\]\\n```'
]
])) {
expect(processLatexBrackets(input)).toBe(expected)
}
})
})
describe('link protection', () => {
it('should not affect LaTeX in link text', () => {
const input = '[\\[pdf\\] Document](https://example.com/doc.pdf)'
expect(processLatexBrackets(input)).toBe(input)
})
it('should not affect LaTeX in link URLs', () => {
const input = '[Click here](https://example.com/path\\[with\\]brackets)'
expect(processLatexBrackets(input)).toBe(input)
})
it('should handle mixed links and LaTeX', () => {
const input = 'See [\\[pdf\\] file](url) for formula \\[x + y = z\\]'
const expected = 'See [\\[pdf\\] file](url) for formula $$x + y = z$$'
expect(processLatexBrackets(input)).toBe(expected)
})
it('should protect complex link patterns', () => {
for (const [input, expected] of new Map([
['[Title with \\(math\\)](https://example.com)', '[Title with \\(math\\)](https://example.com)'],
['[Link](https://example.com/\\[path\\]/file)', '[Link](https://example.com/\\[path\\]/file)'],
[
'[\\[Section 1\\] Overview](url) and \\[math formula\\]',
'[\\[Section 1\\] Overview](url) and $$math formula$$'
]
])) {
expect(processLatexBrackets(input)).toBe(expected)
}
})
})
describe('edge cases', () => {
it('should handle empty string', () => {
expect(processLatexBrackets('')).toBe('')
})
it('should handle content without LaTeX', () => {
for (const [input, expected] of new Map([
['Regular text without math', 'Regular text without math'],
['Text with [regular] brackets', 'Text with [regular] brackets'],
['Text with (parentheses)', 'Text with (parentheses)'],
['No special characters here', 'No special characters here']
])) {
expect(processLatexBrackets(input)).toBe(expected)
}
})
it('should handle malformed LaTeX patterns', () => {
for (const [input, expected] of new Map([
['\\[unclosed bracket', '\\[unclosed bracket'],
['unopened bracket\\]', 'unopened bracket\\]'],
['\\(unclosed paren', '\\(unclosed paren'],
['unopened paren\\)', 'unopened paren\\)'],
['\\[\\]', '$$$$'], // Empty LaTeX block
['\\(\\)', '$$'] // Empty LaTeX inline
])) {
expect(processLatexBrackets(input)).toBe(expected)
}
})
it('should handle nested brackets', () => {
for (const [input, expected] of new Map([
['\\[outer \\[inner\\] formula\\]', '$$outer \\[inner\\] formula$$'],
['\\(a + \\(b + c\\)\\)', '$a + \\(b + c\\)$']
])) {
expect(processLatexBrackets(input)).toBe(expected)
}
})
})
describe('complex cases', () => {
it('should handle complex mixed content', () => {
const complexInput = `
# Mathematical Document
Here's a simple formula \\(E = mc^2\\) in text.
## Section 1: Equations
The quadratic formula is \\[x = \\frac{-b \\pm \\sqrt{b^2-4ac}}{2a}\\].
- Item 1: See formula \\(\\alpha + \\beta = \\gamma\\) in this list
- Item 2: Check [\\[PDF\\] Complex Analysis](https://example.com/math.pdf)
- Subitem 2.1: Basic concepts and definitions
- Subitem 2.2: The Cauchy-Riemann equations \\[\\frac{\\partial u}{\\partial x} = \\frac{\\partial v}{\\partial y}, \\quad \\frac{\\partial u}{\\partial y} = -\\frac{\\partial v}{\\partial x}\\]
- Subitem 2.3: Green's theorem connects line integrals and double integrals
\\[
\\oint_C (P dx + Q dy) = \\iint_D \\left(\\frac{\\partial Q}{\\partial x} - \\frac{\\partial P}{\\partial y}\\right) dx dy
\\]
- Subitem 2.4: Applications in engineering and physics
- Item 3: The sum \\[\\sum_{i=1}^{n} \\frac{1}{i^2} = \\frac{\\pi^2}{6}\\] is famous
\`\`\`javascript
// Code should not be affected
const matrix = \\[
\\[1, 2\\],
\\[3, 4\\]
\\];
const func = \\(x\\) => x * 2;
\`\`\`
Read more in [Section \\[3.2\\]: Advanced Topics](url) and see inline code \`\\[array\\]\`.
Final thoughts on \\(\\nabla \\cdot \\vec{F} = \\rho\\) and display math:
\\[\\int_0^\\infty e^{-x^2} dx = \\frac{\\sqrt{\\pi}}{2}\\]
`
const expectedOutput = `
# Mathematical Document
Here's a simple formula $E = mc^2$ in text.
## Section 1: Equations
The quadratic formula is $$x = \\frac{-b \\pm \\sqrt{b^2-4ac}}{2a}$$.
- Item 1: See formula $\\alpha + \\beta = \\gamma$ in this list
- Item 2: Check [\\[PDF\\] Complex Analysis](https://example.com/math.pdf)
- Subitem 2.1: Basic concepts and definitions
- Subitem 2.2: The Cauchy-Riemann equations $$\\frac{\\partial u}{\\partial x} = \\frac{\\partial v}{\\partial y}, \\quad \\frac{\\partial u}{\\partial y} = -\\frac{\\partial v}{\\partial x}$$
- Subitem 2.3: Green's theorem connects line integrals and double integrals
$$
\\oint_C (P dx + Q dy) = \\iint_D \\left(\\frac{\\partial Q}{\\partial x} - \\frac{\\partial P}{\\partial y}\\right) dx dy
$$
- Subitem 2.4: Applications in engineering and physics
- Item 3: The sum $$\\sum_{i=1}^{n} \\frac{1}{i^2} = \\frac{\\pi^2}{6}$$ is famous
\`\`\`javascript
// Code should not be affected
const matrix = \\[
\\[1, 2\\],
\\[3, 4\\]
\\];
const func = \\(x\\) => x * 2;
\`\`\`
Read more in [Section \\[3.2\\]: Advanced Topics](url) and see inline code \`\\[array\\]\`.
Final thoughts on $\\nabla \\cdot \\vec{F} = \\rho$ and display math:
$$\\int_0^\\infty e^{-x^2} dx = \\frac{\\sqrt{\\pi}}{2}$$
`
expect(processLatexBrackets(complexInput)).toBe(expectedOutput)
})
})
})
})

View File

@ -53,24 +53,6 @@ export function escapeDollarNumber(text: string) {
return escapedText
}
export function escapeBrackets(text: string) {
const pattern = /(```[\s\S]*?```|`.*?`)|\\\[([\s\S]*?[^\\])\\]|\\\((.*?)\\\)/g
return text.replace(pattern, (match, codeBlock, squareBracket, roundBracket) => {
if (codeBlock) {
return codeBlock
} else if (squareBracket) {
return `
$$
${squareBracket}
$$
`
} else if (roundBracket) {
return `$${roundBracket}$`
}
return match
})
}
export function extractTitle(html: string): string | null {
if (!html) return null

View File

@ -1,4 +1,5 @@
import { languages } from '@shared/config/languages'
import balanced from 'balanced-match'
import remarkParse from 'remark-parse'
import remarkStringify from 'remark-stringify'
import removeMarkdown from 'remove-markdown'
@ -29,6 +30,85 @@ export const findCitationInChildren = (children: any): string => {
return ''
}
// 检查是否包含潜在的 LaTeX 模式
const containsLatexRegex = /\\\(.*?\\\)|\\\[.*?\\\]|\$.*?\$|\\begin\{equation\}.*?\\end\{equation\}/
/**
* LaTeX `\[\]` `\(\)` Markdown `$$...$$` `$...$`
*
* remark-math LaTeX
*
*
* - remark-math
* -
* - `\\(x\\)` `\$x\$``\\[x\\]` `\$$x\$$`
*
* @see https://github.com/remarkjs/remark-math/issues/39
* @param text Markdown
* @returns
*/
export const processLatexBrackets = (text: string) => {
// 没有 LaTeX 模式直接返回
if (!containsLatexRegex.test(text)) {
return text
}
// 保护代码块和链接
const protectedItems: string[] = []
let processedContent = text
processedContent = processedContent
// 保护代码块(包括多行代码块和行内代码)
.replace(/(```[\s\S]*?```|`[^`]*`)/g, (match) => {
const index = protectedItems.length
protectedItems.push(match)
return `__CHERRY_STUDIO_PROTECTED_${index}__`
})
// 保护链接 [text](url)
.replace(/\[([^[\]]*(?:\[[^\]]*\][^[\]]*)*)\]\([^)]*?\)/g, (match) => {
const index = protectedItems.length
protectedItems.push(match)
return `__CHERRY_STUDIO_PROTECTED_${index}__`
})
// LaTeX 括号转换函数
const processMath = (content: string, openDelim: string, closeDelim: string, wrapper: string): string => {
let result = ''
let remaining = content
while (remaining.length > 0) {
const match = balanced(openDelim, closeDelim, remaining)
if (!match) {
result += remaining
break
}
result += match.pre
result += `${wrapper}${match.body}${wrapper}`
remaining = match.post
}
return result
}
// 先处理块级公式,再处理内联公式
let result = processMath(processedContent, '\\[', '\\]', '$$')
result = processMath(result, '\\(', '\\)', '$')
// 还原被保护的内容
result = result.replace(/__CHERRY_STUDIO_PROTECTED_(\d+)__/g, (match, indexStr) => {
const index = parseInt(indexStr, 10)
// 添加边界检查,防止数组越界
if (index >= 0 && index < protectedItems.length) {
return protectedItems[index]
}
// 如果索引无效,保持原始匹配
return match
})
return result
}
/**
*
* - LaTeX '\\[' '\\]' '$$$$'

View File

@ -4115,6 +4115,13 @@ __metadata:
languageName: node
linkType: hard
"@types/balanced-match@npm:^3":
version: 3.0.2
resolution: "@types/balanced-match@npm:3.0.2"
checksum: 10c0/833f6499609363537026c4ec2770af5c5a36e71b80f7b5b23884b15296301bfcf974cd40bc75fda940dea4994acd96c9222b284c248383a1ade59bf8835940b0
languageName: node
linkType: hard
"@types/cacheable-request@npm:^6.0.1":
version: 6.0.3
resolution: "@types/cacheable-request@npm:6.0.3"
@ -5639,6 +5646,7 @@ __metadata:
"@testing-library/jest-dom": "npm:^6.6.3"
"@testing-library/react": "npm:^16.3.0"
"@tryfabric/martian": "npm:^1.2.4"
"@types/balanced-match": "npm:^3"
"@types/diff": "npm:^7"
"@types/fs-extra": "npm:^11"
"@types/lodash": "npm:^4.17.5"
@ -5665,6 +5673,7 @@ __metadata:
archiver: "npm:^7.0.1"
async-mutex: "npm:^0.5.0"
axios: "npm:^1.7.3"
balanced-match: "npm:^3.0.1"
browser-image-compression: "npm:^2.0.2"
color: "npm:^5.0.0"
dayjs: "npm:^1.11.11"
@ -6297,6 +6306,13 @@ __metadata:
languageName: node
linkType: hard
"balanced-match@npm:^3.0.1":
version: 3.0.1
resolution: "balanced-match@npm:3.0.1"
checksum: 10c0/ac8dd63a5b260610c2cbda982f436e964c1b9ae8764d368a523769da40a31710abd6e19f0fdf1773c4ad7b2ea7ba7b285d547375dc723f6e754369835afc8e9f
languageName: node
linkType: hard
"bare-events@npm:^2.2.0":
version: 2.5.4
resolution: "bare-events@npm:2.5.4"