mirror of
https://github.com/CherryHQ/cherry-studio.git
synced 2025-12-24 18:50:56 +08:00
feat(history-search): show keyword-adjacent snippets and align matching text (#12034)
* fix(history-search): show keyword-adjacent snippets and align matching text - Limit search results to title plus nearby lines with ellipses - Merge multi-keyword hit ranges and truncate long lines - Match against sanitized visible text to avoid URL/image false hits * fix(history): 针对review 的改进:避免搜索高亮嵌套并优化命名与省略逻辑注释
This commit is contained in:
parent
c4f94848e8
commit
9a435b8abb
@ -17,6 +17,7 @@ type SearchResult = {
|
||||
message: Message
|
||||
topic: Topic
|
||||
content: string
|
||||
snippet: string
|
||||
}
|
||||
|
||||
interface Props extends React.HTMLAttributes<HTMLDivElement> {
|
||||
@ -25,6 +26,158 @@ interface Props extends React.HTMLAttributes<HTMLDivElement> {
|
||||
onTopicClick: (topic: Topic) => void
|
||||
}
|
||||
|
||||
const SEARCH_SNIPPET_CONTEXT_LINES = 1
|
||||
const SEARCH_SNIPPET_MAX_LINES = 12
|
||||
const SEARCH_SNIPPET_MAX_LINE_LENGTH = 160
|
||||
const SEARCH_SNIPPET_LINE_FRAGMENT_RADIUS = 40
|
||||
const SEARCH_SNIPPET_MAX_LINE_FRAGMENTS = 3
|
||||
|
||||
const stripMarkdownFormatting = (text: string) => {
|
||||
return text
|
||||
.replace(/```(?:[^\n]*\n)?([\s\S]*?)```/g, '$1')
|
||||
.replace(/!\[(.*?)\]\((.*?)\)/g, '$1')
|
||||
.replace(/\[(.*?)\]\((.*?)\)/g, '$1')
|
||||
.replace(/\*\*(.*?)\*\*/g, '$1')
|
||||
.replace(/\*(.*?)\*/g, '$1')
|
||||
.replace(/`(.*?)`/g, '$1')
|
||||
.replace(/#+\s/g, '')
|
||||
.replace(/<[^>]*>/g, '')
|
||||
}
|
||||
|
||||
const normalizeText = (text: string) => text.replace(/\r\n/g, '\n').replace(/\r/g, '\n')
|
||||
|
||||
const escapeRegex = (text: string) => text.replace(/[.*+?^${}()|[\]\\]/g, '\\$&')
|
||||
|
||||
const mergeRanges = (ranges: Array<[number, number]>) => {
|
||||
const sorted = ranges.slice().sort((a, b) => a[0] - b[0])
|
||||
const merged: Array<[number, number]> = []
|
||||
for (const range of sorted) {
|
||||
const last = merged[merged.length - 1]
|
||||
if (!last || range[0] > last[1] + 1) {
|
||||
merged.push([range[0], range[1]])
|
||||
continue
|
||||
}
|
||||
last[1] = Math.max(last[1], range[1])
|
||||
}
|
||||
return merged
|
||||
}
|
||||
|
||||
const buildLineSnippet = (line: string, regexes: RegExp[]) => {
|
||||
if (line.length <= SEARCH_SNIPPET_MAX_LINE_LENGTH) {
|
||||
return line
|
||||
}
|
||||
|
||||
const matchRanges: Array<[number, number]> = []
|
||||
for (const regex of regexes) {
|
||||
regex.lastIndex = 0
|
||||
let match: RegExpExecArray | null
|
||||
while ((match = regex.exec(line)) !== null) {
|
||||
matchRanges.push([match.index, match.index + match[0].length])
|
||||
if (match[0].length === 0) {
|
||||
regex.lastIndex += 1
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
if (matchRanges.length === 0) {
|
||||
return `${line.slice(0, SEARCH_SNIPPET_MAX_LINE_LENGTH)}...`
|
||||
}
|
||||
|
||||
const expandedRanges: Array<[number, number]> = matchRanges.map(([start, end]) => [
|
||||
Math.max(0, start - SEARCH_SNIPPET_LINE_FRAGMENT_RADIUS),
|
||||
Math.min(line.length, end + SEARCH_SNIPPET_LINE_FRAGMENT_RADIUS)
|
||||
])
|
||||
const mergedRanges = mergeRanges(expandedRanges)
|
||||
const limitedRanges = mergedRanges.slice(0, SEARCH_SNIPPET_MAX_LINE_FRAGMENTS)
|
||||
|
||||
let result = limitedRanges.map(([start, end]) => line.slice(start, end)).join(' ... ')
|
||||
// 片段未从行首开始,补前置省略号。
|
||||
if (limitedRanges[0][0] > 0) {
|
||||
result = `...${result}`
|
||||
}
|
||||
// 片段未覆盖到行尾,补后置省略号。
|
||||
if (limitedRanges[limitedRanges.length - 1][1] < line.length) {
|
||||
result = `${result}...`
|
||||
}
|
||||
// 还有未展示的匹配片段,提示省略。
|
||||
if (mergedRanges.length > SEARCH_SNIPPET_MAX_LINE_FRAGMENTS) {
|
||||
result = `${result}...`
|
||||
}
|
||||
// 最终长度超限,强制截断并补省略号。
|
||||
if (result.length > SEARCH_SNIPPET_MAX_LINE_LENGTH) {
|
||||
result = `${result.slice(0, SEARCH_SNIPPET_MAX_LINE_LENGTH)}...`
|
||||
}
|
||||
return result
|
||||
}
|
||||
|
||||
const buildSearchSnippet = (text: string, terms: string[]) => {
|
||||
const normalized = normalizeText(stripMarkdownFormatting(text))
|
||||
const lines = normalized.split('\n')
|
||||
if (lines.length === 0) {
|
||||
return ''
|
||||
}
|
||||
|
||||
const nonEmptyTerms = terms.filter((term) => term.length > 0)
|
||||
const regexes = nonEmptyTerms.map((term) => new RegExp(escapeRegex(term), 'gi'))
|
||||
const matchedLineIndexes: number[] = []
|
||||
|
||||
if (regexes.length > 0) {
|
||||
for (let i = 0; i < lines.length; i += 1) {
|
||||
const line = lines[i]
|
||||
const isMatch = regexes.some((regex) => {
|
||||
regex.lastIndex = 0
|
||||
return regex.test(line)
|
||||
})
|
||||
if (isMatch) {
|
||||
matchedLineIndexes.push(i)
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
const ranges: Array<[number, number]> =
|
||||
matchedLineIndexes.length > 0
|
||||
? mergeRanges(
|
||||
matchedLineIndexes.map((index) => [
|
||||
Math.max(0, index - SEARCH_SNIPPET_CONTEXT_LINES),
|
||||
Math.min(lines.length - 1, index + SEARCH_SNIPPET_CONTEXT_LINES)
|
||||
])
|
||||
)
|
||||
: [[0, Math.min(lines.length - 1, SEARCH_SNIPPET_MAX_LINES - 1)]]
|
||||
|
||||
const outputLines: string[] = []
|
||||
let truncated = false
|
||||
|
||||
if (ranges[0][0] > 0) {
|
||||
outputLines.push('...')
|
||||
}
|
||||
|
||||
for (const [start, end] of ranges) {
|
||||
if (outputLines.length >= SEARCH_SNIPPET_MAX_LINES) {
|
||||
truncated = true
|
||||
break
|
||||
}
|
||||
if (outputLines.length > 0 && outputLines[outputLines.length - 1] !== '...') {
|
||||
outputLines.push('...')
|
||||
}
|
||||
for (let i = start; i <= end; i += 1) {
|
||||
if (outputLines.length >= SEARCH_SNIPPET_MAX_LINES) {
|
||||
truncated = true
|
||||
break
|
||||
}
|
||||
outputLines.push(buildLineSnippet(lines[i], regexes))
|
||||
}
|
||||
if (truncated) {
|
||||
break
|
||||
}
|
||||
}
|
||||
|
||||
if ((truncated || ranges[ranges.length - 1][1] < lines.length - 1) && outputLines.at(-1) !== '...') {
|
||||
outputLines.push('...')
|
||||
}
|
||||
|
||||
return outputLines.join('\n')
|
||||
}
|
||||
|
||||
const SearchResults: FC<Props> = ({ keywords, onMessageClick, onTopicClick, ...props }) => {
|
||||
const { handleScroll, containerRef } = useScrollPosition('SearchResults')
|
||||
const observerRef = useRef<MutationObserver | null>(null)
|
||||
@ -44,17 +197,6 @@ const SearchResults: FC<Props> = ({ keywords, onMessageClick, onTopicClick, ...p
|
||||
const [searchStats, setSearchStats] = useState({ count: 0, time: 0 })
|
||||
const [isLoading, setIsLoading] = useState(false)
|
||||
|
||||
const removeMarkdown = (text: string) => {
|
||||
return text
|
||||
.replace(/\*\*(.*?)\*\*/g, '$1')
|
||||
.replace(/\*(.*?)\*/g, '$1')
|
||||
.replace(/\[(.*?)\]\((.*?)\)/g, '$1')
|
||||
.replace(/```[\s\S]*?```/g, '')
|
||||
.replace(/`(.*?)`/g, '$1')
|
||||
.replace(/#+\s/g, '')
|
||||
.replace(/<[^>]*>/g, '')
|
||||
}
|
||||
|
||||
const onSearch = useCallback(async () => {
|
||||
setSearchResults([])
|
||||
setIsLoading(true)
|
||||
@ -69,13 +211,16 @@ const SearchResults: FC<Props> = ({ keywords, onMessageClick, onTopicClick, ...p
|
||||
const startTime = performance.now()
|
||||
const newSearchTerms = keywords
|
||||
.toLowerCase()
|
||||
.split(' ')
|
||||
.split(/\s+/)
|
||||
.filter((term) => term.length > 0)
|
||||
const searchRegexes = newSearchTerms.map((term) => new RegExp(term, 'i'))
|
||||
const searchRegexes = newSearchTerms.map((term) => new RegExp(escapeRegex(term), 'i'))
|
||||
|
||||
const blocks = (await db.message_blocks.toArray())
|
||||
.filter((block) => block.type === MessageBlockType.MAIN_TEXT)
|
||||
.filter((block) => searchRegexes.some((regex) => regex.test(block.content)))
|
||||
.filter((block) => {
|
||||
const searchableContent = stripMarkdownFormatting(block.content)
|
||||
return searchRegexes.some((regex) => regex.test(searchableContent))
|
||||
})
|
||||
|
||||
const messages = topics?.flatMap((topic) => topic.messages)
|
||||
|
||||
@ -85,7 +230,12 @@ const SearchResults: FC<Props> = ({ keywords, onMessageClick, onTopicClick, ...p
|
||||
if (message) {
|
||||
const topic = storeTopicsMap.get(message.topicId)
|
||||
if (topic) {
|
||||
return { message, topic, content: block.content }
|
||||
return {
|
||||
message,
|
||||
topic,
|
||||
content: block.content,
|
||||
snippet: buildSearchSnippet(block.content, newSearchTerms)
|
||||
}
|
||||
}
|
||||
}
|
||||
return null
|
||||
@ -103,15 +253,17 @@ const SearchResults: FC<Props> = ({ keywords, onMessageClick, onTopicClick, ...p
|
||||
}, [keywords, storeTopicsMap, topics])
|
||||
|
||||
const highlightText = (text: string) => {
|
||||
let highlightedText = removeMarkdown(text)
|
||||
searchTerms.forEach((term) => {
|
||||
try {
|
||||
const regex = new RegExp(term, 'gi')
|
||||
highlightedText = highlightedText.replace(regex, (match) => `<mark>${match}</mark>`)
|
||||
} catch (error) {
|
||||
//
|
||||
}
|
||||
})
|
||||
const uniqueTerms = Array.from(new Set(searchTerms.filter((term) => term.length > 0)))
|
||||
if (uniqueTerms.length === 0) {
|
||||
return <span dangerouslySetInnerHTML={{ __html: text }} />
|
||||
}
|
||||
|
||||
const pattern = uniqueTerms
|
||||
.sort((a, b) => b.length - a.length)
|
||||
.map((term) => escapeRegex(term))
|
||||
.join('|')
|
||||
const regex = new RegExp(pattern, 'gi')
|
||||
const highlightedText = text.replace(regex, (match) => `<mark>${match}</mark>`)
|
||||
return <span dangerouslySetInnerHTML={{ __html: highlightedText }} />
|
||||
}
|
||||
|
||||
@ -150,7 +302,7 @@ const SearchResults: FC<Props> = ({ keywords, onMessageClick, onTopicClick, ...p
|
||||
hideOnSinglePage: true
|
||||
}}
|
||||
style={{ opacity: isLoading ? 0 : 1 }}
|
||||
renderItem={({ message, topic, content }) => (
|
||||
renderItem={({ message, topic, snippet }) => (
|
||||
<List.Item>
|
||||
<Title
|
||||
level={5}
|
||||
@ -159,7 +311,7 @@ const SearchResults: FC<Props> = ({ keywords, onMessageClick, onTopicClick, ...p
|
||||
{topic.name}
|
||||
</Title>
|
||||
<div style={{ cursor: 'pointer' }} onClick={() => onMessageClick(message)}>
|
||||
<Text>{highlightText(content)}</Text>
|
||||
<Text style={{ whiteSpace: 'pre-line' }}>{highlightText(snippet)}</Text>
|
||||
</div>
|
||||
<SearchResultTime>
|
||||
<Text type="secondary">{new Date(message.createdAt).toLocaleString()}</Text>
|
||||
|
||||
Loading…
Reference in New Issue
Block a user