feat(history-search): show keyword-adjacent snippets and align matching text (#12034)

* fix(history-search): show keyword-adjacent snippets and align matching text

  - Limit search results to title plus nearby lines with ellipses
  - Merge multi-keyword hit ranges and truncate long lines
  - Match against sanitized visible text to avoid URL/image false hits

* fix(history): 针对review 的改进:避免搜索高亮嵌套并优化命名与省略逻辑注释
This commit is contained in:
atoz03 2025-12-21 17:32:32 +08:00 committed by GitHub
parent c4f94848e8
commit 9a435b8abb
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194

View File

@ -17,6 +17,7 @@ type SearchResult = {
message: Message
topic: Topic
content: string
snippet: string
}
interface Props extends React.HTMLAttributes<HTMLDivElement> {
@ -25,6 +26,158 @@ interface Props extends React.HTMLAttributes<HTMLDivElement> {
onTopicClick: (topic: Topic) => void
}
const SEARCH_SNIPPET_CONTEXT_LINES = 1
const SEARCH_SNIPPET_MAX_LINES = 12
const SEARCH_SNIPPET_MAX_LINE_LENGTH = 160
const SEARCH_SNIPPET_LINE_FRAGMENT_RADIUS = 40
const SEARCH_SNIPPET_MAX_LINE_FRAGMENTS = 3
const stripMarkdownFormatting = (text: string) => {
return text
.replace(/```(?:[^\n]*\n)?([\s\S]*?)```/g, '$1')
.replace(/!\[(.*?)\]\((.*?)\)/g, '$1')
.replace(/\[(.*?)\]\((.*?)\)/g, '$1')
.replace(/\*\*(.*?)\*\*/g, '$1')
.replace(/\*(.*?)\*/g, '$1')
.replace(/`(.*?)`/g, '$1')
.replace(/#+\s/g, '')
.replace(/<[^>]*>/g, '')
}
const normalizeText = (text: string) => text.replace(/\r\n/g, '\n').replace(/\r/g, '\n')
const escapeRegex = (text: string) => text.replace(/[.*+?^${}()|[\]\\]/g, '\\$&')
const mergeRanges = (ranges: Array<[number, number]>) => {
const sorted = ranges.slice().sort((a, b) => a[0] - b[0])
const merged: Array<[number, number]> = []
for (const range of sorted) {
const last = merged[merged.length - 1]
if (!last || range[0] > last[1] + 1) {
merged.push([range[0], range[1]])
continue
}
last[1] = Math.max(last[1], range[1])
}
return merged
}
const buildLineSnippet = (line: string, regexes: RegExp[]) => {
if (line.length <= SEARCH_SNIPPET_MAX_LINE_LENGTH) {
return line
}
const matchRanges: Array<[number, number]> = []
for (const regex of regexes) {
regex.lastIndex = 0
let match: RegExpExecArray | null
while ((match = regex.exec(line)) !== null) {
matchRanges.push([match.index, match.index + match[0].length])
if (match[0].length === 0) {
regex.lastIndex += 1
}
}
}
if (matchRanges.length === 0) {
return `${line.slice(0, SEARCH_SNIPPET_MAX_LINE_LENGTH)}...`
}
const expandedRanges: Array<[number, number]> = matchRanges.map(([start, end]) => [
Math.max(0, start - SEARCH_SNIPPET_LINE_FRAGMENT_RADIUS),
Math.min(line.length, end + SEARCH_SNIPPET_LINE_FRAGMENT_RADIUS)
])
const mergedRanges = mergeRanges(expandedRanges)
const limitedRanges = mergedRanges.slice(0, SEARCH_SNIPPET_MAX_LINE_FRAGMENTS)
let result = limitedRanges.map(([start, end]) => line.slice(start, end)).join(' ... ')
// 片段未从行首开始,补前置省略号。
if (limitedRanges[0][0] > 0) {
result = `...${result}`
}
// 片段未覆盖到行尾,补后置省略号。
if (limitedRanges[limitedRanges.length - 1][1] < line.length) {
result = `${result}...`
}
// 还有未展示的匹配片段,提示省略。
if (mergedRanges.length > SEARCH_SNIPPET_MAX_LINE_FRAGMENTS) {
result = `${result}...`
}
// 最终长度超限,强制截断并补省略号。
if (result.length > SEARCH_SNIPPET_MAX_LINE_LENGTH) {
result = `${result.slice(0, SEARCH_SNIPPET_MAX_LINE_LENGTH)}...`
}
return result
}
const buildSearchSnippet = (text: string, terms: string[]) => {
const normalized = normalizeText(stripMarkdownFormatting(text))
const lines = normalized.split('\n')
if (lines.length === 0) {
return ''
}
const nonEmptyTerms = terms.filter((term) => term.length > 0)
const regexes = nonEmptyTerms.map((term) => new RegExp(escapeRegex(term), 'gi'))
const matchedLineIndexes: number[] = []
if (regexes.length > 0) {
for (let i = 0; i < lines.length; i += 1) {
const line = lines[i]
const isMatch = regexes.some((regex) => {
regex.lastIndex = 0
return regex.test(line)
})
if (isMatch) {
matchedLineIndexes.push(i)
}
}
}
const ranges: Array<[number, number]> =
matchedLineIndexes.length > 0
? mergeRanges(
matchedLineIndexes.map((index) => [
Math.max(0, index - SEARCH_SNIPPET_CONTEXT_LINES),
Math.min(lines.length - 1, index + SEARCH_SNIPPET_CONTEXT_LINES)
])
)
: [[0, Math.min(lines.length - 1, SEARCH_SNIPPET_MAX_LINES - 1)]]
const outputLines: string[] = []
let truncated = false
if (ranges[0][0] > 0) {
outputLines.push('...')
}
for (const [start, end] of ranges) {
if (outputLines.length >= SEARCH_SNIPPET_MAX_LINES) {
truncated = true
break
}
if (outputLines.length > 0 && outputLines[outputLines.length - 1] !== '...') {
outputLines.push('...')
}
for (let i = start; i <= end; i += 1) {
if (outputLines.length >= SEARCH_SNIPPET_MAX_LINES) {
truncated = true
break
}
outputLines.push(buildLineSnippet(lines[i], regexes))
}
if (truncated) {
break
}
}
if ((truncated || ranges[ranges.length - 1][1] < lines.length - 1) && outputLines.at(-1) !== '...') {
outputLines.push('...')
}
return outputLines.join('\n')
}
const SearchResults: FC<Props> = ({ keywords, onMessageClick, onTopicClick, ...props }) => {
const { handleScroll, containerRef } = useScrollPosition('SearchResults')
const observerRef = useRef<MutationObserver | null>(null)
@ -44,17 +197,6 @@ const SearchResults: FC<Props> = ({ keywords, onMessageClick, onTopicClick, ...p
const [searchStats, setSearchStats] = useState({ count: 0, time: 0 })
const [isLoading, setIsLoading] = useState(false)
const removeMarkdown = (text: string) => {
return text
.replace(/\*\*(.*?)\*\*/g, '$1')
.replace(/\*(.*?)\*/g, '$1')
.replace(/\[(.*?)\]\((.*?)\)/g, '$1')
.replace(/```[\s\S]*?```/g, '')
.replace(/`(.*?)`/g, '$1')
.replace(/#+\s/g, '')
.replace(/<[^>]*>/g, '')
}
const onSearch = useCallback(async () => {
setSearchResults([])
setIsLoading(true)
@ -69,13 +211,16 @@ const SearchResults: FC<Props> = ({ keywords, onMessageClick, onTopicClick, ...p
const startTime = performance.now()
const newSearchTerms = keywords
.toLowerCase()
.split(' ')
.split(/\s+/)
.filter((term) => term.length > 0)
const searchRegexes = newSearchTerms.map((term) => new RegExp(term, 'i'))
const searchRegexes = newSearchTerms.map((term) => new RegExp(escapeRegex(term), 'i'))
const blocks = (await db.message_blocks.toArray())
.filter((block) => block.type === MessageBlockType.MAIN_TEXT)
.filter((block) => searchRegexes.some((regex) => regex.test(block.content)))
.filter((block) => {
const searchableContent = stripMarkdownFormatting(block.content)
return searchRegexes.some((regex) => regex.test(searchableContent))
})
const messages = topics?.flatMap((topic) => topic.messages)
@ -85,7 +230,12 @@ const SearchResults: FC<Props> = ({ keywords, onMessageClick, onTopicClick, ...p
if (message) {
const topic = storeTopicsMap.get(message.topicId)
if (topic) {
return { message, topic, content: block.content }
return {
message,
topic,
content: block.content,
snippet: buildSearchSnippet(block.content, newSearchTerms)
}
}
}
return null
@ -103,15 +253,17 @@ const SearchResults: FC<Props> = ({ keywords, onMessageClick, onTopicClick, ...p
}, [keywords, storeTopicsMap, topics])
const highlightText = (text: string) => {
let highlightedText = removeMarkdown(text)
searchTerms.forEach((term) => {
try {
const regex = new RegExp(term, 'gi')
highlightedText = highlightedText.replace(regex, (match) => `<mark>${match}</mark>`)
} catch (error) {
//
}
})
const uniqueTerms = Array.from(new Set(searchTerms.filter((term) => term.length > 0)))
if (uniqueTerms.length === 0) {
return <span dangerouslySetInnerHTML={{ __html: text }} />
}
const pattern = uniqueTerms
.sort((a, b) => b.length - a.length)
.map((term) => escapeRegex(term))
.join('|')
const regex = new RegExp(pattern, 'gi')
const highlightedText = text.replace(regex, (match) => `<mark>${match}</mark>`)
return <span dangerouslySetInnerHTML={{ __html: highlightedText }} />
}
@ -150,7 +302,7 @@ const SearchResults: FC<Props> = ({ keywords, onMessageClick, onTopicClick, ...p
hideOnSinglePage: true
}}
style={{ opacity: isLoading ? 0 : 1 }}
renderItem={({ message, topic, content }) => (
renderItem={({ message, topic, snippet }) => (
<List.Item>
<Title
level={5}
@ -159,7 +311,7 @@ const SearchResults: FC<Props> = ({ keywords, onMessageClick, onTopicClick, ...p
{topic.name}
</Title>
<div style={{ cursor: 'pointer' }} onClick={() => onMessageClick(message)}>
<Text>{highlightText(content)}</Text>
<Text style={{ whiteSpace: 'pre-line' }}>{highlightText(snippet)}</Text>
</div>
<SearchResultTime>
<Text type="secondary">{new Date(message.createdAt).toLocaleString()}</Text>