mirror of
https://github.com/CherryHQ/cherry-studio.git
synced 2026-01-08 14:29:15 +08:00
feat(history-search): show keyword-adjacent snippets and align matching text (#12034)
* fix(history-search): show keyword-adjacent snippets and align matching text - Limit search results to title plus nearby lines with ellipses - Merge multi-keyword hit ranges and truncate long lines - Match against sanitized visible text to avoid URL/image false hits * fix(history): 针对review 的改进:避免搜索高亮嵌套并优化命名与省略逻辑注释
This commit is contained in:
parent
c4f94848e8
commit
9a435b8abb
@ -17,6 +17,7 @@ type SearchResult = {
|
|||||||
message: Message
|
message: Message
|
||||||
topic: Topic
|
topic: Topic
|
||||||
content: string
|
content: string
|
||||||
|
snippet: string
|
||||||
}
|
}
|
||||||
|
|
||||||
interface Props extends React.HTMLAttributes<HTMLDivElement> {
|
interface Props extends React.HTMLAttributes<HTMLDivElement> {
|
||||||
@ -25,6 +26,158 @@ interface Props extends React.HTMLAttributes<HTMLDivElement> {
|
|||||||
onTopicClick: (topic: Topic) => void
|
onTopicClick: (topic: Topic) => void
|
||||||
}
|
}
|
||||||
|
|
||||||
|
const SEARCH_SNIPPET_CONTEXT_LINES = 1
|
||||||
|
const SEARCH_SNIPPET_MAX_LINES = 12
|
||||||
|
const SEARCH_SNIPPET_MAX_LINE_LENGTH = 160
|
||||||
|
const SEARCH_SNIPPET_LINE_FRAGMENT_RADIUS = 40
|
||||||
|
const SEARCH_SNIPPET_MAX_LINE_FRAGMENTS = 3
|
||||||
|
|
||||||
|
const stripMarkdownFormatting = (text: string) => {
|
||||||
|
return text
|
||||||
|
.replace(/```(?:[^\n]*\n)?([\s\S]*?)```/g, '$1')
|
||||||
|
.replace(/!\[(.*?)\]\((.*?)\)/g, '$1')
|
||||||
|
.replace(/\[(.*?)\]\((.*?)\)/g, '$1')
|
||||||
|
.replace(/\*\*(.*?)\*\*/g, '$1')
|
||||||
|
.replace(/\*(.*?)\*/g, '$1')
|
||||||
|
.replace(/`(.*?)`/g, '$1')
|
||||||
|
.replace(/#+\s/g, '')
|
||||||
|
.replace(/<[^>]*>/g, '')
|
||||||
|
}
|
||||||
|
|
||||||
|
const normalizeText = (text: string) => text.replace(/\r\n/g, '\n').replace(/\r/g, '\n')
|
||||||
|
|
||||||
|
const escapeRegex = (text: string) => text.replace(/[.*+?^${}()|[\]\\]/g, '\\$&')
|
||||||
|
|
||||||
|
const mergeRanges = (ranges: Array<[number, number]>) => {
|
||||||
|
const sorted = ranges.slice().sort((a, b) => a[0] - b[0])
|
||||||
|
const merged: Array<[number, number]> = []
|
||||||
|
for (const range of sorted) {
|
||||||
|
const last = merged[merged.length - 1]
|
||||||
|
if (!last || range[0] > last[1] + 1) {
|
||||||
|
merged.push([range[0], range[1]])
|
||||||
|
continue
|
||||||
|
}
|
||||||
|
last[1] = Math.max(last[1], range[1])
|
||||||
|
}
|
||||||
|
return merged
|
||||||
|
}
|
||||||
|
|
||||||
|
const buildLineSnippet = (line: string, regexes: RegExp[]) => {
|
||||||
|
if (line.length <= SEARCH_SNIPPET_MAX_LINE_LENGTH) {
|
||||||
|
return line
|
||||||
|
}
|
||||||
|
|
||||||
|
const matchRanges: Array<[number, number]> = []
|
||||||
|
for (const regex of regexes) {
|
||||||
|
regex.lastIndex = 0
|
||||||
|
let match: RegExpExecArray | null
|
||||||
|
while ((match = regex.exec(line)) !== null) {
|
||||||
|
matchRanges.push([match.index, match.index + match[0].length])
|
||||||
|
if (match[0].length === 0) {
|
||||||
|
regex.lastIndex += 1
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
if (matchRanges.length === 0) {
|
||||||
|
return `${line.slice(0, SEARCH_SNIPPET_MAX_LINE_LENGTH)}...`
|
||||||
|
}
|
||||||
|
|
||||||
|
const expandedRanges: Array<[number, number]> = matchRanges.map(([start, end]) => [
|
||||||
|
Math.max(0, start - SEARCH_SNIPPET_LINE_FRAGMENT_RADIUS),
|
||||||
|
Math.min(line.length, end + SEARCH_SNIPPET_LINE_FRAGMENT_RADIUS)
|
||||||
|
])
|
||||||
|
const mergedRanges = mergeRanges(expandedRanges)
|
||||||
|
const limitedRanges = mergedRanges.slice(0, SEARCH_SNIPPET_MAX_LINE_FRAGMENTS)
|
||||||
|
|
||||||
|
let result = limitedRanges.map(([start, end]) => line.slice(start, end)).join(' ... ')
|
||||||
|
// 片段未从行首开始,补前置省略号。
|
||||||
|
if (limitedRanges[0][0] > 0) {
|
||||||
|
result = `...${result}`
|
||||||
|
}
|
||||||
|
// 片段未覆盖到行尾,补后置省略号。
|
||||||
|
if (limitedRanges[limitedRanges.length - 1][1] < line.length) {
|
||||||
|
result = `${result}...`
|
||||||
|
}
|
||||||
|
// 还有未展示的匹配片段,提示省略。
|
||||||
|
if (mergedRanges.length > SEARCH_SNIPPET_MAX_LINE_FRAGMENTS) {
|
||||||
|
result = `${result}...`
|
||||||
|
}
|
||||||
|
// 最终长度超限,强制截断并补省略号。
|
||||||
|
if (result.length > SEARCH_SNIPPET_MAX_LINE_LENGTH) {
|
||||||
|
result = `${result.slice(0, SEARCH_SNIPPET_MAX_LINE_LENGTH)}...`
|
||||||
|
}
|
||||||
|
return result
|
||||||
|
}
|
||||||
|
|
||||||
|
const buildSearchSnippet = (text: string, terms: string[]) => {
|
||||||
|
const normalized = normalizeText(stripMarkdownFormatting(text))
|
||||||
|
const lines = normalized.split('\n')
|
||||||
|
if (lines.length === 0) {
|
||||||
|
return ''
|
||||||
|
}
|
||||||
|
|
||||||
|
const nonEmptyTerms = terms.filter((term) => term.length > 0)
|
||||||
|
const regexes = nonEmptyTerms.map((term) => new RegExp(escapeRegex(term), 'gi'))
|
||||||
|
const matchedLineIndexes: number[] = []
|
||||||
|
|
||||||
|
if (regexes.length > 0) {
|
||||||
|
for (let i = 0; i < lines.length; i += 1) {
|
||||||
|
const line = lines[i]
|
||||||
|
const isMatch = regexes.some((regex) => {
|
||||||
|
regex.lastIndex = 0
|
||||||
|
return regex.test(line)
|
||||||
|
})
|
||||||
|
if (isMatch) {
|
||||||
|
matchedLineIndexes.push(i)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
const ranges: Array<[number, number]> =
|
||||||
|
matchedLineIndexes.length > 0
|
||||||
|
? mergeRanges(
|
||||||
|
matchedLineIndexes.map((index) => [
|
||||||
|
Math.max(0, index - SEARCH_SNIPPET_CONTEXT_LINES),
|
||||||
|
Math.min(lines.length - 1, index + SEARCH_SNIPPET_CONTEXT_LINES)
|
||||||
|
])
|
||||||
|
)
|
||||||
|
: [[0, Math.min(lines.length - 1, SEARCH_SNIPPET_MAX_LINES - 1)]]
|
||||||
|
|
||||||
|
const outputLines: string[] = []
|
||||||
|
let truncated = false
|
||||||
|
|
||||||
|
if (ranges[0][0] > 0) {
|
||||||
|
outputLines.push('...')
|
||||||
|
}
|
||||||
|
|
||||||
|
for (const [start, end] of ranges) {
|
||||||
|
if (outputLines.length >= SEARCH_SNIPPET_MAX_LINES) {
|
||||||
|
truncated = true
|
||||||
|
break
|
||||||
|
}
|
||||||
|
if (outputLines.length > 0 && outputLines[outputLines.length - 1] !== '...') {
|
||||||
|
outputLines.push('...')
|
||||||
|
}
|
||||||
|
for (let i = start; i <= end; i += 1) {
|
||||||
|
if (outputLines.length >= SEARCH_SNIPPET_MAX_LINES) {
|
||||||
|
truncated = true
|
||||||
|
break
|
||||||
|
}
|
||||||
|
outputLines.push(buildLineSnippet(lines[i], regexes))
|
||||||
|
}
|
||||||
|
if (truncated) {
|
||||||
|
break
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
if ((truncated || ranges[ranges.length - 1][1] < lines.length - 1) && outputLines.at(-1) !== '...') {
|
||||||
|
outputLines.push('...')
|
||||||
|
}
|
||||||
|
|
||||||
|
return outputLines.join('\n')
|
||||||
|
}
|
||||||
|
|
||||||
const SearchResults: FC<Props> = ({ keywords, onMessageClick, onTopicClick, ...props }) => {
|
const SearchResults: FC<Props> = ({ keywords, onMessageClick, onTopicClick, ...props }) => {
|
||||||
const { handleScroll, containerRef } = useScrollPosition('SearchResults')
|
const { handleScroll, containerRef } = useScrollPosition('SearchResults')
|
||||||
const observerRef = useRef<MutationObserver | null>(null)
|
const observerRef = useRef<MutationObserver | null>(null)
|
||||||
@ -44,17 +197,6 @@ const SearchResults: FC<Props> = ({ keywords, onMessageClick, onTopicClick, ...p
|
|||||||
const [searchStats, setSearchStats] = useState({ count: 0, time: 0 })
|
const [searchStats, setSearchStats] = useState({ count: 0, time: 0 })
|
||||||
const [isLoading, setIsLoading] = useState(false)
|
const [isLoading, setIsLoading] = useState(false)
|
||||||
|
|
||||||
const removeMarkdown = (text: string) => {
|
|
||||||
return text
|
|
||||||
.replace(/\*\*(.*?)\*\*/g, '$1')
|
|
||||||
.replace(/\*(.*?)\*/g, '$1')
|
|
||||||
.replace(/\[(.*?)\]\((.*?)\)/g, '$1')
|
|
||||||
.replace(/```[\s\S]*?```/g, '')
|
|
||||||
.replace(/`(.*?)`/g, '$1')
|
|
||||||
.replace(/#+\s/g, '')
|
|
||||||
.replace(/<[^>]*>/g, '')
|
|
||||||
}
|
|
||||||
|
|
||||||
const onSearch = useCallback(async () => {
|
const onSearch = useCallback(async () => {
|
||||||
setSearchResults([])
|
setSearchResults([])
|
||||||
setIsLoading(true)
|
setIsLoading(true)
|
||||||
@ -69,13 +211,16 @@ const SearchResults: FC<Props> = ({ keywords, onMessageClick, onTopicClick, ...p
|
|||||||
const startTime = performance.now()
|
const startTime = performance.now()
|
||||||
const newSearchTerms = keywords
|
const newSearchTerms = keywords
|
||||||
.toLowerCase()
|
.toLowerCase()
|
||||||
.split(' ')
|
.split(/\s+/)
|
||||||
.filter((term) => term.length > 0)
|
.filter((term) => term.length > 0)
|
||||||
const searchRegexes = newSearchTerms.map((term) => new RegExp(term, 'i'))
|
const searchRegexes = newSearchTerms.map((term) => new RegExp(escapeRegex(term), 'i'))
|
||||||
|
|
||||||
const blocks = (await db.message_blocks.toArray())
|
const blocks = (await db.message_blocks.toArray())
|
||||||
.filter((block) => block.type === MessageBlockType.MAIN_TEXT)
|
.filter((block) => block.type === MessageBlockType.MAIN_TEXT)
|
||||||
.filter((block) => searchRegexes.some((regex) => regex.test(block.content)))
|
.filter((block) => {
|
||||||
|
const searchableContent = stripMarkdownFormatting(block.content)
|
||||||
|
return searchRegexes.some((regex) => regex.test(searchableContent))
|
||||||
|
})
|
||||||
|
|
||||||
const messages = topics?.flatMap((topic) => topic.messages)
|
const messages = topics?.flatMap((topic) => topic.messages)
|
||||||
|
|
||||||
@ -85,7 +230,12 @@ const SearchResults: FC<Props> = ({ keywords, onMessageClick, onTopicClick, ...p
|
|||||||
if (message) {
|
if (message) {
|
||||||
const topic = storeTopicsMap.get(message.topicId)
|
const topic = storeTopicsMap.get(message.topicId)
|
||||||
if (topic) {
|
if (topic) {
|
||||||
return { message, topic, content: block.content }
|
return {
|
||||||
|
message,
|
||||||
|
topic,
|
||||||
|
content: block.content,
|
||||||
|
snippet: buildSearchSnippet(block.content, newSearchTerms)
|
||||||
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
return null
|
return null
|
||||||
@ -103,15 +253,17 @@ const SearchResults: FC<Props> = ({ keywords, onMessageClick, onTopicClick, ...p
|
|||||||
}, [keywords, storeTopicsMap, topics])
|
}, [keywords, storeTopicsMap, topics])
|
||||||
|
|
||||||
const highlightText = (text: string) => {
|
const highlightText = (text: string) => {
|
||||||
let highlightedText = removeMarkdown(text)
|
const uniqueTerms = Array.from(new Set(searchTerms.filter((term) => term.length > 0)))
|
||||||
searchTerms.forEach((term) => {
|
if (uniqueTerms.length === 0) {
|
||||||
try {
|
return <span dangerouslySetInnerHTML={{ __html: text }} />
|
||||||
const regex = new RegExp(term, 'gi')
|
}
|
||||||
highlightedText = highlightedText.replace(regex, (match) => `<mark>${match}</mark>`)
|
|
||||||
} catch (error) {
|
const pattern = uniqueTerms
|
||||||
//
|
.sort((a, b) => b.length - a.length)
|
||||||
}
|
.map((term) => escapeRegex(term))
|
||||||
})
|
.join('|')
|
||||||
|
const regex = new RegExp(pattern, 'gi')
|
||||||
|
const highlightedText = text.replace(regex, (match) => `<mark>${match}</mark>`)
|
||||||
return <span dangerouslySetInnerHTML={{ __html: highlightedText }} />
|
return <span dangerouslySetInnerHTML={{ __html: highlightedText }} />
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -150,7 +302,7 @@ const SearchResults: FC<Props> = ({ keywords, onMessageClick, onTopicClick, ...p
|
|||||||
hideOnSinglePage: true
|
hideOnSinglePage: true
|
||||||
}}
|
}}
|
||||||
style={{ opacity: isLoading ? 0 : 1 }}
|
style={{ opacity: isLoading ? 0 : 1 }}
|
||||||
renderItem={({ message, topic, content }) => (
|
renderItem={({ message, topic, snippet }) => (
|
||||||
<List.Item>
|
<List.Item>
|
||||||
<Title
|
<Title
|
||||||
level={5}
|
level={5}
|
||||||
@ -159,7 +311,7 @@ const SearchResults: FC<Props> = ({ keywords, onMessageClick, onTopicClick, ...p
|
|||||||
{topic.name}
|
{topic.name}
|
||||||
</Title>
|
</Title>
|
||||||
<div style={{ cursor: 'pointer' }} onClick={() => onMessageClick(message)}>
|
<div style={{ cursor: 'pointer' }} onClick={() => onMessageClick(message)}>
|
||||||
<Text>{highlightText(content)}</Text>
|
<Text style={{ whiteSpace: 'pre-line' }}>{highlightText(snippet)}</Text>
|
||||||
</div>
|
</div>
|
||||||
<SearchResultTime>
|
<SearchResultTime>
|
||||||
<Text type="secondary">{new Date(message.createdAt).toLocaleString()}</Text>
|
<Text type="secondary">{new Date(message.createdAt).toLocaleString()}</Text>
|
||||||
|
|||||||
Loading…
Reference in New Issue
Block a user