From 9a435b8abbe141c1eed36d396abc094a8e926203 Mon Sep 17 00:00:00 2001 From: atoz03 <31232741+atoz03@users.noreply.github.com> Date: Sun, 21 Dec 2025 17:32:32 +0800 Subject: [PATCH] feat(history-search): show keyword-adjacent snippets and align matching text (#12034) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit * fix(history-search): show keyword-adjacent snippets and align matching text - Limit search results to title plus nearby lines with ellipses - Merge multi-keyword hit ranges and truncate long lines - Match against sanitized visible text to avoid URL/image false hits * fix(history): 针对review 的改进:避免搜索高亮嵌套并优化命名与省略逻辑注释 --- .../history/components/SearchResults.tsx | 204 +++++++++++++++--- 1 file changed, 178 insertions(+), 26 deletions(-) diff --git a/src/renderer/src/pages/history/components/SearchResults.tsx b/src/renderer/src/pages/history/components/SearchResults.tsx index e0ffba8b2a..7189e78e5a 100644 --- a/src/renderer/src/pages/history/components/SearchResults.tsx +++ b/src/renderer/src/pages/history/components/SearchResults.tsx @@ -17,6 +17,7 @@ type SearchResult = { message: Message topic: Topic content: string + snippet: string } interface Props extends React.HTMLAttributes { @@ -25,6 +26,158 @@ interface Props extends React.HTMLAttributes { onTopicClick: (topic: Topic) => void } +const SEARCH_SNIPPET_CONTEXT_LINES = 1 +const SEARCH_SNIPPET_MAX_LINES = 12 +const SEARCH_SNIPPET_MAX_LINE_LENGTH = 160 +const SEARCH_SNIPPET_LINE_FRAGMENT_RADIUS = 40 +const SEARCH_SNIPPET_MAX_LINE_FRAGMENTS = 3 + +const stripMarkdownFormatting = (text: string) => { + return text + .replace(/```(?:[^\n]*\n)?([\s\S]*?)```/g, '$1') + .replace(/!\[(.*?)\]\((.*?)\)/g, '$1') + .replace(/\[(.*?)\]\((.*?)\)/g, '$1') + .replace(/\*\*(.*?)\*\*/g, '$1') + .replace(/\*(.*?)\*/g, '$1') + .replace(/`(.*?)`/g, '$1') + .replace(/#+\s/g, '') + .replace(/<[^>]*>/g, '') +} + +const normalizeText = (text: string) => text.replace(/\r\n/g, '\n').replace(/\r/g, '\n') + +const escapeRegex = (text: string) => text.replace(/[.*+?^${}()|[\]\\]/g, '\\$&') + +const mergeRanges = (ranges: Array<[number, number]>) => { + const sorted = ranges.slice().sort((a, b) => a[0] - b[0]) + const merged: Array<[number, number]> = [] + for (const range of sorted) { + const last = merged[merged.length - 1] + if (!last || range[0] > last[1] + 1) { + merged.push([range[0], range[1]]) + continue + } + last[1] = Math.max(last[1], range[1]) + } + return merged +} + +const buildLineSnippet = (line: string, regexes: RegExp[]) => { + if (line.length <= SEARCH_SNIPPET_MAX_LINE_LENGTH) { + return line + } + + const matchRanges: Array<[number, number]> = [] + for (const regex of regexes) { + regex.lastIndex = 0 + let match: RegExpExecArray | null + while ((match = regex.exec(line)) !== null) { + matchRanges.push([match.index, match.index + match[0].length]) + if (match[0].length === 0) { + regex.lastIndex += 1 + } + } + } + + if (matchRanges.length === 0) { + return `${line.slice(0, SEARCH_SNIPPET_MAX_LINE_LENGTH)}...` + } + + const expandedRanges: Array<[number, number]> = matchRanges.map(([start, end]) => [ + Math.max(0, start - SEARCH_SNIPPET_LINE_FRAGMENT_RADIUS), + Math.min(line.length, end + SEARCH_SNIPPET_LINE_FRAGMENT_RADIUS) + ]) + const mergedRanges = mergeRanges(expandedRanges) + const limitedRanges = mergedRanges.slice(0, SEARCH_SNIPPET_MAX_LINE_FRAGMENTS) + + let result = limitedRanges.map(([start, end]) => line.slice(start, end)).join(' ... ') + // 片段未从行首开始,补前置省略号。 + if (limitedRanges[0][0] > 0) { + result = `...${result}` + } + // 片段未覆盖到行尾,补后置省略号。 + if (limitedRanges[limitedRanges.length - 1][1] < line.length) { + result = `${result}...` + } + // 还有未展示的匹配片段,提示省略。 + if (mergedRanges.length > SEARCH_SNIPPET_MAX_LINE_FRAGMENTS) { + result = `${result}...` + } + // 最终长度超限,强制截断并补省略号。 + if (result.length > SEARCH_SNIPPET_MAX_LINE_LENGTH) { + result = `${result.slice(0, SEARCH_SNIPPET_MAX_LINE_LENGTH)}...` + } + return result +} + +const buildSearchSnippet = (text: string, terms: string[]) => { + const normalized = normalizeText(stripMarkdownFormatting(text)) + const lines = normalized.split('\n') + if (lines.length === 0) { + return '' + } + + const nonEmptyTerms = terms.filter((term) => term.length > 0) + const regexes = nonEmptyTerms.map((term) => new RegExp(escapeRegex(term), 'gi')) + const matchedLineIndexes: number[] = [] + + if (regexes.length > 0) { + for (let i = 0; i < lines.length; i += 1) { + const line = lines[i] + const isMatch = regexes.some((regex) => { + regex.lastIndex = 0 + return regex.test(line) + }) + if (isMatch) { + matchedLineIndexes.push(i) + } + } + } + + const ranges: Array<[number, number]> = + matchedLineIndexes.length > 0 + ? mergeRanges( + matchedLineIndexes.map((index) => [ + Math.max(0, index - SEARCH_SNIPPET_CONTEXT_LINES), + Math.min(lines.length - 1, index + SEARCH_SNIPPET_CONTEXT_LINES) + ]) + ) + : [[0, Math.min(lines.length - 1, SEARCH_SNIPPET_MAX_LINES - 1)]] + + const outputLines: string[] = [] + let truncated = false + + if (ranges[0][0] > 0) { + outputLines.push('...') + } + + for (const [start, end] of ranges) { + if (outputLines.length >= SEARCH_SNIPPET_MAX_LINES) { + truncated = true + break + } + if (outputLines.length > 0 && outputLines[outputLines.length - 1] !== '...') { + outputLines.push('...') + } + for (let i = start; i <= end; i += 1) { + if (outputLines.length >= SEARCH_SNIPPET_MAX_LINES) { + truncated = true + break + } + outputLines.push(buildLineSnippet(lines[i], regexes)) + } + if (truncated) { + break + } + } + + if ((truncated || ranges[ranges.length - 1][1] < lines.length - 1) && outputLines.at(-1) !== '...') { + outputLines.push('...') + } + + return outputLines.join('\n') +} + const SearchResults: FC = ({ keywords, onMessageClick, onTopicClick, ...props }) => { const { handleScroll, containerRef } = useScrollPosition('SearchResults') const observerRef = useRef(null) @@ -44,17 +197,6 @@ const SearchResults: FC = ({ keywords, onMessageClick, onTopicClick, ...p const [searchStats, setSearchStats] = useState({ count: 0, time: 0 }) const [isLoading, setIsLoading] = useState(false) - const removeMarkdown = (text: string) => { - return text - .replace(/\*\*(.*?)\*\*/g, '$1') - .replace(/\*(.*?)\*/g, '$1') - .replace(/\[(.*?)\]\((.*?)\)/g, '$1') - .replace(/```[\s\S]*?```/g, '') - .replace(/`(.*?)`/g, '$1') - .replace(/#+\s/g, '') - .replace(/<[^>]*>/g, '') - } - const onSearch = useCallback(async () => { setSearchResults([]) setIsLoading(true) @@ -69,13 +211,16 @@ const SearchResults: FC = ({ keywords, onMessageClick, onTopicClick, ...p const startTime = performance.now() const newSearchTerms = keywords .toLowerCase() - .split(' ') + .split(/\s+/) .filter((term) => term.length > 0) - const searchRegexes = newSearchTerms.map((term) => new RegExp(term, 'i')) + const searchRegexes = newSearchTerms.map((term) => new RegExp(escapeRegex(term), 'i')) const blocks = (await db.message_blocks.toArray()) .filter((block) => block.type === MessageBlockType.MAIN_TEXT) - .filter((block) => searchRegexes.some((regex) => regex.test(block.content))) + .filter((block) => { + const searchableContent = stripMarkdownFormatting(block.content) + return searchRegexes.some((regex) => regex.test(searchableContent)) + }) const messages = topics?.flatMap((topic) => topic.messages) @@ -85,7 +230,12 @@ const SearchResults: FC = ({ keywords, onMessageClick, onTopicClick, ...p if (message) { const topic = storeTopicsMap.get(message.topicId) if (topic) { - return { message, topic, content: block.content } + return { + message, + topic, + content: block.content, + snippet: buildSearchSnippet(block.content, newSearchTerms) + } } } return null @@ -103,15 +253,17 @@ const SearchResults: FC = ({ keywords, onMessageClick, onTopicClick, ...p }, [keywords, storeTopicsMap, topics]) const highlightText = (text: string) => { - let highlightedText = removeMarkdown(text) - searchTerms.forEach((term) => { - try { - const regex = new RegExp(term, 'gi') - highlightedText = highlightedText.replace(regex, (match) => `${match}`) - } catch (error) { - // - } - }) + const uniqueTerms = Array.from(new Set(searchTerms.filter((term) => term.length > 0))) + if (uniqueTerms.length === 0) { + return + } + + const pattern = uniqueTerms + .sort((a, b) => b.length - a.length) + .map((term) => escapeRegex(term)) + .join('|') + const regex = new RegExp(pattern, 'gi') + const highlightedText = text.replace(regex, (match) => `${match}`) return } @@ -150,7 +302,7 @@ const SearchResults: FC = ({ keywords, onMessageClick, onTopicClick, ...p hideOnSinglePage: true }} style={{ opacity: isLoading ? 0 : 1 }} - renderItem={({ message, topic, content }) => ( + renderItem={({ message, topic, snippet }) => ( = ({ keywords, onMessageClick, onTopicClick, ...p {topic.name}
onMessageClick(message)}> - {highlightText(content)} + {highlightText(snippet)}
{new Date(message.createdAt).toLocaleString()}