mirror of
https://github.com/CherryHQ/cherry-studio.git
synced 2026-01-01 01:30:51 +08:00
Add custom markdown tokenizer with logging for YAML front matter
- Implement markdown tokenizer with start() and tokenize() methods for parsing YAML front matter - Add comprehensive logging throughout tokenization, parsing, and serialization processes - Improve markdown serialization to ensure proper --- delimiter handling
This commit is contained in:
parent
a66c0860b2
commit
54503c0e62
@ -1,8 +1,11 @@
|
||||
import { loggerService } from '@logger'
|
||||
import { mergeAttributes, Node } from '@tiptap/core'
|
||||
import { ReactNodeViewRenderer } from '@tiptap/react'
|
||||
|
||||
import YamlFrontMatterNodeView from '../components/YamlFrontMatterNodeView'
|
||||
|
||||
const logger = loggerService.withContext('YamlFrontMatterExtension')
|
||||
|
||||
declare module '@tiptap/core' {
|
||||
interface Commands<ReturnType> {
|
||||
yamlFrontMatter: {
|
||||
@ -17,8 +20,113 @@ export const YamlFrontMatter = Node.create({
|
||||
atom: true,
|
||||
draggable: false,
|
||||
|
||||
// Markdown token name for custom parsing
|
||||
markdownTokenName: 'yamlFrontMatter',
|
||||
// Custom tokenizer for YAML front matter
|
||||
markdownTokenizer: {
|
||||
name: 'yamlFrontMatter',
|
||||
level: 'block',
|
||||
// Optimization: check if content starts with ---
|
||||
start(src: string) {
|
||||
logger.info('🔍 Tokenizer start() called', {
|
||||
srcLength: src.length,
|
||||
srcPrefix: src.substring(0, 60).replace(/\n/g, '\\n'),
|
||||
startsWithDashes: src.startsWith('---\n')
|
||||
})
|
||||
|
||||
const result = src.match(/^---\n/) ? 0 : -1
|
||||
logger.info('✅ Tokenizer start() result:', { result })
|
||||
return result
|
||||
},
|
||||
// Parse YAML front matter
|
||||
tokenize(src: string) {
|
||||
logger.info('🔍 Tokenizer tokenize() called', {
|
||||
srcLength: src.length,
|
||||
srcPrefix: src.substring(0, 120).replace(/\n/g, '\\n')
|
||||
})
|
||||
|
||||
// Match: ---\n...yaml content...\n---
|
||||
const match = /^---\n([\s\S]*?)\n---(?:\n|$)/.exec(src)
|
||||
|
||||
if (!match) {
|
||||
logger.warn('❌ Tokenizer tokenize() - NO MATCH FOUND')
|
||||
return undefined
|
||||
}
|
||||
|
||||
const token = {
|
||||
type: 'yamlFrontMatter',
|
||||
raw: match[0],
|
||||
text: match[1] // YAML content without delimiters
|
||||
}
|
||||
|
||||
logger.info('✅ Tokenizer tokenize() - MATCH FOUND', {
|
||||
rawLength: token.raw.length,
|
||||
textLength: token.text.length,
|
||||
textPreview: token.text.substring(0, 100).replace(/\n/g, '\\n')
|
||||
})
|
||||
|
||||
return token
|
||||
}
|
||||
},
|
||||
|
||||
// Parse markdown token to Tiptap JSON
|
||||
parseMarkdown(token, helpers) {
|
||||
logger.info('🔍 parseMarkdown() called', {
|
||||
tokenType: token.type,
|
||||
hasText: !!token.text,
|
||||
textLength: token.text?.length || 0,
|
||||
textPreview: token.text?.substring(0, 100).replace(/\n/g, '\\n'),
|
||||
hasTokens: !!token.tokens,
|
||||
tokensLength: token.tokens?.length || 0
|
||||
})
|
||||
|
||||
const result = {
|
||||
type: this.name,
|
||||
attrs: {
|
||||
content: token.text || ''
|
||||
},
|
||||
content: helpers.parseChildren(token.tokens || [])
|
||||
}
|
||||
|
||||
logger.info('✅ parseMarkdown() result', {
|
||||
type: result.type,
|
||||
contentLength: result.attrs.content.length,
|
||||
hasContent: !!result.content
|
||||
})
|
||||
|
||||
return result
|
||||
},
|
||||
|
||||
// Serialize Tiptap node to markdown
|
||||
renderMarkdown(node) {
|
||||
logger.info('🔍 renderMarkdown() called', {
|
||||
nodeType: node.type,
|
||||
hasContent: !!node.attrs?.content,
|
||||
contentLength: node.attrs?.content?.length || 0,
|
||||
contentPreview: node.attrs?.content?.substring(0, 100).replace(/\n/g, '\\n')
|
||||
})
|
||||
|
||||
const content = node.attrs?.content || ''
|
||||
if (!content.trim()) {
|
||||
logger.info('⚠️ renderMarkdown() - empty content, returning empty string')
|
||||
return ''
|
||||
}
|
||||
|
||||
const trimmedContent = content.trim()
|
||||
let result = ''
|
||||
|
||||
// Ensure proper format with closing ---
|
||||
if (trimmedContent.endsWith('---')) {
|
||||
result = trimmedContent + '\n\n'
|
||||
} else {
|
||||
result = trimmedContent + '\n---\n\n'
|
||||
}
|
||||
|
||||
logger.info('✅ renderMarkdown() result', {
|
||||
resultLength: result.length,
|
||||
resultPreview: result.substring(0, 120).replace(/\n/g, '\\n')
|
||||
})
|
||||
|
||||
return result
|
||||
},
|
||||
|
||||
addOptions() {
|
||||
return {
|
||||
@ -78,30 +186,6 @@ export const YamlFrontMatter = Node.create({
|
||||
]
|
||||
},
|
||||
|
||||
// Parse markdown token to Tiptap JSON
|
||||
parseMarkdown(token: any) {
|
||||
// Extract YAML content from the token
|
||||
// The content should be the raw YAML text between --- delimiters
|
||||
const content = token.text || token.raw || ''
|
||||
return {
|
||||
type: this.name,
|
||||
attrs: {
|
||||
content: content.trim()
|
||||
}
|
||||
}
|
||||
},
|
||||
|
||||
// Serialize Tiptap node to markdown
|
||||
renderMarkdown(node: any) {
|
||||
const content = node.attrs.content || ''
|
||||
// If content doesn't end with ---, add it
|
||||
const trimmedContent = content.trim()
|
||||
if (trimmedContent && !trimmedContent.endsWith('---')) {
|
||||
return trimmedContent + '\n---\n\n'
|
||||
}
|
||||
return trimmedContent ? trimmedContent + '\n\n' : ''
|
||||
},
|
||||
|
||||
addCommands() {
|
||||
return {
|
||||
insertYamlFrontMatter:
|
||||
|
||||
Loading…
Reference in New Issue
Block a user