diff --git a/docs/en/references/data/v2-migration-guide.md b/docs/en/references/data/v2-migration-guide.md index 86d597223e..8d08dd8d3a 100644 --- a/docs/en/references/data/v2-migration-guide.md +++ b/docs/en/references/data/v2-migration-guide.md @@ -31,9 +31,10 @@ src/main/data/migration/v2/ - `execute(ctx)`: perform inserts/updates; manage your own transactions; report progress via `reportProgress` - `validate(ctx)`: verify counts and integrity; return `ValidateResult` with stats (`sourceCount`, `targetCount`, `skippedCount`) and any `errors` - Registration: list migrators (in order) in `migrators/index.ts` so the engine can sort and run them. -- Current migrators: +- Current migrators (see `migrators/README-.md` for detailed documentation): - `PreferencesMigrator` (implemented): maps ElectronStore + Redux settings to the `preference` table using `mappings/PreferencesMappings.ts`. - - `AssistantMigrator`, `KnowledgeMigrator`, `ChatMigrator` (placeholders): scaffolding and TODO notes for future tables. + - `ChatMigrator` (implemented): migrates topics and messages from Dexie to SQLite. See [`README-ChatMigrator.md`](../../../src/main/data/migration/v2/migrators/README-ChatMigrator.md). + - `AssistantMigrator`, `KnowledgeMigrator` (placeholders): scaffolding and TODO notes for future tables. - Conventions: - All logging goes through `loggerService` with a migrator-specific context. - Use `MigrationContext.sources` instead of accessing raw files/stores directly. @@ -62,3 +63,10 @@ src/main/data/migration/v2/ - [ ] Wire progress updates through `reportProgress` so UI shows per-migrator progress. - [ ] Register the migrator in `migrators/index.ts` with the correct `order`. - [ ] Add any new target tables to `MigrationEngine.verifyAndClearNewTables` once those tables exist. +- [ ] Include detailed comments for maintainability (file-level, function-level, logic blocks). +- [ ] **Create/update `migrators/README-.md`** with detailed documentation including: + - Data sources and target tables + - Key transformations + - Field mappings (source → target) + - Dropped fields and rationale + - Code quality notes diff --git a/src/main/data/migration/v2/core/MigrationEngine.ts b/src/main/data/migration/v2/core/MigrationEngine.ts index 1b004d38e7..77bc4afd92 100644 --- a/src/main/data/migration/v2/core/MigrationEngine.ts +++ b/src/main/data/migration/v2/core/MigrationEngine.ts @@ -5,7 +5,9 @@ import { dbService } from '@data/db/DbService' import { appStateTable } from '@data/db/schemas/appState' +import { messageTable } from '@data/db/schemas/message' import { preferenceTable } from '@data/db/schemas/preference' +import { topicTable } from '@data/db/schemas/topic' import { loggerService } from '@logger' import type { MigrationProgress, @@ -24,8 +26,6 @@ import { createMigrationContext } from './MigrationContext' // TODO: Import these tables when they are created in user data schema // import { assistantTable } from '../../db/schemas/assistant' -// import { topicTable } from '../../db/schemas/topic' -// import { messageTable } from '../../db/schemas/message' // import { fileTable } from '../../db/schemas/file' // import { knowledgeBaseTable } from '../../db/schemas/knowledgeBase' @@ -197,12 +197,13 @@ export class MigrationEngine { const db = dbService.getDb() // Tables to clear - add more as they are created + // Order matters: child tables must be cleared before parent tables const tables = [ + { table: messageTable, name: 'message' }, // Must clear before topic (FK reference) + { table: topicTable, name: 'topic' }, { table: preferenceTable, name: 'preference' } // TODO: Add these when tables are created // { table: assistantTable, name: 'assistant' }, - // { table: topicTable, name: 'topic' }, - // { table: messageTable, name: 'message' }, // { table: fileTable, name: 'file' }, // { table: knowledgeBaseTable, name: 'knowledge_base' } ] @@ -216,14 +217,15 @@ export class MigrationEngine { } } - // Clear tables in reverse dependency order + // Clear tables in dependency order (children before parents) + // Messages reference topics, so delete messages first + await db.delete(messageTable) + await db.delete(topicTable) + await db.delete(preferenceTable) // TODO: Add these when tables are created (in correct order) - // await db.delete(messageTable) - // await db.delete(topicTable) // await db.delete(fileTable) // await db.delete(knowledgeBaseTable) // await db.delete(assistantTable) - await db.delete(preferenceTable) logger.info('All new architecture tables cleared successfully') } diff --git a/src/main/data/migration/v2/migrators/ChatMigrator.ts b/src/main/data/migration/v2/migrators/ChatMigrator.ts index 5a9b845a00..7e622739dd 100644 --- a/src/main/data/migration/v2/migrators/ChatMigrator.ts +++ b/src/main/data/migration/v2/migrators/ChatMigrator.ts @@ -1,81 +1,623 @@ /** - * Chat migrator - migrates topics and messages from Dexie to SQLite + * Chat Migrator - Migrates topics and messages from Dexie to SQLite * - * TODO: Implement when chat tables are created - * Data source: Dexie topics table (messages are embedded in topics) - * Target tables: topic, message + * ## Overview * - * Note: This migrator handles the largest amount of data (potentially millions of messages) - * and uses streaming JSON reading with batch inserts for memory efficiency. + * This migrator handles the largest data migration task: transferring all chat topics + * and their messages from the old Dexie/IndexedDB storage to the new SQLite database. + * + * ## Data Sources + * + * | Data | Source | File/Path | + * |------|--------|-----------| + * | Topics with messages | Dexie `topics` table | `topics.json` → `{ id, messages[] }` | + * | Message blocks | Dexie `message_blocks` table | `message_blocks.json` | + * | Assistants (for meta) | Redux `assistants` slice | `ReduxStateReader.getCategory('assistants')` | + * + * ## Target Tables + * + * - `topicTable` - Stores conversation topics/threads + * - `messageTable` - Stores chat messages with tree structure + * + * ## Key Transformations + * + * 1. **Linear → Tree Structure** + * - Old: Messages stored as linear array in `topic.messages[]` + * - New: Tree via `parentId` + `siblingsGroupId` + * + * 2. **Multi-model Responses** + * - Old: `askId` links responses to user message, `foldSelected` marks active + * - New: Shared `parentId` + non-zero `siblingsGroupId` groups siblings + * + * 3. **Block Inlining** + * - Old: `message.blocks: string[]` (IDs) + separate `message_blocks` table + * - New: `message.data.blocks: MessageDataBlock[]` (inline JSON) + * + * 4. **Citation Migration** + * - Old: Separate `CitationMessageBlock` + * - New: Merged into `MainTextBlock.references` as ContentReference[] + * + * 5. **Mention Migration** + * - Old: `message.mentions: Model[]` + * - New: `MentionReference[]` in `MainTextBlock.references` + * + * ## Performance Considerations + * + * - Uses streaming JSON reader for large data sets (potentially millions of messages) + * - Processes topics in batches to control memory usage + * - Pre-loads all blocks into memory map for O(1) lookup (blocks table is smaller) + * - Uses database transactions for atomicity and performance + * + * @since v2.0.0 */ +import { messageTable } from '@data/db/schemas/message' +import { topicTable } from '@data/db/schemas/topic' import { loggerService } from '@logger' -import type { ExecuteResult, PrepareResult, ValidateResult } from '@shared/data/migration/v2/types' +import type { ExecuteResult, PrepareResult, ValidateResult, ValidationError } from '@shared/data/migration/v2/types' +import { eq, sql } from 'drizzle-orm' +import { v4 as uuidv4 } from 'uuid' +import type { MigrationContext } from '../core/MigrationContext' import { BaseMigrator } from './BaseMigrator' +import { + buildBlockLookup, + buildMessageTree, + type NewMessage, + type NewTopic, + type OldAssistant, + type OldBlock, + type OldTopic, + type OldTopicMeta, + resolveBlocks, + transformMessage, + transformTopic +} from './mappings/ChatMappings' const logger = loggerService.withContext('ChatMigrator') +/** + * Batch size for processing topics + * Chosen to balance memory usage and transaction overhead + */ +const TOPIC_BATCH_SIZE = 50 + +/** + * Batch size for inserting messages + * SQLite has limits on the number of parameters per statement + */ +const MESSAGE_INSERT_BATCH_SIZE = 100 + +/** + * Assistant data from Redux for generating AssistantMeta + */ +interface AssistantState { + assistants: OldAssistant[] +} + +/** + * Prepared data for execution phase + */ +interface PreparedTopicData { + topic: NewTopic + messages: NewMessage[] +} + export class ChatMigrator extends BaseMigrator { readonly id = 'chat' readonly name = 'ChatData' - readonly description = 'Migrate chat data' + readonly description = 'Migrate chat topics and messages' readonly order = 4 - async prepare(): Promise { - logger.info('ChatMigrator.prepare - placeholder implementation') + // Prepared data for execution + private topicCount = 0 + private messageCount = 0 + private blockLookup: Map = new Map() + private assistantLookup: Map = new Map() + // Topic metadata from Redux (name, pinned, etc.) - Dexie only has messages + private topicMetaLookup: Map = new Map() + // Topic → AssistantId mapping from Redux (Dexie topics don't store assistantId) + private topicAssistantLookup: Map = new Map() + private skippedTopics = 0 + private skippedMessages = 0 + // Track seen message IDs to handle duplicates across topics + private seenMessageIds = new Set() + // Block statistics for diagnostics + private blockStats = { requested: 0, resolved: 0, messagesWithMissingBlocks: 0, messagesWithEmptyBlocks: 0 } - // TODO: Implement when chat tables are created - // 1. Check if topics.json export file exists - // 2. Validate JSON format with sample read - // 3. Count total topics and estimate message count - // 4. Check for data integrity (e.g., messages have valid topic references) + /** + * Prepare phase - validate source data and count items + * + * Steps: + * 1. Check if topics.json and message_blocks.json exist + * 2. Load all blocks into memory for fast lookup + * 3. Load assistant data for generating meta + * 4. Count topics and estimate message count + * 5. Validate sample data for integrity + */ + async prepare(ctx: MigrationContext): Promise { + const warnings: string[] = [] - return { - success: true, - itemCount: 0, - warnings: ['ChatMigrator not yet implemented - waiting for chat tables'] - } - } + try { + // Step 1: Verify export files exist + const topicsExist = await ctx.sources.dexieExport.tableExists('topics') + if (!topicsExist) { + logger.warn('topics.json not found, skipping chat migration') + return { + success: true, + itemCount: 0, + warnings: ['topics.json not found - no chat data to migrate'] + } + } - async execute(): Promise { - logger.info('ChatMigrator.execute - placeholder implementation') + const blocksExist = await ctx.sources.dexieExport.tableExists('message_blocks') + if (!blocksExist) { + warnings.push('message_blocks.json not found - messages will have empty blocks') + } - // TODO: Implement when chat tables are created - // Use streaming JSON reader for large message files: - // - // const streamReader = _ctx.sources.dexieExport.createStreamReader('topics') - // await streamReader.readInBatches( - // BATCH_SIZE, - // async (topics, batchIndex) => { - // // 1. Insert topics - // // 2. Extract and insert messages from each topic - // // 3. Report progress - // } - // ) + // Step 2: Load all blocks into lookup map + // Blocks table is typically smaller than messages, safe to load entirely + if (blocksExist) { + logger.info('Loading message blocks into memory...') + const blocks = await ctx.sources.dexieExport.readTable('message_blocks') + this.blockLookup = buildBlockLookup(blocks) + logger.info(`Loaded ${this.blockLookup.size} blocks into lookup map`) + } - return { - success: true, - processedCount: 0 - } - } + // Step 3: Load assistant data for generating AssistantMeta + // Also extract topic metadata from assistants (Redux stores topic metadata in assistants.topics[]) + const assistantState = ctx.sources.reduxState.getCategory('assistants') + if (assistantState?.assistants) { + for (const assistant of assistantState.assistants) { + this.assistantLookup.set(assistant.id, assistant) - async validate(): Promise { - logger.info('ChatMigrator.validate - placeholder implementation') + // Extract topic metadata from this assistant's topics array + // Redux stores topic metadata (name, pinned, etc.) but with messages: [] + // Also track topic → assistantId mapping (Dexie doesn't store assistantId) + if (assistant.topics && Array.isArray(assistant.topics)) { + for (const topic of assistant.topics) { + if (topic.id) { + this.topicMetaLookup.set(topic.id, topic) + this.topicAssistantLookup.set(topic.id, assistant.id) + } + } + } + } + logger.info( + `Loaded ${this.assistantLookup.size} assistants and ${this.topicMetaLookup.size} topic metadata entries` + ) + } else { + warnings.push('No assistant data found - topics will have null assistantMeta and missing names') + } - // TODO: Implement when chat tables are created - // 1. Count validation for topics and messages - // 2. Sample validation (check a few topics have correct message counts) - // 3. Reference integrity validation + // Step 4: Count topics and estimate messages + const topicReader = ctx.sources.dexieExport.createStreamReader('topics') + this.topicCount = await topicReader.count() + logger.info(`Found ${this.topicCount} topics to migrate`) - return { - success: true, - errors: [], - stats: { - sourceCount: 0, - targetCount: 0, - skippedCount: 0 + // Estimate message count from sample + if (this.topicCount > 0) { + const sampleTopics = await topicReader.readSample(10) + const avgMessagesPerTopic = + sampleTopics.reduce((sum, t) => sum + (t.messages?.length || 0), 0) / sampleTopics.length + this.messageCount = Math.round(this.topicCount * avgMessagesPerTopic) + logger.info(`Estimated ${this.messageCount} messages based on sample`) + } + + // Step 5: Validate sample data + if (this.topicCount > 0) { + const sampleTopics = await topicReader.readSample(5) + for (const topic of sampleTopics) { + if (!topic.id) { + warnings.push(`Found topic without id - will be skipped`) + } + if (!topic.messages || !Array.isArray(topic.messages)) { + warnings.push(`Topic ${topic.id} has invalid messages array`) + } + } + } + + logger.info('Prepare phase completed', { + topics: this.topicCount, + estimatedMessages: this.messageCount, + blocks: this.blockLookup.size, + assistants: this.assistantLookup.size + }) + + return { + success: true, + itemCount: this.topicCount, + warnings: warnings.length > 0 ? warnings : undefined + } + } catch (error) { + logger.error('Prepare failed', error as Error) + return { + success: false, + itemCount: 0, + warnings: [error instanceof Error ? error.message : String(error)] } } } + + /** + * Execute phase - perform the actual data migration + * + * Processing strategy: + * 1. Stream topics in batches to control memory + * 2. For each topic batch: + * a. Transform topics and their messages + * b. Build message tree structure + * c. Insert topics in single transaction + * d. Insert messages in batched transactions + * 3. Report progress throughout + */ + async execute(ctx: MigrationContext): Promise { + if (this.topicCount === 0) { + logger.info('No topics to migrate') + return { success: true, processedCount: 0 } + } + + let processedTopics = 0 + let processedMessages = 0 + + try { + const db = ctx.db + const topicReader = ctx.sources.dexieExport.createStreamReader('topics') + + // Process topics in batches + await topicReader.readInBatches(TOPIC_BATCH_SIZE, async (topics, batchIndex) => { + logger.debug(`Processing topic batch ${batchIndex + 1}`, { count: topics.length }) + + // Transform all topics and messages in this batch + const preparedData: PreparedTopicData[] = [] + + for (const oldTopic of topics) { + try { + const prepared = this.prepareTopicData(oldTopic) + if (prepared) { + preparedData.push(prepared) + } else { + this.skippedTopics++ + } + } catch (error) { + logger.warn(`Failed to transform topic ${oldTopic.id}`, { error }) + this.skippedTopics++ + } + } + + // Insert topics in a transaction + if (preparedData.length > 0) { + await db.transaction(async (tx) => { + // Insert topics + const topicValues = preparedData.map((d) => d.topic) + await tx.insert(topicTable).values(topicValues) + + // Collect all messages, handling duplicate IDs by generating new ones + const allMessages: NewMessage[] = [] + for (const data of preparedData) { + for (const msg of data.messages) { + if (this.seenMessageIds.has(msg.id)) { + const newId = uuidv4() + logger.warn(`Duplicate message ID found: ${msg.id}, assigning new ID: ${newId}`) + msg.id = newId + } + this.seenMessageIds.add(msg.id) + allMessages.push(msg) + } + } + + // Insert messages in batches (SQLite parameter limit) + for (let i = 0; i < allMessages.length; i += MESSAGE_INSERT_BATCH_SIZE) { + const batch = allMessages.slice(i, i + MESSAGE_INSERT_BATCH_SIZE) + await tx.insert(messageTable).values(batch) + } + + processedMessages += allMessages.length + }) + + processedTopics += preparedData.length + } + + // Report progress + const progress = Math.round((processedTopics / this.topicCount) * 100) + this.reportProgress( + progress, + `已迁移 ${processedTopics}/${this.topicCount} 个对话,${processedMessages} 条消息` + ) + }) + + logger.info('Execute completed', { + processedTopics, + processedMessages, + skippedTopics: this.skippedTopics, + skippedMessages: this.skippedMessages + }) + + // Log block statistics for diagnostics + logger.info('Block migration statistics', { + blocksRequested: this.blockStats.requested, + blocksResolved: this.blockStats.resolved, + blocksMissing: this.blockStats.requested - this.blockStats.resolved, + messagesWithEmptyBlocks: this.blockStats.messagesWithEmptyBlocks, + messagesWithMissingBlocks: this.blockStats.messagesWithMissingBlocks + }) + + return { + success: true, + processedCount: processedTopics + } + } catch (error) { + logger.error('Execute failed', error as Error) + return { + success: false, + processedCount: processedTopics, + error: error instanceof Error ? error.message : String(error) + } + } + } + + /** + * Validate phase - verify migrated data integrity + * + * Validation checks: + * 1. Topic count matches source (minus skipped) + * 2. Message count is within expected range + * 3. Sample topics have correct structure + * 4. Foreign key integrity (messages belong to existing topics) + */ + async validate(ctx: MigrationContext): Promise { + const errors: ValidationError[] = [] + const db = ctx.db + + try { + // Count topics in target + const topicResult = await db.select({ count: sql`count(*)` }).from(topicTable).get() + const targetTopicCount = topicResult?.count ?? 0 + + // Count messages in target + const messageResult = await db.select({ count: sql`count(*)` }).from(messageTable).get() + const targetMessageCount = messageResult?.count ?? 0 + + logger.info('Validation counts', { + sourceTopics: this.topicCount, + targetTopics: targetTopicCount, + skippedTopics: this.skippedTopics, + targetMessages: targetMessageCount + }) + + // Validate topic count + const expectedTopics = this.topicCount - this.skippedTopics + if (targetTopicCount < expectedTopics) { + errors.push({ + key: 'topic_count', + message: `Topic count mismatch: expected ${expectedTopics}, got ${targetTopicCount}` + }) + } + + // Sample validation: check a few topics have messages + const sampleTopics = await db.select().from(topicTable).limit(5).all() + for (const topic of sampleTopics) { + const msgCount = await db + .select({ count: sql`count(*)` }) + .from(messageTable) + .where(eq(messageTable.topicId, topic.id)) + .get() + + if (msgCount?.count === 0) { + // This is a warning, not an error - some topics may legitimately have no messages + logger.warn(`Topic ${topic.id} has no messages after migration`) + } + } + + // Check for orphan messages (messages without valid topic) + // This shouldn't happen due to foreign key constraints, but verify anyway + const orphanCheck = await db + .select({ count: sql`count(*)` }) + .from(messageTable) + .where(sql`${messageTable.topicId} NOT IN (SELECT id FROM ${topicTable})`) + .get() + + if (orphanCheck && orphanCheck.count > 0) { + errors.push({ + key: 'orphan_messages', + message: `Found ${orphanCheck.count} orphan messages without valid topics` + }) + } + + return { + success: errors.length === 0, + errors, + stats: { + sourceCount: this.topicCount, + targetCount: targetTopicCount, + skippedCount: this.skippedTopics + } + } + } catch (error) { + logger.error('Validation failed', error as Error) + return { + success: false, + errors: [ + { + key: 'validation', + message: error instanceof Error ? error.message : String(error) + } + ], + stats: { + sourceCount: this.topicCount, + targetCount: 0, + skippedCount: this.skippedTopics + } + } + } + } + + /** + * Prepare a single topic and its messages for migration + * + * @param oldTopic - Source topic from Dexie (has messages, may lack metadata) + * @returns Prepared data or null if topic should be skipped + * + * ## Data Merging + * + * Topic data comes from two sources: + * - Dexie `topics` table: Has `id`, `messages[]`, `assistantId` + * - Redux `assistants[].topics[]`: Has metadata (`name`, `pinned`, `prompt`, etc.) + * + * We merge Redux metadata into the Dexie topic before transformation. + */ + private prepareTopicData(oldTopic: OldTopic): PreparedTopicData | null { + // Validate required fields + if (!oldTopic.id) { + logger.warn('Topic missing id, skipping') + return null + } + + // Merge topic metadata from Redux (name, pinned, etc.) + // Dexie topics may have stale or missing metadata; Redux is authoritative for these fields + const topicMeta = this.topicMetaLookup.get(oldTopic.id) + if (topicMeta) { + // Merge Redux metadata into Dexie topic + // Note: Redux topic.name can also be empty from ancient version migrations (see store/migrate.ts:303-305) + oldTopic.name = topicMeta.name || oldTopic.name + oldTopic.pinned = topicMeta.pinned ?? oldTopic.pinned + oldTopic.prompt = topicMeta.prompt ?? oldTopic.prompt + oldTopic.isNameManuallyEdited = topicMeta.isNameManuallyEdited ?? oldTopic.isNameManuallyEdited + // Use Redux timestamps if available and Dexie lacks them + if (topicMeta.createdAt && !oldTopic.createdAt) { + oldTopic.createdAt = topicMeta.createdAt + } + if (topicMeta.updatedAt && !oldTopic.updatedAt) { + oldTopic.updatedAt = topicMeta.updatedAt + } + } + + // Fallback: If name is still empty after merge, use a default name + // This handles cases where both Dexie and Redux have empty names (ancient version bug) + if (!oldTopic.name) { + oldTopic.name = 'Unnamed Topic' // Default fallback for topics with no name + } + + // Get assistantId from Redux mapping (Dexie topics don't store assistantId) + // Fall back to oldTopic.assistantId in case Dexie did store it (defensive) + const assistantId = this.topicAssistantLookup.get(oldTopic.id) || oldTopic.assistantId + if (assistantId && !oldTopic.assistantId) { + oldTopic.assistantId = assistantId + } + + // Get assistant for meta generation + const assistant = this.assistantLookup.get(assistantId) || null + + // Get messages array (may be empty or undefined) + const oldMessages = oldTopic.messages || [] + + // Build message tree structure + const messageTree = buildMessageTree(oldMessages) + + // === First pass: identify messages to skip (no blocks) === + const skippedMessageIds = new Set() + const messageParentMap = new Map() // messageId -> parentId + + for (const oldMsg of oldMessages) { + const blockIds = oldMsg.blocks || [] + const blocks = resolveBlocks(blockIds, this.blockLookup) + + // Track block statistics for diagnostics + this.blockStats.requested += blockIds.length + this.blockStats.resolved += blocks.length + if (blockIds.length === 0) { + this.blockStats.messagesWithEmptyBlocks++ + } else if (blocks.length < blockIds.length) { + this.blockStats.messagesWithMissingBlocks++ + if (blocks.length === 0) { + logger.warn(`Message ${oldMsg.id} has ${blockIds.length} block IDs but none found in message_blocks`) + } + } + + // Store parent info from tree + const treeInfo = messageTree.get(oldMsg.id) + messageParentMap.set(oldMsg.id, treeInfo?.parentId ?? null) + + // Mark for skipping if no blocks + if (blocks.length === 0) { + skippedMessageIds.add(oldMsg.id) + this.skippedMessages++ + } + } + + // === Helper: resolve parent through skipped messages === + // If parentId points to a skipped message, follow the chain to find a non-skipped ancestor + const resolveParentId = (parentId: string | null): string | null => { + let currentParent = parentId + const visited = new Set() // Prevent infinite loops + + while (currentParent && skippedMessageIds.has(currentParent)) { + if (visited.has(currentParent)) { + // Circular reference, break out + return null + } + visited.add(currentParent) + currentParent = messageParentMap.get(currentParent) ?? null + } + + return currentParent + } + + // === Second pass: transform messages that have blocks === + const newMessages: NewMessage[] = [] + for (const oldMsg of oldMessages) { + // Skip messages marked for skipping + if (skippedMessageIds.has(oldMsg.id)) { + continue + } + + try { + const treeInfo = messageTree.get(oldMsg.id) + if (!treeInfo) { + logger.warn(`Message ${oldMsg.id} not found in tree, using defaults`) + continue + } + + // Resolve blocks for this message (we know it has blocks from first pass) + const blockIds = oldMsg.blocks || [] + const blocks = resolveBlocks(blockIds, this.blockLookup) + + // Resolve parentId through any skipped messages + const resolvedParentId = resolveParentId(treeInfo.parentId) + + // Get assistant for this message (may differ from topic's assistant) + const msgAssistant = this.assistantLookup.get(oldMsg.assistantId) || assistant + + const newMsg = transformMessage( + oldMsg, + resolvedParentId, // Use resolved parent instead of original + treeInfo.siblingsGroupId, + blocks, + msgAssistant, + oldTopic.id + ) + + newMessages.push(newMsg) + } catch (error) { + logger.warn(`Failed to transform message ${oldMsg.id}`, { error }) + this.skippedMessages++ + } + } + + // Calculate activeNodeId based on migrated messages (not original messages) + // If no messages were migrated, set to null + let activeNodeId: string | null = null + if (newMessages.length > 0) { + // Use the last migrated message as active node + activeNodeId = newMessages[newMessages.length - 1].id + } + + // Transform topic with correct activeNodeId + const newTopic = transformTopic(oldTopic, assistant, activeNodeId) + + return { + topic: newTopic, + messages: newMessages + } + } } diff --git a/src/main/data/migration/v2/migrators/README-ChatMigrator.md b/src/main/data/migration/v2/migrators/README-ChatMigrator.md new file mode 100644 index 0000000000..a3a0640ccd --- /dev/null +++ b/src/main/data/migration/v2/migrators/README-ChatMigrator.md @@ -0,0 +1,138 @@ +# ChatMigrator + +The `ChatMigrator` handles the largest data migration task: topics and messages from Dexie/IndexedDB to SQLite. + +## Data Sources + +| Data | Source | File/Path | +|------|--------|-----------| +| Topics with messages | Dexie `topics` table | `topics.json` | +| Topic metadata (name, pinned, etc.) | Redux `assistants[].topics[]` | `ReduxStateReader.getCategory('assistants')` | +| Message blocks | Dexie `message_blocks` table | `message_blocks.json` | +| Assistants (for meta) | Redux `assistants` slice | `ReduxStateReader.getCategory('assistants')` | + +### Topic Data Split (Important!) + +The old system stores topic data in **two separate locations**: + +1. **Dexie `topics` table**: Contains only `id` and `messages[]` array (NO `assistantId`!) +2. **Redux `assistants[].topics[]`**: Contains metadata (`name`, `pinned`, `prompt`, `isNameManuallyEdited`) and implicitly the `assistantId` (from parent assistant) + +Redux deliberately clears `messages[]` to reduce storage size. The migrator merges these sources: +- Messages come from Dexie +- Metadata (name, pinned, etc.) comes from Redux +- `assistantId` comes from Redux structure (each assistant owns its topics) + +## Key Transformations + +1. **Linear → Tree Structure** + - Old: Messages stored as linear array in `topic.messages[]` + - New: Tree via `parentId` + `siblingsGroupId` + +2. **Multi-model Responses** + - Old: `askId` links responses to user message, `foldSelected` marks active + - New: Shared `parentId` + non-zero `siblingsGroupId` groups siblings + +3. **Block Inlining** + - Old: `message.blocks: string[]` (IDs) + separate `message_blocks` table + - New: `message.data.blocks: MessageDataBlock[]` (inline JSON) + +4. **Citation Migration** + - Old: Separate `CitationMessageBlock` with `response`, `knowledge`, `memories` + - New: Merged into `MainTextBlock.references` as `ContentReference[]` + +5. **Mention Migration** + - Old: `message.mentions: Model[]` + - New: `MentionReference[]` in `MainTextBlock.references` + +## Data Quality Handling + +The migrator handles potential data inconsistencies from the old system: + +| Issue | Detection | Handling | +|-------|-----------|----------| +| **Duplicate message ID** | Same ID appears in multiple topics | Generate new UUID, log warning | +| **TopicId mismatch** | `message.topicId` ≠ parent `topic.id` | Use correct parent topic.id (silent fix) | +| **Missing blocks** | Block ID not found in `message_blocks` | Skip missing block (silent) | +| **Invalid topic** | Topic missing required `id` field | Skip entire topic | +| **Missing topic metadata** | Topic not found in Redux `assistants[].topics[]` | Use Dexie values, fallback name if empty | +| **Missing assistantId** | Topic not in any `assistant.topics[]` | `assistantId` and `assistantMeta` will be null | +| **Empty topic name** | Both Dexie and Redux have empty `name` (ancient bug) | Use fallback "Unnamed Topic" | +| **Message with no blocks** | `blocks` array is empty after resolution | Skip message, re-link children to parent's parent | +| **Topic with no messages** | All messages skipped (no blocks) | Keep topic, set `activeNodeId` to null | + +## Field Mappings + +### Topic Mapping + +Topic data is merged from Dexie + Redux before transformation: + +| Source | Target (topicTable) | Notes | +|--------|---------------------|-------| +| Dexie: `id` | `id` | Direct copy | +| Redux: `name` | `name` | Merged from Redux `assistants[].topics[]` | +| Redux: `isNameManuallyEdited` | `isNameManuallyEdited` | Merged from Redux | +| Redux: (parent assistant.id) | `assistantId` | From `topicAssistantLookup` mapping | +| (from Assistant) | `assistantMeta` | Generated from assistant entity | +| Redux: `prompt` | `prompt` | Merged from Redux | +| (computed) | `activeNodeId` | Last message ID or foldSelected | +| (none) | `groupId` | null (new field) | +| (none) | `sortOrder` | 0 (new field) | +| Redux: `pinned` | `isPinned` | Merged from Redux, renamed | +| (none) | `pinnedOrder` | 0 (new field) | +| `createdAt` | `createdAt` | ISO string → timestamp | +| `updatedAt` | `updatedAt` | ISO string → timestamp | + +**Dropped fields**: `type` ('chat' | 'session') + +### Message Mapping + +| Source (OldMessage) | Target (messageTable) | Notes | +|---------------------|----------------------|-------| +| `id` | `id` | Direct copy (new UUID if duplicate) | +| (computed) | `parentId` | From tree building algorithm | +| (from parent topic) | `topicId` | Uses parent topic.id for consistency | +| `role` | `role` | Direct copy | +| `blocks` + `mentions` + citations | `data` | Complex transformation | +| (extracted) | `searchableText` | Extracted from text blocks | +| `status` | `status` | Normalized to success/error/paused | +| (computed) | `siblingsGroupId` | From multi-model detection | +| `assistantId` | `assistantId` | Direct copy | +| `modelId` | `modelId` | Direct copy | +| (from Message.model) | `modelMeta` | Generated from model entity | +| `traceId` | `traceId` | Direct copy | +| `usage` + `metrics` | `stats` | Merged into single stats object | +| `createdAt` | `createdAt` | ISO string → timestamp | +| `updatedAt` | `updatedAt` | ISO string → timestamp | + +**Dropped fields**: `type`, `useful`, `enabledMCPs`, `agentSessionId`, `providerMetadata`, `multiModelMessageStyle`, `askId` (replaced by parentId), `foldSelected` (replaced by siblingsGroupId) + +### Block Type Mapping + +| Old Type | New Type | Notes | +|----------|----------|-------| +| `main_text` | `MainTextBlock` | Direct, references added from citations/mentions | +| `thinking` | `ThinkingBlock` | `thinking_millsec` → `thinkingMs` | +| `translation` | `TranslationBlock` | Direct copy | +| `code` | `CodeBlock` | Direct copy | +| `image` | `ImageBlock` | `file.id` → `fileId` | +| `file` | `FileBlock` | `file.id` → `fileId` | +| `video` | `VideoBlock` | Direct copy | +| `tool` | `ToolBlock` | Direct copy | +| `citation` | (removed) | Converted to `MainTextBlock.references` | +| `error` | `ErrorBlock` | Direct copy | +| `compact` | `CompactBlock` | Direct copy | +| `unknown` | (skipped) | Placeholder blocks are dropped | + +## Implementation Files + +- `ChatMigrator.ts` - Main migrator class with prepare/execute/validate phases +- `mappings/ChatMappings.ts` - Pure transformation functions and type definitions + +## Code Quality + +All implementation code includes detailed comments: +- File-level comments: Describe purpose, data flow, and overview +- Function-level comments: Purpose, parameters, return values, side effects +- Logic block comments: Step-by-step explanations for complex logic +- Data transformation comments: Old field → new field mapping relationships diff --git a/src/main/data/migration/v2/migrators/mappings/ChatMappings.ts b/src/main/data/migration/v2/migrators/mappings/ChatMappings.ts new file mode 100644 index 0000000000..99b4023c08 --- /dev/null +++ b/src/main/data/migration/v2/migrators/mappings/ChatMappings.ts @@ -0,0 +1,1168 @@ +/** + * Chat Mappings - Topic and Message transformation functions for Dexie → SQLite migration + * + * This file contains pure transformation functions that convert old data structures + * to new SQLite-compatible formats. All functions are stateless and side-effect free. + * + * ## Data Flow Overview: + * + * ### Topics: + * - Source: Redux `assistants.topics[]` + Dexie `topics` table (for messages) + * - Target: SQLite `topicTable` + * + * ### Messages: + * - Source: Dexie `topics.messages[]` (embedded in topic) + `message_blocks` table + * - Target: SQLite `messageTable` with inline blocks in `data.blocks` + * + * ## Key Transformations: + * + * 1. **Message Order → Tree Structure** + * - Old: Linear array `topic.messages[]` with array index as order + * - New: Tree via `parentId` + `siblingsGroupId` + * + * 2. **Multi-model Responses** + * - Old: Multiple messages share same `askId`, `foldSelected` marks active + * - New: Same `parentId` + non-zero `siblingsGroupId` groups siblings + * + * 3. **Block Storage** + * - Old: `message.blocks: string[]` (IDs) + separate `message_blocks` table + * - New: `message.data.blocks: MessageDataBlock[]` (inline JSON) + * + * 4. **Citations → References** + * - Old: Separate `CitationMessageBlock` with response/knowledge/memories + * - New: Merged into `MainTextBlock.references` as typed ContentReference[] + * + * 5. **Mentions → References** + * - Old: `message.mentions: Model[]` + * - New: `MentionReference[]` in `MainTextBlock.references` + * + * @since v2.0.0 + */ + +import type { + BlockType, + CitationReference, + CitationType, + CodeBlock, + CompactBlock, + ContentReference, + ErrorBlock, + FileBlock, + ImageBlock, + MainTextBlock, + MentionReference, + MessageData, + MessageDataBlock, + MessageStats, + ReferenceCategory, + ThinkingBlock, + ToolBlock, + TranslationBlock, + VideoBlock +} from '@shared/data/types/message' +import type { AssistantMeta, ModelMeta } from '@shared/data/types/meta' + +// ============================================================================ +// Old Type Definitions (Source Data Structures) +// ============================================================================ + +/** + * Old Topic type from Redux assistants slice + * Source: src/renderer/src/types/index.ts + */ +export interface OldTopic { + id: string + type?: 'chat' | 'session' // Dropped in new schema + assistantId: string + name: string + createdAt: string + updatedAt: string + messages: OldMessage[] + pinned?: boolean + prompt?: string + isNameManuallyEdited?: boolean +} + +/** + * Old Assistant type for extracting AssistantMeta + * Note: In Redux state, assistant.topics[] contains topic metadata (but with messages: []) + */ +export interface OldAssistant { + id: string + name: string + emoji?: string + type: string + topics?: OldTopicMeta[] // Topics are nested inside assistants in Redux +} + +/** + * Old Topic metadata from Redux assistants.topics[] + * + * Redux stores topic metadata (name, pinned, etc.) but clears messages[] to reduce storage. + * Dexie stores topics with messages[] but may have stale metadata. + * Migration merges: Redux metadata + Dexie messages. + */ +export interface OldTopicMeta { + id: string + name: string + pinned?: boolean + prompt?: string + isNameManuallyEdited?: boolean + createdAt?: string + updatedAt?: string +} + +/** + * Old Model type for extracting ModelMeta + */ +export interface OldModel { + id: string + name: string + provider: string + group: string +} + +/** + * Old Message type from Dexie topics table + * Source: src/renderer/src/types/newMessage.ts + */ +export interface OldMessage { + id: string + role: 'user' | 'assistant' | 'system' + assistantId: string + topicId: string + createdAt: string + updatedAt?: string + // Old status includes more values, we normalize to success/error/paused + status: 'sending' | 'pending' | 'searching' | 'processing' | 'success' | 'paused' | 'error' + + // Model info + modelId?: string + model?: OldModel + + // Multi-model response fields + askId?: string // Links to user message ID + foldSelected?: boolean // True if this is the selected response in fold view + multiModelMessageStyle?: string // UI state, dropped + + // Content + blocks: string[] // Block IDs referencing message_blocks table + + // Metadata + usage?: OldUsage + metrics?: OldMetrics + traceId?: string + + // Fields being transformed + mentions?: OldModel[] // → MentionReference in MainTextBlock.references + + // Dropped fields + type?: 'clear' | 'text' | '@' + useful?: boolean + enabledMCPs?: unknown[] + agentSessionId?: string + providerMetadata?: unknown +} + +/** + * Old Usage type for token consumption + */ +export interface OldUsage { + prompt_tokens?: number + completion_tokens?: number + total_tokens?: number + thoughts_tokens?: number + cost?: number +} + +/** + * Old Metrics type for performance measurement + */ +export interface OldMetrics { + completion_tokens?: number + time_completion_millsec?: number + time_first_token_millsec?: number + time_thinking_millsec?: number +} + +/** + * Old MessageBlock base type + */ +export interface OldMessageBlock { + id: string + messageId: string + type: string + createdAt: string + updatedAt?: string + status: string // Dropped in new schema + model?: OldModel // Dropped in new schema + metadata?: Record + error?: unknown +} + +/** + * Old MainTextMessageBlock + */ +export interface OldMainTextBlock extends OldMessageBlock { + type: 'main_text' + content: string + knowledgeBaseIds?: string[] // Dropped (deprecated) + citationReferences?: Array<{ + citationBlockId?: string + citationBlockSource?: string + }> // Dropped (replaced by references) +} + +/** + * Old ThinkingMessageBlock + */ +export interface OldThinkingBlock extends OldMessageBlock { + type: 'thinking' + content: string + thinking_millsec: number // → thinkingMs +} + +/** + * Old TranslationMessageBlock + */ +export interface OldTranslationBlock extends OldMessageBlock { + type: 'translation' + content: string + sourceBlockId?: string + sourceLanguage?: string + targetLanguage: string +} + +/** + * Old CodeMessageBlock + */ +export interface OldCodeBlock extends OldMessageBlock { + type: 'code' + content: string + language: string +} + +/** + * Old ImageMessageBlock + */ +export interface OldImageBlock extends OldMessageBlock { + type: 'image' + url?: string + file?: { id: string; [key: string]: unknown } // file.id → fileId +} + +/** + * Old FileMessageBlock + */ +export interface OldFileBlock extends OldMessageBlock { + type: 'file' + file: { id: string; [key: string]: unknown } // file.id → fileId +} + +/** + * Old VideoMessageBlock + */ +export interface OldVideoBlock extends OldMessageBlock { + type: 'video' + url?: string + filePath?: string +} + +/** + * Old ToolMessageBlock + */ +export interface OldToolBlock extends OldMessageBlock { + type: 'tool' + toolId: string + toolName?: string + arguments?: Record + content?: string | object +} + +/** + * Old CitationMessageBlock - contains web search, knowledge, and memory references + * This is the primary source for ContentReference transformation + */ +export interface OldCitationBlock extends OldMessageBlock { + type: 'citation' + response?: { + results?: unknown + source: unknown + } + knowledge?: Array<{ + id: number + content: string + sourceUrl: string + type: string + file?: unknown + metadata?: Record + }> + memories?: Array<{ + id: string + memory: string + hash?: string + createdAt?: string + updatedAt?: string + score?: number + metadata?: Record + }> +} + +/** + * Old ErrorMessageBlock + */ +export interface OldErrorBlock extends OldMessageBlock { + type: 'error' +} + +/** + * Old CompactMessageBlock + */ +export interface OldCompactBlock extends OldMessageBlock { + type: 'compact' + content: string + compactedContent: string +} + +/** + * Union of all old block types + */ +export type OldBlock = + | OldMainTextBlock + | OldThinkingBlock + | OldTranslationBlock + | OldCodeBlock + | OldImageBlock + | OldFileBlock + | OldVideoBlock + | OldToolBlock + | OldCitationBlock + | OldErrorBlock + | OldCompactBlock + | OldMessageBlock + +// ============================================================================ +// New Type Definitions (Target Data Structures) +// ============================================================================ + +/** + * New Topic for SQLite insertion + * Matches topicTable schema + */ +export interface NewTopic { + id: string + name: string | null + isNameManuallyEdited: boolean + assistantId: string | null + assistantMeta: AssistantMeta | null + prompt: string | null + activeNodeId: string | null + groupId: string | null + sortOrder: number + isPinned: boolean + pinnedOrder: number + createdAt: number // timestamp + updatedAt: number // timestamp +} + +/** + * New Message for SQLite insertion + * Matches messageTable schema + */ +export interface NewMessage { + id: string + parentId: string | null + topicId: string + role: string + data: MessageData + searchableText: string | null + status: 'success' | 'error' | 'paused' + siblingsGroupId: number + assistantId: string | null + assistantMeta: AssistantMeta | null + modelId: string | null + modelMeta: ModelMeta | null + traceId: string | null + stats: MessageStats | null + createdAt: number // timestamp + updatedAt: number // timestamp +} + +// ============================================================================ +// Topic Transformation Functions +// ============================================================================ + +/** + * Transform old Topic to new Topic format + * + * @param oldTopic - Source topic from Redux/Dexie + * @param assistant - Assistant entity for generating AssistantMeta + * @param activeNodeId - Last message ID to set as active node + * @returns New topic ready for SQLite insertion + * + * ## Field Mapping: + * | Source | Target | Notes | + * |--------|--------|-------| + * | id | id | Direct copy | + * | name | name | Direct copy | + * | isNameManuallyEdited | isNameManuallyEdited | Direct copy | + * | assistantId | assistantId | Direct copy | + * | (from Assistant) | assistantMeta | Generated from assistant entity | + * | prompt | prompt | Direct copy | + * | (computed) | activeNodeId | Last message ID | + * | (none) | groupId | null (new field) | + * | (none) | sortOrder | 0 (new field) | + * | pinned | isPinned | Renamed | + * | (none) | pinnedOrder | 0 (new field) | + * | createdAt | createdAt | ISO string → timestamp | + * | updatedAt | updatedAt | ISO string → timestamp | + * + * ## Dropped Fields: + * - type ('chat' | 'session'): No longer needed in new schema + */ +export function transformTopic( + oldTopic: OldTopic, + assistant: OldAssistant | null, + activeNodeId: string | null +): NewTopic { + return { + id: oldTopic.id, + name: oldTopic.name || null, + isNameManuallyEdited: oldTopic.isNameManuallyEdited ?? false, + assistantId: oldTopic.assistantId || null, + assistantMeta: assistant ? extractAssistantMeta(assistant) : null, + prompt: oldTopic.prompt || null, + activeNodeId, + groupId: null, // New field, no migration source + sortOrder: 0, // New field, default value + isPinned: oldTopic.pinned ?? false, + pinnedOrder: 0, // New field, default value + createdAt: parseTimestamp(oldTopic.createdAt), + updatedAt: parseTimestamp(oldTopic.updatedAt) + } +} + +/** + * Extract AssistantMeta from old Assistant entity + * + * AssistantMeta preserves display information when the original + * assistant is deleted, ensuring messages/topics remain readable. + * + * @param assistant - Source assistant entity + * @returns AssistantMeta for storage in topic/message + */ +export function extractAssistantMeta(assistant: OldAssistant): AssistantMeta { + return { + id: assistant.id, + name: assistant.name, + emoji: assistant.emoji, + type: assistant.type + } +} + +// ============================================================================ +// Message Transformation Functions +// ============================================================================ + +/** + * Transform old Message to new Message format + * + * This is the core message transformation function. It handles: + * - Status normalization + * - Block transformation (IDs → inline data) + * - Citation merging into references + * - Mention conversion to references + * - Stats merging (usage + metrics) + * + * @param oldMessage - Source message from Dexie + * @param parentId - Computed parent message ID (from tree building) + * @param siblingsGroupId - Computed siblings group ID (from multi-model detection) + * @param blocks - Resolved block data from message_blocks table + * @param assistant - Assistant entity for generating AssistantMeta + * @param correctTopicId - The correct topic ID (from parent topic, not from message) + * @returns New message ready for SQLite insertion + * + * ## Field Mapping: + * | Source | Target | Notes | + * |--------|--------|-------| + * | id | id | Direct copy | + * | (computed) | parentId | From tree building algorithm | + * | (parameter) | topicId | From correctTopicId param (ensures consistency) | + * | role | role | Direct copy | + * | blocks + mentions + citations | data | Complex transformation | + * | (extracted) | searchableText | Extracted from text blocks | + * | status | status | Normalized to success/error/paused | + * | (computed) | siblingsGroupId | From multi-model detection | + * | assistantId | assistantId | Direct copy | + * | (from Message.model) | assistantMeta | Generated if available | + * | modelId | modelId | Direct copy | + * | (from Message.model) | modelMeta | Generated from model entity | + * | traceId | traceId | Direct copy | + * | usage + metrics | stats | Merged into single stats object | + * | createdAt | createdAt | ISO string → timestamp | + * | updatedAt | updatedAt | ISO string → timestamp | + * + * ## Dropped Fields: + * - type ('clear' | 'text' | '@') + * - useful (boolean) + * - enabledMCPs (deprecated) + * - agentSessionId (session identifier) + * - providerMetadata (raw provider data) + * - multiModelMessageStyle (UI state) + * - askId (replaced by parentId) + * - foldSelected (replaced by siblingsGroupId) + */ +export function transformMessage( + oldMessage: OldMessage, + parentId: string | null, + siblingsGroupId: number, + blocks: OldBlock[], + assistant: OldAssistant | null, + correctTopicId: string +): NewMessage { + // Transform blocks and merge citations/mentions into references + const { dataBlocks, citationReferences, searchableText } = transformBlocks(blocks) + + // Convert mentions to MentionReferences + const mentionReferences = transformMentions(oldMessage.mentions) + + // Find the MainTextBlock and add references if any exist + const allReferences = [...citationReferences, ...mentionReferences] + if (allReferences.length > 0) { + const mainTextBlock = dataBlocks.find((b) => b.type === 'main_text') as MainTextBlock | undefined + if (mainTextBlock) { + mainTextBlock.references = allReferences + } + } + + return { + id: oldMessage.id, + parentId, + topicId: correctTopicId, + role: oldMessage.role, + data: { blocks: dataBlocks }, + searchableText: searchableText || null, + status: normalizeStatus(oldMessage.status), + siblingsGroupId, + assistantId: oldMessage.assistantId || null, + assistantMeta: assistant ? extractAssistantMeta(assistant) : null, + modelId: oldMessage.modelId || null, + modelMeta: oldMessage.model ? extractModelMeta(oldMessage.model) : null, + traceId: oldMessage.traceId || null, + stats: mergeStats(oldMessage.usage, oldMessage.metrics), + createdAt: parseTimestamp(oldMessage.createdAt), + updatedAt: parseTimestamp(oldMessage.updatedAt || oldMessage.createdAt) + } +} + +/** + * Extract ModelMeta from old Model entity + * + * ModelMeta preserves model display information when the original + * model configuration is removed or unavailable. + * + * @param model - Source model entity + * @returns ModelMeta for storage in message + */ +export function extractModelMeta(model: OldModel): ModelMeta { + return { + id: model.id, + name: model.name, + provider: model.provider, + group: model.group + } +} + +/** + * Normalize old status values to new enum + * + * Old system has multiple transient states that don't apply to stored messages. + * We normalize these to the three final states in the new schema. + * + * @param oldStatus - Status from old message + * @returns Normalized status for new message + * + * ## Mapping: + * - 'success' → 'success' + * - 'error' → 'error' + * - 'paused' → 'paused' + * - 'sending', 'pending', 'searching', 'processing' → 'success' (completed states) + */ +export function normalizeStatus(oldStatus: OldMessage['status']): 'success' | 'error' | 'paused' { + switch (oldStatus) { + case 'error': + return 'error' + case 'paused': + return 'paused' + case 'success': + case 'sending': + case 'pending': + case 'searching': + case 'processing': + default: + // All transient states are treated as success for stored messages + // If a message was in a transient state during export, it completed + return 'success' + } +} + +/** + * Merge old usage and metrics into new MessageStats + * + * The old system stored token usage and performance metrics in separate objects. + * The new schema combines them into a single stats object. + * + * @param usage - Token usage data from old message + * @param metrics - Performance metrics from old message + * @returns Combined MessageStats or null if no data + * + * ## Field Mapping: + * | Source | Target | + * |--------|--------| + * | usage.prompt_tokens | promptTokens | + * | usage.completion_tokens | completionTokens | + * | usage.total_tokens | totalTokens | + * | usage.thoughts_tokens | thoughtsTokens | + * | usage.cost | cost | + * | metrics.time_first_token_millsec | timeFirstTokenMs | + * | metrics.time_completion_millsec | timeCompletionMs | + * | metrics.time_thinking_millsec | timeThinkingMs | + */ +export function mergeStats(usage?: OldUsage, metrics?: OldMetrics): MessageStats | null { + if (!usage && !metrics) return null + + const stats: MessageStats = {} + + // Token usage + if (usage) { + if (usage.prompt_tokens !== undefined) stats.promptTokens = usage.prompt_tokens + if (usage.completion_tokens !== undefined) stats.completionTokens = usage.completion_tokens + if (usage.total_tokens !== undefined) stats.totalTokens = usage.total_tokens + if (usage.thoughts_tokens !== undefined) stats.thoughtsTokens = usage.thoughts_tokens + if (usage.cost !== undefined) stats.cost = usage.cost + } + + // Performance metrics + if (metrics) { + if (metrics.time_first_token_millsec !== undefined) stats.timeFirstTokenMs = metrics.time_first_token_millsec + if (metrics.time_completion_millsec !== undefined) stats.timeCompletionMs = metrics.time_completion_millsec + if (metrics.time_thinking_millsec !== undefined) stats.timeThinkingMs = metrics.time_thinking_millsec + } + + // Return null if no data was actually added + return Object.keys(stats).length > 0 ? stats : null +} + +// ============================================================================ +// Block Transformation Functions +// ============================================================================ + +/** + * Transform old blocks to new format and extract citation references + * + * This function: + * 1. Converts each old block to new format (removing id, messageId, status) + * 2. Extracts CitationMessageBlocks and converts to ContentReference[] + * 3. Extracts searchable text from text-based blocks + * + * @param oldBlocks - Array of old blocks from message_blocks table + * @returns Object containing: + * - dataBlocks: Transformed blocks (excluding CitationBlocks) + * - citationReferences: Extracted citation references + * - searchableText: Combined searchable text + * + * ## Block Type Mapping: + * | Old Type | New Type | Notes | + * |----------|----------|-------| + * | main_text | MainTextBlock | Direct, references added later | + * | thinking | ThinkingBlock | thinking_millsec → thinkingMs | + * | translation | TranslationBlock | Direct copy | + * | code | CodeBlock | Direct copy | + * | image | ImageBlock | file.id → fileId | + * | file | FileBlock | file.id → fileId | + * | video | VideoBlock | Direct copy | + * | tool | ToolBlock | Direct copy | + * | citation | (removed) | Converted to MainTextBlock.references | + * | error | ErrorBlock | Direct copy | + * | compact | CompactBlock | Direct copy | + * | unknown | (skipped) | Placeholder blocks are dropped | + */ +export function transformBlocks(oldBlocks: OldBlock[]): { + dataBlocks: MessageDataBlock[] + citationReferences: ContentReference[] + searchableText: string +} { + const dataBlocks: MessageDataBlock[] = [] + const citationReferences: ContentReference[] = [] + const searchableTexts: string[] = [] + + for (const oldBlock of oldBlocks) { + const transformed = transformSingleBlock(oldBlock) + + if (transformed.block) { + dataBlocks.push(transformed.block) + } + + if (transformed.citations) { + citationReferences.push(...transformed.citations) + } + + if (transformed.searchableText) { + searchableTexts.push(transformed.searchableText) + } + } + + return { + dataBlocks, + citationReferences, + searchableText: searchableTexts.join('\n') + } +} + +/** + * Transform a single old block to new format + * + * @param oldBlock - Single old block + * @returns Transformed block and extracted data + */ +function transformSingleBlock(oldBlock: OldBlock): { + block: MessageDataBlock | null + citations: ContentReference[] | null + searchableText: string | null +} { + const baseFields = { + createdAt: parseTimestamp(oldBlock.createdAt), + updatedAt: oldBlock.updatedAt ? parseTimestamp(oldBlock.updatedAt) : undefined, + metadata: oldBlock.metadata, + error: oldBlock.error as MessageDataBlock['error'] + } + + switch (oldBlock.type) { + case 'main_text': { + const block = oldBlock as OldMainTextBlock + return { + block: { + type: 'main_text' as BlockType.MAIN_TEXT, + content: block.content, + ...baseFields + // knowledgeBaseIds and citationReferences are intentionally dropped + // References will be added from CitationBlocks and mentions + } as MainTextBlock, + citations: null, + searchableText: block.content + } + } + + case 'thinking': { + const block = oldBlock as OldThinkingBlock + return { + block: { + type: 'thinking' as BlockType.THINKING, + content: block.content, + thinkingMs: block.thinking_millsec, // Field rename + ...baseFields + } as ThinkingBlock, + citations: null, + searchableText: block.content + } + } + + case 'translation': { + const block = oldBlock as OldTranslationBlock + return { + block: { + type: 'translation' as BlockType.TRANSLATION, + content: block.content, + sourceBlockId: block.sourceBlockId, + sourceLanguage: block.sourceLanguage, + targetLanguage: block.targetLanguage, + ...baseFields + } as TranslationBlock, + citations: null, + searchableText: block.content + } + } + + case 'code': { + const block = oldBlock as OldCodeBlock + return { + block: { + type: 'code' as BlockType.CODE, + content: block.content, + language: block.language, + ...baseFields + } as CodeBlock, + citations: null, + searchableText: block.content + } + } + + case 'image': { + const block = oldBlock as OldImageBlock + return { + block: { + type: 'image' as BlockType.IMAGE, + url: block.url, + fileId: block.file?.id, // file.id → fileId + ...baseFields + } as ImageBlock, + citations: null, + searchableText: null + } + } + + case 'file': { + const block = oldBlock as OldFileBlock + return { + block: { + type: 'file' as BlockType.FILE, + fileId: block.file.id, // file.id → fileId + ...baseFields + } as FileBlock, + citations: null, + searchableText: null + } + } + + case 'video': { + const block = oldBlock as OldVideoBlock + return { + block: { + type: 'video' as BlockType.VIDEO, + url: block.url, + filePath: block.filePath, + ...baseFields + } as VideoBlock, + citations: null, + searchableText: null + } + } + + case 'tool': { + const block = oldBlock as OldToolBlock + return { + block: { + type: 'tool' as BlockType.TOOL, + toolId: block.toolId, + toolName: block.toolName, + arguments: block.arguments, + content: block.content, + ...baseFields + } as ToolBlock, + citations: null, + searchableText: null + } + } + + case 'citation': { + // CitationBlocks are NOT converted to blocks + // Instead, their content is extracted as ContentReferences + const block = oldBlock as OldCitationBlock + const citations = extractCitationReferences(block) + return { + block: null, // No block output + citations, + searchableText: null + } + } + + case 'error': { + return { + block: { + type: 'error' as BlockType.ERROR, + ...baseFields + } as ErrorBlock, + citations: null, + searchableText: null + } + } + + case 'compact': { + const block = oldBlock as OldCompactBlock + return { + block: { + type: 'compact' as BlockType.COMPACT, + content: block.content, + compactedContent: block.compactedContent, + ...baseFields + } as CompactBlock, + citations: null, + searchableText: block.content + } + } + + case 'unknown': + default: + // Skip unknown/placeholder blocks + return { + block: null, + citations: null, + searchableText: null + } + } +} + +/** + * Extract ContentReferences from old CitationMessageBlock + * + * Old CitationBlocks contain three types of citations: + * - response (web search results) → WebCitationReference + * - knowledge (knowledge base refs) → KnowledgeCitationReference + * - memories (memory items) → MemoryCitationReference + * + * @param citationBlock - Old CitationMessageBlock + * @returns Array of ContentReferences + */ +export function extractCitationReferences(citationBlock: OldCitationBlock): ContentReference[] { + const references: ContentReference[] = [] + + // Web search citations + if (citationBlock.response) { + references.push({ + category: 'citation' as ReferenceCategory.CITATION, + citationType: 'web' as CitationType.WEB, + content: { + results: citationBlock.response.results, + source: citationBlock.response.source + } + } as CitationReference) + } + + // Knowledge base citations + if (citationBlock.knowledge && citationBlock.knowledge.length > 0) { + references.push({ + category: 'citation' as ReferenceCategory.CITATION, + citationType: 'knowledge' as CitationType.KNOWLEDGE, + content: citationBlock.knowledge.map((k) => ({ + id: k.id, + content: k.content, + sourceUrl: k.sourceUrl, + type: k.type, + file: k.file, + metadata: k.metadata + })) + } as CitationReference) + } + + // Memory citations + if (citationBlock.memories && citationBlock.memories.length > 0) { + references.push({ + category: 'citation' as ReferenceCategory.CITATION, + citationType: 'memory' as CitationType.MEMORY, + content: citationBlock.memories.map((m) => ({ + id: m.id, + memory: m.memory, + hash: m.hash, + createdAt: m.createdAt, + updatedAt: m.updatedAt, + score: m.score, + metadata: m.metadata + })) + } as CitationReference) + } + + return references +} + +/** + * Transform old mentions to MentionReferences + * + * Old system stored @mentions as a Model[] array on the message. + * New system stores them as MentionReference[] in MainTextBlock.references. + * + * @param mentions - Array of mentioned models from old message + * @returns Array of MentionReferences + * + * ## Transformation: + * | Old Field | New Field | + * |-----------|-----------| + * | model.id | modelId | + * | model.name | displayName | + */ +export function transformMentions(mentions?: OldModel[]): MentionReference[] { + if (!mentions || mentions.length === 0) return [] + + return mentions.map((model) => ({ + category: 'mention' as ReferenceCategory.MENTION, + modelId: model.id, + displayName: model.name + })) +} + +// ============================================================================ +// Tree Building Functions +// ============================================================================ + +/** + * Build message tree structure from linear message array + * + * The old system stores messages in a linear array. The new system uses + * a tree structure with parentId for navigation. + * + * ## Algorithm: + * 1. Process messages in array order (which is the conversation order) + * 2. For each message: + * - If it's a user message or first message, parent is the previous message + * - If it's an assistant message with askId, link to that user message + * - If multiple messages share same askId, they form a siblings group + * + * @param messages - Messages in array order from old topic + * @returns Map of messageId → { parentId, siblingsGroupId } + * + * ## Example: + * ``` + * Input: [u1, a1, u2, a2, a3(askId=u2,foldSelected), a4(askId=u2), u3] + * + * Output: + * u1: { parentId: null, siblingsGroupId: 0 } + * a1: { parentId: 'u1', siblingsGroupId: 0 } + * u2: { parentId: 'a1', siblingsGroupId: 0 } + * a2: { parentId: 'u2', siblingsGroupId: 1 } // Multi-model group + * a3: { parentId: 'u2', siblingsGroupId: 1 } // Selected one + * a4: { parentId: 'u2', siblingsGroupId: 1 } + * u3: { parentId: 'a3', siblingsGroupId: 0 } // Links to foldSelected + * ``` + */ +export function buildMessageTree( + messages: OldMessage[] +): Map { + const result = new Map() + + if (messages.length === 0) return result + + // Track askId → siblingsGroupId mapping + // Each unique askId with multiple responses gets a unique siblingsGroupId + const askIdToGroupId = new Map() + const askIdCounts = new Map() + + // First pass: count messages per askId to identify multi-model responses + for (const msg of messages) { + if (msg.askId) { + askIdCounts.set(msg.askId, (askIdCounts.get(msg.askId) || 0) + 1) + } + } + + // Assign group IDs to askIds with multiple responses + let nextGroupId = 1 + for (const [askId, count] of askIdCounts) { + if (count > 1) { + askIdToGroupId.set(askId, nextGroupId++) + } + } + + // Second pass: build parent/sibling relationships + let previousMessageId: string | null = null + let lastNonGroupMessageId: string | null = null // Last message not in a group, for linking subsequent user messages + + for (let i = 0; i < messages.length; i++) { + const msg = messages[i] + let parentId: string | null = null + let siblingsGroupId = 0 + + if (msg.askId && askIdToGroupId.has(msg.askId)) { + // This is part of a multi-model response group + parentId = msg.askId // Parent is the user message + siblingsGroupId = askIdToGroupId.get(msg.askId)! + + // If this is the selected response, update lastNonGroupMessageId for subsequent user messages + if (msg.foldSelected) { + lastNonGroupMessageId = msg.id + } + } else if (msg.role === 'user' && lastNonGroupMessageId) { + // User message after a multi-model group links to the selected response + parentId = lastNonGroupMessageId + lastNonGroupMessageId = null + } else { + // Normal sequential message - parent is previous message + parentId = previousMessageId + } + + result.set(msg.id, { parentId, siblingsGroupId }) + + // Update tracking for next iteration + previousMessageId = msg.id + + // Update lastNonGroupMessageId for non-group messages + if (siblingsGroupId === 0) { + lastNonGroupMessageId = msg.id + } + } + + return result +} + +/** + * Find the activeNodeId for a topic + * + * The activeNodeId should be the last message in the main conversation thread. + * For multi-model responses, it should be the foldSelected one. + * + * @param messages - Messages in array order + * @returns The ID of the last message (or foldSelected if applicable) + */ +export function findActiveNodeId(messages: OldMessage[]): string | null { + if (messages.length === 0) return null + + // Find the last message + // If it's part of a multi-model group, find the foldSelected one + const lastMsg = messages[messages.length - 1] + + if (lastMsg.askId) { + // Check if there's a foldSelected message with the same askId + const selectedMsg = messages.find((m) => m.askId === lastMsg.askId && m.foldSelected) + if (selectedMsg) return selectedMsg.id + } + + return lastMsg.id +} + +// ============================================================================ +// Utility Functions +// ============================================================================ + +/** + * Parse ISO timestamp string to Unix timestamp (milliseconds) + * + * @param isoString - ISO 8601 timestamp string or undefined + * @returns Unix timestamp in milliseconds + */ +export function parseTimestamp(isoString: string | undefined): number { + if (!isoString) return Date.now() + + const parsed = new Date(isoString).getTime() + return isNaN(parsed) ? Date.now() : parsed +} + +/** + * Build block lookup map from message_blocks table + * + * Creates a Map of blockId → block for fast lookup during message transformation. + * + * @param blocks - All blocks from message_blocks table + * @returns Map for O(1) block lookup + */ +export function buildBlockLookup(blocks: OldBlock[]): Map { + const lookup = new Map() + for (const block of blocks) { + lookup.set(block.id, block) + } + return lookup +} + +/** + * Resolve block IDs to actual block data + * + * @param blockIds - Array of block IDs from message.blocks + * @param blockLookup - Map of blockId → block + * @returns Array of resolved blocks (missing blocks are skipped) + */ +export function resolveBlocks(blockIds: string[], blockLookup: Map): OldBlock[] { + const resolved: OldBlock[] = [] + for (const id of blockIds) { + const block = blockLookup.get(id) + if (block) { + resolved.push(block) + } + } + return resolved +}