refactor(migration): improve ChatMigrator's handling of duplicate messages and active node selection

- Enhanced duplicate message ID handling by updating parentId references and ensuring transaction safety.
- Implemented a smart selection logic for determining activeNodeId, prioritizing original active nodes and foldSelected messages.
- Updated documentation to reflect changes in duplicate handling and active node selection strategies.
This commit is contained in:
fullex 2026-01-01 23:26:28 +08:00
parent 4f4785396a
commit 71a7b1b7ea
2 changed files with 60 additions and 24 deletions

View File

@ -63,6 +63,7 @@ import { BaseMigrator } from './BaseMigrator'
import { import {
buildBlockLookup, buildBlockLookup,
buildMessageTree, buildMessageTree,
findActiveNodeId,
type NewMessage, type NewMessage,
type NewTopic, type NewTopic,
type OldAssistant, type OldAssistant,
@ -287,34 +288,52 @@ export class ChatMigrator extends BaseMigrator {
// Insert topics in a transaction // Insert topics in a transaction
if (preparedData.length > 0) { if (preparedData.length > 0) {
// Collect all messages and handle duplicates BEFORE transaction
// This ensures parentId references are updated correctly
const allMessages: NewMessage[] = []
const idRemapping = new Map<string, string>() // oldId → newId for duplicates
const batchMessageIds = new Set<string>() // IDs added in this batch (for transaction safety)
for (const data of preparedData) {
for (const msg of data.messages) {
if (this.seenMessageIds.has(msg.id) || batchMessageIds.has(msg.id)) {
const newId = uuidv4()
logger.warn(`Duplicate message ID found: ${msg.id}, assigning new ID: ${newId}`)
idRemapping.set(msg.id, newId)
msg.id = newId
}
batchMessageIds.add(msg.id)
allMessages.push(msg)
}
}
// Update parentId references for any remapped IDs
if (idRemapping.size > 0) {
for (const msg of allMessages) {
if (msg.parentId && idRemapping.has(msg.parentId)) {
msg.parentId = idRemapping.get(msg.parentId)!
}
}
}
// Execute transaction
await db.transaction(async (tx) => { await db.transaction(async (tx) => {
// Insert topics // Insert topics
const topicValues = preparedData.map((d) => d.topic) const topicValues = preparedData.map((d) => d.topic)
await tx.insert(topicTable).values(topicValues) await tx.insert(topicTable).values(topicValues)
// Collect all messages, handling duplicate IDs by generating new ones
const allMessages: NewMessage[] = []
for (const data of preparedData) {
for (const msg of data.messages) {
if (this.seenMessageIds.has(msg.id)) {
const newId = uuidv4()
logger.warn(`Duplicate message ID found: ${msg.id}, assigning new ID: ${newId}`)
msg.id = newId
}
this.seenMessageIds.add(msg.id)
allMessages.push(msg)
}
}
// Insert messages in batches (SQLite parameter limit) // Insert messages in batches (SQLite parameter limit)
for (let i = 0; i < allMessages.length; i += MESSAGE_INSERT_BATCH_SIZE) { for (let i = 0; i < allMessages.length; i += MESSAGE_INSERT_BATCH_SIZE) {
const batch = allMessages.slice(i, i + MESSAGE_INSERT_BATCH_SIZE) const batch = allMessages.slice(i, i + MESSAGE_INSERT_BATCH_SIZE)
await tx.insert(messageTable).values(batch) await tx.insert(messageTable).values(batch)
} }
processedMessages += allMessages.length
}) })
// Update state ONLY after transaction succeeds (transaction safety)
for (const id of batchMessageIds) {
this.seenMessageIds.add(id)
}
processedMessages += allMessages.length
processedTopics += preparedData.length processedTopics += preparedData.length
} }
@ -389,9 +408,12 @@ export class ChatMigrator extends BaseMigrator {
const expectedTopics = this.topicCount - this.skippedTopics const expectedTopics = this.topicCount - this.skippedTopics
if (targetTopicCount < expectedTopics) { if (targetTopicCount < expectedTopics) {
errors.push({ errors.push({
key: 'topic_count', key: 'topic_count_low',
message: `Topic count mismatch: expected ${expectedTopics}, got ${targetTopicCount}` message: `Topic count too low: expected ${expectedTopics}, got ${targetTopicCount}`
}) })
} else if (targetTopicCount > expectedTopics) {
// More topics than expected could indicate duplicate insertions or data corruption
logger.warn(`Topic count higher than expected: expected ${expectedTopics}, got ${targetTopicCount}`)
} }
// Sample validation: check a few topics have messages // Sample validation: check a few topics have messages
@ -604,12 +626,26 @@ export class ChatMigrator extends BaseMigrator {
} }
} }
// Calculate activeNodeId based on migrated messages (not original messages) // Calculate activeNodeId using smart selection logic
// If no messages were migrated, set to null // Priority: 1) Original activeNode if migrated, 2) foldSelected if migrated, 3) last migrated
let activeNodeId: string | null = null let activeNodeId: string | null = null
if (newMessages.length > 0) { if (newMessages.length > 0) {
// Use the last migrated message as active node const migratedIds = new Set(newMessages.map((m) => m.id))
activeNodeId = newMessages[newMessages.length - 1].id
// Try to use the original active node (handles foldSelected for multi-model)
const originalActiveId = findActiveNodeId(oldMessages)
if (originalActiveId && migratedIds.has(originalActiveId)) {
activeNodeId = originalActiveId
} else {
// Original active was skipped; find a foldSelected among migrated messages
const foldSelectedMsg = oldMessages.find((m) => m.foldSelected && migratedIds.has(m.id))
if (foldSelectedMsg) {
activeNodeId = foldSelectedMsg.id
} else {
// Fallback to last migrated message
activeNodeId = newMessages[newMessages.length - 1].id
}
}
} }
// Transform topic with correct activeNodeId // Transform topic with correct activeNodeId

View File

@ -51,7 +51,7 @@ The migrator handles potential data inconsistencies from the old system:
| Issue | Detection | Handling | | Issue | Detection | Handling |
|-------|-----------|----------| |-------|-----------|----------|
| **Duplicate message ID** | Same ID appears in multiple topics | Generate new UUID, log warning | | **Duplicate message ID** | Same ID appears in multiple topics | Generate new UUID, update parentId refs, log warning |
| **TopicId mismatch** | `message.topicId` ≠ parent `topic.id` | Use correct parent topic.id (silent fix) | | **TopicId mismatch** | `message.topicId` ≠ parent `topic.id` | Use correct parent topic.id (silent fix) |
| **Missing blocks** | Block ID not found in `message_blocks` | Skip missing block (silent) | | **Missing blocks** | Block ID not found in `message_blocks` | Skip missing block (silent) |
| **Invalid topic** | Topic missing required `id` field | Skip entire topic | | **Invalid topic** | Topic missing required `id` field | Skip entire topic |
@ -75,7 +75,7 @@ Topic data is merged from Dexie + Redux before transformation:
| Redux: (parent assistant.id) | `assistantId` | From `topicAssistantLookup` mapping | | Redux: (parent assistant.id) | `assistantId` | From `topicAssistantLookup` mapping |
| (from Assistant) | `assistantMeta` | Generated from assistant entity | | (from Assistant) | `assistantMeta` | Generated from assistant entity |
| Redux: `prompt` | `prompt` | Merged from Redux | | Redux: `prompt` | `prompt` | Merged from Redux |
| (computed) | `activeNodeId` | Last message ID or foldSelected | | (computed) | `activeNodeId` | Smart selection: original active → foldSelected → last migrated |
| (none) | `groupId` | null (new field) | | (none) | `groupId` | null (new field) |
| (none) | `sortOrder` | 0 (new field) | | (none) | `sortOrder` | 0 (new field) |
| Redux: `pinned` | `isPinned` | Merged from Redux, renamed | | Redux: `pinned` | `isPinned` | Merged from Redux, renamed |