fix(knowledge): prevent abnormal deletion of local SQL files during knowledge base removal (#8151)

* fix(knowledge): improve RAG app lifecycle and safe DB cleanup

* feat(knowledge): enhance database reclamation lifecycle

* fix(knowledge): update log messages for pending delete actions

* fix(knowledge): log info when knowledge base deletion fails on startup

* fix(knowledge): remove KnowledgeService cleanup and simplify app exit

* fix(logging): update message for pending knowledge base deletion

* fix(knowledge): optimize log level
This commit is contained in:
George·Dong 2025-07-23 11:26:49 +08:00 committed by GitHub
parent 736f73a726
commit f6f55e0609
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194
2 changed files with 135 additions and 13 deletions

View File

@ -180,13 +180,12 @@ if (!app.requestSingleInstanceLock()) {
})
app.on('will-quit', async () => {
// event.preventDefault()
// 简单的资源清理,不阻塞退出流程
try {
await mcpService.cleanup()
} catch (error) {
logger.error('Error cleaning up MCP service:', error)
logger.warn('Error cleaning up MCP service:', error)
}
// finish the logger
logger.finish()
})

View File

@ -97,10 +97,13 @@ const loaderTaskIntoOfSet = (loaderTask: LoaderTask): LoaderTaskOfSet => {
class KnowledgeService {
private storageDir = path.join(getDataPath(), 'KnowledgeBase')
private pendingDeleteFile = path.join(this.storageDir, 'knowledge_pending_delete.json')
// Byte based
private workload = 0
private processingItemCount = 0
private knowledgeItemProcessingQueueMappingPromise: Map<LoaderTaskOfSet, () => void> = new Map()
private ragApplications: Map<string, RAGApplication> = new Map()
private dbInstances: Map<string, LibSqlDb> = new Map()
private static MAXIMUM_WORKLOAD = 80 * MB
private static MAXIMUM_PROCESSING_ITEM_COUNT = 30
private static ERROR_LOADER_RETURN: LoaderReturn = {
@ -113,6 +116,7 @@ class KnowledgeService {
constructor() {
this.initStorageDir()
this.cleanupOnStartup()
}
private initStorageDir = (): void => {
@ -121,24 +125,137 @@ class KnowledgeService {
}
}
/**
* Clean up knowledge base resources (RAG applications and database connections in memory)
*/
private cleanupKnowledgeResources = async (id: string): Promise<void> => {
try {
// Remove RAG application instance
if (this.ragApplications.has(id)) {
const ragApp = this.ragApplications.get(id)!
await ragApp.reset()
this.ragApplications.delete(id)
logger.debug(`Cleaned up RAG application for id: ${id}`)
}
// Remove database instance reference
if (this.dbInstances.has(id)) {
this.dbInstances.delete(id)
logger.debug(`Removed database instance reference for id: ${id}`)
}
} catch (error) {
logger.warn(`Failed to cleanup resources for id: ${id}`, error)
}
}
/**
* Delete knowledge base file
*/
private deleteKnowledgeFile = (id: string): boolean => {
const dbPath = path.join(this.storageDir, id)
if (fs.existsSync(dbPath)) {
try {
fs.rmSync(dbPath, { recursive: true })
logger.debug(`Deleted knowledge base file with id: ${id}`)
return true
} catch (error) {
logger.warn(`Failed to delete knowledge base file with id: ${id}: ${error}`)
return false
}
}
return true // File does not exist, consider deletion successful
}
/**
* Manage persistent deletion list
*/
private pendingDeleteManager = {
load: (): string[] => {
try {
if (fs.existsSync(this.pendingDeleteFile)) {
return JSON.parse(fs.readFileSync(this.pendingDeleteFile, 'utf-8')) as string[]
}
} catch (error) {
logger.warn('Failed to load pending delete IDs:', error)
}
return []
},
save: (ids: string[]): void => {
try {
fs.writeFileSync(this.pendingDeleteFile, JSON.stringify(ids, null, 2))
logger.debug(`Total ${ids.length} knowledge bases pending delete`)
} catch (error) {
logger.warn('Failed to save pending delete IDs:', error)
}
},
add: (id: string): void => {
const existingIds = this.pendingDeleteManager.load()
const allIds = [...new Set([...existingIds, id])]
this.pendingDeleteManager.save(allIds)
},
clear: (): void => {
try {
if (fs.existsSync(this.pendingDeleteFile)) {
fs.unlinkSync(this.pendingDeleteFile)
}
} catch (error) {
logger.warn('Failed to clear pending delete file:', error)
}
}
}
/**
* Clean up databases marked for deletion on startup
*/
private cleanupOnStartup = (): void => {
const pendingDeleteIds = this.pendingDeleteManager.load()
if (pendingDeleteIds.length === 0) return
logger.info(`Found ${pendingDeleteIds.length} knowledge bases pending deletion from previous session`)
let deletedCount = 0
pendingDeleteIds.forEach((id) => {
if (this.deleteKnowledgeFile(id)) {
deletedCount++
} else {
logger.warn(`Failed to delete knowledge base ${id}, please delete it manually`)
}
})
this.pendingDeleteManager.clear()
logger.info(`Startup cleanup completed: ${deletedCount}/${pendingDeleteIds.length} knowledge bases deleted`)
}
private getRagApplication = async ({
id,
embedApiClient,
dimensions,
documentCount
}: KnowledgeBaseParams): Promise<RAGApplication> => {
if (this.ragApplications.has(id)) {
return this.ragApplications.get(id)!
}
let ragApplication: RAGApplication
const embeddings = new Embeddings({
embedApiClient,
dimensions
})
try {
const libSqlDb = new LibSqlDb({ path: path.join(this.storageDir, id) })
// Save database instance for later closing
this.dbInstances.set(id, libSqlDb)
ragApplication = await new RAGApplicationBuilder()
.setModel('NO_MODEL')
.setEmbeddingModel(embeddings)
.setVectorDatabase(new LibSqlDb({ path: path.join(this.storageDir, id) }))
.setVectorDatabase(libSqlDb)
.setSearchResultCount(documentCount || 30)
.build()
this.ragApplications.set(id, ragApplication)
} catch (e) {
logger.error('Failed to create RAGApplication:', e)
throw new Error(`Failed to create RAGApplication: ${e}`)
@ -148,7 +265,7 @@ class KnowledgeService {
}
public create = async (_: Electron.IpcMainInvokeEvent, base: KnowledgeBaseParams): Promise<void> => {
this.getRagApplication(base)
await this.getRagApplication(base)
}
public reset = async (_: Electron.IpcMainInvokeEvent, base: KnowledgeBaseParams): Promise<void> => {
@ -158,9 +275,15 @@ class KnowledgeService {
public async delete(_: Electron.IpcMainInvokeEvent, id: string): Promise<void> {
logger.debug('delete id', id)
const dbPath = path.join(this.storageDir, id)
if (fs.existsSync(dbPath)) {
fs.rmSync(dbPath, { recursive: true })
await this.cleanupKnowledgeResources(id)
await new Promise((resolve) => setTimeout(resolve, 100))
// Try to delete database file immediately
if (!this.deleteKnowledgeFile(id)) {
logger.debug(`Will delete knowledge base ${id} on next startup`)
this.pendingDeleteManager.add(id)
}
}
@ -183,10 +306,10 @@ class KnowledgeService {
state: LoaderTaskItemState.PENDING,
task: async () => {
try {
// 添加预处理逻辑
// Add preprocessing logic
const fileToProcess: FileMetadata = await this.preprocessing(file, base, item, userId)
// 使用处理后的文件进行加载
// Use processed file for loading
return addFileLoader(ragApplication, fileToProcess, base, forceReload)
.then((result) => {
loaderTask.loaderDoneReturn = result
@ -572,14 +695,14 @@ class KnowledgeService {
} else {
provider = new OcrProvider(base.preprocessOrOcrProvider.provider)
}
// 首先检查文件是否已经被预处理过
// Check if file has already been preprocessed
const alreadyProcessed = await provider.checkIfAlreadyProcessed(file)
if (alreadyProcessed) {
logger.debug(`File already preprocess processed, using cached result: ${file.path}`)
return alreadyProcessed
}
// 执行预处理
// Execute preprocessing
logger.debug(`Starting preprocess processing for scanned PDF: ${file.path}`)
const { processedFile, quota } = await provider.parseFile(item.id, file)
fileToProcess = processedFile
@ -590,7 +713,7 @@ class KnowledgeService {
})
} catch (err) {
logger.error(`Preprocess processing failed: ${err}`)
// 如果预处理失败,使用原始文件
// If preprocessing fails, use original file
// fileToProcess = file
throw new Error(`Preprocess processing failed: ${err}`)
}