mirror of
https://github.com/CherryHQ/cherry-studio.git
synced 2026-01-04 11:49:02 +08:00
fix(knowledge): prevent abnormal deletion of local SQL files during knowledge base removal (#8151)
* fix(knowledge): improve RAG app lifecycle and safe DB cleanup * feat(knowledge): enhance database reclamation lifecycle * fix(knowledge): update log messages for pending delete actions * fix(knowledge): log info when knowledge base deletion fails on startup * fix(knowledge): remove KnowledgeService cleanup and simplify app exit * fix(logging): update message for pending knowledge base deletion * fix(knowledge): optimize log level
This commit is contained in:
parent
736f73a726
commit
f6f55e0609
@ -180,13 +180,12 @@ if (!app.requestSingleInstanceLock()) {
|
||||
})
|
||||
|
||||
app.on('will-quit', async () => {
|
||||
// event.preventDefault()
|
||||
// 简单的资源清理,不阻塞退出流程
|
||||
try {
|
||||
await mcpService.cleanup()
|
||||
} catch (error) {
|
||||
logger.error('Error cleaning up MCP service:', error)
|
||||
logger.warn('Error cleaning up MCP service:', error)
|
||||
}
|
||||
|
||||
// finish the logger
|
||||
logger.finish()
|
||||
})
|
||||
|
||||
@ -97,10 +97,13 @@ const loaderTaskIntoOfSet = (loaderTask: LoaderTask): LoaderTaskOfSet => {
|
||||
|
||||
class KnowledgeService {
|
||||
private storageDir = path.join(getDataPath(), 'KnowledgeBase')
|
||||
private pendingDeleteFile = path.join(this.storageDir, 'knowledge_pending_delete.json')
|
||||
// Byte based
|
||||
private workload = 0
|
||||
private processingItemCount = 0
|
||||
private knowledgeItemProcessingQueueMappingPromise: Map<LoaderTaskOfSet, () => void> = new Map()
|
||||
private ragApplications: Map<string, RAGApplication> = new Map()
|
||||
private dbInstances: Map<string, LibSqlDb> = new Map()
|
||||
private static MAXIMUM_WORKLOAD = 80 * MB
|
||||
private static MAXIMUM_PROCESSING_ITEM_COUNT = 30
|
||||
private static ERROR_LOADER_RETURN: LoaderReturn = {
|
||||
@ -113,6 +116,7 @@ class KnowledgeService {
|
||||
|
||||
constructor() {
|
||||
this.initStorageDir()
|
||||
this.cleanupOnStartup()
|
||||
}
|
||||
|
||||
private initStorageDir = (): void => {
|
||||
@ -121,24 +125,137 @@ class KnowledgeService {
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Clean up knowledge base resources (RAG applications and database connections in memory)
|
||||
*/
|
||||
private cleanupKnowledgeResources = async (id: string): Promise<void> => {
|
||||
try {
|
||||
// Remove RAG application instance
|
||||
if (this.ragApplications.has(id)) {
|
||||
const ragApp = this.ragApplications.get(id)!
|
||||
await ragApp.reset()
|
||||
this.ragApplications.delete(id)
|
||||
logger.debug(`Cleaned up RAG application for id: ${id}`)
|
||||
}
|
||||
|
||||
// Remove database instance reference
|
||||
if (this.dbInstances.has(id)) {
|
||||
this.dbInstances.delete(id)
|
||||
logger.debug(`Removed database instance reference for id: ${id}`)
|
||||
}
|
||||
} catch (error) {
|
||||
logger.warn(`Failed to cleanup resources for id: ${id}`, error)
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Delete knowledge base file
|
||||
*/
|
||||
private deleteKnowledgeFile = (id: string): boolean => {
|
||||
const dbPath = path.join(this.storageDir, id)
|
||||
if (fs.existsSync(dbPath)) {
|
||||
try {
|
||||
fs.rmSync(dbPath, { recursive: true })
|
||||
logger.debug(`Deleted knowledge base file with id: ${id}`)
|
||||
return true
|
||||
} catch (error) {
|
||||
logger.warn(`Failed to delete knowledge base file with id: ${id}: ${error}`)
|
||||
return false
|
||||
}
|
||||
}
|
||||
return true // File does not exist, consider deletion successful
|
||||
}
|
||||
|
||||
/**
|
||||
* Manage persistent deletion list
|
||||
*/
|
||||
private pendingDeleteManager = {
|
||||
load: (): string[] => {
|
||||
try {
|
||||
if (fs.existsSync(this.pendingDeleteFile)) {
|
||||
return JSON.parse(fs.readFileSync(this.pendingDeleteFile, 'utf-8')) as string[]
|
||||
}
|
||||
} catch (error) {
|
||||
logger.warn('Failed to load pending delete IDs:', error)
|
||||
}
|
||||
return []
|
||||
},
|
||||
|
||||
save: (ids: string[]): void => {
|
||||
try {
|
||||
fs.writeFileSync(this.pendingDeleteFile, JSON.stringify(ids, null, 2))
|
||||
logger.debug(`Total ${ids.length} knowledge bases pending delete`)
|
||||
} catch (error) {
|
||||
logger.warn('Failed to save pending delete IDs:', error)
|
||||
}
|
||||
},
|
||||
|
||||
add: (id: string): void => {
|
||||
const existingIds = this.pendingDeleteManager.load()
|
||||
const allIds = [...new Set([...existingIds, id])]
|
||||
this.pendingDeleteManager.save(allIds)
|
||||
},
|
||||
|
||||
clear: (): void => {
|
||||
try {
|
||||
if (fs.existsSync(this.pendingDeleteFile)) {
|
||||
fs.unlinkSync(this.pendingDeleteFile)
|
||||
}
|
||||
} catch (error) {
|
||||
logger.warn('Failed to clear pending delete file:', error)
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Clean up databases marked for deletion on startup
|
||||
*/
|
||||
private cleanupOnStartup = (): void => {
|
||||
const pendingDeleteIds = this.pendingDeleteManager.load()
|
||||
if (pendingDeleteIds.length === 0) return
|
||||
|
||||
logger.info(`Found ${pendingDeleteIds.length} knowledge bases pending deletion from previous session`)
|
||||
|
||||
let deletedCount = 0
|
||||
pendingDeleteIds.forEach((id) => {
|
||||
if (this.deleteKnowledgeFile(id)) {
|
||||
deletedCount++
|
||||
} else {
|
||||
logger.warn(`Failed to delete knowledge base ${id}, please delete it manually`)
|
||||
}
|
||||
})
|
||||
|
||||
this.pendingDeleteManager.clear()
|
||||
logger.info(`Startup cleanup completed: ${deletedCount}/${pendingDeleteIds.length} knowledge bases deleted`)
|
||||
}
|
||||
|
||||
private getRagApplication = async ({
|
||||
id,
|
||||
embedApiClient,
|
||||
dimensions,
|
||||
documentCount
|
||||
}: KnowledgeBaseParams): Promise<RAGApplication> => {
|
||||
if (this.ragApplications.has(id)) {
|
||||
return this.ragApplications.get(id)!
|
||||
}
|
||||
|
||||
let ragApplication: RAGApplication
|
||||
const embeddings = new Embeddings({
|
||||
embedApiClient,
|
||||
dimensions
|
||||
})
|
||||
try {
|
||||
const libSqlDb = new LibSqlDb({ path: path.join(this.storageDir, id) })
|
||||
// Save database instance for later closing
|
||||
this.dbInstances.set(id, libSqlDb)
|
||||
|
||||
ragApplication = await new RAGApplicationBuilder()
|
||||
.setModel('NO_MODEL')
|
||||
.setEmbeddingModel(embeddings)
|
||||
.setVectorDatabase(new LibSqlDb({ path: path.join(this.storageDir, id) }))
|
||||
.setVectorDatabase(libSqlDb)
|
||||
.setSearchResultCount(documentCount || 30)
|
||||
.build()
|
||||
this.ragApplications.set(id, ragApplication)
|
||||
} catch (e) {
|
||||
logger.error('Failed to create RAGApplication:', e)
|
||||
throw new Error(`Failed to create RAGApplication: ${e}`)
|
||||
@ -148,7 +265,7 @@ class KnowledgeService {
|
||||
}
|
||||
|
||||
public create = async (_: Electron.IpcMainInvokeEvent, base: KnowledgeBaseParams): Promise<void> => {
|
||||
this.getRagApplication(base)
|
||||
await this.getRagApplication(base)
|
||||
}
|
||||
|
||||
public reset = async (_: Electron.IpcMainInvokeEvent, base: KnowledgeBaseParams): Promise<void> => {
|
||||
@ -158,9 +275,15 @@ class KnowledgeService {
|
||||
|
||||
public async delete(_: Electron.IpcMainInvokeEvent, id: string): Promise<void> {
|
||||
logger.debug('delete id', id)
|
||||
const dbPath = path.join(this.storageDir, id)
|
||||
if (fs.existsSync(dbPath)) {
|
||||
fs.rmSync(dbPath, { recursive: true })
|
||||
|
||||
await this.cleanupKnowledgeResources(id)
|
||||
|
||||
await new Promise((resolve) => setTimeout(resolve, 100))
|
||||
|
||||
// Try to delete database file immediately
|
||||
if (!this.deleteKnowledgeFile(id)) {
|
||||
logger.debug(`Will delete knowledge base ${id} on next startup`)
|
||||
this.pendingDeleteManager.add(id)
|
||||
}
|
||||
}
|
||||
|
||||
@ -183,10 +306,10 @@ class KnowledgeService {
|
||||
state: LoaderTaskItemState.PENDING,
|
||||
task: async () => {
|
||||
try {
|
||||
// 添加预处理逻辑
|
||||
// Add preprocessing logic
|
||||
const fileToProcess: FileMetadata = await this.preprocessing(file, base, item, userId)
|
||||
|
||||
// 使用处理后的文件进行加载
|
||||
// Use processed file for loading
|
||||
return addFileLoader(ragApplication, fileToProcess, base, forceReload)
|
||||
.then((result) => {
|
||||
loaderTask.loaderDoneReturn = result
|
||||
@ -572,14 +695,14 @@ class KnowledgeService {
|
||||
} else {
|
||||
provider = new OcrProvider(base.preprocessOrOcrProvider.provider)
|
||||
}
|
||||
// 首先检查文件是否已经被预处理过
|
||||
// Check if file has already been preprocessed
|
||||
const alreadyProcessed = await provider.checkIfAlreadyProcessed(file)
|
||||
if (alreadyProcessed) {
|
||||
logger.debug(`File already preprocess processed, using cached result: ${file.path}`)
|
||||
return alreadyProcessed
|
||||
}
|
||||
|
||||
// 执行预处理
|
||||
// Execute preprocessing
|
||||
logger.debug(`Starting preprocess processing for scanned PDF: ${file.path}`)
|
||||
const { processedFile, quota } = await provider.parseFile(item.id, file)
|
||||
fileToProcess = processedFile
|
||||
@ -590,7 +713,7 @@ class KnowledgeService {
|
||||
})
|
||||
} catch (err) {
|
||||
logger.error(`Preprocess processing failed: ${err}`)
|
||||
// 如果预处理失败,使用原始文件
|
||||
// If preprocessing fails, use original file
|
||||
// fileToProcess = file
|
||||
throw new Error(`Preprocess processing failed: ${err}`)
|
||||
}
|
||||
|
||||
Loading…
Reference in New Issue
Block a user