cherry-studio/src/main/knowledge/ocr/MacSysOcrProvider.ts
fullex c2086fdb15
refactor[Logger]: strict type check for Logger (#8363)
* fix: strict type check of logger

* feat: logger format in renderer

* fix: error type
2025-07-23 13:24:03 +08:00

131 lines
4.2 KiB
TypeScript
Raw Blame History

This file contains ambiguous Unicode characters

This file contains Unicode characters that might be confused with other characters. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.

import { loggerService } from '@logger'
import { isMac } from '@main/constant'
import { FileMetadata, OcrProvider } from '@types'
import * as fs from 'fs'
import * as path from 'path'
import { TextItem } from 'pdfjs-dist/types/src/display/api'
import BaseOcrProvider from './BaseOcrProvider'
const logger = loggerService.withContext('MacSysOcrProvider')
export default class MacSysOcrProvider extends BaseOcrProvider {
private readonly MIN_TEXT_LENGTH = 1000
private MacOCR: any
private async initMacOCR() {
if (!isMac) {
throw new Error('MacSysOcrProvider is only available on macOS')
}
if (!this.MacOCR) {
try {
// @ts-ignore This module is optional and only installed/available on macOS. Runtime checks prevent execution on other platforms.
const module = await import('@cherrystudio/mac-system-ocr')
this.MacOCR = module.default
} catch (error) {
logger.error('Failed to load mac-system-ocr:', error as Error)
throw error
}
}
return this.MacOCR
}
private getRecognitionLevel(level?: number) {
return level === 0 ? this.MacOCR.RECOGNITION_LEVEL_FAST : this.MacOCR.RECOGNITION_LEVEL_ACCURATE
}
constructor(provider: OcrProvider) {
super(provider)
}
private async processPages(
results: any,
totalPages: number,
sourceId: string,
writeStream: fs.WriteStream
): Promise<void> {
await this.initMacOCR()
// TODO: 下个版本后面使用批处理以及p-queue来优化
for (let i = 0; i < totalPages; i++) {
// Convert pages to buffers
const pageNum = i + 1
const pageBuffer = await results.getPage(pageNum)
// Process batch
const ocrResult = await this.MacOCR.recognizeFromBuffer(pageBuffer, {
ocrOptions: {
recognitionLevel: this.getRecognitionLevel(this.provider.options?.recognitionLevel),
minConfidence: this.provider.options?.minConfidence || 0.5
}
})
// Write results in order
writeStream.write(ocrResult.text + '\n')
// Update progress
await this.sendOcrProgress(sourceId, (pageNum / totalPages) * 100)
}
}
public async isScanPdf(buffer: Buffer): Promise<boolean> {
const doc = await this.readPdf(new Uint8Array(buffer))
const pageLength = doc.numPages
let counts = 0
const pagesToCheck = Math.min(pageLength, 10)
for (let i = 0; i < pagesToCheck; i++) {
const page = await doc.getPage(i + 1)
const pageData = await page.getTextContent()
const pageText = pageData.items.map((item) => (item as TextItem).str).join('')
counts += pageText.length
if (counts >= this.MIN_TEXT_LENGTH) {
return false
}
}
return true
}
public async parseFile(sourceId: string, file: FileMetadata): Promise<{ processedFile: FileMetadata }> {
logger.info(`Starting OCR process for file: ${file.name}`)
if (file.ext === '.pdf') {
try {
const { pdf } = await import('@cherrystudio/pdf-to-img-napi')
const pdfBuffer = await fs.promises.readFile(file.path)
const results = await pdf(pdfBuffer, {
scale: 2
})
const totalPages = results.length
const baseDir = path.dirname(file.path)
const baseName = path.basename(file.path, path.extname(file.path))
const txtFileName = `${baseName}.txt`
const txtFilePath = path.join(baseDir, txtFileName)
const writeStream = fs.createWriteStream(txtFilePath)
await this.processPages(results, totalPages, sourceId, writeStream)
await new Promise<void>((resolve, reject) => {
writeStream.end(() => {
logger.info(`OCR process completed successfully for ${file.origin_name}`)
resolve()
})
writeStream.on('error', reject)
})
const movedPaths = this.moveToAttachmentsDir(file.id, [txtFilePath])
return {
processedFile: {
...file,
name: txtFileName,
path: movedPaths[0],
ext: '.txt',
size: fs.statSync(movedPaths[0]).size
}
}
} catch (error) {
logger.error('Error during OCR process:', error as Error)
throw error
}
}
return { processedFile: file }
}
}