diff --git a/packages/shared/config/constant.ts b/packages/shared/config/constant.ts index af1d8b57c4..9240bb73f5 100644 --- a/packages/shared/config/constant.ts +++ b/packages/shared/config/constant.ts @@ -199,7 +199,7 @@ export enum FeedUrl { export const tesseractLangs = ['chi_sim', 'chi_tra', 'eng'] export enum TesseractLangsDownloadUrl { - CN = 'https://gitcode.com/beyondkmp/tessdata/blob/main/', + CN = 'https://gitcode.com/beyondkmp/tessdata/releases/download/4.1.0/', GLOBAL = 'https://github.com/tesseract-ocr/tessdata/raw/main/' } diff --git a/src/main/services/ocr/tesseract/TesseractService.ts b/src/main/services/ocr/tesseract/TesseractService.ts index 8440e48c79..82764c3a5b 100644 --- a/src/main/services/ocr/tesseract/TesseractService.ts +++ b/src/main/services/ocr/tesseract/TesseractService.ts @@ -2,6 +2,7 @@ import { loggerService } from '@logger' import { getIpCountry } from '@main/utils/ipService' import { TesseractLangsDownloadUrl } from '@shared/config/constant' import { app } from 'electron' +import fs from 'fs' import path from 'path' import Tesseract, { createWorker } from 'tesseract.js' @@ -120,7 +121,8 @@ export class TesseractService { // for now, only support limited languages this.worker = await createWorker(['chi_sim', 'chi_tra', 'eng'], undefined, { langPath: await this._getLangPath(), - cachePath: this._getCacheDir(), + cachePath: await this._getCacheDir(), + gzip: false, logger: (m) => logger.debug('From worker', m) }) } @@ -132,8 +134,12 @@ export class TesseractService { return country.toLowerCase() === 'cn' ? TesseractLangsDownloadUrl.CN : TesseractLangsDownloadUrl.GLOBAL } - private _getCacheDir(): string { - return path.join(app.getPath('userData'), 'tesseract') + private async _getCacheDir(): Promise { + const cacheDir = path.join(app.getPath('userData'), 'tesseract') + if (!fs.existsSync(cacheDir)) { + await fs.promises.mkdir(cacheDir, { recursive: true }) + } + return cacheDir } async dispose(): Promise {