diff --git a/src/constants/languages.d.ts b/src/constants/languages.d.ts new file mode 100644 index 0000000000000000000000000000000000000000..6a2ba5086187622b8ca8887bcc7406018fba8a89 --- /dev/null +++ b/src/constants/languages.d.ts @@ -0,0 +1,43 @@ +/** + * Languages with existing tesseract traineddata + * https://tesseract-ocr.github.io/tessdoc/Data-Files#data-files-for-version-400-november-29-2016 + */ + +// Define the language codes as string literals +type LanguageCode = + | 'afr' | 'amh' | 'ara' | 'asm' | 'aze' | 'aze_cyrl' | 'bel' | 'ben' | 'bod' | 'bos' + | 'bul' | 'cat' | 'ceb' | 'ces' | 'chi_sim' | 'chi_tra' | 'chr' | 'cym' | 'dan' | 'deu' + | 'dzo' | 'ell' | 'eng' | 'enm' | 'epo' | 'est' | 'eus' | 'fas' | 'fin' | 'fra' + | 'frk' | 'frm' | 'gle' | 'glg' | 'grc' | 'guj' | 'hat' | 'heb' | 'hin' | 'hrv' + | 'hun' | 'iku' | 'ind' | 'isl' | 'ita' | 'ita_old' | 'jav' | 'jpn' | 'kan' | 'kat' + | 'kat_old' | 'kaz' | 'khm' | 'kir' | 'kor' | 'kur' | 'lao' | 'lat' | 'lav' | 'lit' + | 'mal' | 'mar' | 'mkd' | 'mlt' | 'msa' | 'mya' | 'nep' | 'nld' | 'nor' | 'ori' + | 'pan' | 'pol' | 'por' | 'pus' | 'ron' | 'rus' | 'san' | 'sin' | 'slk' | 'slv' + | 'spa' | 'spa_old' | 'sqi' | 'srp' | 'srp_latn' | 'swa' | 'swe' | 'syr' | 'tam' | 'tel' + | 'tgk' | 'tgl' | 'tha' | 'tir' | 'tur' | 'uig' | 'ukr' | 'urd' | 'uzb' | 'uzb_cyrl' + | 'vie' | 'yid'; + +// Define the language keys as string literals +type LanguageKey = + | 'AFR' | 'AMH' | 'ARA' | 'ASM' | 'AZE' | 'AZE_CYRL' | 'BEL' | 'BEN' | 'BOD' | 'BOS' + | 'BUL' | 'CAT' | 'CEB' | 'CES' | 'CHI_SIM' | 'CHI_TRA' | 'CHR' | 'CYM' | 'DAN' | 'DEU' + | 'DZO' | 'ELL' | 'ENG' | 'ENM' | 'EPO' | 'EST' | 'EUS' | 'FAS' | 'FIN' | 'FRA' + | 'FRK' | 'FRM' | 'GLE' | 'GLG' | 'GRC' | 'GUJ' | 'HAT' | 'HEB' | 'HIN' | 'HRV' + | 'HUN' | 'IKU' | 'IND' | 'ISL' | 'ITA' | 'ITA_OLD' | 'JAV' | 'JPN' | 'KAN' | 'KAT' + | 'KAT_OLD' | 'KAZ' | 'KHM' | 'KIR' | 'KOR' | 'KUR' | 'LAO' | 'LAT' | 'LAV' | 'LIT' + | 'MAL' | 'MAR' | 'MKD' | 'MLT' | 'MSA' | 'MYA' | 'NEP' | 'NLD' | 'NOR' | 'ORI' + | 'PAN' | 'POL' | 'POR' | 'PUS' | 'RON' | 'RUS' | 'SAN' | 'SIN' | 'SLK' | 'SLV' + | 'SPA' | 'SPA_OLD' | 'SQI' | 'SRP' | 'SRP_LATN' | 'SWA' | 'SWE' | 'SYR' | 'TAM' | 'TEL' + | 'TGK' | 'TGL' | 'THA' | 'TIR' | 'TUR' | 'UIG' | 'UKR' | 'URD' | 'UZB' | 'UZB_CYRL' + | 'VIE' | 'YID'; + +// Create a mapped type to ensure each key maps to its specific value +type LanguagesMap = { + [K in LanguageKey]: LanguageCode; +}; + +// Declare the exported constant with the specific type +export const LANGUAGES: LanguagesMap; + +// Export the individual types for use in other modules +export type { LanguageCode, LanguageKey, LanguagesMap }; \ No newline at end of file diff --git a/src/index.d.ts b/src/index.d.ts index 1f5a9c8094fe4de7983467f9efb43bdb4de535f2..16dc95cf68663673e37e189b719cb74897b7735f 100644 --- a/src/index.d.ts +++ b/src/index.d.ts @@ -1,31 +1,74 @@ +// Import the languages types +import { LanguagesMap } from "./constants/languages"; + +/// + declare namespace Tesseract { - function createScheduler(): Scheduler - function createWorker(langs?: string | string[] | Lang[], oem?: OEM, options?: Partial, config?: string | Partial): Promise - function setLogging(logging: boolean): void - function recognize(image: ImageLike, langs?: string, options?: Partial): Promise - function detect(image: ImageLike, options?: Partial): any + function createScheduler(): Scheduler; + function createWorker( + langs?: LanguageCode | LanguageCode[] | Lang[], + oem?: OEM, + options?: Partial, + config?: string | Partial + ): Promise; + function setLogging(logging: boolean): void; + function recognize( + image: ImageLike, + langs?: LanguageCode, + options?: Partial + ): Promise; + function detect(image: ImageLike, options?: Partial): any; + + // Export languages constant + const languages: LanguagesMap; + + type LanguageCode = import("./constants/languages").LanguageCode; + type LanguageKey = import("./constants/languages").LanguageKey; interface Scheduler { - addWorker(worker: Worker): string - addJob(action: 'recognize', ...args: Parameters): Promise - addJob(action: 'detect', ...args: Parameters): Promise - terminate(): Promise - getQueueLen(): number - getNumWorkers(): number + addWorker(worker: Worker): string; + addJob( + action: "recognize", + ...args: Parameters + ): Promise; + addJob( + action: "detect", + ...args: Parameters + ): Promise; + terminate(): Promise; + getQueueLen(): number; + getNumWorkers(): number; } interface Worker { - load(jobId?: string): Promise - writeText(path: string, text: string, jobId?: string): Promise - readText(path: string, jobId?: string): Promise - removeText(path: string, jobId?: string): Promise - FS(method: string, args: any[], jobId?: string): Promise - reinitialize(langs?: string | Lang[], oem?: OEM, config?: string | Partial, jobId?: string): Promise - setParameters(params: Partial, jobId?: string): Promise - getImage(type: imageType): string - recognize(image: ImageLike, options?: Partial, output?: Partial, jobId?: string): Promise - detect(image: ImageLike, jobId?: string): Promise - terminate(jobId?: string): Promise + load(jobId?: string): Promise; + writeText( + path: string, + text: string, + jobId?: string + ): Promise; + readText(path: string, jobId?: string): Promise; + removeText(path: string, jobId?: string): Promise; + FS(method: string, args: any[], jobId?: string): Promise; + reinitialize( + langs?: string | Lang[], + oem?: OEM, + config?: string | Partial, + jobId?: string + ): Promise; + setParameters( + params: Partial, + jobId?: string + ): Promise; + getImage(type: imageType): string; + recognize( + image: ImageLike, + options?: Partial, + output?: Partial, + jobId?: string + ): Promise; + detect(image: ImageLike, jobId?: string): Promise; + terminate(jobId?: string): Promise; } interface Lang { @@ -34,43 +77,43 @@ declare namespace Tesseract { } interface InitOptions { - load_system_dawg: string - load_freq_dawg: string - load_unambig_dawg: string - load_punc_dawg: string - load_number_dawg: string - load_bigram_dawg: string - } - - type LoggerMessage = { - jobId: string - progress: number - status: string - userJobId: string - workerId: string + load_system_dawg: string; + load_freq_dawg: string; + load_unambig_dawg: string; + load_punc_dawg: string; + load_number_dawg: string; + load_bigram_dawg: string; } - + + type LoggerMessage = { + jobId: string; + progress: number; + status: string; + userJobId: string; + workerId: string; + }; + interface WorkerOptions { - corePath: string - langPath: string - cachePath: string - dataPath: string - workerPath: string - cacheMethod: string - workerBlobURL: boolean - gzip: boolean - legacyLang: boolean - legacyCore: boolean - logger: (arg: LoggerMessage) => void, - errorHandler: (arg: any) => void + corePath: string; + langPath: string; + cachePath: string; + dataPath: string; + workerPath: string; + cacheMethod: string; + workerBlobURL: boolean; + gzip: boolean; + legacyLang: boolean; + legacyCore: boolean; + logger: (arg: LoggerMessage) => void; + errorHandler: (arg: any) => void; } interface WorkerParams { - tessedit_pageseg_mode: PSM - tessedit_char_whitelist: string - tessedit_char_blacklist: string - preserve_interword_spaces: string - user_defined_dpi: string - [propName: string]: any + tessedit_pageseg_mode: PSM; + tessedit_char_whitelist: string; + tessedit_char_blacklist: string; + preserve_interword_spaces: string; + user_defined_dpi: string; + [propName: string]: any; } interface OutputFormats { text: boolean; @@ -88,36 +131,36 @@ declare namespace Tesseract { debug: boolean; } interface RecognizeOptions { - rectangle: Rectangle - pdfTitle: string - pdfTextOnly: boolean - rotateAuto: boolean - rotateRadians: number + rectangle: Rectangle; + pdfTitle: string; + pdfTextOnly: boolean; + rotateAuto: boolean; + rotateRadians: number; } interface ConfigResult { - jobId: string - data: any + jobId: string; + data: any; } interface RecognizeResult { - jobId: string - data: Page + jobId: string; + data: Page; } interface DetectResult { - jobId: string - data: DetectData + jobId: string; + data: DetectData; } interface DetectData { - tesseract_script_id: number | null - script: string | null - script_confidence: number | null - orientation_degrees: number | null - orientation_confidence: number | null + tesseract_script_id: number | null; + script: string | null; + script_confidence: number | null; + orientation_degrees: number | null; + orientation_confidence: number | null; } interface Rectangle { - left: number - top: number - width: number - height: number + left: number; + top: number; + width: number; + height: number; } enum OEM { TESSERACT_ONLY, @@ -126,28 +169,36 @@ declare namespace Tesseract { DEFAULT, } enum PSM { - OSD_ONLY = '0', - AUTO_OSD = '1', - AUTO_ONLY = '2', - AUTO = '3', - SINGLE_COLUMN = '4', - SINGLE_BLOCK_VERT_TEXT = '5', - SINGLE_BLOCK = '6', - SINGLE_LINE = '7', - SINGLE_WORD = '8', - CIRCLE_WORD = '9', - SINGLE_CHAR = '10', - SPARSE_TEXT = '11', - SPARSE_TEXT_OSD = '12', - RAW_LINE = '13' + OSD_ONLY = "0", + AUTO_OSD = "1", + AUTO_ONLY = "2", + AUTO = "3", + SINGLE_COLUMN = "4", + SINGLE_BLOCK_VERT_TEXT = "5", + SINGLE_BLOCK = "6", + SINGLE_LINE = "7", + SINGLE_WORD = "8", + CIRCLE_WORD = "9", + SINGLE_CHAR = "10", + SPARSE_TEXT = "11", + SPARSE_TEXT_OSD = "12", + RAW_LINE = "13", } const enum imageType { COLOR = 0, GREY = 1, - BINARY = 2 + BINARY = 2, } - type ImageLike = string | HTMLImageElement | HTMLCanvasElement | HTMLVideoElement - | CanvasRenderingContext2D | File | Blob | Buffer | OffscreenCanvas; + type ImageLike = + | string + | HTMLImageElement + | HTMLCanvasElement + | HTMLVideoElement + | CanvasRenderingContext2D + | File + | Blob + | (typeof Buffer extends undefined ? never : Buffer) + | OffscreenCanvas; interface Block { paragraphs: Paragraph[]; text: string; @@ -179,7 +230,7 @@ declare namespace Tesseract { text: string; confidence: number; baseline: Baseline; - rowAttributes: RowAttributes + rowAttributes: RowAttributes; bbox: Bbox; } interface Paragraph {