From 7de31d8cb6443ee74a4e4a638276b28d493d4e44 Mon Sep 17 00:00:00 2001 From: Lin Manhui Date: Thu, 4 Sep 2025 17:13:58 +0800 Subject: [PATCH] feat: Add PaddleOCR as a new OCR provider (#9876) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit * feat: support PaddleOCR as an OCR provider * style: fix format * fix: update persistReducer version * update wrt comments * fix(ocr): 修复迁移147中OCR提供商的设置错误 将直接赋值改为使用addOcrProvider方法添加内置PaddleOCR提供商,确保正确初始化OCR服务 * Replace bare fetch with net.fetch * Use '\n' as delimiter * Optimize code wrt comments * Add tip --------- Co-authored-by: icarus --- src/main/services/ocr/OcrService.ts | 3 + src/main/services/ocr/builtin/PpocrService.ts | 100 ++++++++++++++++++ .../src/assets/images/providers/paddleocr.png | Bin 0 -> 16805 bytes src/renderer/src/config/ocr.ts | 16 ++- src/renderer/src/hooks/useOcrProvider.tsx | 3 + src/renderer/src/i18n/label.ts | 4 +- src/renderer/src/i18n/locales/en-us.json | 7 ++ src/renderer/src/i18n/locales/ja-jp.json | 7 ++ src/renderer/src/i18n/locales/ru-ru.json | 7 ++ src/renderer/src/i18n/locales/zh-cn.json | 7 ++ src/renderer/src/i18n/locales/zh-tw.json | 7 ++ src/renderer/src/i18n/translate/el-gr.json | 7 ++ src/renderer/src/i18n/translate/es-es.json | 7 ++ src/renderer/src/i18n/translate/fr-fr.json | 7 ++ src/renderer/src/i18n/translate/pt-pt.json | 7 ++ .../DocProcessSettings/OcrPpocrSettings.tsx | 83 +++++++++++++++ .../OcrProviderSettings.tsx | 3 + src/renderer/src/store/index.ts | 2 +- src/renderer/src/store/migrate.ts | 9 ++ src/renderer/src/types/ocr.ts | 22 +++- 20 files changed, 303 insertions(+), 5 deletions(-) create mode 100644 src/main/services/ocr/builtin/PpocrService.ts create mode 100644 src/renderer/src/assets/images/providers/paddleocr.png create mode 100644 src/renderer/src/pages/settings/DocProcessSettings/OcrPpocrSettings.tsx diff --git a/src/main/services/ocr/OcrService.ts b/src/main/services/ocr/OcrService.ts index dfd796346f..471d31edce 100644 --- a/src/main/services/ocr/OcrService.ts +++ b/src/main/services/ocr/OcrService.ts @@ -2,6 +2,7 @@ import { loggerService } from '@logger' import { isLinux } from '@main/constant' import { BuiltinOcrProviderIds, OcrHandler, OcrProvider, OcrResult, SupportedOcrFile } from '@types' +import { ppocrService } from './builtin/PpocrService' import { systemOcrService } from './builtin/SystemOcrService' import { tesseractService } from './builtin/TesseractService' @@ -36,3 +37,5 @@ export const ocrService = new OcrService() ocrService.register(BuiltinOcrProviderIds.tesseract, tesseractService.ocr.bind(tesseractService)) !isLinux && ocrService.register(BuiltinOcrProviderIds.system, systemOcrService.ocr.bind(systemOcrService)) + +ocrService.register(BuiltinOcrProviderIds.paddleocr, ppocrService.ocr.bind(ppocrService)) diff --git a/src/main/services/ocr/builtin/PpocrService.ts b/src/main/services/ocr/builtin/PpocrService.ts new file mode 100644 index 0000000000..2079f2d6b8 --- /dev/null +++ b/src/main/services/ocr/builtin/PpocrService.ts @@ -0,0 +1,100 @@ +import { loadOcrImage } from '@main/utils/ocr' +import { ImageFileMetadata, isImageFileMetadata, OcrPpocrConfig, OcrResult, SupportedOcrFile } from '@types' +import { net } from 'electron' +import { z } from 'zod' + +import { OcrBaseService } from './OcrBaseService' + +enum FileType { + PDF = 0, + Image = 1 +} + +// API Reference: https://www.paddleocr.ai/latest/version3.x/pipeline_usage/OCR.html#3 +interface OcrPayload { + file: string + fileType?: FileType | null + useDocOrientationClassify?: boolean | null + useDocUnwarping?: boolean | null + useTextlineOrientation?: boolean | null + textDetLimitSideLen?: number | null + textDetLimitType?: string | null + textDetThresh?: number | null + textDetBoxThresh?: number | null + textDetUnclipRatio?: number | null + textRecScoreThresh?: number | null + visualize?: boolean | null +} + +const OcrResponseSchema = z.object({ + result: z.object({ + ocrResults: z.array( + z.object({ + prunedResult: z.object({ + rec_texts: z.array(z.string()) + }) + }) + ) + }) +}) + +export class PpocrService extends OcrBaseService { + public ocr = async (file: SupportedOcrFile, options?: OcrPpocrConfig): Promise => { + if (!isImageFileMetadata(file)) { + throw new Error('Only image files are supported currently') + } + if (!options) { + throw new Error('config is required') + } + return this.imageOcr(file, options) + } + + private async imageOcr(file: ImageFileMetadata, options: OcrPpocrConfig): Promise { + if (!options.apiUrl) { + throw new Error('API URL is required') + } + const apiUrl = options.apiUrl + + const buffer = await loadOcrImage(file) + const base64 = buffer.toString('base64') + const payload = { + file: base64, + fileType: FileType.Image, + useDocOrientationClassify: false, + useDocUnwarping: false, + visualize: false + } satisfies OcrPayload + + const headers: Record = { + 'Content-Type': 'application/json' + } + + if (options.accessToken) { + headers['Authorization'] = `token ${options.accessToken}` + } + + try { + const response = await net.fetch(apiUrl, { + method: 'POST', + headers, + body: JSON.stringify(payload) + }) + + if (!response.ok) { + const text = await response.text() + throw new Error(`OCR service error: ${response.status} ${response.statusText} - ${text}`) + } + + const data = await response.json() + + const validatedResponse = OcrResponseSchema.parse(data) + const recTexts = validatedResponse.result.ocrResults[0].prunedResult.rec_texts + + return { text: recTexts.join('\n') } + } catch (error: any) { + throw new Error(`OCR service error: ${error.message}`) + } + } +} + +export const ppocrService = new PpocrService() diff --git a/src/renderer/src/assets/images/providers/paddleocr.png b/src/renderer/src/assets/images/providers/paddleocr.png new file mode 100644 index 0000000000000000000000000000000000000000..eec88c8e026eed0bf67bb16eb9fdbeef1cd7d99f GIT binary patch literal 16805 zcmbSyb8sfnvv!;}wrwYGY-eNJwzaYIh8x?)Zfx7OtqnF#HrCzW{chd+|2I>8o@c7M zPxYCZI_f@A%8F7*@c8gxU|>ix(h{owaOyt-fcaMnlgr=z18_H0DKW5`8N#!F4Sjb?03n`EWhoVCTlZiZliv|OqlK0fQe0l<- z48j955QH>_VW&c4UoF-$jt}$>dV3+6M5v03Wbk}a0q0q-`A-^>e%x#p{}|3E zN10YjY>SmwAFQb%5{ZV?PqBv#hO^<62PA<=_I`qi8^;c}H5@B^>ZX#-1%_HHexjjp;QujzNpKH<9jdG{1ZqDhF*~wP}4y%`4B( z1IOPxIV~kC(a4-*qhEEO=hp^uMrBMKv+ST!FKzY@%@ILjkBxcyGM=@a{AH8Jy{1fm zb6WBk6jBQYx%D93GQ2Or8qbJ!zkw*b_#ESswz z{0HNHc9>R9I~~n--etygM2s!Jbr$}Y%jjF#n7Tv;T~PA0mV&~pvF9F%^xgI`ryKD)qgmB>*U>v& zVcWaK1!F3O6@O^3iIOSe7&S2`Z3>qin#n>4RVNCGul(c>@nv7$#)=QMkz0RKIQ*0> z-%(+vDx;jrR#?H2#5qKp$+GTpJGNk5G!wSqyxE4!%gZF*3bZkFWWN0@UTXyhTk8%) zXc}_~=rnx%4lN)yvZ8a6PZoa7P=~yTmPCep!AWD);TQlp--+Uv*4|P5MOO;SkhAkg zXoxs2_dr0nTd<375D>N)*4L1tdF+%>TLRZ@jv$?u1qCe!Qf<^cG@;g|Wo-V@azw~% zE8?BGpdFQ}lM}DoJfW>F$VFy{(VFuc2o9b%)HRgN)TBMRI$8ABQWb4C>gejUg2FU3 zrxOy=B^fHh1tQ)6N@ol@d8h;NUHg&^ozOTJR8G%+HcuV%Se2nuy}FJ-LtFdHD4v3k z7B&0bPy@DQp`lpYKsFm$xVZ5*@IFL1O{Xo>;6S2 zFt8WF3dpl$Q4?^GWpUK;uP@Niwb|=uF`rQF`!y8~@pp*vJFiuijh&se2=u*V*$suC znriNZx$!s9IFt{y1*h*YrLV1_wJUJ<%J@M0Pp6m{y3;Uo^1KLdvOLd4sXYC^ADMcN zofctu(?oj`KP4kWgPcq9;Qg1{10=>S=71A-e-8tx#;kq1inG~@6??E2HG76kC$yF( zzy();2e}sfTk(;}&!zbg$gQy;rTm{)LFs?)>u3<5AZEltj4WddH_IYeF_`cxKGaOu z<~q~4!TX1qxn@HOL)$)Or2X7%*40b#AsfkwC-jlah1LhElKk%f`cMmKSB0vSl}Arr znw9~9{kp()*KM3BxtV?k%d=7iNcU;+35UG99uh>jscz30EilS4#9dx`v6UiPSAm*T>Tyq*z^c*^3h( zx?0(-J7=sA1mUTNCIQV9J;=S}Ai2AE4pf;ZiminwMWWhq=@R+LqDUY+h%i~B^7$rk zklAgxRu2YEZ3ymla^{f0m!`5YA&-Xe%`HfZeTG6vL7$cf&n|XzMXLe)-&Zz=jE4E1 zBcS>gop2YF;vAnz?ZN`85*ok|4-`^Z3Gwc>oDUbWKJNvXPz=lGQC~HAE;vbE{O&yy z2!O>L&HPUd$l{0iFq2z>Dvz$X$dJ-AD`aYX2xdzq|%d-IN^^r_^XgDYp z%)zN<7I2duzlP@H3qlW;lkA<=TF={{!z~1(4~w-*EthJ1%vNKS{TyRH57q-+gV8vR zK^P|AUj^dvmMAg(wtOz8+YkRnx?Yt5oksLq*HD%5w&Sz?8Fz$kXJQ!IBr;3Y8h8TzTt&;hL}vxw`=tIVWPq`N>jX*ECqj%{1$;X02Gmb2 zA3)ZKeH`tLR+j>~jreL9H%8$hdq9=@xSB{u{-`Q1L2_3(1LPj6!^P#bw^<~GQ&lRH zE+3r_;`aufeCd=D&&fOAn&`v1Wis}uPb9C;tGg(SBA?riCc{L$+0adF@q25cejnPS z0aiOvJ?(d^U&M&3?Zs662yBc}wq*PV5MVj1adY~yyL50GhL6K@Px74jTOMzQDjGk0 z4apH(Qt+RhwUPs&Xy2hSVhe;y8>il!q5Y(W$lzxO_J%7 z{oKcwr&@hEE3UI=X?(k#KeRM|{>~`Nl^s>_?Lcsn+ITXSGAOWnG7sA;in~=NT6}y< zaaQ8wc>Juu==MHZx@X6^NYGC7P8~bI*&5Il`97c0n9XJzA5EoP!}NRE9i?P3h_Q`q z-KnVMl%zh4n6nJ4UflZADmqFj31)M+af|cKG)jFc2mQ85fJSLuLwr>JLqm z@}CH*u-oX>4v#T1I@h1Fh1Y{?>@E&_@Hru7M$r6(0Rqg=y0e3U} zp#I$aw%Oe4Q3wtKg7I+<0uQghZ6tkE_c6*6)xrs-=SQSXhs}WF`+f=9M+F$0{I4E! z8k!PgPg43xnEQ>sIGfJh*wM|{31$O)Y*|6DJ*yBPg~CCOEF)Lzj!>0cuT5&y_dWMU zp9`R#x@1B|_Ee4OUIb@7$I$(=BI*yZwLwP-^dsU67EVr9oz~9#YOxdxKk>UuY#F{1 zh9KT2d!fbq<;_{E69RxD(im96ia@_coA>VtKE9F{C9MOK3u_HpV-BNaZV}LIDr((@ zc~YRq!yjpE)LRd`dt?*J)dzi0OsnbC3eYh$(AKzM@(Edj6n{UMEa;p4KDtv~#Ta4yb0j&sbR~Ts zBg-9>W3>`-?wRZT05wuHC>=o00shM zSCGO1e1sxk9BHys2JO7_2oq0%jQyL2+gC+*Yb$)Rzf;{k6h!zW14}(kn-e@9K@3J< zS2S`bdt#T1b0y-B+o$x%&scW7+fSmL$ zc=_npDLJT!)x+J|h?MRrt3tZQX$Da6j0?lz{(}s*&4i1RmyDIeEjoM51u;DnLs$r# zh%ZJ~{sGNdRKeWlOw~ku9i)%s2|{AGvj;;}7vK~x-l!c^4AXTe_4{-NF+~-0AtpptrkkHH zI}EHIGg9Q!Jp;&MEaS5p&4vOevvpDM85muFUm)UgG$q)B3xP#?G%Wp3Ll0ZIY72ig zPt4BWdLdN?Y%eaQetgX{ zKwz*xd!HLrrl(40WDXXb^P~pOb#)t?Q8yA#tr{44H@qqI+e)WpaRb!I*9L*eh+qgT z)U_|u>dc$NDM)8=XAlT;-)aF7AM14>@Ifnl{Edmw6*e8-Rs0>u&X#dQrPmw)a^#3y zZbXl?W>U}Mv4IsaX2XK#=M;OA4$TX4WbZ^tjdutzORwRN)fn@jJGi(NBspEL1-!&5 z@*h^{#z^3j_}VGFT|7oj&=Jj~jBT6vGd5}*hvJ4mqga_CFm;bi`FS+uT<>>kZvTNF z!?)ix9V5i1(2%9x!9dPfgH)iYJ)*M-?Nm#6&4B!S0^DC?l`d>UQqL^%av6f;KsBIOu8k23LOpS1MJ*`xE-AE*@G0(-E1i7<;Anpoq9L2 zXb`nv9I0K8{g;*gI?zjp?GuDK$Vk>}xeA-=HeI4`w43#9js9Ai-tVJtrpYMzX{6EF z@^$8}9~k34jX}3_CuEboieU1Mli~}f80yKw<7T9e-C8}G%?&NB2>u5S>CiUiA$n$R z{Ocg9Y~oJ>1VNh#C3P$mNE-Cy1`Cnp+#3!*UV^;vg{ZDf7aUmJ5RtbjYeHTI3B@$+VtZ35>O>iTzuW)5Dcjd z|8?taX-JA_{OZ<^P3E}z`_;m(kJYrS9ofiy>oa&k{JVaX8jtj2A1TOhM{p@<*Rpz{ z1nRFuS%!{$lE*%|!47~5zYFkI4|vzxi^;t@Dq3mdj1;y$OS(QV_L{D0 zWBM=8VgvL)SoK4@48*kwgW}~IoA@L3?#-J<22A396a$=lZQ!BaWX6J%lc31m*pEVF z`c7_f@@CpIe;YP~V(;-}Lyr9{rX0w%v-$F1;pR1HWn-ocCZQN;{zhM9XO1LnLgAiJ zkH5XRXDqv2jpsV3%uq(scOdk6j>zirJo?yW`(v*fAp|@y%!%qJHKk zlQmzbigTu0A^;xvm1-h>O|lFOKdL-GUQ>O5)l-$hbz*r`^ncMEga*d(17m65*INE~ ze6KhJS_r77K$kyl$@u32i0#9>4PtENf ztdt|1&LBOO{1mgSRWa1yD9mpq5mx|E4|-_jwN6}Qb1wF7N^kRjghdd8j0QhfLdLmF zVWr-(nt*e$jylVKS6Sn+W$$fn7vdpNM=mlvZK^$bTfA<$q8Zuj&i*E5rGxcC0xe!C zoV!NJp5TX)d`@I%JeF5H#A^41e{&9aT~+hEhip8P+)69LdlqiZ88tU?WF>nZuBak3 z5p6p2wAiEST78IjJrKUBxhZ@?Awb(hZ}wkGNXp7v*98z)Lutc8AoIV9xi!|`S>B1h z_(O8XIX7K2R37&J`1J)7U7C(BuS1eyPQ9zHqT<-p2m`j1(w(uKuF4GaeGhz!H;B>p z^m_ynh5L`MOPddybNg!Au7!O?o4J7HE1&Ol+F}>VP22cX zxHP+66=BetD^p*?KU2rq2B8m!+z@I|4q`$XjY8!{Gcjd;n_D;uMx;n;GDpgdj*+t0 zqQRs!BZM&ylWq7#h(;a$wy2r+@IzS9d0ki-xMaKSt`$4~@cem)L;(^7zepNbz$Vg% zERCcMrNz^XIwnBuqsdd0K9?;BZ&d$@0|B11-vizMH$8}B_>Nu4(^20_tyB&bhsaq?e+Mok<%bBM8B3Y;&<+(YX+vRd55SU8E4< zgW7VF^oNbB@*>#@Fm@z2_n7QG*#&N-s#HyUJ+rLdKY$dEO2Arxv*qfQ@@Qi0wA(;n zd9Gs3rqRwC48)C~+9Lo~OXAwOPA!8<8&2Jo*uPPSZUX68Dg}AWt826CVk$M^aSts+ zl87zDNcB<;JFRdzz&EWG0BJ$$P|8!6r80{4(eL>VVa;_f%KB7$nxCi`hS`#}E(xoJ zuQt}QBdG~1x)%}Vjj!%MvtD2Cc6iCA`_*jaFZ4K^vTJkc<&&eKW=U|*7Kj#}ZV$^D zEZl}SFt_Fs;_Zrv=z3fUK=MN&zay|zUpO4(&P0Mpvj$enY_w#if)=n2o(owrQnPZm zEDkobOwuiF-9XPR^H>4C8UNgqc!Ab^ErVmsVagQGy3CPtfE8{2en6>1_N-)HZn+)& z-Vii%HcJBj#&-jfPupanj$x`k~gM>4Ud9)o!9-DJR^vueF7Jh&%XSd%i zN=;}Y!GhBHniCi_rqh z(KHV76=Kw9^xEE$<-;->hih-1(-h~8c? znveq}>IocV}cG)-ZjS8=#fAT%Wosy39X5Hc3zOEqQLKEh(Z4&khxV@ip z{=6pRx%Dpz%~X$Z__I+zr?h&HXo4Im2f5rvutxL2lM@~YzK7|1l!AEz7i6}-{-oN*9&-VFZYL>DLX17{Y3GPU*-F5Fr z1-c3@%?=Kw02*WY0kdad0_5P$GPHy|cylpn@Mp5J0GGxU0%R>q&=6D-DWvALM;KQN z5H+X$Ua@;S^{kg{0TPN7<`60({>wkJ`vhqqg_Ies1TS1NoY>igaPOl{quk9$`9v&E zxGysmb%b;t2};1-qXJzFIaF%ZlbmrueSM*e;w%GyWq%)Mv1@8tDss*|1XyAM(fNlV zLu!W=ikOvBMJBqt~4P#!yRG|3sQJdLZr^bAXg9pJqR??XWE80IOU< zle_`^Q~dbAYYFD*gxT+?>0^6Mw6nsNuH@fZ(PcEPPqkfZ+<2YKE}yVI~MfliE?99$}wu1K%BQu7Hgz%j`Z2$ee~rU~?3Y;a2# zek7)CMFLcgx)a||LYxQ;R2l6O1{*?%5*LKA%2NoD>;f6N%Gc2{4h-2>p|hOdZ0{+_ z^hiQMh%=@rP`4!H#flj*M_BJ*oll9!JDw1ZaM-W)yYIa5z_ez3Q+$}PY#WFGo6gC! zTemW*6Z$~^L%bNZD(=ORMa&9E=SmwCno9!%Ph%WiQy&kqE3Gr_ud`xKZxd%3H_hHR z#;e`L^O%unNEAmZURJz!>YA_|sSN3FGMwouM&T}Iuu`v((v~RS9I2@-G)9hR!>;vz zpomjjU`Hh_vFPXIRedp|thQ~yyiajihmc_JtkP6+KjjU&1m7!V#9<7Lm@qze?A{&; z4A5S^lRIRU!Kr6mP4lm;v{OgG+A-0pQu894W04WG(7n1en)@zDN_RFS#W4NG z=(*n3o;AX*(SjKmBa~j~th-{{AnVdCFYrgbMd^`v=s35a=ra|)7)$gX={{&@KEM54 zCP1i-?^Zm>D0SZ57hTdr9F+DKK$4Fjwe*{NFTEoBIu*2=- zv?0lL$Ia0c2K%ruofHv>@d`PV&ZS3V*e5)_xl;SIo1G(xJN=t0N4y}Abzha#lIoCt zrb8*W;>jZT4~hQz44mYEYP|Sh2L{6CTy*fOm}il;6wNu-&#UZ2aQkST2C*OYf0uT% zQnNm`LJJpE8Do1xgErt z{uwh>vRPz19s0|(dVOJtGIP>+${>G5N8e`hkYKtWH^oktZKSk{AD?Fg8D+!yhqIYv zF>cXwI~qwiw`ND*D?HaC5jnjZvX$8>^v0Lp@1!mcX+I*k?JBl14wZ%dk$5*L%?7RP z(VhB*QqE=AW7j%#kV`AB>qZ4da?XL$aY&R+D1 zGnVd!dEc8W7fY<~-)A*$8nCic&c$pZ#o-la0V6!lcWY@{+?N84tLO(RbFKV^;9jVl zK7!QRnUqO{^v6x`Cgq%Z{Rv#5Qbx$u8LnujX+Eq5JoJo9uC0t6vYS;4s&USBg8$;g z2lPR!N-g#>CcA$;a@Q%qAz^_O>_}R=^>JxPbKWsZdM#$i1KN=8XyD8egWH1Y)xH(Y zpJUHCdwQOE>tl@VM6yL+A;At1@5;?|4B@&mmj$`v6}wyqV$|Oc+ux~;5= zf%AkcgsaA0GohT7B?IqH>sjktMXpb3j(M;q#>nuytQpczOTr0@Fd&0Af@DR-xbB)N z%Hjs&7a+AZfGN>M76u)GRJWDMWC~_iu3B?cHT_~e*OZR)0IhTZhYji@MwI|5mXgV) z()y=GRrQ#h9~@%UZN6ZxH~xf>i(&;gEg#%@!$o+I1SW&FdppjNuW@cIm}}^mSgts0 zKYS}54SQomy`Uh}BCsknAyDlyaft}n5Lnmhp6tP{;jBqY%DzT3F9Mu&KA6y z1@{Lb(B})}h~bR{7$UC>wMJ*giC1{kc<@O6Jlp)l)!N7n?{;K5Etw{H0LzU#?4f1{ z-&)UDCdevr5J~#o{s?t~b{7KC|&hhnQ>>QK#>s0wB4GQiT`G#i8y9OXTi10Y@kAars zjd631<>u%I&r47%wiu83rEzXl55KE~#t{Q3xnLWbTMqwtE;^QPn^X{%%i7fwZqomm zta2+Onh^VxQ$0@>Nvqd``j>9=t-|p|69!zDIL-!9+gj)^@aM&SLlej41QHxLwv+>h za1ZM56q6(PA=`1xoxgLX^)ZI~`YTW8M@JiW_0N<(?I5 zYF#V-n}8eRuk^MsAS|+Vee?OugSL6I+e$MdXG|{SkE|zpBB7;~F2>6j=-X4Yz^vW< z+Z@TugP+4yTKtB(NGe$JyBATIV`Pp=F1u*Ow{qy zVk!bvmDC=L0nykBlR=o<-@FUac3KW;Tm{PQ!%-LUA|VcS+R5XHLZFl z55j82dFYpFL5qk)zIxcPq!8%5_maZJCT_;X`1|Dp>{pR2tW<1o9^*HQnui;avU;YWT|ZaoBh9zD#@{=km_ z?pzv@%a-B)r0Vs+O+fyu<0rU%gTsTci{Js7oYxDtsy^5Bem}kv=;v6WB2w*>|H3<{ zC(otra!MtMIcbKEhn;B9=D4-`L1z~Idr1o=V8Xwl`}chW``H=~sPMjY?2 zk-bQsc%)`UC`pV1br(DzT!6SbsZ;X8Af)n*ppZ%{hldD~XRZ;{O4CNp1Lb&P2Y%%O z-A(Z;8Jl-tQe>P1Q-053LScl#a5%C&w6HM6KhJ%v3t*mbSB^*qyt+P67QbP>WZMQ9WB3ST2P_eq7+($_aW5voOkGx|yq6)8{ij{p&Adg4dH=S(?xLi2U1Ls&%|7{e4f}=7r*@pb5I6dxKI?*te z1kGNEGn8^NXHSKPJ~HOZ&Cj2rmuC~o0|}P)-e+-zdlF@TCx9q=LinEGr&Gt(wpfOO zuTm({qOY-8vR1&Y0`X| zXB$-B1KU22X!@scNy`huVOZ!dlz?6U)mlIjCO|8pa+h(w5QlZFD#t_$6(;Eoq=Ddu zE1e{dM0|pqFy4M~aVGP-mz(>sZ_2Pr0_EN=qTf6ePaK%Mmfu36_k|+^P^z{@uxU4l zRCRjDo(5!MbzZgrO1JxYlCLLwx5FZl%5#6axfEvW>8%<{6Pr(wCNn<)1_VR{NY@PP zd327n$ZHXEqI+{|R)OC}u>#?HFj~15+}B+%Gh^4|WNL)9MrvPI@b(bzzlCM{M87=%tdF-Dv|M3=wr7dZQWM7MYlQ7w%;+M7ny;GHEdieIZ{W zMD`g{1-908?38|D&rDnN7UpnM3(f5BuAwkxAXGeC?N{7HEiy5@HEipEGkDcoT=Pa6 ztm|H!cz=tA!`+A_BJP=CQGlANF{ovM#^{75A136!$NS!+3Rik@;^G>Zn1kp^zxx&TM|llv%KRCExxq5Br1L} zdG;c0daFIpREv-6lBlI6d;5fWZH4LSgQSWc1i5&^xCU1s{6b=o#i{+1pVI7!8uCL; zWNq+sH;g#MY}%yEb`U1Kc@9SYN4`FvQcA@F=Kbq0$)IiLPEp=6#?R%J?l3U~XasWF-|*hx+*GL{c+E^mxXYL?|}KYLj`DZBFXA*>V?SXE0b zv~PccfH&v$l6=WMy+;;2tR`p*ei9MV0tCX$rVmLU`Ct(ozyf$U8w6=^Tl)1Sf)jX_2V~d_Nu9is_h~R*VH7 zbr^Va3R^F3A#CIAHZb(ktIfv^nhYi7Jg5*I6)zdH9@6ZqLUo3{T$h^=Z&&v*kSdn3 znOKCS#`-vEpD05_G*5dRBxF_z>kC%j$26Z)&R0r^0F2z+xsF5$)uB ze4egcehXb26=@^*iP5jCjh6?FYM@)=fo|gvis}y41^SLBkgdg{jA24h1-&H7LX`;7 z&!qo%f5rL?tAk~Hcz;DrcWca(^Tl{-KpwdS0z@sK${;J=SGtbs(F50o$Z;GKhB{h6 zBHkNW)IGKp>Pj?OoByWjHv55br>5mcT}>kDE5sgeNg-C~$m4UP-_ZuvggTapn;lX*(Sp8H;lzb8(Mi-JQ}?C)DLYqq7F&{= zS4rsNZ3})8h(8x(1~d8TsM}gi6aTq+cotcv#W)#Q#Z)!z|LyvVZA3Um)U@zt>Zv-) zL;QwVt>`VZqQ^JtN>U9aSgNv8D-zmVSTzlg)Y2tIwbHU>Y3c^jzzJ8 zR_g}|(ir?<&%BRRwBk#!s=mIxuwnK{VCY{Xvu42cAwHz|rI z--baPc9m^!*iKze-|8`44ww-tM_(d^I`Hjn-JLGEMXsG;3+-74QG+WmMU&~ zJZwSJtjgj9_taf_mB6(prVh~P_&Y?b?rb30+8M=y7>4xf8iWBrCp?6jdIzOPqA>_< z{@tt#{}h8>{8fZ}F>Y+Nc(iGog5u`mJ?qbpw^|M4j*m`<;i9CO?Ncg)jDnN|n)n)D zTaMk+lQcoWe+kn}sEEKmzYgq`-plADWx36M&Q`BuhqhuUN^>*~>cSoJS&b0zBCdyA z+iJy8HhoAnG+IuMCcT;%KY>X1m>HJn_3cnO*u{>OxHj*vm9@BK&c`hlaF!78_7rb@ zG}Ab;xV81TbgmrY>CL)xMmRmqHb7m3e3G%;d#LNt-1tiKY}_%y2dL*lC4rGMD}Dv1 zr}D9Qd6#HfJYNsL^XWX{;)hY=Z1eY(IqK8WpgjU6$G^O?5V7zU3~u6~$mi0tyP>Y{ z0sut&0(tb7KGfYs7i)tbGQVC@(jm^zq}L|_zx2#(7bt&2fa*J53zMq}dV%@IY>Q@i z7Rpu~xlIpJ#Z6@Quv3Z9%Pu=7h%~B0<2D3Yy(lgo3v&yZc}60a{hZi|!+s`HbOVTW ziyCRoUe(^M-58=;LkdU9n$vz6iz#(-;;3_{7KbliHt^oH;KEqG3raJldAFz;Y9KVe zJ3I+z!d(iIUTOU(kuAZqgRfUwA*TjujUFQG>4culJTM!j1DoQ(DT2MJIgw*ble(L0 z$sGN|!o*HG_^xR4b9<2U*%arYeVT*4B0 z$GB(AW^DzHZCSvSgmJ%!WdJXnpfjuX=>>!mXax0ocX-aLfCOUtI9;KheI9L8TIJ4ZkBu2)A&c@lZe_^7RN@1kUP09SOr0RG0-IW?2(` zBD<`Fzd@ShtFNg6VoF#F0I89!;gZk$726&51A!_E$mol}&1+!+?A-?_n$!e>PtBg~ zw13P)AkSjFT>f z2^uU&FO;~mI-x@}#sTXB;Rjt&o>;P?#pg|CY9a`4x1-^`P%*7-*L9Az-3wynP_u#KqQ%DY!TFYqe=%D0G=e=+|#(r3opr<`Xi0|DxAp>{# zONP)2&QqjmLQhrX^4pJbwqCLW;ZqmyVmqlKO^p?O4=nUUX<{qQ1bx7zXL9*)%T*$5 z-sFGrul(144w1(stOz=XL|B#L4J3qMEZvT36skskL*Nu$uN3zb5rZMYwQr-2QoDd| z{jqz&&5guuQx0}FgFhhfh+EVGJU6i}pWZz30i(TVG($kpG--FUQo_RiFfd23%t7sj zR(#m4XJnyIaJe-N-b4i&b+u07`}?+^{}qFZ@@`Zq)XRJ@)~`%U!#>>WZ?zqJVi70v zMVkt_Vn->LI1v^sX?sxD+e~W$XSpNdYFz))5zk1VDsVQV+-D^ud2s@DURs~B@u0aK z(haV-a{5Z^$r-PN7~>q1iX0JocUWKzf3mUURDxv|2TbD+w0S%$xTC@)p)TL(t#_c( zNWZl!k7xkM$~B_s`u71Z!#bz=t|o~0g(%>-2SJ0!SK^CXa4BV`DE-dH%V!7PrQ^bZ zBxIOtX5mu)D7cgZqj{gxF9@0IMC(--K!poDxjXo_#jJ)o%nLCT8J)+wix4UtJvGm) zE9P;Pn^pT!q5K~d6Qe@Y@B>{v#rWyd)G;4V-KS|Gy(=DG>%)FfjxDcU7NhGhCK`Uw3-Q-% zI8WN&9`ysd2kS(ah!j)c!9gOMLG-C*+mFca`pvEX5=o+0`SS=7B*rS0DZ$cfYn-ZKqDKH(Wqi88Rjwh_5SlH=!ckgT+KOpSK5F>tH z0<~NY72ax^n?u9Heyo=V^~I1QHb!uZCO1s5&*m3l(AD<&lO8wldE0-qs_kISg^r9+ zl!|Evm2x&(-+EY;@fk3)2;M?*54S*s)V(NMVVX2i{ppX}=d*8AfLJjz$5g&KCJeQo zmvUxmd#R#k$#sxnbdSCrohszEh{6-kSoX|i*^j0$hAjVgeC%RqC>?VTTz)uE-p^3F zH#;k@-dX7=!0j9$0*8$i^js-8y$oJgolsyewDQk(A=|y3(F6CIUI+sOUg0c+I2#-C zi0rq$qZ{%jc1}veo}+<#Z}-$Od~_ZT{bSu<#CQZ%Qnw05Edq`kho)GXq?WP!p!n;a z>j%4YG1N)#bU(~b=TMGwc+D1VTYA70V1R>jA76i*D~KC_iaZwVvwJD<@V)@yA!W&k zp5_UMMwe}Sx6n%w!8(t&P*HHGR34#ltYigc;-eiOc1rzLuLhOjY_reT6FBK-{bB&= zX#1s%`o>z<$lS%oU(%(CNi*Aro`D9^5pY)6!+(0)3%sI(l7EdA3DopG4-aIeV`sT{ z4c|mU4;L>!JL1lS_pdFrhSm$-NzreaKz~+dTR-`M=$&Y7Ora*?VfTN=3!3pF8X~ zr+f$DdGNIk?89Av@c1R)TU{e+J&8Bg^t=|GeTU%eL~5#kSGsnb#&{S>*LRM50VB}*L0N`xo%i3oN1Hy+Q7pcoo~1efd@tI zN(pe~d2VFz8+^RHzE|SMk7ktJqu&A?-|76aR4UAihkJ#{5;Q=(+;uI8Jp}wyvn)Vk ztxzp(?WVVjr>?1$x>jH4w6_h?PZrumr*#74S-HNWxq^mhAXUyPtA$qfzv&Vdnw`GD zs)zH#17XN?mFKrk9}4_fF<7&umh!;)S1&yR?qzrUX0@q{c1}wa#F{jx-vXE=Vr`2R zp`!3J>1>|fSEz_16~R+c0K{(}lZ=evRaZdmzfPROy+K*Ff-ylz4q;P)y@}2^ds&oy zN@_sX!J~nn15BsR>fvArIf8b8Llxa_X7Z6%v%zdGj!7Fc9Jww}V81;f-| zmOLlofx(}`3eOE|ca#1+BU_Yi;S!J+SoV+lL0l1H zvzpfq)~z4V5aOg}pxYX=y+Z2hX+cxoE~Aoz+2thxt-6!+nR4zCQG4P(M0|L8k%Fc; z20Uh68d$^6W9aGJ6zUeV7Op{64=QH)tY^!t-3({Pb%)U_o)r)Ktg8Fxhq4hhIYzs2 z-O&-MQ5Y-S2E~;u6qe|_0Sl`WV*~T9L0aM(*aqBlqcIQ+Oei$bcGB_Y!b+{(H-f7D z;9UgR$qi&;eYLI`Q*1OEGmxke8M1|FjhRTFi{{+C32gwu;hkaWRBazxQ3`K}u+R^? z(lt!d1SC-KV6wLupwKR@4q+qbaHH7`YfWn<8^(^tn!z(6SE%9B#%&Bo&Kx2*2G!Ed zY*uzsmr&v>9Y|sM{ z9@qbP>HYzl2$>7tl7GJ!<=lMm9wF|H+0781>-Vr~rU)r;faBlUsDUm)z<8O<+k!3t zf$c+S1o}h=yc8o}p7>b-CVV?g53brBd=gt>FLk>s7>&B-h3^8ra{9&TIgg(^oG-2g zljF;vx7L#tBH=RlV8vNdhL9v$51Zu*7@jaG`Pc^tW$@wd3pA_eT7dGrw(pcd_LGjn@&Tpq zmw+_wx&5EVj>jTbp&VD?htB;tS{z!i!T&~6XJgnkL}kf$ohpv_xB`Tmq2m7v&j2w0 zlS43rxy9Q}?f?J)u1Q2eRF1b1rTRaAs+D$K0@z1P7;Qs^Sc>Nm=)0v3?0000 export const BUILTIN_OCR_PROVIDERS: BuiltinOcrProvider[] = Object.values(BUILTIN_OCR_PROVIDERS_MAP) diff --git a/src/renderer/src/hooks/useOcrProvider.tsx b/src/renderer/src/hooks/useOcrProvider.tsx index b0e23c20e3..38afaf81b0 100644 --- a/src/renderer/src/hooks/useOcrProvider.tsx +++ b/src/renderer/src/hooks/useOcrProvider.tsx @@ -1,4 +1,5 @@ import { loggerService } from '@logger' +import PaddleocrLogo from '@renderer/assets/images/providers/paddleocr.png' import TesseractLogo from '@renderer/assets/images/providers/Tesseract.js.png' import { BUILTIN_OCR_PROVIDERS_MAP, DEFAULT_OCR_PROVIDER } from '@renderer/config/ocr' import { getBuiltinOcrProviderLabel } from '@renderer/i18n/label' @@ -80,6 +81,8 @@ export const useOcrProviders = () => { return case 'system': return + case 'paddleocr': + return } } return diff --git a/src/renderer/src/i18n/label.ts b/src/renderer/src/i18n/label.ts index bc389dc82a..bdac2f7230 100644 --- a/src/renderer/src/i18n/label.ts +++ b/src/renderer/src/i18n/label.ts @@ -327,10 +327,12 @@ export const getBuiltInMcpServerDescriptionLabel = (key: string): string => { const builtinOcrProviderKeyMap = { system: 'ocr.builtin.system', - tesseract: '' + tesseract: '', + paddleocr: '' } as const satisfies Record export const getBuiltinOcrProviderLabel = (key: BuiltinOcrProviderId) => { if (key === 'tesseract') return 'Tesseract' + else if (key == 'paddleocr') return 'PaddleOCR' else return getLabel(builtinOcrProviderKeyMap, key) } diff --git a/src/renderer/src/i18n/locales/en-us.json b/src/renderer/src/i18n/locales/en-us.json index 41882c82ea..fc9245cf8e 100644 --- a/src/renderer/src/i18n/locales/en-us.json +++ b/src/renderer/src/i18n/locales/en-us.json @@ -3884,6 +3884,13 @@ "title": "Image" }, "image_provider": "OCR service provider", + "paddleocr": { + "aistudio_access_token": "Access token of AI Studio Community", + "aistudio_url_label": "AI Studio Community", + "api_url": "API URL", + "serving_doc_url_label": "PaddleOCR Serving Documentation", + "tip": "You can refer to the official PaddleOCR documentation to deploy a local service, or deploy a cloud service on the PaddlePaddle AI Studio Community. For the latter case, please provide the access token of the AI Studio Community." + }, "system": { "win": { "langs_tooltip": "Dependent on Windows to provide services, you need to download language packs in the system to support the relevant languages." diff --git a/src/renderer/src/i18n/locales/ja-jp.json b/src/renderer/src/i18n/locales/ja-jp.json index 90b2eb32b9..4ee4518fbd 100644 --- a/src/renderer/src/i18n/locales/ja-jp.json +++ b/src/renderer/src/i18n/locales/ja-jp.json @@ -3884,6 +3884,13 @@ "title": "画像" }, "image_provider": "OCRサービスプロバイダー", + "paddleocr": { + "aistudio_access_token": "AI Studio Community のアクセス・トークン", + "aistudio_url_label": "AI Studio Community", + "api_url": "API URL", + "serving_doc_url_label": "PaddleOCR サービング ドキュメント", + "tip": "ローカルサービスをデプロイするには、公式の PaddleOCR ドキュメントを参照するか、PaddlePaddle AI Studio コミュニティ上でクラウドサービスをデプロイすることができます。後者の場合は、AI Studio コミュニティのアクセストークンを提供してください。" + }, "system": { "win": { "langs_tooltip": "Windows が提供するサービスに依存しており、関連する言語をサポートするには、システムで言語パックをダウンロードする必要があります。" diff --git a/src/renderer/src/i18n/locales/ru-ru.json b/src/renderer/src/i18n/locales/ru-ru.json index 6371fd9efe..428b3c4028 100644 --- a/src/renderer/src/i18n/locales/ru-ru.json +++ b/src/renderer/src/i18n/locales/ru-ru.json @@ -3884,6 +3884,13 @@ "title": "Изображение" }, "image_provider": "Поставщик услуг OCR", + "paddleocr": { + "aistudio_access_token": "Токен доступа сообщества AI Studio", + "aistudio_url_label": "Сообщество AI Studio", + "api_url": "URL API", + "serving_doc_url_label": "Документация по PaddleOCR Serving", + "tip": "Вы можете обратиться к официальной документации PaddleOCR, чтобы развернуть локальный сервис, либо развернуть облачный сервис в сообществе PaddlePaddle AI Studio. В последнем случае, пожалуйста, предоставьте токен доступа сообщества AI Studio." + }, "system": { "win": { "langs_tooltip": "Для предоставления служб Windows необходимо загрузить языковой пакет в системе для поддержки соответствующего языка." diff --git a/src/renderer/src/i18n/locales/zh-cn.json b/src/renderer/src/i18n/locales/zh-cn.json index 71c734ce75..539972ddfc 100644 --- a/src/renderer/src/i18n/locales/zh-cn.json +++ b/src/renderer/src/i18n/locales/zh-cn.json @@ -3884,6 +3884,13 @@ "title": "图片" }, "image_provider": "OCR 服务提供商", + "paddleocr": { + "aistudio_access_token": "星河社区访问令牌", + "aistudio_url_label": "星河社区", + "api_url": "API URL", + "serving_doc_url_label": "PaddleOCR 服务化部署文档", + "tip": "您可以参考 PaddleOCR 官方文档部署本地服务,或者在飞桨星河社区部署云服务。对于后一种情况,请填写星河社区访问令牌。" + }, "system": { "win": { "langs_tooltip": "依赖 Windows 提供服务,您需要在系统中下载语言包来支持相关语言。" diff --git a/src/renderer/src/i18n/locales/zh-tw.json b/src/renderer/src/i18n/locales/zh-tw.json index 4ab0fa3dae..2a60499310 100644 --- a/src/renderer/src/i18n/locales/zh-tw.json +++ b/src/renderer/src/i18n/locales/zh-tw.json @@ -3884,6 +3884,13 @@ "title": "圖片" }, "image_provider": "OCR 服務提供商", + "paddleocr": { + "aistudio_access_token": "星河社群存取權杖", + "aistudio_url_label": "星河社群", + "api_url": "API 網址", + "serving_doc_url_label": "PaddleOCR 服務化部署文件", + "tip": "您可以參考 PaddleOCR 官方文件來部署本機服務,或是在飛槳星河社群部署雲端服務。對於後者,請提供星河社群的存取權杖。" + }, "system": { "win": { "langs_tooltip": "依賴 Windows 提供服務,您需要在系統中下載語言包來支援相關語言。" diff --git a/src/renderer/src/i18n/translate/el-gr.json b/src/renderer/src/i18n/translate/el-gr.json index 187110e47d..52f911f148 100644 --- a/src/renderer/src/i18n/translate/el-gr.json +++ b/src/renderer/src/i18n/translate/el-gr.json @@ -3884,6 +3884,13 @@ "title": "Εικόνα" }, "image_provider": "Πάροχοι υπηρεσιών OCR", + "paddleocr": { + "aistudio_access_token": "Διακριτικό πρόσβασης της κοινότητας AI Studio", + "aistudio_url_label": "Κοινότητα AI Studio", + "api_url": "Διεύθυνση URL API", + "serving_doc_url_label": "Τεκμηρίωση PaddleOCR Serving", + "tip": "Μπορείτε να ανατρέξετε στην επίσημη τεκμηρίωση του PaddleOCR για να αναπτύξετε μια τοπική υπηρεσία, ή να αναπτύξετε μια υπηρεσία στο cloud στην Κοινότητα PaddlePaddle AI Studio. Στη δεύτερη περίπτωση, παρακαλώ παρέχετε το διακριτικό πρόσβασης (access token) της Κοινότητας AI Studio." + }, "system": { "win": { "langs_tooltip": "Εξαρτάται από τα Windows για την παροχή υπηρεσιών, πρέπει να κατεβάσετε το πακέτο γλώσσας στο σύστημα για να υποστηρίξετε τις σχετικές γλώσσες." diff --git a/src/renderer/src/i18n/translate/es-es.json b/src/renderer/src/i18n/translate/es-es.json index 029b5c5813..d9eb5fb5c4 100644 --- a/src/renderer/src/i18n/translate/es-es.json +++ b/src/renderer/src/i18n/translate/es-es.json @@ -3884,6 +3884,13 @@ "title": "Imagen" }, "image_provider": "Proveedor de servicios OCR", + "paddleocr": { + "aistudio_access_token": "Token de acceso de la comunidad de AI Studio", + "aistudio_url_label": "Comunidad de AI Studio", + "api_url": "URL de la API", + "serving_doc_url_label": "Documentación de PaddleOCR Serving", + "tip": "Puede consultar la documentación oficial de PaddleOCR para implementar un servicio local, o implementar un servicio en la nube en la Comunidad de PaddlePaddle AI Studio. En este último caso, proporcione el token de acceso de la Comunidad de AI Studio." + }, "system": { "win": { "langs_tooltip": "Dependiendo de Windows para proporcionar servicios, necesita descargar el paquete de idioma en el sistema para admitir los idiomas correspondientes." diff --git a/src/renderer/src/i18n/translate/fr-fr.json b/src/renderer/src/i18n/translate/fr-fr.json index 08af4d6e30..383f53bec3 100644 --- a/src/renderer/src/i18n/translate/fr-fr.json +++ b/src/renderer/src/i18n/translate/fr-fr.json @@ -3884,6 +3884,13 @@ "title": "Image" }, "image_provider": "Fournisseur de service OCR", + "paddleocr": { + "aistudio_access_token": "Jeton d’accès de la communauté AI Studio", + "aistudio_url_label": "Communauté AI Studio", + "api_url": "URL de l’API", + "serving_doc_url_label": "Documentation de PaddleOCR Serving", + "tip": "Vous pouvez consulter la documentation officielle de PaddleOCR pour déployer un service local, ou déployer un service cloud sur la Communauté PaddlePaddle AI Studio. Dans ce dernier cas, veuillez fournir le jeton d’accès de la Communauté AI Studio." + }, "system": { "win": { "langs_tooltip": "Dépendre de Windows pour fournir des services, vous devez télécharger des packs linguistiques dans le système afin de prendre en charge les langues concernées." diff --git a/src/renderer/src/i18n/translate/pt-pt.json b/src/renderer/src/i18n/translate/pt-pt.json index eff87d6902..3da7c91e18 100644 --- a/src/renderer/src/i18n/translate/pt-pt.json +++ b/src/renderer/src/i18n/translate/pt-pt.json @@ -3884,6 +3884,13 @@ "title": "Imagem" }, "image_provider": "Provedor de serviços OCR", + "paddleocr": { + "aistudio_access_token": "Token de acesso da comunidade AI Studio", + "aistudio_url_label": "Comunidade AI Studio", + "api_url": "URL da API", + "serving_doc_url_label": "Documentação do PaddleOCR Serving", + "tip": "Você pode consultar a documentação oficial do PaddleOCR para implantar um serviço local ou implantar um serviço na nuvem na Comunidade PaddlePaddle AI Studio. No último caso, forneça o token de acesso da Comunidade AI Studio." + }, "system": { "win": { "langs_tooltip": "Dependendo do Windows para fornecer serviços, você precisa baixar pacotes de idiomas no sistema para dar suporte aos idiomas relevantes." diff --git a/src/renderer/src/pages/settings/DocProcessSettings/OcrPpocrSettings.tsx b/src/renderer/src/pages/settings/DocProcessSettings/OcrPpocrSettings.tsx new file mode 100644 index 0000000000..634e63b2d3 --- /dev/null +++ b/src/renderer/src/pages/settings/DocProcessSettings/OcrPpocrSettings.tsx @@ -0,0 +1,83 @@ +import { ErrorBoundary } from '@renderer/components/ErrorBoundary' +import { useOcrProvider } from '@renderer/hooks/useOcrProvider' +import { BuiltinOcrProviderIds, isOcrPpocrProvider } from '@renderer/types' +import { Input } from 'antd' +import { startTransition, useCallback, useState } from 'react' +import { useTranslation } from 'react-i18next' + +import { SettingHelpLink, SettingHelpText, SettingHelpTextRow, SettingRow, SettingRowTitle } from '..' + +export const OcrPpocrSettings = () => { + // Hack: Hard-coded for now + const SERVING_DOC_URL = 'https://www.paddleocr.ai/latest/version3.x/deployment/serving.html' + const AISTUDIO_URL = 'https://aistudio.baidu.com/pipeline/mine' + + const { t } = useTranslation() + const { provider, updateConfig } = useOcrProvider(BuiltinOcrProviderIds.paddleocr) + + if (!isOcrPpocrProvider(provider)) { + throw new Error('Not PaddleOCR provider.') + } + + const [apiUrl, setApiUrl] = useState(provider.config.apiUrl || '') + const [accessToken, setAccessToken] = useState(provider.config.accessToken || '') + + const onApiUrlChange = useCallback((e: React.ChangeEvent) => { + const value = e.target.value + startTransition(() => { + setApiUrl(value) + }) + }, []) + const onAccessTokenChange = useCallback((e: React.ChangeEvent) => { + const value = e.target.value + startTransition(() => { + setAccessToken(value) + }) + }, []) + + const onBlur = useCallback(() => { + updateConfig({ + apiUrl, + accessToken + }) + }, [apiUrl, accessToken, updateConfig]) + + return ( + + + {t('settings.tool.ocr.paddleocr.api_url')} + + + + + + {t('settings.tool.ocr.paddleocr.aistudio_access_token')} + + + + + + {t('settings.tool.ocr.paddleocr.tip')} +
+ + {t('settings.tool.ocr.paddleocr.serving_doc_url_label')} + + + {t('settings.tool.ocr.paddleocr.aistudio_url_label')} + +
+
+
+ ) +} diff --git a/src/renderer/src/pages/settings/DocProcessSettings/OcrProviderSettings.tsx b/src/renderer/src/pages/settings/DocProcessSettings/OcrProviderSettings.tsx index ac069a3b3b..120e5a9e48 100644 --- a/src/renderer/src/pages/settings/DocProcessSettings/OcrProviderSettings.tsx +++ b/src/renderer/src/pages/settings/DocProcessSettings/OcrProviderSettings.tsx @@ -8,6 +8,7 @@ import { Divider, Flex } from 'antd' import styled from 'styled-components' import { SettingGroup, SettingTitle } from '..' +import { OcrPpocrSettings } from './OcrPpocrSettings' import { OcrSystemSettings } from './OcrSystemSettings' import { OcrTesseractSettings } from './OcrTesseractSettings' @@ -32,6 +33,8 @@ const OcrProviderSettings = ({ provider }: Props) => { return case 'system': return + case 'paddleocr': + return default: return null } diff --git a/src/renderer/src/store/index.ts b/src/renderer/src/store/index.ts index 5a70c202f8..6f432babf2 100644 --- a/src/renderer/src/store/index.ts +++ b/src/renderer/src/store/index.ts @@ -67,7 +67,7 @@ const persistedReducer = persistReducer( { key: 'cherry-studio', storage, - version: 147, + version: 148, blacklist: ['runtime', 'messages', 'messageBlocks', 'tabs'], migrate }, diff --git a/src/renderer/src/store/migrate.ts b/src/renderer/src/store/migrate.ts index 0ba27d931e..5107b584e1 100644 --- a/src/renderer/src/store/migrate.ts +++ b/src/renderer/src/store/migrate.ts @@ -2383,6 +2383,15 @@ const migrateConfig = { logger.error('migrate 147 error', error as Error) return state } + }, + '148': (state: RootState) => { + try { + addOcrProvider(state, BUILTIN_OCR_PROVIDERS_MAP.paddleocr) + return state + } catch (error) { + logger.error('migrate 148 error', error as Error) + return state + } } } diff --git a/src/renderer/src/types/ocr.ts b/src/renderer/src/types/ocr.ts index 692ae7283d..d67cba958d 100644 --- a/src/renderer/src/types/ocr.ts +++ b/src/renderer/src/types/ocr.ts @@ -4,7 +4,8 @@ import { FileMetadata, ImageFileMetadata, isImageFileMetadata, TranslateLanguage export const BuiltinOcrProviderIds = { tesseract: 'tesseract', - system: 'system' + system: 'system', + paddleocr: 'paddleocr' } as const export type BuiltinOcrProviderId = keyof typeof BuiltinOcrProviderIds @@ -74,7 +75,7 @@ export type OcrProviderBaseConfig = { enabled?: boolean } -export type OcrProviderConfig = OcrApiProviderConfig | OcrTesseractConfig | OcrSystemConfig +export type OcrProviderConfig = OcrApiProviderConfig | OcrTesseractConfig | OcrSystemConfig | OcrPpocrConfig export type OcrProvider = { id: string @@ -170,3 +171,20 @@ export type OcrSystemProvider = { export const isOcrSystemProvider = (p: OcrProvider): p is OcrSystemProvider => { return p.id === BuiltinOcrProviderIds.system } + +// PaddleOCR Types +export type OcrPpocrConfig = OcrProviderBaseConfig & { + apiUrl?: string + accessToken?: string +} + +export type OcrPpocrProvider = { + id: 'paddleocr' + config: OcrPpocrConfig +} & ImageOcrProvider & + // PdfOcrProvider & + BuiltinOcrProvider + +export const isOcrPpocrProvider = (p: OcrProvider): p is OcrPpocrProvider => { + return p.id === BuiltinOcrProviderIds.paddleocr +}