From f9591689a3d55c5aa32b07eca28cfd6aa85f0b86 Mon Sep 17 00:00:00 2001 From: Wang Jiyuan <59059173+EurFelux@users.noreply.github.com> Date: Wed, 11 Jun 2025 11:52:15 +0800 Subject: [PATCH] Feat: Allows setting the vector dimension of the knowledge base embedding model (#7025) --- src/main/embeddings/EmbeddingsFactory.ts | 22 +++-- src/main/embeddings/VoyageEmbeddings.ts | 12 ++- src/renderer/src/i18n/locales/en-us.json | 8 +- src/renderer/src/i18n/locales/ja-jp.json | 8 +- src/renderer/src/i18n/locales/ru-ru.json | 8 +- src/renderer/src/i18n/locales/zh-cn.json | 6 +- src/renderer/src/i18n/locales/zh-tw.json | 8 +- src/renderer/src/i18n/translate/el-gr.json | 8 +- src/renderer/src/i18n/translate/es-es.json | 8 +- src/renderer/src/i18n/translate/fr-fr.json | 8 +- src/renderer/src/i18n/translate/pt-pt.json | 8 +- .../components/AddKnowledgePopup.tsx | 89 +++++++++++++++---- src/renderer/src/services/KnowledgeService.ts | 3 +- 13 files changed, 149 insertions(+), 47 deletions(-) diff --git a/src/main/embeddings/EmbeddingsFactory.ts b/src/main/embeddings/EmbeddingsFactory.ts index 6dfc049293..808db05794 100644 --- a/src/main/embeddings/EmbeddingsFactory.ts +++ b/src/main/embeddings/EmbeddingsFactory.ts @@ -5,18 +5,26 @@ import { AzureOpenAiEmbeddings } from '@cherrystudio/embedjs-openai/src/azure-op import { getInstanceName } from '@main/utils' import { KnowledgeBaseParams } from '@types' -import VoyageEmbeddings from './VoyageEmbeddings' +import { SUPPORTED_DIM_MODELS as VOYAGE_SUPPORTED_DIM_MODELS, VoyageEmbeddings } from './VoyageEmbeddings' export default class EmbeddingsFactory { static create({ model, provider, apiKey, apiVersion, baseURL, dimensions }: KnowledgeBaseParams): BaseEmbeddings { const batchSize = 10 if (provider === 'voyageai') { - return new VoyageEmbeddings({ - modelName: model, - apiKey, - outputDimension: dimensions, - batchSize: 8 - }) + if (VOYAGE_SUPPORTED_DIM_MODELS.includes(model)) { + return new VoyageEmbeddings({ + modelName: model, + apiKey, + outputDimension: dimensions, + batchSize: 8 + }) + } else { + return new VoyageEmbeddings({ + modelName: model, + apiKey, + batchSize: 8 + }) + } } if (provider === 'ollama') { if (baseURL.includes('v1/')) { diff --git a/src/main/embeddings/VoyageEmbeddings.ts b/src/main/embeddings/VoyageEmbeddings.ts index ce21afe580..edec32dc51 100644 --- a/src/main/embeddings/VoyageEmbeddings.ts +++ b/src/main/embeddings/VoyageEmbeddings.ts @@ -1,16 +1,20 @@ import { BaseEmbeddings } from '@cherrystudio/embedjs-interfaces' import { VoyageEmbeddings as _VoyageEmbeddings } from '@langchain/community/embeddings/voyage' -export default class VoyageEmbeddings extends BaseEmbeddings { +/** + * 支持设置嵌入维度的模型 + */ +export const SUPPORTED_DIM_MODELS = ['voyage-3-large', 'voyage-3.5', 'voyage-3.5-lite', 'voyage-code-3'] +export class VoyageEmbeddings extends BaseEmbeddings { private model: _VoyageEmbeddings constructor(private readonly configuration?: ConstructorParameters[0]) { super() if (!this.configuration) this.configuration = {} if (!this.configuration.modelName) this.configuration.modelName = 'voyage-3' - - if (!this.configuration.outputDimension) { - throw new Error('You need to pass in the optional dimensions parameter for this model') + if (!SUPPORTED_DIM_MODELS.includes(this.configuration.modelName) && this.configuration.outputDimension) { + throw new Error(`VoyageEmbeddings only supports ${SUPPORTED_DIM_MODELS.join(', ')}`) } + this.model = new _VoyageEmbeddings(this.configuration) } override async getDimensions(): Promise { diff --git a/src/renderer/src/i18n/locales/en-us.json b/src/renderer/src/i18n/locales/en-us.json index 3a497def5b..fc20b5ab7d 100644 --- a/src/renderer/src/i18n/locales/en-us.json +++ b/src/renderer/src/i18n/locales/en-us.json @@ -565,8 +565,12 @@ "urls": "URLs", "dimensions": "Embedding dimension", "dimensions_size_tooltip": "The size of the embedding dimension; the larger the value, the larger the embedding dimension, but it also consumes more tokens.", - "dimensions_size_placeholder": "Default value (modification not recommended)", - "dimensions_size_too_large": "The embedding dimension cannot exceed the model's context limit ({{max_context}})." + "dimensions_size_placeholder": " Embedding dimension size, e.g. 1024", + "dimensions_auto_set": "Auto-set embedding dimensions", + "dimensions_error_invalid": "Please enter embedding dimension size", + "dimensions_size_too_large": "The embedding dimension cannot exceed the model's context limit ({{max_context}}).", + "dimensions_set_right": "⚠️ Please ensure the model supports the set embedding dimension size", + "dimensions_default": "The model will use default embedding dimensions" }, "languages": { "arabic": "Arabic", diff --git a/src/renderer/src/i18n/locales/ja-jp.json b/src/renderer/src/i18n/locales/ja-jp.json index c632cca10a..576205e028 100644 --- a/src/renderer/src/i18n/locales/ja-jp.json +++ b/src/renderer/src/i18n/locales/ja-jp.json @@ -565,8 +565,12 @@ "urls": "URL", "dimensions": "埋め込み次元", "dimensions_size_tooltip": "埋め込み次元のサイズは、数値が大きいほど埋め込み次元も大きくなりますが、消費するトークンも増えます。", - "dimensions_size_placeholder": "デフォルト値(変更はお勧めしません)", - "dimensions_size_too_large": "埋め込み次元はモデルのコンテキスト制限({{max_context}})を超えてはなりません。" + "dimensions_size_placeholder": " 埋め込み次元のサイズ(例:1024)", + "dimensions_auto_set": "埋め込み次元を自動設定", + "dimensions_error_invalid": "埋め込み次元のサイズを入力してください", + "dimensions_size_too_large": "埋め込み次元はモデルのコンテキスト制限({{max_context}})を超えてはなりません。", + "dimensions_set_right": "⚠️ モデルが設定した埋め込み次元のサイズをサポートしていることを確認してください", + "dimensions_default": "モデルはデフォルトの埋め込み次元を使用します" }, "languages": { "arabic": "アラビア語", diff --git a/src/renderer/src/i18n/locales/ru-ru.json b/src/renderer/src/i18n/locales/ru-ru.json index f2e834349c..024718b008 100644 --- a/src/renderer/src/i18n/locales/ru-ru.json +++ b/src/renderer/src/i18n/locales/ru-ru.json @@ -565,8 +565,12 @@ "urls": "URL-адреса", "dimensions": "векторное пространство", "dimensions_size_tooltip": "Размерность вложения, чем больше значение, тем больше размерность вложения, но и потребляемых токенов также становится больше.", - "dimensions_size_placeholder": "Значение по умолчанию (не рекомендуется изменять)", - "dimensions_size_too_large": "Размерность вложения не может превышать ограничение контекста модели ({{max_context}})" + "dimensions_size_placeholder": " Размерность эмбеддинга, например 1024", + "dimensions_auto_set": "Автоматическая установка размерности эмбеддинга", + "dimensions_error_invalid": "Пожалуйста, введите размерность эмбеддинга", + "dimensions_size_too_large": "Размерность вложения не может превышать ограничение контекста модели ({{max_context}})", + "dimensions_set_right": "⚠️ Убедитесь, что модель поддерживает заданный размер эмбеддинга", + "dimensions_default": "Модель будет использовать размер эмбеддинга по умолчанию" }, "languages": { "arabic": "Арабский", diff --git a/src/renderer/src/i18n/locales/zh-cn.json b/src/renderer/src/i18n/locales/zh-cn.json index d5b2849519..4edad4311c 100644 --- a/src/renderer/src/i18n/locales/zh-cn.json +++ b/src/renderer/src/i18n/locales/zh-cn.json @@ -518,7 +518,11 @@ "delete_confirm": "确定要删除此知识库吗?", "dimensions": "嵌入维度", "dimensions_size_tooltip": "嵌入维度大小,数值越大,嵌入维度越大,但消耗的 Token 也越多", - "dimensions_size_placeholder": " 默认值(不建议修改)", + "dimensions_set_right": "⚠️ 请确保模型支持所设置的嵌入维度大小", + "dimensions_default": "模型将使用默认嵌入维度", + "dimensions_size_placeholder": " 嵌入维度大小,如 1024", + "dimensions_auto_set": "自动设置嵌入维度", + "dimensions_error_invalid": "请输入嵌入维度大小", "dimensions_size_too_large": "嵌入维度不能超过模型上下文限制({{max_context}})", "directories": "目录", "directory_placeholder": "请输入目录路径", diff --git a/src/renderer/src/i18n/locales/zh-tw.json b/src/renderer/src/i18n/locales/zh-tw.json index eed89e5af7..ac7334089b 100644 --- a/src/renderer/src/i18n/locales/zh-tw.json +++ b/src/renderer/src/i18n/locales/zh-tw.json @@ -565,8 +565,12 @@ "urls": "網址", "dimensions": "嵌入維度", "dimensions_size_tooltip": "嵌入維度大小,數值越大,嵌入維度越大,但消耗的 Token 也越多", - "dimensions_size_placeholder": "預設值(不建議修改)", - "dimensions_size_too_large": "嵌入維度不能超過模型上下文限制({{max_context}})" + "dimensions_size_placeholder": " 嵌入維度大小,例如 1024", + "dimensions_auto_set": "自動設定嵌入維度", + "dimensions_error_invalid": "請輸入嵌入維度大小", + "dimensions_size_too_large": "嵌入維度不能超過模型上下文限制({{max_context}})", + "dimensions_set_right": "⚠️ 請確保模型支援所設置的嵌入維度大小", + "dimensions_default": "模型將使用預設嵌入維度" }, "languages": { "arabic": "阿拉伯文", diff --git a/src/renderer/src/i18n/translate/el-gr.json b/src/renderer/src/i18n/translate/el-gr.json index 678b59708c..d9dfcc740d 100644 --- a/src/renderer/src/i18n/translate/el-gr.json +++ b/src/renderer/src/i18n/translate/el-gr.json @@ -490,8 +490,12 @@ "urls": "Διευθύνσεις", "dimensions": "Διαστάσεις ενσωμάτωσης", "dimensions_size_tooltip": "Το μέγεθος των διαστάσεων ενσωμάτωσης. Όσο μεγαλύτερη η τιμή, τόσο περισσότερες οι διαστάσεις ενσωμάτωσης, αλλά και οι απαιτούμενες μονάδες (Tokens).", - "dimensions_size_placeholder": "Προεπιλεγμένη τιμή (δεν συνιστάται να τροποποιηθεί)", - "dimensions_size_too_large": "Οι διαστάσεις ενσωμάτωσης δεν μπορούν να υπερβούν το όριο περιεχομένου του μοντέλου ({{max_context}})" + "dimensions_size_placeholder": " Μέγεθος διαστάσεων ενσωμάτωσης, π.χ. 1024", + "dimensions_auto_set": "Αυτόματη ρύθμιση διαστάσεων ενσωμάτωσης", + "dimensions_error_invalid": "Παρακαλώ εισάγετε μέγεθος διαστάσεων ενσωμάτωσης", + "dimensions_size_too_large": "Οι διαστάσεις ενσωμάτωσης δεν μπορούν να υπερβούν το όριο περιεχομένου του μοντέλου ({{max_context}})", + "dimensions_set_right": "⚠️ Βεβαιωθείτε ότι το μοντέλο υποστηρίζει το καθορισμένο μέγεθος διαστάσεων ενσωμάτωσης", + "dimensions_default": "Το μοντέλο θα χρησιμοποιήσει τις προεπιλεγμένες διαστάσεις ενσωμάτωσης" }, "languages": { "arabic": "Αραβικά", diff --git a/src/renderer/src/i18n/translate/es-es.json b/src/renderer/src/i18n/translate/es-es.json index a3551058fc..9876e89f02 100644 --- a/src/renderer/src/i18n/translate/es-es.json +++ b/src/renderer/src/i18n/translate/es-es.json @@ -491,8 +491,12 @@ "urls": "URLs", "dimensions": "Dimensión de incrustación", "dimensions_size_tooltip": "Tamaño de la dimensión de incrustación, cuanto mayor sea el valor, mayor será la dimensión de incrustación, pero también consumirá más Tokens", - "dimensions_size_placeholder": "Valor predeterminado (no recomendado modificar)", - "dimensions_size_too_large": "La dimensión de incrustación no puede exceder el límite del contexto del modelo ({{max_context}})" + "dimensions_size_placeholder": " Tamaño de dimensión de incrustación, ej. 1024", + "dimensions_auto_set": "Configuración automática de dimensiones de incrustación", + "dimensions_error_invalid": "Por favor ingrese el tamaño de dimensión de incrustación", + "dimensions_size_too_large": "La dimensión de incrustación no puede exceder el límite del contexto del modelo ({{max_context}})", + "dimensions_set_right": "⚠️ Asegúrese de que el modelo admita el tamaño de dimensión de incrustación establecido", + "dimensions_default": "El modelo utilizará las dimensiones de incrustación predeterminadas" }, "languages": { "arabic": "Árabe", diff --git a/src/renderer/src/i18n/translate/fr-fr.json b/src/renderer/src/i18n/translate/fr-fr.json index 1fe3ee1751..5a2e83374e 100644 --- a/src/renderer/src/i18n/translate/fr-fr.json +++ b/src/renderer/src/i18n/translate/fr-fr.json @@ -490,8 +490,12 @@ "urls": "URLs", "dimensions": "Размерность встраивания", "dimensions_size_tooltip": "Размерность встраивания. Чем больше значение, тем выше размерность, но тем больше токенов требуется", - "dimensions_size_placeholder": "Значение по умолчанию (не рекомендуется изменять)", - "dimensions_size_too_large": "Размерность встраивания не может превышать ограничение контекста модели ({{max_context}})" + "dimensions_size_placeholder": " Taille de dimension d'incorporation, ex. 1024", + "dimensions_auto_set": "Réglage automatique des dimensions d'incorporation", + "dimensions_error_invalid": "Veuillez saisir la taille de dimension d'incorporation", + "dimensions_size_too_large": "Размерность встраивания не может превышать ограничение контекста модели ({{max_context}})", + "dimensions_set_right": "⚠️ Assurez-vous que le modèle prend en charge la taille de dimension d'incorporation définie", + "dimensions_default": "Le modèle utilisera les dimensions d'incorporation par défaut" }, "languages": { "arabic": "Arabe", diff --git a/src/renderer/src/i18n/translate/pt-pt.json b/src/renderer/src/i18n/translate/pt-pt.json index d0e4c6b6a3..d13541bee4 100644 --- a/src/renderer/src/i18n/translate/pt-pt.json +++ b/src/renderer/src/i18n/translate/pt-pt.json @@ -492,8 +492,12 @@ "urls": "URLs", "dimensions": "Dimensão de incorporação", "dimensions_size_tooltip": "Tamanho da dimensão de incorporação, quanto maior o valor, maior a dimensão de incorporação, mas também maior o consumo de tokens", - "dimensions_size_placeholder": "Valor padrão (não recomendado alterar)", - "dimensions_size_too_large": "A dimensão de incorporação não pode exceder o limite do contexto do modelo ({{max_context}})" + "dimensions_size_placeholder": " Tamanho da dimensão de incorporação, ex. 1024", + "dimensions_auto_set": "Definição automática de dimensões de incorporação", + "dimensions_error_invalid": "Por favor insira o tamanho da dimensão de incorporação", + "dimensions_size_too_large": "A dimensão de incorporação não pode exceder o limite do contexto do modelo ({{max_context}})", + "dimensions_set_right": "⚠️ Certifique-se de que o modelo suporta o tamanho da dimensão de incorporação definido", + "dimensions_default": "O modelo utilizará as dimensões de incorporação padrão" }, "languages": { "arabic": "Árabe", diff --git a/src/renderer/src/pages/knowledge/components/AddKnowledgePopup.tsx b/src/renderer/src/pages/knowledge/components/AddKnowledgePopup.tsx index e1c9d29580..70f976f4b8 100644 --- a/src/renderer/src/pages/knowledge/components/AddKnowledgePopup.tsx +++ b/src/renderer/src/pages/knowledge/components/AddKnowledgePopup.tsx @@ -9,9 +9,9 @@ import { SettingHelpText } from '@renderer/pages/settings' import AiProvider from '@renderer/providers/AiProvider' import { getKnowledgeBaseParams } from '@renderer/services/KnowledgeService' import { getModelUniqId } from '@renderer/services/ModelService' -import { Model } from '@renderer/types' +import { KnowledgeBase, Model } from '@renderer/types' import { getErrorMessage } from '@renderer/utils/error' -import { Form, Input, Modal, Select, Slider } from 'antd' +import { Flex, Form, Input, InputNumber, Modal, Select, Slider, Switch } from 'antd' import { find, sortBy } from 'lodash' import { nanoid } from 'nanoid' import { useMemo, useRef, useState } from 'react' @@ -24,6 +24,8 @@ interface ShowParams { interface FormData { name: string model: string + autoDims: boolean | undefined + dimensions: number | undefined rerankModel: string | undefined documentCount: number | undefined } @@ -35,6 +37,7 @@ interface Props extends ShowParams { const PopupContainer: React.FC = ({ title, resolve }) => { const [open, setOpen] = useState(true) const [loading, setLoading] = useState(false) + const [autoDims, setAutoDims] = useState(true) const [form] = Form.useForm() const { t } = useTranslation() const { providers } = useProviders() @@ -67,7 +70,8 @@ const PopupContainer: React.FC = ({ title, resolve }) => { .map((m) => ({ label: m.name, value: getModelUniqId(m), - key: `${p.id}-${m.id}` + providerId: p.id, + modelId: m.id })) })) .filter((group) => group.options.length > 0) @@ -107,24 +111,27 @@ const PopupContainer: React.FC = ({ title, resolve }) => { return } - const aiProvider = new AiProvider(provider) - let dimensions = 0 - - try { - dimensions = await aiProvider.getEmbeddingDimensions(selectedEmbeddingModel) - } catch (error) { - console.error('Error getting embedding dimensions:', error) - window.message.error(t('message.error.get_embedding_dimensions') + '\n' + getErrorMessage(error)) - setLoading(false) - return + if (autoDims || typeof values.dimensions === 'undefined') { + try { + const aiProvider = new AiProvider(provider) + values.dimensions = await aiProvider.getEmbeddingDimensions(selectedEmbeddingModel) + } catch (error) { + console.error('Error getting embedding dimensions:', error) + window.message.error(t('message.error.get_embedding_dimensions') + '\n' + getErrorMessage(error)) + setLoading(false) + return + } + } else if (typeof values.dimensions === 'string') { + // 按理来说不应该是string的,但是确实是string + values.dimensions = parseInt(values.dimensions) } - const newBase = { + const newBase: KnowledgeBase = { id: nanoid(), name: values.name, model: selectedEmbeddingModel, rerankModel: selectedRerankModel, - dimensions, + dimensions: values.dimensions, documentCount: values.documentCount || DEFAULT_KNOWLEDGE_DOCUMENT_COUNT, items: [], created_at: Date.now(), @@ -134,7 +141,7 @@ const PopupContainer: React.FC = ({ title, resolve }) => { await window.api.knowledgeBase.create(getKnowledgeBaseParams(newBase)) - addKnowledgeBase(newBase as any) + addKnowledgeBase(newBase) setOpen(false) resolve(newBase) } @@ -203,11 +210,59 @@ const PopupContainer: React.FC = ({ title, resolve }) => { marks={{ 1: '1', 6: t('knowledge.document_count_default'), 30: '30' }} /> + + + { + form.setFieldValue('autoDims', !autoDims) + if (!autoDims) { + form.validateFields(['dimensions']) + } + setAutoDims(!autoDims) + }}> + + + + ({ + validator(_, value) { + if (getFieldValue('autoDims') || value > 0) { + return Promise.resolve() + } else { + return Promise.reject(t('knowledge.dimensions_error_invalid')) + } + } + }) + ]}> + + + + {!autoDims && ( + + {t('knowledge.dimensions_set_right')} + + )} ) } - export default class AddKnowledgePopup { static hide() { TopView.hide('AddKnowledgePopup') diff --git a/src/renderer/src/services/KnowledgeService.ts b/src/renderer/src/services/KnowledgeService.ts index af44b8e8de..8a28732a02 100644 --- a/src/renderer/src/services/KnowledgeService.ts +++ b/src/renderer/src/services/KnowledgeService.ts @@ -2,7 +2,6 @@ import type { ExtractChunkData } from '@cherrystudio/embedjs-interfaces' import { DEFAULT_KNOWLEDGE_DOCUMENT_COUNT, DEFAULT_KNOWLEDGE_THRESHOLD } from '@renderer/config/constant' import { getEmbeddingMaxContext } from '@renderer/config/embedings' import Logger from '@renderer/config/logger' -import { ONLY_SUPPORTED_DIMENSION_PROVIDERS } from '@renderer/config/providers' import AiProvider from '@renderer/providers/AiProvider' import store from '@renderer/store' import { FileType, KnowledgeBase, KnowledgeBaseParams, KnowledgeReference } from '@renderer/types' @@ -40,7 +39,7 @@ export const getKnowledgeBaseParams = (base: KnowledgeBase): KnowledgeBaseParams id: base.id, model: base.model.id, provider: base.model.provider, - dimensions: ONLY_SUPPORTED_DIMENSION_PROVIDERS.includes(base.model.provider) ? base.dimensions : undefined, + dimensions: base.dimensions, apiKey: aiProvider.getApiKey() || 'secret', apiVersion: provider.apiVersion, baseURL: host,