Feat: Allows setting the vector dimension of the knowledge base embedding model (#7025)

This commit is contained in:
Wang Jiyuan 2025-06-11 11:52:15 +08:00 committed by GitHub
parent 79e504fc31
commit f9591689a3
13 changed files with 149 additions and 47 deletions

View File

@ -5,18 +5,26 @@ import { AzureOpenAiEmbeddings } from '@cherrystudio/embedjs-openai/src/azure-op
import { getInstanceName } from '@main/utils'
import { KnowledgeBaseParams } from '@types'
import VoyageEmbeddings from './VoyageEmbeddings'
import { SUPPORTED_DIM_MODELS as VOYAGE_SUPPORTED_DIM_MODELS, VoyageEmbeddings } from './VoyageEmbeddings'
export default class EmbeddingsFactory {
static create({ model, provider, apiKey, apiVersion, baseURL, dimensions }: KnowledgeBaseParams): BaseEmbeddings {
const batchSize = 10
if (provider === 'voyageai') {
return new VoyageEmbeddings({
modelName: model,
apiKey,
outputDimension: dimensions,
batchSize: 8
})
if (VOYAGE_SUPPORTED_DIM_MODELS.includes(model)) {
return new VoyageEmbeddings({
modelName: model,
apiKey,
outputDimension: dimensions,
batchSize: 8
})
} else {
return new VoyageEmbeddings({
modelName: model,
apiKey,
batchSize: 8
})
}
}
if (provider === 'ollama') {
if (baseURL.includes('v1/')) {

View File

@ -1,16 +1,20 @@
import { BaseEmbeddings } from '@cherrystudio/embedjs-interfaces'
import { VoyageEmbeddings as _VoyageEmbeddings } from '@langchain/community/embeddings/voyage'
export default class VoyageEmbeddings extends BaseEmbeddings {
/**
*
*/
export const SUPPORTED_DIM_MODELS = ['voyage-3-large', 'voyage-3.5', 'voyage-3.5-lite', 'voyage-code-3']
export class VoyageEmbeddings extends BaseEmbeddings {
private model: _VoyageEmbeddings
constructor(private readonly configuration?: ConstructorParameters<typeof _VoyageEmbeddings>[0]) {
super()
if (!this.configuration) this.configuration = {}
if (!this.configuration.modelName) this.configuration.modelName = 'voyage-3'
if (!this.configuration.outputDimension) {
throw new Error('You need to pass in the optional dimensions parameter for this model')
if (!SUPPORTED_DIM_MODELS.includes(this.configuration.modelName) && this.configuration.outputDimension) {
throw new Error(`VoyageEmbeddings only supports ${SUPPORTED_DIM_MODELS.join(', ')}`)
}
this.model = new _VoyageEmbeddings(this.configuration)
}
override async getDimensions(): Promise<number> {

View File

@ -565,8 +565,12 @@
"urls": "URLs",
"dimensions": "Embedding dimension",
"dimensions_size_tooltip": "The size of the embedding dimension; the larger the value, the larger the embedding dimension, but it also consumes more tokens.",
"dimensions_size_placeholder": "Default value (modification not recommended)",
"dimensions_size_too_large": "The embedding dimension cannot exceed the model's context limit ({{max_context}})."
"dimensions_size_placeholder": " Embedding dimension size, e.g. 1024",
"dimensions_auto_set": "Auto-set embedding dimensions",
"dimensions_error_invalid": "Please enter embedding dimension size",
"dimensions_size_too_large": "The embedding dimension cannot exceed the model's context limit ({{max_context}}).",
"dimensions_set_right": "⚠️ Please ensure the model supports the set embedding dimension size",
"dimensions_default": "The model will use default embedding dimensions"
},
"languages": {
"arabic": "Arabic",

View File

@ -565,8 +565,12 @@
"urls": "URL",
"dimensions": "埋め込み次元",
"dimensions_size_tooltip": "埋め込み次元のサイズは、数値が大きいほど埋め込み次元も大きくなりますが、消費するトークンも増えます。",
"dimensions_size_placeholder": "デフォルト値(変更はお勧めしません)",
"dimensions_size_too_large": "埋め込み次元はモデルのコンテキスト制限({{max_context}})を超えてはなりません。"
"dimensions_size_placeholder": " 埋め込み次元のサイズ1024",
"dimensions_auto_set": "埋め込み次元を自動設定",
"dimensions_error_invalid": "埋め込み次元のサイズを入力してください",
"dimensions_size_too_large": "埋め込み次元はモデルのコンテキスト制限({{max_context}})を超えてはなりません。",
"dimensions_set_right": "⚠️ モデルが設定した埋め込み次元のサイズをサポートしていることを確認してください",
"dimensions_default": "モデルはデフォルトの埋め込み次元を使用します"
},
"languages": {
"arabic": "アラビア語",

View File

@ -565,8 +565,12 @@
"urls": "URL-адреса",
"dimensions": "векторное пространство",
"dimensions_size_tooltip": "Размерность вложения, чем больше значение, тем больше размерность вложения, но и потребляемых токенов также становится больше.",
"dimensions_size_placeholder": "Значение по умолчанию (не рекомендуется изменять)",
"dimensions_size_too_large": "Размерность вложения не может превышать ограничение контекста модели ({{max_context}})"
"dimensions_size_placeholder": " Размерность эмбеддинга, например 1024",
"dimensions_auto_set": "Автоматическая установка размерности эмбеддинга",
"dimensions_error_invalid": "Пожалуйста, введите размерность эмбеддинга",
"dimensions_size_too_large": "Размерность вложения не может превышать ограничение контекста модели ({{max_context}})",
"dimensions_set_right": "⚠️ Убедитесь, что модель поддерживает заданный размер эмбеддинга",
"dimensions_default": "Модель будет использовать размер эмбеддинга по умолчанию"
},
"languages": {
"arabic": "Арабский",

View File

@ -518,7 +518,11 @@
"delete_confirm": "确定要删除此知识库吗?",
"dimensions": "嵌入维度",
"dimensions_size_tooltip": "嵌入维度大小,数值越大,嵌入维度越大,但消耗的 Token 也越多",
"dimensions_size_placeholder": " 默认值(不建议修改)",
"dimensions_set_right": "⚠️ 请确保模型支持所设置的嵌入维度大小",
"dimensions_default": "模型将使用默认嵌入维度",
"dimensions_size_placeholder": " 嵌入维度大小,如 1024",
"dimensions_auto_set": "自动设置嵌入维度",
"dimensions_error_invalid": "请输入嵌入维度大小",
"dimensions_size_too_large": "嵌入维度不能超过模型上下文限制({{max_context}}",
"directories": "目录",
"directory_placeholder": "请输入目录路径",

View File

@ -565,8 +565,12 @@
"urls": "網址",
"dimensions": "嵌入維度",
"dimensions_size_tooltip": "嵌入維度大小,數值越大,嵌入維度越大,但消耗的 Token 也越多",
"dimensions_size_placeholder": "預設值(不建議修改)",
"dimensions_size_too_large": "嵌入維度不能超過模型上下文限制({{max_context}}"
"dimensions_size_placeholder": " 嵌入維度大小,例如 1024",
"dimensions_auto_set": "自動設定嵌入維度",
"dimensions_error_invalid": "請輸入嵌入維度大小",
"dimensions_size_too_large": "嵌入維度不能超過模型上下文限制({{max_context}}",
"dimensions_set_right": "⚠️ 請確保模型支援所設置的嵌入維度大小",
"dimensions_default": "模型將使用預設嵌入維度"
},
"languages": {
"arabic": "阿拉伯文",

View File

@ -490,8 +490,12 @@
"urls": "Διευθύνσεις",
"dimensions": "Διαστάσεις ενσωμάτωσης",
"dimensions_size_tooltip": "Το μέγεθος των διαστάσεων ενσωμάτωσης. Όσο μεγαλύτερη η τιμή, τόσο περισσότερες οι διαστάσεις ενσωμάτωσης, αλλά και οι απαιτούμενες μονάδες (Tokens).",
"dimensions_size_placeholder": "Προεπιλεγμένη τιμή (δεν συνιστάται να τροποποιηθεί)",
"dimensions_size_too_large": "Οι διαστάσεις ενσωμάτωσης δεν μπορούν να υπερβούν το όριο περιεχομένου του μοντέλου ({{max_context}})"
"dimensions_size_placeholder": " Μέγεθος διαστάσεων ενσωμάτωσης, π.χ. 1024",
"dimensions_auto_set": "Αυτόματη ρύθμιση διαστάσεων ενσωμάτωσης",
"dimensions_error_invalid": "Παρακαλώ εισάγετε μέγεθος διαστάσεων ενσωμάτωσης",
"dimensions_size_too_large": "Οι διαστάσεις ενσωμάτωσης δεν μπορούν να υπερβούν το όριο περιεχομένου του μοντέλου ({{max_context}})",
"dimensions_set_right": "⚠️ Βεβαιωθείτε ότι το μοντέλο υποστηρίζει το καθορισμένο μέγεθος διαστάσεων ενσωμάτωσης",
"dimensions_default": "Το μοντέλο θα χρησιμοποιήσει τις προεπιλεγμένες διαστάσεις ενσωμάτωσης"
},
"languages": {
"arabic": "Αραβικά",

View File

@ -491,8 +491,12 @@
"urls": "URLs",
"dimensions": "Dimensión de incrustación",
"dimensions_size_tooltip": "Tamaño de la dimensión de incrustación, cuanto mayor sea el valor, mayor será la dimensión de incrustación, pero también consumirá más Tokens",
"dimensions_size_placeholder": "Valor predeterminado (no recomendado modificar)",
"dimensions_size_too_large": "La dimensión de incrustación no puede exceder el límite del contexto del modelo ({{max_context}})"
"dimensions_size_placeholder": " Tamaño de dimensión de incrustación, ej. 1024",
"dimensions_auto_set": "Configuración automática de dimensiones de incrustación",
"dimensions_error_invalid": "Por favor ingrese el tamaño de dimensión de incrustación",
"dimensions_size_too_large": "La dimensión de incrustación no puede exceder el límite del contexto del modelo ({{max_context}})",
"dimensions_set_right": "⚠️ Asegúrese de que el modelo admita el tamaño de dimensión de incrustación establecido",
"dimensions_default": "El modelo utilizará las dimensiones de incrustación predeterminadas"
},
"languages": {
"arabic": "Árabe",

View File

@ -490,8 +490,12 @@
"urls": "URLs",
"dimensions": "Размерность встраивания",
"dimensions_size_tooltip": "Размерность встраивания. Чем больше значение, тем выше размерность, но тем больше токенов требуется",
"dimensions_size_placeholder": "Значение по умолчанию (не рекомендуется изменять)",
"dimensions_size_too_large": "Размерность встраивания не может превышать ограничение контекста модели ({{max_context}})"
"dimensions_size_placeholder": " Taille de dimension d'incorporation, ex. 1024",
"dimensions_auto_set": "Réglage automatique des dimensions d'incorporation",
"dimensions_error_invalid": "Veuillez saisir la taille de dimension d'incorporation",
"dimensions_size_too_large": "Размерность встраивания не может превышать ограничение контекста модели ({{max_context}})",
"dimensions_set_right": "⚠️ Assurez-vous que le modèle prend en charge la taille de dimension d'incorporation définie",
"dimensions_default": "Le modèle utilisera les dimensions d'incorporation par défaut"
},
"languages": {
"arabic": "Arabe",

View File

@ -492,8 +492,12 @@
"urls": "URLs",
"dimensions": "Dimensão de incorporação",
"dimensions_size_tooltip": "Tamanho da dimensão de incorporação, quanto maior o valor, maior a dimensão de incorporação, mas também maior o consumo de tokens",
"dimensions_size_placeholder": "Valor padrão (não recomendado alterar)",
"dimensions_size_too_large": "A dimensão de incorporação não pode exceder o limite do contexto do modelo ({{max_context}})"
"dimensions_size_placeholder": " Tamanho da dimensão de incorporação, ex. 1024",
"dimensions_auto_set": "Definição automática de dimensões de incorporação",
"dimensions_error_invalid": "Por favor insira o tamanho da dimensão de incorporação",
"dimensions_size_too_large": "A dimensão de incorporação não pode exceder o limite do contexto do modelo ({{max_context}})",
"dimensions_set_right": "⚠️ Certifique-se de que o modelo suporta o tamanho da dimensão de incorporação definido",
"dimensions_default": "O modelo utilizará as dimensões de incorporação padrão"
},
"languages": {
"arabic": "Árabe",

View File

@ -9,9 +9,9 @@ import { SettingHelpText } from '@renderer/pages/settings'
import AiProvider from '@renderer/providers/AiProvider'
import { getKnowledgeBaseParams } from '@renderer/services/KnowledgeService'
import { getModelUniqId } from '@renderer/services/ModelService'
import { Model } from '@renderer/types'
import { KnowledgeBase, Model } from '@renderer/types'
import { getErrorMessage } from '@renderer/utils/error'
import { Form, Input, Modal, Select, Slider } from 'antd'
import { Flex, Form, Input, InputNumber, Modal, Select, Slider, Switch } from 'antd'
import { find, sortBy } from 'lodash'
import { nanoid } from 'nanoid'
import { useMemo, useRef, useState } from 'react'
@ -24,6 +24,8 @@ interface ShowParams {
interface FormData {
name: string
model: string
autoDims: boolean | undefined
dimensions: number | undefined
rerankModel: string | undefined
documentCount: number | undefined
}
@ -35,6 +37,7 @@ interface Props extends ShowParams {
const PopupContainer: React.FC<Props> = ({ title, resolve }) => {
const [open, setOpen] = useState(true)
const [loading, setLoading] = useState(false)
const [autoDims, setAutoDims] = useState(true)
const [form] = Form.useForm<FormData>()
const { t } = useTranslation()
const { providers } = useProviders()
@ -67,7 +70,8 @@ const PopupContainer: React.FC<Props> = ({ title, resolve }) => {
.map((m) => ({
label: m.name,
value: getModelUniqId(m),
key: `${p.id}-${m.id}`
providerId: p.id,
modelId: m.id
}))
}))
.filter((group) => group.options.length > 0)
@ -107,24 +111,27 @@ const PopupContainer: React.FC<Props> = ({ title, resolve }) => {
return
}
const aiProvider = new AiProvider(provider)
let dimensions = 0
try {
dimensions = await aiProvider.getEmbeddingDimensions(selectedEmbeddingModel)
} catch (error) {
console.error('Error getting embedding dimensions:', error)
window.message.error(t('message.error.get_embedding_dimensions') + '\n' + getErrorMessage(error))
setLoading(false)
return
if (autoDims || typeof values.dimensions === 'undefined') {
try {
const aiProvider = new AiProvider(provider)
values.dimensions = await aiProvider.getEmbeddingDimensions(selectedEmbeddingModel)
} catch (error) {
console.error('Error getting embedding dimensions:', error)
window.message.error(t('message.error.get_embedding_dimensions') + '\n' + getErrorMessage(error))
setLoading(false)
return
}
} else if (typeof values.dimensions === 'string') {
// 按理来说不应该是string的但是确实是string
values.dimensions = parseInt(values.dimensions)
}
const newBase = {
const newBase: KnowledgeBase = {
id: nanoid(),
name: values.name,
model: selectedEmbeddingModel,
rerankModel: selectedRerankModel,
dimensions,
dimensions: values.dimensions,
documentCount: values.documentCount || DEFAULT_KNOWLEDGE_DOCUMENT_COUNT,
items: [],
created_at: Date.now(),
@ -134,7 +141,7 @@ const PopupContainer: React.FC<Props> = ({ title, resolve }) => {
await window.api.knowledgeBase.create(getKnowledgeBaseParams(newBase))
addKnowledgeBase(newBase as any)
addKnowledgeBase(newBase)
setOpen(false)
resolve(newBase)
}
@ -203,11 +210,59 @@ const PopupContainer: React.FC<Props> = ({ title, resolve }) => {
marks={{ 1: '1', 6: t('knowledge.document_count_default'), 30: '30' }}
/>
</Form.Item>
<Form.Item
name="autoDims"
colon={false}
initialValue={true}
layout="horizontal"
label={t('knowledge.dimensions_auto_set')}
tooltip={t('knowledge.dimensions_default')}
style={{ marginBottom: 0, justifyContent: 'space-between' }}>
<Flex justify="flex-end" style={{ marginBottom: '1rem' }}>
<Switch
checked={autoDims}
onClick={() => {
form.setFieldValue('autoDims', !autoDims)
if (!autoDims) {
form.validateFields(['dimensions'])
}
setAutoDims(!autoDims)
}}></Switch>
</Flex>
</Form.Item>
<Form.Item
name="dimensions"
colon={false}
layout="horizontal"
initialValue={undefined}
label={t('knowledge.dimensions')}
tooltip={{ title: t('knowledge.dimensions_size_tooltip') }}
dependencies={['model']}
style={{ display: autoDims ? 'none' : 'block' }}
rules={[
({ getFieldValue }) => ({
validator(_, value) {
if (getFieldValue('autoDims') || value > 0) {
return Promise.resolve()
} else {
return Promise.reject(t('knowledge.dimensions_error_invalid'))
}
}
})
]}>
<InputNumber min={1} style={{ width: '100%' }} placeholder={t('knowledge.dimensions_size_placeholder')} />
</Form.Item>
{!autoDims && (
<SettingHelpText style={{ marginTop: -15, marginBottom: 20 }}>
{t('knowledge.dimensions_set_right')}
</SettingHelpText>
)}
</Form>
</Modal>
)
}
export default class AddKnowledgePopup {
static hide() {
TopView.hide('AddKnowledgePopup')

View File

@ -2,7 +2,6 @@ import type { ExtractChunkData } from '@cherrystudio/embedjs-interfaces'
import { DEFAULT_KNOWLEDGE_DOCUMENT_COUNT, DEFAULT_KNOWLEDGE_THRESHOLD } from '@renderer/config/constant'
import { getEmbeddingMaxContext } from '@renderer/config/embedings'
import Logger from '@renderer/config/logger'
import { ONLY_SUPPORTED_DIMENSION_PROVIDERS } from '@renderer/config/providers'
import AiProvider from '@renderer/providers/AiProvider'
import store from '@renderer/store'
import { FileType, KnowledgeBase, KnowledgeBaseParams, KnowledgeReference } from '@renderer/types'
@ -40,7 +39,7 @@ export const getKnowledgeBaseParams = (base: KnowledgeBase): KnowledgeBaseParams
id: base.id,
model: base.model.id,
provider: base.model.provider,
dimensions: ONLY_SUPPORTED_DIMENSION_PROVIDERS.includes(base.model.provider) ? base.dimensions : undefined,
dimensions: base.dimensions,
apiKey: aiProvider.getApiKey() || 'secret',
apiVersion: provider.apiVersion,
baseURL: host,