mirror of
https://github.com/CherryHQ/cherry-studio.git
synced 2025-12-24 18:50:56 +08:00
Feat: Allows setting the vector dimension of the knowledge base embedding model (#7025)
This commit is contained in:
parent
79e504fc31
commit
f9591689a3
@ -5,18 +5,26 @@ import { AzureOpenAiEmbeddings } from '@cherrystudio/embedjs-openai/src/azure-op
|
||||
import { getInstanceName } from '@main/utils'
|
||||
import { KnowledgeBaseParams } from '@types'
|
||||
|
||||
import VoyageEmbeddings from './VoyageEmbeddings'
|
||||
import { SUPPORTED_DIM_MODELS as VOYAGE_SUPPORTED_DIM_MODELS, VoyageEmbeddings } from './VoyageEmbeddings'
|
||||
|
||||
export default class EmbeddingsFactory {
|
||||
static create({ model, provider, apiKey, apiVersion, baseURL, dimensions }: KnowledgeBaseParams): BaseEmbeddings {
|
||||
const batchSize = 10
|
||||
if (provider === 'voyageai') {
|
||||
return new VoyageEmbeddings({
|
||||
modelName: model,
|
||||
apiKey,
|
||||
outputDimension: dimensions,
|
||||
batchSize: 8
|
||||
})
|
||||
if (VOYAGE_SUPPORTED_DIM_MODELS.includes(model)) {
|
||||
return new VoyageEmbeddings({
|
||||
modelName: model,
|
||||
apiKey,
|
||||
outputDimension: dimensions,
|
||||
batchSize: 8
|
||||
})
|
||||
} else {
|
||||
return new VoyageEmbeddings({
|
||||
modelName: model,
|
||||
apiKey,
|
||||
batchSize: 8
|
||||
})
|
||||
}
|
||||
}
|
||||
if (provider === 'ollama') {
|
||||
if (baseURL.includes('v1/')) {
|
||||
|
||||
@ -1,16 +1,20 @@
|
||||
import { BaseEmbeddings } from '@cherrystudio/embedjs-interfaces'
|
||||
import { VoyageEmbeddings as _VoyageEmbeddings } from '@langchain/community/embeddings/voyage'
|
||||
|
||||
export default class VoyageEmbeddings extends BaseEmbeddings {
|
||||
/**
|
||||
* 支持设置嵌入维度的模型
|
||||
*/
|
||||
export const SUPPORTED_DIM_MODELS = ['voyage-3-large', 'voyage-3.5', 'voyage-3.5-lite', 'voyage-code-3']
|
||||
export class VoyageEmbeddings extends BaseEmbeddings {
|
||||
private model: _VoyageEmbeddings
|
||||
constructor(private readonly configuration?: ConstructorParameters<typeof _VoyageEmbeddings>[0]) {
|
||||
super()
|
||||
if (!this.configuration) this.configuration = {}
|
||||
if (!this.configuration.modelName) this.configuration.modelName = 'voyage-3'
|
||||
|
||||
if (!this.configuration.outputDimension) {
|
||||
throw new Error('You need to pass in the optional dimensions parameter for this model')
|
||||
if (!SUPPORTED_DIM_MODELS.includes(this.configuration.modelName) && this.configuration.outputDimension) {
|
||||
throw new Error(`VoyageEmbeddings only supports ${SUPPORTED_DIM_MODELS.join(', ')}`)
|
||||
}
|
||||
|
||||
this.model = new _VoyageEmbeddings(this.configuration)
|
||||
}
|
||||
override async getDimensions(): Promise<number> {
|
||||
|
||||
@ -565,8 +565,12 @@
|
||||
"urls": "URLs",
|
||||
"dimensions": "Embedding dimension",
|
||||
"dimensions_size_tooltip": "The size of the embedding dimension; the larger the value, the larger the embedding dimension, but it also consumes more tokens.",
|
||||
"dimensions_size_placeholder": "Default value (modification not recommended)",
|
||||
"dimensions_size_too_large": "The embedding dimension cannot exceed the model's context limit ({{max_context}})."
|
||||
"dimensions_size_placeholder": " Embedding dimension size, e.g. 1024",
|
||||
"dimensions_auto_set": "Auto-set embedding dimensions",
|
||||
"dimensions_error_invalid": "Please enter embedding dimension size",
|
||||
"dimensions_size_too_large": "The embedding dimension cannot exceed the model's context limit ({{max_context}}).",
|
||||
"dimensions_set_right": "⚠️ Please ensure the model supports the set embedding dimension size",
|
||||
"dimensions_default": "The model will use default embedding dimensions"
|
||||
},
|
||||
"languages": {
|
||||
"arabic": "Arabic",
|
||||
|
||||
@ -565,8 +565,12 @@
|
||||
"urls": "URL",
|
||||
"dimensions": "埋め込み次元",
|
||||
"dimensions_size_tooltip": "埋め込み次元のサイズは、数値が大きいほど埋め込み次元も大きくなりますが、消費するトークンも増えます。",
|
||||
"dimensions_size_placeholder": "デフォルト値(変更はお勧めしません)",
|
||||
"dimensions_size_too_large": "埋め込み次元はモデルのコンテキスト制限({{max_context}})を超えてはなりません。"
|
||||
"dimensions_size_placeholder": " 埋め込み次元のサイズ(例:1024)",
|
||||
"dimensions_auto_set": "埋め込み次元を自動設定",
|
||||
"dimensions_error_invalid": "埋め込み次元のサイズを入力してください",
|
||||
"dimensions_size_too_large": "埋め込み次元はモデルのコンテキスト制限({{max_context}})を超えてはなりません。",
|
||||
"dimensions_set_right": "⚠️ モデルが設定した埋め込み次元のサイズをサポートしていることを確認してください",
|
||||
"dimensions_default": "モデルはデフォルトの埋め込み次元を使用します"
|
||||
},
|
||||
"languages": {
|
||||
"arabic": "アラビア語",
|
||||
|
||||
@ -565,8 +565,12 @@
|
||||
"urls": "URL-адреса",
|
||||
"dimensions": "векторное пространство",
|
||||
"dimensions_size_tooltip": "Размерность вложения, чем больше значение, тем больше размерность вложения, но и потребляемых токенов также становится больше.",
|
||||
"dimensions_size_placeholder": "Значение по умолчанию (не рекомендуется изменять)",
|
||||
"dimensions_size_too_large": "Размерность вложения не может превышать ограничение контекста модели ({{max_context}})"
|
||||
"dimensions_size_placeholder": " Размерность эмбеддинга, например 1024",
|
||||
"dimensions_auto_set": "Автоматическая установка размерности эмбеддинга",
|
||||
"dimensions_error_invalid": "Пожалуйста, введите размерность эмбеддинга",
|
||||
"dimensions_size_too_large": "Размерность вложения не может превышать ограничение контекста модели ({{max_context}})",
|
||||
"dimensions_set_right": "⚠️ Убедитесь, что модель поддерживает заданный размер эмбеддинга",
|
||||
"dimensions_default": "Модель будет использовать размер эмбеддинга по умолчанию"
|
||||
},
|
||||
"languages": {
|
||||
"arabic": "Арабский",
|
||||
|
||||
@ -518,7 +518,11 @@
|
||||
"delete_confirm": "确定要删除此知识库吗?",
|
||||
"dimensions": "嵌入维度",
|
||||
"dimensions_size_tooltip": "嵌入维度大小,数值越大,嵌入维度越大,但消耗的 Token 也越多",
|
||||
"dimensions_size_placeholder": " 默认值(不建议修改)",
|
||||
"dimensions_set_right": "⚠️ 请确保模型支持所设置的嵌入维度大小",
|
||||
"dimensions_default": "模型将使用默认嵌入维度",
|
||||
"dimensions_size_placeholder": " 嵌入维度大小,如 1024",
|
||||
"dimensions_auto_set": "自动设置嵌入维度",
|
||||
"dimensions_error_invalid": "请输入嵌入维度大小",
|
||||
"dimensions_size_too_large": "嵌入维度不能超过模型上下文限制({{max_context}})",
|
||||
"directories": "目录",
|
||||
"directory_placeholder": "请输入目录路径",
|
||||
|
||||
@ -565,8 +565,12 @@
|
||||
"urls": "網址",
|
||||
"dimensions": "嵌入維度",
|
||||
"dimensions_size_tooltip": "嵌入維度大小,數值越大,嵌入維度越大,但消耗的 Token 也越多",
|
||||
"dimensions_size_placeholder": "預設值(不建議修改)",
|
||||
"dimensions_size_too_large": "嵌入維度不能超過模型上下文限制({{max_context}})"
|
||||
"dimensions_size_placeholder": " 嵌入維度大小,例如 1024",
|
||||
"dimensions_auto_set": "自動設定嵌入維度",
|
||||
"dimensions_error_invalid": "請輸入嵌入維度大小",
|
||||
"dimensions_size_too_large": "嵌入維度不能超過模型上下文限制({{max_context}})",
|
||||
"dimensions_set_right": "⚠️ 請確保模型支援所設置的嵌入維度大小",
|
||||
"dimensions_default": "模型將使用預設嵌入維度"
|
||||
},
|
||||
"languages": {
|
||||
"arabic": "阿拉伯文",
|
||||
|
||||
@ -490,8 +490,12 @@
|
||||
"urls": "Διευθύνσεις",
|
||||
"dimensions": "Διαστάσεις ενσωμάτωσης",
|
||||
"dimensions_size_tooltip": "Το μέγεθος των διαστάσεων ενσωμάτωσης. Όσο μεγαλύτερη η τιμή, τόσο περισσότερες οι διαστάσεις ενσωμάτωσης, αλλά και οι απαιτούμενες μονάδες (Tokens).",
|
||||
"dimensions_size_placeholder": "Προεπιλεγμένη τιμή (δεν συνιστάται να τροποποιηθεί)",
|
||||
"dimensions_size_too_large": "Οι διαστάσεις ενσωμάτωσης δεν μπορούν να υπερβούν το όριο περιεχομένου του μοντέλου ({{max_context}})"
|
||||
"dimensions_size_placeholder": " Μέγεθος διαστάσεων ενσωμάτωσης, π.χ. 1024",
|
||||
"dimensions_auto_set": "Αυτόματη ρύθμιση διαστάσεων ενσωμάτωσης",
|
||||
"dimensions_error_invalid": "Παρακαλώ εισάγετε μέγεθος διαστάσεων ενσωμάτωσης",
|
||||
"dimensions_size_too_large": "Οι διαστάσεις ενσωμάτωσης δεν μπορούν να υπερβούν το όριο περιεχομένου του μοντέλου ({{max_context}})",
|
||||
"dimensions_set_right": "⚠️ Βεβαιωθείτε ότι το μοντέλο υποστηρίζει το καθορισμένο μέγεθος διαστάσεων ενσωμάτωσης",
|
||||
"dimensions_default": "Το μοντέλο θα χρησιμοποιήσει τις προεπιλεγμένες διαστάσεις ενσωμάτωσης"
|
||||
},
|
||||
"languages": {
|
||||
"arabic": "Αραβικά",
|
||||
|
||||
@ -491,8 +491,12 @@
|
||||
"urls": "URLs",
|
||||
"dimensions": "Dimensión de incrustación",
|
||||
"dimensions_size_tooltip": "Tamaño de la dimensión de incrustación, cuanto mayor sea el valor, mayor será la dimensión de incrustación, pero también consumirá más Tokens",
|
||||
"dimensions_size_placeholder": "Valor predeterminado (no recomendado modificar)",
|
||||
"dimensions_size_too_large": "La dimensión de incrustación no puede exceder el límite del contexto del modelo ({{max_context}})"
|
||||
"dimensions_size_placeholder": " Tamaño de dimensión de incrustación, ej. 1024",
|
||||
"dimensions_auto_set": "Configuración automática de dimensiones de incrustación",
|
||||
"dimensions_error_invalid": "Por favor ingrese el tamaño de dimensión de incrustación",
|
||||
"dimensions_size_too_large": "La dimensión de incrustación no puede exceder el límite del contexto del modelo ({{max_context}})",
|
||||
"dimensions_set_right": "⚠️ Asegúrese de que el modelo admita el tamaño de dimensión de incrustación establecido",
|
||||
"dimensions_default": "El modelo utilizará las dimensiones de incrustación predeterminadas"
|
||||
},
|
||||
"languages": {
|
||||
"arabic": "Árabe",
|
||||
|
||||
@ -490,8 +490,12 @@
|
||||
"urls": "URLs",
|
||||
"dimensions": "Размерность встраивания",
|
||||
"dimensions_size_tooltip": "Размерность встраивания. Чем больше значение, тем выше размерность, но тем больше токенов требуется",
|
||||
"dimensions_size_placeholder": "Значение по умолчанию (не рекомендуется изменять)",
|
||||
"dimensions_size_too_large": "Размерность встраивания не может превышать ограничение контекста модели ({{max_context}})"
|
||||
"dimensions_size_placeholder": " Taille de dimension d'incorporation, ex. 1024",
|
||||
"dimensions_auto_set": "Réglage automatique des dimensions d'incorporation",
|
||||
"dimensions_error_invalid": "Veuillez saisir la taille de dimension d'incorporation",
|
||||
"dimensions_size_too_large": "Размерность встраивания не может превышать ограничение контекста модели ({{max_context}})",
|
||||
"dimensions_set_right": "⚠️ Assurez-vous que le modèle prend en charge la taille de dimension d'incorporation définie",
|
||||
"dimensions_default": "Le modèle utilisera les dimensions d'incorporation par défaut"
|
||||
},
|
||||
"languages": {
|
||||
"arabic": "Arabe",
|
||||
|
||||
@ -492,8 +492,12 @@
|
||||
"urls": "URLs",
|
||||
"dimensions": "Dimensão de incorporação",
|
||||
"dimensions_size_tooltip": "Tamanho da dimensão de incorporação, quanto maior o valor, maior a dimensão de incorporação, mas também maior o consumo de tokens",
|
||||
"dimensions_size_placeholder": "Valor padrão (não recomendado alterar)",
|
||||
"dimensions_size_too_large": "A dimensão de incorporação não pode exceder o limite do contexto do modelo ({{max_context}})"
|
||||
"dimensions_size_placeholder": " Tamanho da dimensão de incorporação, ex. 1024",
|
||||
"dimensions_auto_set": "Definição automática de dimensões de incorporação",
|
||||
"dimensions_error_invalid": "Por favor insira o tamanho da dimensão de incorporação",
|
||||
"dimensions_size_too_large": "A dimensão de incorporação não pode exceder o limite do contexto do modelo ({{max_context}})",
|
||||
"dimensions_set_right": "⚠️ Certifique-se de que o modelo suporta o tamanho da dimensão de incorporação definido",
|
||||
"dimensions_default": "O modelo utilizará as dimensões de incorporação padrão"
|
||||
},
|
||||
"languages": {
|
||||
"arabic": "Árabe",
|
||||
|
||||
@ -9,9 +9,9 @@ import { SettingHelpText } from '@renderer/pages/settings'
|
||||
import AiProvider from '@renderer/providers/AiProvider'
|
||||
import { getKnowledgeBaseParams } from '@renderer/services/KnowledgeService'
|
||||
import { getModelUniqId } from '@renderer/services/ModelService'
|
||||
import { Model } from '@renderer/types'
|
||||
import { KnowledgeBase, Model } from '@renderer/types'
|
||||
import { getErrorMessage } from '@renderer/utils/error'
|
||||
import { Form, Input, Modal, Select, Slider } from 'antd'
|
||||
import { Flex, Form, Input, InputNumber, Modal, Select, Slider, Switch } from 'antd'
|
||||
import { find, sortBy } from 'lodash'
|
||||
import { nanoid } from 'nanoid'
|
||||
import { useMemo, useRef, useState } from 'react'
|
||||
@ -24,6 +24,8 @@ interface ShowParams {
|
||||
interface FormData {
|
||||
name: string
|
||||
model: string
|
||||
autoDims: boolean | undefined
|
||||
dimensions: number | undefined
|
||||
rerankModel: string | undefined
|
||||
documentCount: number | undefined
|
||||
}
|
||||
@ -35,6 +37,7 @@ interface Props extends ShowParams {
|
||||
const PopupContainer: React.FC<Props> = ({ title, resolve }) => {
|
||||
const [open, setOpen] = useState(true)
|
||||
const [loading, setLoading] = useState(false)
|
||||
const [autoDims, setAutoDims] = useState(true)
|
||||
const [form] = Form.useForm<FormData>()
|
||||
const { t } = useTranslation()
|
||||
const { providers } = useProviders()
|
||||
@ -67,7 +70,8 @@ const PopupContainer: React.FC<Props> = ({ title, resolve }) => {
|
||||
.map((m) => ({
|
||||
label: m.name,
|
||||
value: getModelUniqId(m),
|
||||
key: `${p.id}-${m.id}`
|
||||
providerId: p.id,
|
||||
modelId: m.id
|
||||
}))
|
||||
}))
|
||||
.filter((group) => group.options.length > 0)
|
||||
@ -107,24 +111,27 @@ const PopupContainer: React.FC<Props> = ({ title, resolve }) => {
|
||||
return
|
||||
}
|
||||
|
||||
const aiProvider = new AiProvider(provider)
|
||||
let dimensions = 0
|
||||
|
||||
try {
|
||||
dimensions = await aiProvider.getEmbeddingDimensions(selectedEmbeddingModel)
|
||||
} catch (error) {
|
||||
console.error('Error getting embedding dimensions:', error)
|
||||
window.message.error(t('message.error.get_embedding_dimensions') + '\n' + getErrorMessage(error))
|
||||
setLoading(false)
|
||||
return
|
||||
if (autoDims || typeof values.dimensions === 'undefined') {
|
||||
try {
|
||||
const aiProvider = new AiProvider(provider)
|
||||
values.dimensions = await aiProvider.getEmbeddingDimensions(selectedEmbeddingModel)
|
||||
} catch (error) {
|
||||
console.error('Error getting embedding dimensions:', error)
|
||||
window.message.error(t('message.error.get_embedding_dimensions') + '\n' + getErrorMessage(error))
|
||||
setLoading(false)
|
||||
return
|
||||
}
|
||||
} else if (typeof values.dimensions === 'string') {
|
||||
// 按理来说不应该是string的,但是确实是string
|
||||
values.dimensions = parseInt(values.dimensions)
|
||||
}
|
||||
|
||||
const newBase = {
|
||||
const newBase: KnowledgeBase = {
|
||||
id: nanoid(),
|
||||
name: values.name,
|
||||
model: selectedEmbeddingModel,
|
||||
rerankModel: selectedRerankModel,
|
||||
dimensions,
|
||||
dimensions: values.dimensions,
|
||||
documentCount: values.documentCount || DEFAULT_KNOWLEDGE_DOCUMENT_COUNT,
|
||||
items: [],
|
||||
created_at: Date.now(),
|
||||
@ -134,7 +141,7 @@ const PopupContainer: React.FC<Props> = ({ title, resolve }) => {
|
||||
|
||||
await window.api.knowledgeBase.create(getKnowledgeBaseParams(newBase))
|
||||
|
||||
addKnowledgeBase(newBase as any)
|
||||
addKnowledgeBase(newBase)
|
||||
setOpen(false)
|
||||
resolve(newBase)
|
||||
}
|
||||
@ -203,11 +210,59 @@ const PopupContainer: React.FC<Props> = ({ title, resolve }) => {
|
||||
marks={{ 1: '1', 6: t('knowledge.document_count_default'), 30: '30' }}
|
||||
/>
|
||||
</Form.Item>
|
||||
<Form.Item
|
||||
name="autoDims"
|
||||
colon={false}
|
||||
initialValue={true}
|
||||
layout="horizontal"
|
||||
label={t('knowledge.dimensions_auto_set')}
|
||||
tooltip={t('knowledge.dimensions_default')}
|
||||
style={{ marginBottom: 0, justifyContent: 'space-between' }}>
|
||||
<Flex justify="flex-end" style={{ marginBottom: '1rem' }}>
|
||||
<Switch
|
||||
checked={autoDims}
|
||||
onClick={() => {
|
||||
form.setFieldValue('autoDims', !autoDims)
|
||||
if (!autoDims) {
|
||||
form.validateFields(['dimensions'])
|
||||
}
|
||||
setAutoDims(!autoDims)
|
||||
}}></Switch>
|
||||
</Flex>
|
||||
</Form.Item>
|
||||
|
||||
<Form.Item
|
||||
name="dimensions"
|
||||
colon={false}
|
||||
layout="horizontal"
|
||||
initialValue={undefined}
|
||||
label={t('knowledge.dimensions')}
|
||||
tooltip={{ title: t('knowledge.dimensions_size_tooltip') }}
|
||||
dependencies={['model']}
|
||||
style={{ display: autoDims ? 'none' : 'block' }}
|
||||
rules={[
|
||||
({ getFieldValue }) => ({
|
||||
validator(_, value) {
|
||||
if (getFieldValue('autoDims') || value > 0) {
|
||||
return Promise.resolve()
|
||||
} else {
|
||||
return Promise.reject(t('knowledge.dimensions_error_invalid'))
|
||||
}
|
||||
}
|
||||
})
|
||||
]}>
|
||||
<InputNumber min={1} style={{ width: '100%' }} placeholder={t('knowledge.dimensions_size_placeholder')} />
|
||||
</Form.Item>
|
||||
|
||||
{!autoDims && (
|
||||
<SettingHelpText style={{ marginTop: -15, marginBottom: 20 }}>
|
||||
{t('knowledge.dimensions_set_right')}
|
||||
</SettingHelpText>
|
||||
)}
|
||||
</Form>
|
||||
</Modal>
|
||||
)
|
||||
}
|
||||
|
||||
export default class AddKnowledgePopup {
|
||||
static hide() {
|
||||
TopView.hide('AddKnowledgePopup')
|
||||
|
||||
@ -2,7 +2,6 @@ import type { ExtractChunkData } from '@cherrystudio/embedjs-interfaces'
|
||||
import { DEFAULT_KNOWLEDGE_DOCUMENT_COUNT, DEFAULT_KNOWLEDGE_THRESHOLD } from '@renderer/config/constant'
|
||||
import { getEmbeddingMaxContext } from '@renderer/config/embedings'
|
||||
import Logger from '@renderer/config/logger'
|
||||
import { ONLY_SUPPORTED_DIMENSION_PROVIDERS } from '@renderer/config/providers'
|
||||
import AiProvider from '@renderer/providers/AiProvider'
|
||||
import store from '@renderer/store'
|
||||
import { FileType, KnowledgeBase, KnowledgeBaseParams, KnowledgeReference } from '@renderer/types'
|
||||
@ -40,7 +39,7 @@ export const getKnowledgeBaseParams = (base: KnowledgeBase): KnowledgeBaseParams
|
||||
id: base.id,
|
||||
model: base.model.id,
|
||||
provider: base.model.provider,
|
||||
dimensions: ONLY_SUPPORTED_DIMENSION_PROVIDERS.includes(base.model.provider) ? base.dimensions : undefined,
|
||||
dimensions: base.dimensions,
|
||||
apiKey: aiProvider.getApiKey() || 'secret',
|
||||
apiVersion: provider.apiVersion,
|
||||
baseURL: host,
|
||||
|
||||
Loading…
Reference in New Issue
Block a user