refactor(GeminiAPIClient): separate model and user message handling to adapt vertex (#7511)

- Introduced a new modelParts array to manage model-related messages separately from user messages.
- Updated the logic to push model messages to currentReqMessages only if they exist, improving clarity and structure.
- Adjusted the return order of messages in buildSdkMessages to ensure history is appended correctly.
- Enhanced McpToolChunkMiddleware to reset tool processing state output when output is present.
This commit is contained in:
SuYao 2025-06-25 22:16:27 +08:00 committed by GitHub
parent 17a8f0a724
commit 9362304db0
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194
2 changed files with 25 additions and 6 deletions

View File

@ -686,16 +686,19 @@ export class GeminiAPIClient extends BaseApiClient<
toolCalls: FunctionCall[]
): Content[] {
const parts: Part[] = []
const modelParts: Part[] = []
if (output) {
parts.push({
modelParts.push({
text: output
})
}
toolCalls.forEach((toolCall) => {
parts.push({
modelParts.push({
functionCall: toolCall
})
})
parts.push(
...toolResults
.map((ts) => ts.parts)
@ -703,10 +706,22 @@ export class GeminiAPIClient extends BaseApiClient<
.filter((p) => p !== undefined)
)
const lastMessage = currentReqMessages[currentReqMessages.length - 1]
if (lastMessage) {
lastMessage.parts?.push(...parts)
const userMessage: Content = {
role: 'user',
parts: []
}
if (modelParts.length > 0) {
currentReqMessages.push({
role: 'model',
parts: modelParts
})
}
if (parts.length > 0) {
userMessage.parts?.push(...parts)
currentReqMessages.push(userMessage)
}
return currentReqMessages
}
@ -747,7 +762,7 @@ export class GeminiAPIClient extends BaseApiClient<
}
})
}
return [messageParam, ...(sdkPayload.history || [])]
return [...(sdkPayload.history || []), messageParam]
}
private async uploadFile(file: FileType): Promise<File> {

View File

@ -255,6 +255,10 @@ function buildParamsWithToolResults(
// 从回复中构建助手消息
const newReqMessages = apiClient.buildSdkMessages(currentReqMessages, output, toolResults, toolCalls)
if (output && ctx._internal.toolProcessingState) {
ctx._internal.toolProcessingState.output = undefined
}
// 估算新增消息的 token 消耗并累加到 usage 中
if (ctx._internal.observer?.usage && newReqMessages.length > currentReqMessages.length) {
try {