Skip to content

Commit ea7e732

Browse files
authored
✨ feat: add custom stream handle support for LobeOpenAICompatibleFactory (#5039)
* ♻️ refactor: add function call support for Spark * ♻️ refactor: add non-stream mode support * ⚡️ perf: using stream mode for tools call * ✨ feat: add `handleStream` & `handleStreamResponse` for LobeOpenAICompatibleFactory, custom stream handle * ✨ feat: add `handleTtransformResponseToStream` for custom non-stream transform handle * ♻️ refactor: refactor qwen to LobeOpenAICompatibleFactory, enable `enable_search` for Qwen LLM * 🔨 chore: add unit test for LobeOpenAICompatibleFactory * 🔨 chore: add unit test for SparkAIStream * 🔨 chore: add unit test for Qwen & Spark * 🐛 fix: fix Qwen param range error * 🔨 chore: add `QwenLegacyModels` array, limit `presence_penalty` * 🐛 fix: fix typo
1 parent cf0e8d8 commit ea7e732

File tree

10 files changed

+570
-351
lines changed

10 files changed

+570
-351
lines changed

src/config/modelProviders/spark.ts

Lines changed: 3 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -10,7 +10,6 @@ const Spark: ModelProviderCard = {
1010
'Spark Lite 是一款轻量级大语言模型,具备极低的延迟与高效的处理能力,完全免费开放,支持实时在线搜索功能。其快速响应的特性使其在低算力设备上的推理应用和模型微调中表现出色,为用户带来出色的成本效益和智能体验,尤其在知识问答、内容生成及搜索场景下表现不俗。',
1111
displayName: 'Spark Lite',
1212
enabled: true,
13-
functionCall: false,
1413
id: 'lite',
1514
maxOutput: 4096,
1615
},
@@ -20,7 +19,6 @@ const Spark: ModelProviderCard = {
2019
'Spark Pro 是一款为专业领域优化的高性能大语言模型,专注数学、编程、医疗、教育等多个领域,并支持联网搜索及内置天气、日期等插件。其优化后模型在复杂知识问答、语言理解及高层次文本创作中展现出色表现和高效性能,是适合专业应用场景的理想选择。',
2120
displayName: 'Spark Pro',
2221
enabled: true,
23-
functionCall: false,
2422
id: 'generalv3',
2523
maxOutput: 8192,
2624
},
@@ -30,7 +28,6 @@ const Spark: ModelProviderCard = {
3028
'Spark Pro 128K 配置了特大上下文处理能力,能够处理多达128K的上下文信息,特别适合需通篇分析和长期逻辑关联处理的长文内容,可在复杂文本沟通中提供流畅一致的逻辑与多样的引用支持。',
3129
displayName: 'Spark Pro 128K',
3230
enabled: true,
33-
functionCall: false,
3431
id: 'pro-128k',
3532
maxOutput: 4096,
3633
},
@@ -40,7 +37,7 @@ const Spark: ModelProviderCard = {
4037
'Spark Max 为功能最为全面的版本,支持联网搜索及众多内置插件。其全面优化的核心能力以及系统角色设定和函数调用功能,使其在各种复杂应用场景中的表现极为优异和出色。',
4138
displayName: 'Spark Max',
4239
enabled: true,
43-
functionCall: false,
40+
functionCall: true,
4441
id: 'generalv3.5',
4542
maxOutput: 8192,
4643
},
@@ -50,7 +47,7 @@ const Spark: ModelProviderCard = {
5047
'Spark Max 32K 配置了大上下文处理能力,更强的上下文理解和逻辑推理能力,支持32K tokens的文本输入,适用于长文档阅读、私有知识问答等场景',
5148
displayName: 'Spark Max 32K',
5249
enabled: true,
53-
functionCall: false,
50+
functionCall: true,
5451
id: 'max-32k',
5552
maxOutput: 8192,
5653
},
@@ -60,7 +57,7 @@ const Spark: ModelProviderCard = {
6057
'Spark Ultra 是星火大模型系列中最为强大的版本,在升级联网搜索链路同时,提升对文本内容的理解和总结能力。它是用于提升办公生产力和准确响应需求的全方位解决方案,是引领行业的智能产品。',
6158
displayName: 'Spark 4.0 Ultra',
6259
enabled: true,
63-
functionCall: false,
60+
functionCall: true,
6461
id: '4.0Ultra',
6562
maxOutput: 8192,
6663
},

src/libs/agent-runtime/qwen/index.test.ts

Lines changed: 13 additions & 188 deletions
Original file line numberDiff line numberDiff line change
@@ -2,8 +2,9 @@
22
import OpenAI from 'openai';
33
import { Mock, afterEach, beforeEach, describe, expect, it, vi } from 'vitest';
44

5-
import Qwen from '@/config/modelProviders/qwen';
6-
import { AgentRuntimeErrorType, ModelProvider } from '@/libs/agent-runtime';
5+
import { LobeOpenAICompatibleRuntime } from '@/libs/agent-runtime';
6+
import { ModelProvider } from '@/libs/agent-runtime';
7+
import { AgentRuntimeErrorType } from '@/libs/agent-runtime';
78

89
import * as debugStreamModule from '../utils/debugStream';
910
import { LobeQwenAI } from './index';
@@ -16,7 +17,7 @@ const invalidErrorType = AgentRuntimeErrorType.InvalidProviderAPIKey;
1617
// Mock the console.error to avoid polluting test output
1718
vi.spyOn(console, 'error').mockImplementation(() => {});
1819

19-
let instance: LobeQwenAI;
20+
let instance: LobeOpenAICompatibleRuntime;
2021

2122
beforeEach(() => {
2223
instance = new LobeQwenAI({ apiKey: 'test' });
@@ -40,183 +41,7 @@ describe('LobeQwenAI', () => {
4041
});
4142
});
4243

43-
describe('models', () => {
44-
it('should correctly list available models', async () => {
45-
const instance = new LobeQwenAI({ apiKey: 'test_api_key' });
46-
vi.spyOn(instance, 'models').mockResolvedValue(Qwen.chatModels);
47-
48-
const models = await instance.models();
49-
expect(models).toEqual(Qwen.chatModels);
50-
});
51-
});
52-
5344
describe('chat', () => {
54-
describe('Params', () => {
55-
it('should call llms with proper options', async () => {
56-
const mockStream = new ReadableStream();
57-
const mockResponse = Promise.resolve(mockStream);
58-
59-
(instance['client'].chat.completions.create as Mock).mockResolvedValue(mockResponse);
60-
61-
const result = await instance.chat({
62-
messages: [{ content: 'Hello', role: 'user' }],
63-
model: 'qwen-turbo',
64-
temperature: 0.6,
65-
top_p: 0.7,
66-
});
67-
68-
// Assert
69-
expect(instance['client'].chat.completions.create).toHaveBeenCalledWith(
70-
{
71-
messages: [{ content: 'Hello', role: 'user' }],
72-
model: 'qwen-turbo',
73-
temperature: 0.6,
74-
stream: true,
75-
top_p: 0.7,
76-
result_format: 'message',
77-
},
78-
{ headers: { Accept: '*/*' } },
79-
);
80-
expect(result).toBeInstanceOf(Response);
81-
});
82-
83-
it('should call vlms with proper options', async () => {
84-
const mockStream = new ReadableStream();
85-
const mockResponse = Promise.resolve(mockStream);
86-
87-
(instance['client'].chat.completions.create as Mock).mockResolvedValue(mockResponse);
88-
89-
const result = await instance.chat({
90-
messages: [{ content: 'Hello', role: 'user' }],
91-
model: 'qwen-vl-plus',
92-
temperature: 0.6,
93-
top_p: 0.7,
94-
});
95-
96-
// Assert
97-
expect(instance['client'].chat.completions.create).toHaveBeenCalledWith(
98-
{
99-
messages: [{ content: 'Hello', role: 'user' }],
100-
model: 'qwen-vl-plus',
101-
stream: true,
102-
},
103-
{ headers: { Accept: '*/*' } },
104-
);
105-
expect(result).toBeInstanceOf(Response);
106-
});
107-
108-
it('should transform non-streaming response to stream correctly', async () => {
109-
const mockResponse = {
110-
id: 'chatcmpl-fc539f49-51a8-94be-8061',
111-
object: 'chat.completion',
112-
created: 1719901794,
113-
model: 'qwen-turbo',
114-
choices: [
115-
{
116-
index: 0,
117-
message: { role: 'assistant', content: 'Hello' },
118-
finish_reason: 'stop',
119-
logprobs: null,
120-
},
121-
],
122-
} as OpenAI.ChatCompletion;
123-
vi.spyOn(instance['client'].chat.completions, 'create').mockResolvedValue(
124-
mockResponse as any,
125-
);
126-
127-
const result = await instance.chat({
128-
messages: [{ content: 'Hello', role: 'user' }],
129-
model: 'qwen-turbo',
130-
temperature: 0.6,
131-
stream: false,
132-
});
133-
134-
const decoder = new TextDecoder();
135-
const reader = result.body!.getReader();
136-
const stream: string[] = [];
137-
138-
while (true) {
139-
const { value, done } = await reader.read();
140-
if (done) break;
141-
stream.push(decoder.decode(value));
142-
}
143-
144-
expect(stream).toEqual([
145-
'id: chatcmpl-fc539f49-51a8-94be-8061\n',
146-
'event: text\n',
147-
'data: "Hello"\n\n',
148-
'id: chatcmpl-fc539f49-51a8-94be-8061\n',
149-
'event: stop\n',
150-
'data: "stop"\n\n',
151-
]);
152-
153-
expect((await reader.read()).done).toBe(true);
154-
});
155-
156-
it('should set temperature to undefined if temperature is 0 or >= 2', async () => {
157-
const temperatures = [0, 2, 3];
158-
const expectedTemperature = undefined;
159-
160-
for (const temp of temperatures) {
161-
vi.spyOn(instance['client'].chat.completions, 'create').mockResolvedValue(
162-
new ReadableStream() as any,
163-
);
164-
await instance.chat({
165-
messages: [{ content: 'Hello', role: 'user' }],
166-
model: 'qwen-turbo',
167-
temperature: temp,
168-
});
169-
expect(instance['client'].chat.completions.create).toHaveBeenCalledWith(
170-
expect.objectContaining({
171-
messages: expect.any(Array),
172-
model: 'qwen-turbo',
173-
temperature: expectedTemperature,
174-
}),
175-
expect.any(Object),
176-
);
177-
}
178-
});
179-
180-
it('should set temperature to original temperature', async () => {
181-
vi.spyOn(instance['client'].chat.completions, 'create').mockResolvedValue(
182-
new ReadableStream() as any,
183-
);
184-
await instance.chat({
185-
messages: [{ content: 'Hello', role: 'user' }],
186-
model: 'qwen-turbo',
187-
temperature: 1.5,
188-
});
189-
expect(instance['client'].chat.completions.create).toHaveBeenCalledWith(
190-
expect.objectContaining({
191-
messages: expect.any(Array),
192-
model: 'qwen-turbo',
193-
temperature: 1.5,
194-
}),
195-
expect.any(Object),
196-
);
197-
});
198-
199-
it('should set temperature to Float', async () => {
200-
const createMock = vi.fn().mockResolvedValue(new ReadableStream() as any);
201-
vi.spyOn(instance['client'].chat.completions, 'create').mockImplementation(createMock);
202-
await instance.chat({
203-
messages: [{ content: 'Hello', role: 'user' }],
204-
model: 'qwen-turbo',
205-
temperature: 1,
206-
});
207-
expect(instance['client'].chat.completions.create).toHaveBeenCalledWith(
208-
expect.objectContaining({
209-
messages: expect.any(Array),
210-
model: 'qwen-turbo',
211-
temperature: expect.any(Number),
212-
}),
213-
expect.any(Object),
214-
);
215-
const callArgs = createMock.mock.calls[0][0];
216-
expect(Number.isInteger(callArgs.temperature)).toBe(false); // Temperature is always not an integer
217-
});
218-
});
219-
22045
describe('Error', () => {
22146
it('should return QwenBizError with an openai error response when OpenAI.APIError is thrown', async () => {
22247
// Arrange
@@ -238,7 +63,7 @@ describe('LobeQwenAI', () => {
23863
try {
23964
await instance.chat({
24065
messages: [{ content: 'Hello', role: 'user' }],
241-
model: 'qwen-turbo',
66+
model: 'qwen-turbo-latest',
24267
temperature: 0.999,
24368
});
24469
} catch (e) {
@@ -278,7 +103,7 @@ describe('LobeQwenAI', () => {
278103
try {
279104
await instance.chat({
280105
messages: [{ content: 'Hello', role: 'user' }],
281-
model: 'qwen-turbo',
106+
model: 'qwen-turbo-latest',
282107
temperature: 0.999,
283108
});
284109
} catch (e) {
@@ -304,7 +129,8 @@ describe('LobeQwenAI', () => {
304129

305130
instance = new LobeQwenAI({
306131
apiKey: 'test',
307-
baseURL: defaultBaseURL,
132+
133+
baseURL: 'https://api.abc.com/v1',
308134
});
309135

310136
vi.spyOn(instance['client'].chat.completions, 'create').mockRejectedValue(apiError);
@@ -313,13 +139,12 @@ describe('LobeQwenAI', () => {
313139
try {
314140
await instance.chat({
315141
messages: [{ content: 'Hello', role: 'user' }],
316-
model: 'qwen-turbo',
142+
model: 'qwen-turbo-latest',
317143
temperature: 0.999,
318144
});
319145
} catch (e) {
320146
expect(e).toEqual({
321-
/* Desensitizing is unnecessary for a public-accessible gateway endpoint. */
322-
endpoint: defaultBaseURL,
147+
endpoint: 'https://api.***.com/v1',
323148
error: {
324149
cause: { message: 'api is undefined' },
325150
stack: 'abc',
@@ -339,7 +164,7 @@ describe('LobeQwenAI', () => {
339164
try {
340165
await instance.chat({
341166
messages: [{ content: 'Hello', role: 'user' }],
342-
model: 'qwen-turbo',
167+
model: 'qwen-turbo-latest',
343168
temperature: 0.999,
344169
});
345170
} catch (e) {
@@ -362,7 +187,7 @@ describe('LobeQwenAI', () => {
362187
try {
363188
await instance.chat({
364189
messages: [{ content: 'Hello', role: 'user' }],
365-
model: 'qwen-turbo',
190+
model: 'qwen-turbo-latest',
366191
temperature: 0.999,
367192
});
368193
} catch (e) {
@@ -410,7 +235,7 @@ describe('LobeQwenAI', () => {
410235
// 假设的测试函数调用,你可能需要根据实际情况调整
411236
await instance.chat({
412237
messages: [{ content: 'Hello', role: 'user' }],
413-
model: 'qwen-turbo',
238+
model: 'qwen-turbo-latest',
414239
stream: true,
415240
temperature: 0.999,
416241
});

0 commit comments

Comments
 (0)