Skip to content

Commit e307210

Browse files
committed
🧪 stress test dynamic loras
Signed-off-by: Joe Runde <Joseph.Runde@ibm.com>
1 parent 9335d4d commit e307210

File tree

1 file changed

+33
-0
lines changed

1 file changed

+33
-0
lines changed

tests/entrypoints/openai/test_lora_adapters.py

Lines changed: 33 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -167,6 +167,39 @@ async def test_dynamic_lora_invalid_lora_rank(client: openai.AsyncOpenAI,
167167
})
168168

169169

170+
@pytest.mark.asyncio
171+
async def test_multiple_lora_adapters(client: openai.AsyncOpenAI, tmp_path,
172+
zephyr_lora_files):
173+
"""Validate that many loras can be dynamically registered and inferenced
174+
with concurrently"""
175+
176+
# This test file configures the server with --max-cpu-loras=2 and this test
177+
# will concurrently load 10 adapters, so it should flex the LRU cache
178+
async def load_and_run_adapter(adapter_name: str):
179+
await client.post("load_lora_adapter",
180+
cast_to=str,
181+
body={
182+
"lora_name": adapter_name,
183+
"lora_path": str(zephyr_lora_files)
184+
})
185+
for _ in range(3):
186+
await client.completions.create(
187+
model=adapter_name,
188+
prompt=["Hello there", "Foo bar bazz buzz"],
189+
max_tokens=5,
190+
)
191+
192+
lora_tasks = []
193+
for i in range(10):
194+
lora_tasks.append(
195+
asyncio.create_task(load_and_run_adapter(f"adapter_{i}")))
196+
197+
results, _ = await asyncio.wait(lora_tasks)
198+
199+
for r in results:
200+
assert not isinstance(r, Exception), f"Got exception {r}"
201+
202+
170203
@pytest.mark.asyncio
171204
async def test_loading_invalid_adapters_does_not_break_others(
172205
client: openai.AsyncOpenAI, tmp_path, zephyr_lora_files):

0 commit comments

Comments
 (0)