Skip to content

Commit 48918e0

Browse files
authored
Merge pull request #140 from OpenPipe/potential_fix
Fix training stability issues with new vLLM version
2 parents 2e92cd0 + c01b133 commit 48918e0

File tree

4 files changed

+6
-4
lines changed

4 files changed

+6
-4
lines changed

pyproject.toml

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -13,7 +13,7 @@ dependencies = [
1313
"torchao>=0.9.0",
1414
"unsloth==2025.5.1 ; sys_platform == 'linux'",
1515
"unsloth-zoo==2025.5.1 ; sys_platform == 'linux'",
16-
"vllm==0.7.3",
16+
"vllm>=0.8.5",
1717
"wandb>=0.19.8",
1818
"peft>=0.14.0",
1919
"typer>=0.15.2",

src/art/dev/model.py

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -43,6 +43,7 @@ def get_model_config(
4343
# which is the fallback for devices with compute capability < 8.0
4444
num_scheduler_steps=16 if torch.cuda.get_device_capability()[0] >= 8 else 1,
4545
enable_sleep_mode=enable_sleep_mode,
46+
generation_config="vllm",
4647
)
4748
engine_args.update(config.get("engine_args", {}))
4849
init_args.update(config.get("init_args", {}))

src/art/dev/openai_server.py

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -27,6 +27,7 @@ def get_openai_server_config(
2727
num_scheduler_steps=16,
2828
served_model_name=base_model,
2929
disable_log_requests=True,
30+
generation_config="vllm",
3031
)
3132
engine_args.update(config.get("engine_args", {}))
3233
return OpenAIServerConfig(

src/art/local/vllm.py

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -265,7 +265,7 @@ def patch_get_lora_tokenizer_async() -> None:
265265
Specifically, Unsloth patches get_lora_tokenizer_async with a non-async function, which causes issues.
266266
"""
267267
import vllm.transformers_utils.tokenizer
268-
import vllm.transformers_utils.tokenizer_group.tokenizer_group
268+
import vllm.transformers_utils.tokenizer_group
269269

270270
async def _return_nothing(*_, **__) -> None:
271271
return None
@@ -274,10 +274,10 @@ async def get_self_lora_tokenizer_async(self, *args, **kwargs):
274274
return self.tokenizer
275275

276276
vllm.transformers_utils.tokenizer.get_lora_tokenizer_async = _return_nothing # type: ignore
277-
vllm.transformers_utils.tokenizer_group.tokenizer_group.get_lora_tokenizer_async = (
277+
vllm.transformers_utils.tokenizer_group.get_lora_tokenizer_async = (
278278
_return_nothing # type: ignore
279279
)
280-
vllm.transformers_utils.tokenizer_group.tokenizer_group.TokenizerGroup.get_lora_tokenizer_async = get_self_lora_tokenizer_async # type: ignore
280+
vllm.transformers_utils.tokenizer_group.TokenizerGroup.get_lora_tokenizer_async = get_self_lora_tokenizer_async # type: ignore
281281

282282

283283
def patch_listen_for_disconnect() -> None:

0 commit comments

Comments
 (0)