Skip to content

Commit a99ccb1

Browse files
committed
mark invariant normalizer factor as non-persistent to prevent it from being scrambled by the dummy weight loader
1 parent d4629dc commit a99ccb1

File tree

4 files changed

+36
-3
lines changed

4 files changed

+36
-3
lines changed
Lines changed: 27 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,27 @@
1+
# SPDX-License-Identifier: Apache-2.0
2+
# SPDX-FileCopyrightText: Copyright contributors to the vLLM project
3+
import pytest
4+
import numpy as np
5+
6+
7+
MODELS = [
8+
"google/gemma-2b",
9+
"google/gemma-2-2b",
10+
"google/gemma-3-4b-it"
11+
]
12+
13+
14+
@pytest.mark.parametrize("model", MODELS)
15+
def test_dummy_loader(vllm_runner, model: str) -> None:
16+
with vllm_runner(
17+
model,
18+
load_format="dummy",
19+
) as llm:
20+
normalizers = llm.collective_rpc(
21+
lambda self: self.worker.model_runner.model.model.normalizer.cpu().item()
22+
)
23+
assert np.allclose(
24+
normalizers,
25+
llm.llm_engine.model_config.hf_config.hidden_size**0.5,
26+
rtol=1e-3
27+
)

vllm/model_executor/models/gemma.py

Lines changed: 3 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -281,7 +281,9 @@ def __init__(self, *, vllm_config: VllmConfig, prefix: str = ""):
281281
# data type such as bfloat16, not float32.
282282
# See https://github.com/huggingface/transformers/pull/29402
283283
normalizer = self.config.hidden_size**0.5
284-
self.register_buffer("normalizer", torch.tensor(normalizer))
284+
self.register_buffer(
285+
"normalizer", torch.tensor(normalizer), persistent=False
286+
)
285287
self.make_empty_intermediate_tensors = (
286288
make_empty_intermediate_tensors_factory(
287289
["hidden_states", "residual"], config.hidden_size))

vllm/model_executor/models/gemma2.py

Lines changed: 3 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -267,7 +267,9 @@ def __init__(self, *, vllm_config: VllmConfig, prefix: str = ""):
267267
# data type such as bfloat16, not float32.
268268
# See https://github.com/huggingface/transformers/pull/29402
269269
normalizer = self.config.hidden_size**0.5
270-
self.register_buffer("normalizer", torch.tensor(normalizer))
270+
self.register_buffer(
271+
"normalizer", torch.tensor(normalizer), persistent=False
272+
)
271273
self.make_empty_intermediate_tensors = (
272274
make_empty_intermediate_tensors_factory(
273275
["hidden_states", "residual"], config.hidden_size))

vllm/model_executor/models/gemma3.py

Lines changed: 3 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -371,7 +371,9 @@ def __init__(self, *, vllm_config: VllmConfig, prefix: str = ""):
371371
# data type such as bfloat16, not float32.
372372
# See https://github.com/huggingface/transformers/pull/29402
373373
normalizer = self.config.hidden_size**0.5
374-
self.register_buffer("normalizer", torch.tensor(normalizer))
374+
self.register_buffer(
375+
"normalizer", torch.tensor(normalizer), persistent=False
376+
)
375377
self.make_empty_intermediate_tensors = (
376378
make_empty_intermediate_tensors_factory(
377379
["hidden_states", "residual"], config.hidden_size))

0 commit comments

Comments
 (0)