Skip to content

Commit c40169c

Browse files
njhillsumitd2
authored andcommitted
[Misc] Don't dump contents of kvcache tensors on errors (vllm-project#8527)
Signed-off-by: Sumit Dubey <sumit.dubey2@ibm.com>
1 parent b86de26 commit c40169c

File tree

1 file changed

+20
-2
lines changed

1 file changed

+20
-2
lines changed

vllm/worker/model_runner_base.py

+20-2
Original file line numberDiff line numberDiff line change
@@ -3,11 +3,13 @@
33
from abc import ABC, abstractmethod
44
from datetime import datetime
55
from functools import wraps
6-
from typing import (TYPE_CHECKING, Any, Dict, Generic, List, Optional, Type,
7-
TypeVar)
6+
from typing import (TYPE_CHECKING, Any, Dict, Generic, Iterable, List,
7+
Optional, Type, TypeVar)
88

99
import torch
10+
from torch import is_tensor
1011

12+
from vllm.logger import init_logger
1113
from vllm.model_executor.layers.sampler import SamplerOutput
1214
from vllm.platforms import current_platform
1315
from vllm.sequence import IntermediateTensors, SequenceGroupMetadata
@@ -17,6 +19,8 @@
1719
from vllm.attention.backends.abstract import AttentionBackend
1820
from vllm.model_executor import SamplingMetadata
1921

22+
logger = init_logger(__name__)
23+
2024
T = TypeVar('T', bound="BroadcastableModelInput")
2125

2226

@@ -113,6 +117,8 @@ def _wrapper(*args, **kwargs):
113117
except Exception as err:
114118
timestamp = datetime.now().strftime("%Y%m%d-%H%M%S")
115119
filename = f"/tmp/err_{func.__name__}_input_{timestamp}.pkl"
120+
logger.info("Writing input of failed execution to %s...",
121+
filename)
116122
with open(filename, "wb") as filep:
117123
dumped_inputs = {
118124
k: v
@@ -122,7 +128,19 @@ def _wrapper(*args, **kwargs):
122128
for i, arg in enumerate(args):
123129
if i not in (exclude_args or []):
124130
dumped_inputs[f"arg_{i}"] = arg
131+
132+
# Only persist dtype and shape for kvcache tensors
133+
# (can be way to big otherwise)
134+
if (kv_caches := dumped_inputs.get("kv_caches")) \
135+
and isinstance(kv_caches, Iterable):
136+
dumped_inputs["kv_caches"] = [(t.dtype, t.shape)
137+
for t in kv_caches
138+
if is_tensor(t)]
139+
125140
pickle.dump(dumped_inputs, filep)
141+
logger.info(
142+
"Completed writing input of failed execution to %s.",
143+
filename)
126144
raise type(err)(
127145
f"Error in model execution (input dumped to {filename}): "
128146
f"{str(err)}") from err

0 commit comments

Comments
 (0)