Skip to content

Commit

Permalink
[Core] Fix circular reference which leaked llm instance in local dev …
Browse files Browse the repository at this point in the history
…env (vllm-project#4737)

Storing exception frame is extremely prone to circular refernece because it contains the reference to objects.

When tensorizer is not installed, it leaks llm instance because error frame has references to various modules which cause circular reference problem.

I also found spec decoding has a circular reference issue, and I solved it using weakref.proxy.
  • Loading branch information
rkooo567 authored May 10, 2024
1 parent dac6a3f commit 6a0f617
Show file tree
Hide file tree
Showing 4 changed files with 22 additions and 7 deletions.
13 changes: 13 additions & 0 deletions tests/basic_correctness/test_basic_correctness.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,16 +3,29 @@
Run `pytest tests/basic_correctness/test_basic_correctness.py`.
"""
import os
import weakref

import pytest

from vllm import LLM

MODELS = [
"facebook/opt-125m",
"meta-llama/Llama-2-7b-hf",
]
VLLM_ATTENTION_BACKEND = "VLLM_ATTENTION_BACKEND"


def test_vllm_gc_ed():
"""Verify vllm instance is GC'ed when it is deleted"""
llm = LLM("facebook/opt-125m")
weak_llm = weakref.ref(llm)
del llm
# If there's any circular reference to vllm, this fails
# because llm instance is not GC'ed.
assert weak_llm() is None


@pytest.mark.parametrize("model", MODELS)
@pytest.mark.parametrize("dtype", ["half"])
@pytest.mark.parametrize("max_tokens", [5])
Expand Down
10 changes: 5 additions & 5 deletions vllm/model_executor/model_loader/tensorizer.py
Original file line number Diff line number Diff line change
Expand Up @@ -19,7 +19,7 @@
from vllm.model_executor.layers.vocab_parallel_embedding import (
VocabParallelEmbedding)

tensorizer_load_fail = None
tensorizer_error_msg = None

try:
from tensorizer import (DecryptionParams, EncryptionParams,
Expand All @@ -28,7 +28,7 @@
from tensorizer.utils import (convert_bytes, get_mem_usage,
no_init_or_tensor)
except ImportError as e:
tensorizer_load_fail = e
tensorizer_error_msg = str(e)

__all__ = [
'EncryptionParams', 'DecryptionParams', 'TensorDeserializer',
Expand Down Expand Up @@ -254,11 +254,11 @@ class TensorizerAgent:

def __init__(self, tensorizer_config: TensorizerConfig,
quant_config: QuantizationConfig, **extra_kwargs):
if tensorizer_load_fail is not None:
if tensorizer_error_msg is not None:
raise ImportError(
"Tensorizer is not installed. Please install tensorizer "
"to use this feature with `pip install vllm[tensorizer]`."
) from tensorizer_load_fail
"to use this feature with `pip install vllm[tensorizer]`. "
"Error message: {}".format(tensorizer_error_msg))

self.tensorizer_config = tensorizer_config
self.tensorizer_args = (
Expand Down
3 changes: 2 additions & 1 deletion vllm/spec_decode/multi_step_worker.py
Original file line number Diff line number Diff line change
@@ -1,4 +1,5 @@
import copy
import weakref
from typing import List, Tuple

import torch
Expand Down Expand Up @@ -32,7 +33,7 @@ def init_device(self):
super().init_device()

self._proposer = Top1Proposer(
self,
weakref.proxy(self),
self.device,
self.vocab_size,
max_proposal_len=self.max_model_len,
Expand Down
3 changes: 2 additions & 1 deletion vllm/spec_decode/ngram_worker.py
Original file line number Diff line number Diff line change
@@ -1,3 +1,4 @@
import weakref
from typing import List, Optional, Tuple

import torch
Expand Down Expand Up @@ -37,7 +38,7 @@ def init_device(self):

# Current only support Top1Proposer
self._proposer = Top1Proposer(
self,
weakref.proxy(self),
device=self.device,
vocab_size=self.vocab_size,
)
Expand Down

0 comments on commit 6a0f617

Please sign in to comment.