Skip to content

Commit 21adc81

Browse files
Factory method to create the offloading manager
This change refactors the KV cache offloading system to use a Factory Pattern for creating eviction policy managers, replacing hardcoded conditional logic. Signed-off-by: Alberto Perdomo <aperdomo@redhat.com>
1 parent 11c3f20 commit 21adc81

File tree

2 files changed

+56
-15
lines changed

2 files changed

+56
-15
lines changed

vllm/v1/kv_offload/cpu.py

Lines changed: 6 additions & 15 deletions
Original file line numberDiff line numberDiff line change
@@ -8,9 +8,8 @@
88
from vllm.model_executor.layers.attention_layer_base import AttentionLayerBase
99
from vllm.platforms import current_platform
1010
from vllm.v1.kv_offload.abstract import LoadStoreSpec, OffloadingManager
11-
from vllm.v1.kv_offload.arc_manager import ARCOffloadingManager
1211
from vllm.v1.kv_offload.backends.cpu import CPUBackend
13-
from vllm.v1.kv_offload.lru_manager import LRUOffloadingManager
12+
from vllm.v1.kv_offload.factory import OffloadingManagerFactory
1413
from vllm.v1.kv_offload.mediums import CPULoadStoreSpec, GPULoadStoreSpec
1514
from vllm.v1.kv_offload.spec import OffloadingSpec
1615
from vllm.v1.kv_offload.worker.cpu_gpu import CpuGpuOffloadingHandler
@@ -47,19 +46,11 @@ def get_manager(self) -> OffloadingManager:
4746
block_size=self.offloaded_block_size, num_blocks=self.num_cpu_blocks
4847
)
4948

50-
if self.eviction_policy == "arc":
51-
self._manager = ARCOffloadingManager(
52-
backend, enable_events=enable_events
53-
)
54-
elif self.eviction_policy == "lru":
55-
self._manager = LRUOffloadingManager(
56-
backend, enable_events=enable_events
57-
)
58-
else:
59-
raise ValueError(
60-
f"Unknown eviction policy: {self.eviction_policy}. "
61-
f"Supported policies: 'lru', 'arc'"
62-
)
49+
self._manager = OffloadingManagerFactory.create_manager(
50+
policy_name=self.eviction_policy,
51+
backend=backend,
52+
enable_events=enable_events,
53+
)
6354
return self._manager
6455

6556
def get_handlers(

vllm/v1/kv_offload/factory.py

Lines changed: 50 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -9,6 +9,8 @@
99

1010
if TYPE_CHECKING:
1111
from vllm.config import VllmConfig
12+
from vllm.v1.kv_offload.abstract import OffloadingManager
13+
from vllm.v1.kv_offload.backend import Backend
1214

1315
logger = init_logger(__name__)
1416

@@ -50,7 +52,55 @@ def create_spec(
5052
return spec_cls(config)
5153

5254

55+
class OffloadingManagerFactory:
56+
"""Factory for creating OffloadingManager instances based on eviction policy."""
57+
58+
_registry: dict[str, Callable[[], type["OffloadingManager"]]] = {}
59+
60+
@classmethod
61+
def register_manager(
62+
cls, policy_name: str, module_path: str, class_name: str
63+
) -> None:
64+
"""Register an eviction policy manager with lazy-loading module and class name."""
65+
if policy_name in cls._registry:
66+
raise ValueError(f"Policy '{policy_name}' is already registered.")
67+
68+
def loader() -> type["OffloadingManager"]:
69+
module = importlib.import_module(module_path)
70+
return getattr(module, class_name)
71+
72+
cls._registry[policy_name] = loader
73+
74+
@classmethod
75+
def create_manager(
76+
cls,
77+
policy_name: str,
78+
backend: "Backend",
79+
enable_events: bool = False,
80+
) -> "OffloadingManager":
81+
"""
82+
Create an OffloadingManager instance based on the eviction policy name.
83+
"""
84+
if policy_name not in cls._registry:
85+
raise ValueError(
86+
f"Unknown eviction policy: {policy_name}. "
87+
f"Supported policies: {list(cls._registry.keys())}"
88+
)
89+
90+
manager_cls = cls._registry[policy_name]()
91+
logger.info("Creating offloading manager with policy: %s", policy_name)
92+
return manager_cls(backend=backend, enable_events=enable_events)
93+
94+
5395
# Register various specs here.
5496
OffloadingSpecFactory.register_spec(
5597
"CPUOffloadingSpec", "vllm.v1.kv_offload.cpu", "CPUOffloadingSpec"
5698
)
99+
100+
# Register built-in eviction policies here.
101+
OffloadingManagerFactory.register_manager(
102+
"lru", "vllm.v1.kv_offload.lru_manager", "LRUOffloadingManager"
103+
)
104+
OffloadingManagerFactory.register_manager(
105+
"arc", "vllm.v1.kv_offload.arc_manager", "ARCOffloadingManager"
106+
)

0 commit comments

Comments
 (0)