Skip to content

Commit

Permalink
fix: add a max_containers param, which controls modal concurrency_lim…
Browse files Browse the repository at this point in the history
  • Loading branch information
sambarnes authored Mar 6, 2024
1 parent 2e8ea4a commit 58828b5
Showing 1 changed file with 8 additions and 1 deletion.
9 changes: 8 additions & 1 deletion modal/runner/containers/vllm_unified.py
Original file line number Diff line number Diff line change
Expand Up @@ -16,7 +16,11 @@


def _make_container(
name: str, num_gpus: int = 1, memory: int = 0, concurrent_inputs: int = 8
name: str,
num_gpus: int = 1,
memory: int = 0,
concurrent_inputs: int = 8,
max_containers: int = None,
):
"""Helper function to create a container with the given GPU configuration."""

Expand Down Expand Up @@ -83,6 +87,7 @@ def __init__(
container_idle_timeout=20 * 60,
timeout=10 * 60,
secrets=[*get_observability_secrets()],
concurrency_limit=max_containers,
)
return wrap(_VllmContainer)

Expand Down Expand Up @@ -112,10 +117,12 @@ def __init__(
num_gpus=2,
memory=80,
concurrent_inputs=4,
max_containers=2,
)
VllmContainer_JohnDurbinBagel34B = _make_container(
name="VllmContainer_JohnDurbinBagel34B",
num_gpus=2,
memory=80,
concurrent_inputs=4,
max_containers=1,
)

0 comments on commit 58828b5

Please sign in to comment.