Skip to content

Commit 9a6e6fa

Browse files
russellbhuachenheli
authored andcommitted
Add option to restrict media domains (vllm-project#25783)
Signed-off-by: Chenheli Hua <huachenheli@outlook.com> Signed-off-by: Russell Bryant <rbryant@redhat.com> Co-authored-by: Chenheli Hua <huachenheli@outlook.com> Signed-off-by: simon-mo <simon.mo@hey.com>
1 parent f789bda commit 9a6e6fa

File tree

11 files changed

+80
-1
lines changed

11 files changed

+80
-1
lines changed

docs/features/multimodal_inputs.md

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -6,6 +6,10 @@ This page teaches you how to pass multi-modal inputs to [multi-modal models][sup
66
We are actively iterating on multi-modal support. See [this RFC](gh-issue:4194) for upcoming changes,
77
and [open an issue on GitHub](https://github.com/vllm-project/vllm/issues/new/choose) if you have any feedback or feature requests.
88

9+
!!! tip
10+
When serving multi-modal models, consider setting `--allowed-media-domains` to restrict domain that vLLM can access to prevent it from accessing arbitrary endpoints that can potentially be vulnerable to Server-Side Request Forgery (SSRF) attacks. You can provide a list of domains for this arg. For example: `--allowed-media-domains upload.wikimedia.org github.com www.bogotobogo.com`
11+
This restriction is especially important if you run vLLM in a containerized environment where the vLLM pods may have unrestricted access to internal networks.
12+
913
## Offline Inference
1014

1115
To input multi-modal data, follow this schema in [vllm.inputs.PromptType][]:

docs/usage/security.md

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -60,6 +60,12 @@ Key points from the PyTorch security guide:
6060
- Implement proper authentication and authorization for management interfaces
6161
- Follow the principle of least privilege for all system components
6262

63+
### 4. **Restrict Domains Access for Media URLs:**
64+
65+
Restrict domains that vLLM can access for media URLs by setting
66+
`--allowed-media-domains` to prevent Server-Side Request Forgery (SSRF) attacks.
67+
(e.g. `--allowed-media-domains upload.wikimedia.org github.com www.bogotobogo.com`)
68+
6369
## Security and Firewalls: Protecting Exposed vLLM Systems
6470

6571
While vLLM is designed to allow unsafe network services to be isolated to

tests/entrypoints/openai/test_lora_resolvers.py

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -45,6 +45,7 @@ class MockModelConfig:
4545
logits_processor_pattern: Optional[str] = None
4646
diff_sampling_param: Optional[dict] = None
4747
allowed_local_media_path: str = ""
48+
allowed_media_domains: Optional[list[str]] = None
4849
encoder_config = None
4950
generation_config: str = "auto"
5051
skip_tokenizer_init: bool = False

tests/entrypoints/openai/test_serving_chat.py

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -240,6 +240,7 @@ class MockModelConfig:
240240
logits_processor_pattern = None
241241
diff_sampling_param: Optional[dict] = None
242242
allowed_local_media_path: str = ""
243+
allowed_media_domains: Optional[list[str]] = None
243244
encoder_config = None
244245
generation_config: str = "auto"
245246
media_io_kwargs: dict[str, dict[str, Any]] = field(default_factory=dict)

tests/multimodal/test_utils.py

Lines changed: 32 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -66,7 +66,12 @@ async def test_fetch_image_http(image_url: str):
6666
@pytest.mark.parametrize("suffix", get_supported_suffixes())
6767
async def test_fetch_image_base64(url_images: dict[str, Image.Image],
6868
raw_image_url: str, suffix: str):
69-
connector = MediaConnector()
69+
connector = MediaConnector(
70+
# Domain restriction should not apply to data URLs.
71+
allowed_media_domains=[
72+
"www.bogotobogo.com",
73+
"github.com",
74+
])
7075
url_image = url_images[raw_image_url]
7176

7277
try:
@@ -387,3 +392,29 @@ def test_argsort_mm_positions(case):
387392
modality_idxs = argsort_mm_positions(mm_positions)
388393

389394
assert modality_idxs == expected_modality_idxs
395+
396+
397+
@pytest.mark.asyncio
398+
@pytest.mark.parametrize("video_url", TEST_VIDEO_URLS)
399+
@pytest.mark.parametrize("num_frames", [-1, 32, 1800])
400+
async def test_allowed_media_domains(video_url: str, num_frames: int):
401+
connector = MediaConnector(
402+
media_io_kwargs={"video": {
403+
"num_frames": num_frames,
404+
}},
405+
allowed_media_domains=[
406+
"www.bogotobogo.com",
407+
"github.com",
408+
])
409+
410+
video_sync, metadata_sync = connector.fetch_video(video_url)
411+
video_async, metadata_async = await connector.fetch_video_async(video_url)
412+
assert np.array_equal(video_sync, video_async)
413+
assert metadata_sync == metadata_async
414+
415+
disallowed_url = "https://upload.wikimedia.org/wikipedia/commons/4/47/PNG_transparency_demonstration_1.png"
416+
with pytest.raises(ValueError):
417+
_, _ = connector.fetch_video(disallowed_url)
418+
419+
with pytest.raises(ValueError):
420+
_, _ = await connector.fetch_video_async(disallowed_url)

vllm/config/model.py

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -137,6 +137,9 @@ class ModelConfig:
137137
"""Allowing API requests to read local images or videos from directories
138138
specified by the server file system. This is a security risk. Should only
139139
be enabled in trusted environments."""
140+
allowed_media_domains: Optional[list[str]] = None
141+
"""If set, only media URLs that belong to this domain can be used for
142+
multi-modal inputs. """
140143
revision: Optional[str] = None
141144
"""The specific model version to use. It can be a branch name, a tag name,
142145
or a commit id. If unspecified, will use the default version."""

vllm/config/speculative.py

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -281,6 +281,8 @@ def __post_init__(self):
281281
trust_remote_code,
282282
allowed_local_media_path=self.target_model_config.
283283
allowed_local_media_path,
284+
allowed_media_domains=self.target_model_config.
285+
allowed_media_domains,
284286
dtype=self.target_model_config.dtype,
285287
seed=self.target_model_config.seed,
286288
revision=self.revision,

vllm/engine/arg_utils.py

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -297,6 +297,8 @@ class EngineArgs:
297297
tokenizer_mode: TokenizerMode = ModelConfig.tokenizer_mode
298298
trust_remote_code: bool = ModelConfig.trust_remote_code
299299
allowed_local_media_path: str = ModelConfig.allowed_local_media_path
300+
allowed_media_domains: Optional[
301+
list[str]] = ModelConfig.allowed_media_domains
300302
download_dir: Optional[str] = LoadConfig.download_dir
301303
safetensors_load_strategy: str = LoadConfig.safetensors_load_strategy
302304
load_format: Union[str, LoadFormats] = LoadConfig.load_format
@@ -531,6 +533,8 @@ def add_cli_args(parser: FlexibleArgumentParser) -> FlexibleArgumentParser:
531533
**model_kwargs["hf_config_path"])
532534
model_group.add_argument("--allowed-local-media-path",
533535
**model_kwargs["allowed_local_media_path"])
536+
model_group.add_argument("--allowed-media-domains",
537+
**model_kwargs["allowed_media_domains"])
534538
model_group.add_argument("--revision", **model_kwargs["revision"])
535539
model_group.add_argument("--code-revision",
536540
**model_kwargs["code_revision"])
@@ -997,6 +1001,7 @@ def create_model_config(self) -> ModelConfig:
9971001
tokenizer_mode=self.tokenizer_mode,
9981002
trust_remote_code=self.trust_remote_code,
9991003
allowed_local_media_path=self.allowed_local_media_path,
1004+
allowed_media_domains=self.allowed_media_domains,
10001005
dtype=self.dtype,
10011006
seed=self.seed,
10021007
revision=self.revision,

vllm/entrypoints/chat_utils.py

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -637,6 +637,10 @@ def model_cls(self) -> type[SupportsMultiModal]:
637637
def allowed_local_media_path(self):
638638
return self._model_config.allowed_local_media_path
639639

640+
@property
641+
def allowed_media_domains(self):
642+
return self._model_config.allowed_media_domains
643+
640644
@property
641645
def mm_registry(self):
642646
return MULTIMODAL_REGISTRY
@@ -837,6 +841,7 @@ def __init__(self, tracker: MultiModalItemTracker) -> None:
837841
self._connector = MediaConnector(
838842
media_io_kwargs=media_io_kwargs,
839843
allowed_local_media_path=tracker.allowed_local_media_path,
844+
allowed_media_domains=tracker.allowed_media_domains,
840845
)
841846

842847
def parse_image(
@@ -921,6 +926,7 @@ def __init__(self, tracker: AsyncMultiModalItemTracker) -> None:
921926
self._connector = MediaConnector(
922927
media_io_kwargs=media_io_kwargs,
923928
allowed_local_media_path=tracker.allowed_local_media_path,
929+
allowed_media_domains=tracker.allowed_media_domains,
924930
)
925931

926932
def parse_image(

vllm/entrypoints/llm.py

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -86,6 +86,8 @@ class LLM:
8686
or videos from directories specified by the server file system.
8787
This is a security risk. Should only be enabled in trusted
8888
environments.
89+
allowed_media_domains: If set, only media URLs that belong to this
90+
domain can be used for multi-modal inputs.
8991
tensor_parallel_size: The number of GPUs to use for distributed
9092
execution with tensor parallelism.
9193
dtype: The data type for the model weights and activations. Currently,
@@ -169,6 +171,7 @@ def __init__(
169171
skip_tokenizer_init: bool = False,
170172
trust_remote_code: bool = False,
171173
allowed_local_media_path: str = "",
174+
allowed_media_domains: Optional[list[str]] = None,
172175
tensor_parallel_size: int = 1,
173176
dtype: ModelDType = "auto",
174177
quantization: Optional[QuantizationMethods] = None,
@@ -264,6 +267,7 @@ def __init__(
264267
skip_tokenizer_init=skip_tokenizer_init,
265268
trust_remote_code=trust_remote_code,
266269
allowed_local_media_path=allowed_local_media_path,
270+
allowed_media_domains=allowed_media_domains,
267271
tensor_parallel_size=tensor_parallel_size,
268272
dtype=dtype,
269273
quantization=quantization,

0 commit comments

Comments
 (0)