|
| 1 | +#!/usr/bin/env python3 |
| 2 | +# SPDX-License-Identifier: Apache-2.0 |
| 3 | +import os |
| 4 | +import sys |
| 5 | + |
| 6 | +import regex as re |
| 7 | + |
| 8 | +try: |
| 9 | + import pathspec |
| 10 | +except ImportError: |
| 11 | + print( |
| 12 | + "ERROR: The 'pathspec' library is required. " |
| 13 | + "Install it with 'pip install pathspec'.", |
| 14 | + file=sys.stderr) |
| 15 | + sys.exit(2) |
| 16 | + |
| 17 | +# List of files (relative to repo root) that are allowed to import pickle or |
| 18 | +# cloudpickle |
| 19 | +# |
| 20 | +# STOP AND READ BEFORE YOU ADD ANYTHING ELSE TO THIS LIST: |
| 21 | +# The pickle and cloudpickle modules are known to be unsafe when deserializing |
| 22 | +# data from potentially untrusted parties. They have resulted in multiple CVEs |
| 23 | +# for vLLM and numerous vulnerabilities in the Python ecosystem more broadly. |
| 24 | +# Before adding new uses of pickle/cloudpickle, please consider safer |
| 25 | +# alternatives like msgpack or pydantic that are already in use in vLLM. Only |
| 26 | +# add to this list if absolutely necessary and after careful security review. |
| 27 | +ALLOWED_FILES = set([ |
| 28 | + # pickle |
| 29 | + 'vllm/utils.py', |
| 30 | + 'vllm/v1/serial_utils.py', |
| 31 | + 'vllm/v1/executor/multiproc_executor.py', |
| 32 | + 'vllm/multimodal/hasher.py', |
| 33 | + 'vllm/transformers_utils/config.py', |
| 34 | + 'vllm/model_executor/models/registry.py', |
| 35 | + 'tests/test_utils.py', |
| 36 | + 'tests/tokenization/test_cached_tokenizer.py', |
| 37 | + 'tests/model_executor/test_guided_processors.py', |
| 38 | + 'vllm/distributed/utils.py', |
| 39 | + 'vllm/distributed/parallel_state.py', |
| 40 | + 'vllm/engine/multiprocessing/client.py', |
| 41 | + 'vllm/distributed/device_communicators/custom_all_reduce_utils.py', |
| 42 | + 'vllm/distributed/device_communicators/shm_broadcast.py', |
| 43 | + 'vllm/engine/multiprocessing/engine.py', |
| 44 | + 'benchmarks/kernels/graph_machete_bench.py', |
| 45 | + 'benchmarks/kernels/benchmark_lora.py', |
| 46 | + 'benchmarks/kernels/benchmark_machete.py', |
| 47 | + 'benchmarks/fused_kernels/layernorm_rms_benchmarks.py', |
| 48 | + 'benchmarks/cutlass_benchmarks/w8a8_benchmarks.py', |
| 49 | + 'benchmarks/cutlass_benchmarks/sparse_benchmarks.py', |
| 50 | + # cloudpickle |
| 51 | + 'vllm/worker/worker_base.py', |
| 52 | + 'vllm/executor/mp_distributed_executor.py', |
| 53 | + 'vllm/executor/ray_distributed_executor.py', |
| 54 | + 'vllm/entrypoints/llm.py', |
| 55 | + 'tests/utils.py', |
| 56 | + # pickle and cloudpickle |
| 57 | + 'vllm/utils.py', |
| 58 | + 'vllm/v1/serial_utils.py', |
| 59 | + 'vllm/v1/executor/multiproc_executor.py', |
| 60 | + 'vllm/transformers_utils/config.py', |
| 61 | + 'vllm/model_executor/models/registry.py', |
| 62 | + 'vllm/engine/multiprocessing/client.py', |
| 63 | + 'vllm/engine/multiprocessing/engine.py', |
| 64 | +]) |
| 65 | + |
| 66 | +PICKLE_RE = re.compile(r"^\s*(import\s+(pickle|cloudpickle)(\s|$|\sas)" |
| 67 | + r"|from\s+(pickle|cloudpickle)\s+import\b)") |
| 68 | + |
| 69 | + |
| 70 | +def is_python_file(path): |
| 71 | + return path.endswith('.py') |
| 72 | + |
| 73 | + |
| 74 | +def scan_file(path): |
| 75 | + with open(path, encoding='utf-8') as f: |
| 76 | + for line in f: |
| 77 | + if PICKLE_RE.match(line): |
| 78 | + return True |
| 79 | + return False |
| 80 | + |
| 81 | + |
| 82 | +def load_gitignore(repo_root): |
| 83 | + gitignore_path = os.path.join(repo_root, '.gitignore') |
| 84 | + patterns = [] |
| 85 | + if os.path.exists(gitignore_path): |
| 86 | + with open(gitignore_path, encoding='utf-8') as f: |
| 87 | + patterns = f.read().splitlines() |
| 88 | + # Always ignore .git directory |
| 89 | + patterns.append('.git/') |
| 90 | + return pathspec.PathSpec.from_lines('gitwildmatch', patterns) |
| 91 | + |
| 92 | + |
| 93 | +def main(): |
| 94 | + repo_root = os.path.dirname(os.path.dirname(os.path.abspath(__file__))) |
| 95 | + spec = load_gitignore(repo_root) |
| 96 | + bad_files = [] |
| 97 | + for dirpath, _, filenames in os.walk(repo_root): |
| 98 | + for filename in filenames: |
| 99 | + if not is_python_file(filename): |
| 100 | + continue |
| 101 | + abs_path = os.path.join(dirpath, filename) |
| 102 | + rel_path = os.path.relpath(abs_path, repo_root) |
| 103 | + # Skip ignored files |
| 104 | + if spec.match_file(rel_path): |
| 105 | + continue |
| 106 | + if scan_file(abs_path) and rel_path not in ALLOWED_FILES: |
| 107 | + bad_files.append(rel_path) |
| 108 | + if bad_files: |
| 109 | + print("\nERROR: The following files import 'pickle' or 'cloudpickle' " |
| 110 | + "but are not in the allowed list:") |
| 111 | + for f in bad_files: |
| 112 | + print(f" {f}") |
| 113 | + print("\nIf this is intentional, update the allowed list in " |
| 114 | + "tools/check_pickle_imports.py.") |
| 115 | + sys.exit(1) |
| 116 | + sys.exit(0) |
| 117 | + |
| 118 | + |
| 119 | +def test_regex(): |
| 120 | + test_cases = [ |
| 121 | + # Should match |
| 122 | + ("import pickle", True), |
| 123 | + ("import cloudpickle", True), |
| 124 | + ("import pickle as pkl", True), |
| 125 | + ("import cloudpickle as cpkl", True), |
| 126 | + ("from pickle import *", True), |
| 127 | + ("from cloudpickle import dumps", True), |
| 128 | + ("from pickle import dumps, loads", True), |
| 129 | + ("from cloudpickle import (dumps, loads)", True), |
| 130 | + (" import pickle", True), |
| 131 | + ("\timport cloudpickle", True), |
| 132 | + ("from pickle import loads", True), |
| 133 | + # Should not match |
| 134 | + ("import somethingelse", False), |
| 135 | + ("from somethingelse import pickle", False), |
| 136 | + ("# import pickle", False), |
| 137 | + ("print('import pickle')", False), |
| 138 | + ("import pickleas as asdf", False), |
| 139 | + ] |
| 140 | + for i, (line, should_match) in enumerate(test_cases): |
| 141 | + result = bool(PICKLE_RE.match(line)) |
| 142 | + assert result == should_match, ( |
| 143 | + f"Test case {i} failed: '{line}' " |
| 144 | + f"(expected {should_match}, got {result})") |
| 145 | + print("All regex tests passed.") |
| 146 | + |
| 147 | + |
| 148 | +if __name__ == '__main__': |
| 149 | + if '--test-regex' in sys.argv: |
| 150 | + test_regex() |
| 151 | + else: |
| 152 | + main() |
0 commit comments