Skip to content

Commit cb240c3

Browse files
authored
Parallel test with pytest-xdist (pytorch#518)
* try pytest-xdist * run multi_gpu tests separately * each worker run 1 file * use loadscope to balance load better (for test_integration.py) * change back to loadfile * per-test schedule * per-test sharding. avoid name collision
1 parent f1c92f8 commit cb240c3

File tree

6 files changed

+14
-4
lines changed

6 files changed

+14
-4
lines changed

.github/workflows/regression_test.yml

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -68,4 +68,5 @@ jobs:
6868
pip install ${{ matrix.torch-spec }}
6969
pip install -r dev-requirements.txt
7070
pip install .
71-
pytest test --verbose -s
71+
pytest test --verbose -s -m "not multi_gpu" --dist load --tx popen//env:CUDA_VISIBLE_DEVICES=0 --tx popen//env:CUDA_VISIBLE_DEVICES=1 --tx popen//env:CUDA_VISIBLE_DEVICES=2 --tx popen//env:CUDA_VISIBLE_DEVICES=3
72+
pytest test --verbose -s -m "multi_gpu"

dev-requirements.txt

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -7,6 +7,7 @@ transformers
77
hypothesis # Avoid test derandomization warning
88
sentencepiece # for gpt-fast tokenizer
99
expecttest
10+
pytest-xdist
1011

1112
# For prototype features and benchmarks
1213
bitsandbytes #needed for testing triton quant / dequant ops for 8-bit optimizers

pytest.ini

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,3 @@
1+
[pytest]
2+
markers =
3+
multi_gpu: marks tests as require multi GPUs (deselect with '-m "not multi_gpu"')

test/dtypes/test_nf4.py

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -486,6 +486,7 @@ class TestQLoRA(FSDPTest):
486486
def world_size(self) -> int:
487487
return 2
488488

489+
@pytest.mark.multi_gpu
489490
@pytest.mark.skipif(
490491
version.parse(torch.__version__).base_version < "2.4.0",
491492
reason="torch >= 2.4 required",

test/integration/test_integration.py

Lines changed: 6 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -985,7 +985,10 @@ def forward(self, x):
985985
# save quantized state_dict
986986
api(model)
987987

988-
torch.save(model.state_dict(), "test.pth")
988+
# unique filename to avoid collision in parallel tests
989+
ckpt_name = f"{api.__name__}_{test_device}_{test_dtype}_test.pth"
990+
991+
torch.save(model.state_dict(), ckpt_name)
989992
# get quantized reference
990993
model_qc = torch.compile(model, mode="max-autotune")
991994
ref_q = model_qc(x).detach()
@@ -998,8 +1001,8 @@ def forward(self, x):
9981001
api(model)
9991002

10001003
# load quantized state_dict
1001-
state_dict = torch.load("test.pth", mmap=True)
1002-
os.remove("test.pth")
1004+
state_dict = torch.load(ckpt_name, mmap=True)
1005+
os.remove(ckpt_name)
10031006

10041007
model.load_state_dict(state_dict, assign=True)
10051008
model = model.to(device=test_device, dtype=test_dtype).eval()

test/prototype/test_low_bit_optim.py

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -163,6 +163,7 @@ class TestFSDP2(FSDPTest):
163163
def world_size(self) -> int:
164164
return 2
165165

166+
@pytest.mark.multi_gpu
166167
@pytest.mark.skipif(not TORCH_VERSION_AFTER_2_4, reason="torch >= 2.4 required")
167168
@skip_if_lt_x_gpu(2)
168169
def test_fsdp2(self):

0 commit comments

Comments
 (0)