pytorch · yuanjua · May 16, 2025 · May 16, 2025 · May 16, 2025 · May 16, 2025
diff --git a/test/dtypes/test_bitpacking.py b/test/dtypes/test_bitpacking.py
@@ -3,8 +3,15 @@
 #
 # This source code is licensed under the BSD 3-Clause license found in the
 # LICENSE file in the root directory of this source tree.
-import pytest
+import unittest
+
 import torch
+from torch.testing._internal.common_utils import (
+    TestCase,
+    instantiate_parametrized_tests,
+    parametrize,
+    run_tests,
+)
 from torch.utils._triton import has_triton
 
 from torchao.dtypes.uintx.bitpacking import pack, pack_cpu, unpack, unpack_cpu
@@ -13,68 +20,77 @@
 dimensions = (0, -1, 1)
 
 
-@pytest.fixture(autouse=True)
-def run_before_and_after_tests():
-    yield
-    torch._dynamo.reset()  # reset cache between tests
-
+class TestBitpacking(TestCase):
+    def setUp(self):
+        torch._dynamo.reset()
 
-@pytest.mark.parametrize("bit_width", bit_widths)
-@pytest.mark.parametrize("dim", dimensions)
-def test_CPU(bit_width, dim):
-    test_tensor = torch.randint(
-        0, 2**bit_width, (32, 32, 32), dtype=torch.uint8, device="cpu"
-    )
-    packed = pack_cpu(test_tensor, bit_width, dim=dim)
-    unpacked = unpack_cpu(packed, bit_width, dim=dim)
-    assert unpacked.allclose(test_tensor)
+    def tearDown(self):
+        torch._dynamo.reset()
 
+    @parametrize("bit_width", bit_widths)
+    @parametrize("dim", dimensions)
+    def test_CPU(self, bit_width, dim):
+        test_tensor = torch.randint(
+            0, 2**bit_width, (32, 32, 32), dtype=torch.uint8, device="cpu"
+        )
+        packed = pack_cpu(test_tensor, bit_width, dim=dim)
+        unpacked = unpack_cpu(packed, bit_width, dim=dim)
+        self.assertTrue(unpacked.allclose(test_tensor))
 
-@pytest.mark.skipif(not torch.cuda.is_available(), reason="CUDA not available")
-@pytest.mark.parametrize("bit_width", bit_widths)
-@pytest.mark.parametrize("dim", dimensions)
-def test_GPU(bit_width, dim):
-    test_tensor = torch.randint(0, 2**bit_width, (32, 32, 32), dtype=torch.uint8).cuda()
-    packed = pack(test_tensor, bit_width, dim=dim)
-    unpacked = unpack(packed, bit_width, dim=dim)
-    assert unpacked.allclose(test_tensor)
+    @parametrize("bit_width", bit_widths)
+    @parametrize("dim", dimensions)
+    @unittest.skipIf(not torch.cuda.is_available(), reason="CUDA not available")
+    def test_GPU(self, bit_width, dim):
+        test_tensor = torch.randint(
+            0, 2**bit_width, (32, 32, 32), dtype=torch.uint8
+        ).cuda()
+        packed = pack(test_tensor, bit_width, dim=dim)
+        unpacked = unpack(packed, bit_width, dim=dim)
+        self.assertTrue(unpacked.allclose(test_tensor))
 
+    @parametrize("bit_width", bit_widths)
+    @parametrize("dim", dimensions)
+    @unittest.skipIf(not torch.cuda.is_available(), reason="CUDA not available")
+    @unittest.skipIf(not has_triton(), reason="unsupported without triton")
+    def test_compile(self, bit_width, dim):
+        torch._dynamo.config.specialize_int = True
+        torch.compile(pack, fullgraph=True)
+        torch.compile(unpack, fullgraph=True)
+        test_tensor = torch.randint(
+            0, 2**bit_width, (32, 32, 32), dtype=torch.uint8
+        ).cuda()
+        packed = pack(test_tensor, bit_width, dim=dim)
+        unpacked = unpack(packed, bit_width, dim=dim)
+        self.assertTrue(unpacked.allclose(test_tensor))
 
-@pytest.mark.skipif(not torch.cuda.is_available(), reason="CUDA not available")
-@pytest.mark.skipif(not has_triton(), reason="unsupported without triton")
-@pytest.mark.parametrize("bit_width", bit_widths)
-@pytest.mark.parametrize("dim", dimensions)
-def test_compile(bit_width, dim):
-    torch._dynamo.config.specialize_int = True
-    torch.compile(pack, fullgraph=True)
-    torch.compile(unpack, fullgraph=True)
-    test_tensor = torch.randint(0, 2**bit_width, (32, 32, 32), dtype=torch.uint8).cuda()
-    packed = pack(test_tensor, bit_width, dim=dim)
-    unpacked = unpack(packed, bit_width, dim=dim)
-    assert unpacked.allclose(test_tensor)
+    # these test cases are for the example pack walk through in the bitpacking.py file
+    @unittest.skipIf(not torch.cuda.is_available(), reason="CUDA not available")
+    def test_pack_example(self):
+        test_tensor = torch.tensor(
+            [0x30, 0x29, 0x17, 0x5, 0x20, 0x16, 0x9, 0x22], dtype=torch.uint8
+        ).cuda()
+        shard_4, shard_2 = pack(test_tensor, 6)
+        print(shard_4, shard_2)
+        assert (
+            torch.tensor([0, 105, 151, 37], dtype=torch.uint8).cuda().allclose(shard_4)
+        )
+        assert torch.tensor([39, 146], dtype=torch.uint8).cuda().allclose(shard_2)
+        unpacked = unpack([shard_4, shard_2], 6)
+        self.assertTrue(unpacked.allclose(test_tensor))
 
+    def test_pack_example_CPU(self):
+        test_tensor = torch.tensor(
+            [0x30, 0x29, 0x17, 0x5, 0x20, 0x16, 0x9, 0x22], dtype=torch.uint8
+        )
+        shard_4, shard_2 = pack(test_tensor, 6)
+        print(shard_4, shard_2)
+        assert torch.tensor([0, 105, 151, 37], dtype=torch.uint8).allclose(shard_4)
+        assert torch.tensor([39, 146], dtype=torch.uint8).allclose(shard_2)
+        unpacked = unpack([shard_4, shard_2], 6)
+        assert unpacked.allclose(test_tensor)
 
-# these test cases are for the example pack walk through in the bitpacking.py file
-@pytest.mark.skipif(not torch.cuda.is_available(), reason="CUDA not available")
-def test_pack_example():
-    test_tensor = torch.tensor(
-        [0x30, 0x29, 0x17, 0x5, 0x20, 0x16, 0x9, 0x22], dtype=torch.uint8
-    ).cuda()
-    shard_4, shard_2 = pack(test_tensor, 6)
-    print(shard_4, shard_2)
-    assert torch.tensor([0, 105, 151, 37], dtype=torch.uint8).cuda().allclose(shard_4)
-    assert torch.tensor([39, 146], dtype=torch.uint8).cuda().allclose(shard_2)
-    unpacked = unpack([shard_4, shard_2], 6)
-    assert unpacked.allclose(test_tensor)
 
+instantiate_parametrized_tests(TestBitpacking)
 
-def test_pack_example_CPU():
-    test_tensor = torch.tensor(
-        [0x30, 0x29, 0x17, 0x5, 0x20, 0x16, 0x9, 0x22], dtype=torch.uint8
-    )
-    shard_4, shard_2 = pack(test_tensor, 6)
-    print(shard_4, shard_2)
-    assert torch.tensor([0, 105, 151, 37], dtype=torch.uint8).allclose(shard_4)
-    assert torch.tensor([39, 146], dtype=torch.uint8).allclose(shard_2)
-    unpacked = unpack([shard_4, shard_2], 6)
-    assert unpacked.allclose(test_tensor)
+if __name__ == "__main__":
+    run_tests()
diff --git a/test/dtypes/test_nf4.py b/test/dtypes/test_nf4.py
@@ -11,7 +11,6 @@
 from collections import OrderedDict
 from typing import Tuple, Union
 
-import pytest
 import torch
 import torch.nn.functional as F
 from torch import nn
@@ -623,9 +622,9 @@ class TestQLoRA(FSDPTest):
     def world_size(self) -> int:
         return 2
 
-    @pytest.mark.skipif(
+    @unittest.skipIf(
         version.parse(torch.__version__).base_version < "2.4.0",
-        reason="torch >= 2.4 required",
+        "torch >= 2.4 required",
     )
     @skip_if_lt_x_gpu(2)
     def test_qlora_fsdp2(self):