pytorch · andrewor14 · Mar 14, 2025 · Mar 13, 2025
diff --git a/test/prototype/module_swap_quantization/test_kmeans_codebook.py b/test/prototype/module_swap_quantization/test_kmeans_codebook.py
@@ -0,0 +1,56 @@
+import copy
+import unittest
+from typing import Union
+
+import torch
+import torch.nn as nn
+
+from torchao.prototype.quantization.module_swap import (
+    CodeBookQuantizer,
+    QuantizedLinear,
+)
+from torchao.prototype.quantization.module_swap.algorithms import kmeans_codebook
+
+
+class SimpleTestNetwork(nn.Module):
+    def __init__(self, weight_group_size: Union[int, str] = "per_channel") -> None:
+        super().__init__()
+        if weight_group_size == "per_channel":
+            weight_group_size = 8
+        assert isinstance(weight_group_size, int)
+        weight_quantizer = CodeBookQuantizer(
+            n_bits=2,
+            features=16,
+            codebook_dim=2,
+        )
+
+        self.linear = QuantizedLinear(
+            in_features=16,
+            out_features=8,
+            bias=False,
+            weight_quantizer=weight_quantizer,
+            activation_bits=8,
+            input_quantization=False,
+            output_quantization=False,
+            weight_quantization=True,
+            activation_quantization=False,
+        )
+
+    def forward(self, x: torch.Tensor) -> torch.Tensor:
+        return self.linear(x)
+
+
+class TestKmeansCodebook(unittest.TestCase):
+    @unittest.skip("No module named 'faiss'")
+    def test_kmeans_codebook(self) -> None:
+        model = SimpleTestNetwork()
+        codebook_before = copy.deepcopy(model.linear.weight_quantizer.codebook)
+        kmeans_codebook(model)
+        assert not torch.allclose(
+            codebook_before,
+            model.linear.weight_quantizer.codebook,
+        )
+
+
+if __name__ == "__main__":
+    unittest.main()
diff --git a/test/prototype/module_swap_quantization/test_llm_ptq_data_getter.py b/test/prototype/module_swap_quantization/test_llm_ptq_data_getter.py
@@ -0,0 +1,35 @@
+import unittest
+from typing import Tuple
+
+import torch
+from transformers.models.llama.modeling_llama import LlamaConfig, LlamaForCausalLM
+
+from torchao.prototype.quantization.module_swap.data_getters import LLMPTQDataGetter
+
+test_config = LlamaConfig(
+    vocab_size=10,
+    hidden_size=32,
+    num_hidden_layers=2,
+    num_attention_heads=2,
+    intermediate_size=64,
+)
+
+
+def get_test_llama_model_data() -> Tuple[LlamaForCausalLM, torch.Tensor]:
+    model = LlamaForCausalLM(test_config)
+    input_ids = torch.randint(0, test_config.vocab_size, (1, 10))
+    return model, input_ids
+
+
+class TestPTQDataGetter(unittest.TestCase):
+    @unittest.skip("TypeError: cannot unpack non-iterable NoneType object")
+    def test_data_getter(self) -> None:
+        model, data = get_test_llama_model_data()
+        data_getter = LLMPTQDataGetter(model, data, 1)
+        for name, module in model.named_modules():
+            if isinstance(module, torch.nn.Linear):
+                data = data_getter.pop(model, name)
+
+
+if __name__ == "__main__":
+    unittest.main()
diff --git a/test/prototype/module_swap_quantization/test_module_swap.py b/test/prototype/module_swap_quantization/test_module_swap.py
@@ -0,0 +1,35 @@
+import unittest
+
+import torch
+import torch.nn as nn
+
+from torchao.prototype.quantization.module_swap import (
+    QuantizationRecipe,
+    quantize_module_swap,
+)
+
+
+class SimpleEmbeddingTestNetwork(nn.Module):
+    def __init__(self) -> None:
+        super().__init__()
+        self.embedding = nn.Embedding(10, 64)
+
+    def forward(self, x: torch.Tensor) -> torch.Tensor:
+        return self.embedding(x)
+
+
+class TestEmbeddingSwap(unittest.TestCase):
+    def test_embedding_swap(self) -> None:
+        model = SimpleEmbeddingTestNetwork()
+        recipe = QuantizationRecipe()
+        recipe.embedding_bits = 4
+        recipe.embedding_quantization = True
+        model = quantize_module_swap(model, recipe)
+        x = torch.randint(0, 10, (10, 64))
+        model(x)
+        assert model.embedding.weight_quantizer.num_bits == 4
+        assert model.embedding.weight_quantizer.group_size == 32
+
+
+if __name__ == "__main__":
+    unittest.main()
diff --git a/test/prototype/module_swap_quantization/test_module_swap_quantization_utils.py b/test/prototype/module_swap_quantization/test_module_swap_quantization_utils.py
@@ -0,0 +1,65 @@
+import unittest
+
+import torch
+from transformers.models.llama.modeling_llama import LlamaConfig, LlamaForCausalLM
+
+from torchao.prototype.quantization.module_swap import QuantizedLinear
+from torchao.prototype.quantization.module_swap.module_swap import (
+    QuantizationRecipe,
+    replace_all_linear_with_quantized_linear,
+)
+from torchao.prototype.quantization.module_swap.utils import set_bit_widths_by_name
+
+test_config = LlamaConfig(
+    vocab_size=10,
+    hidden_size=32,
+    num_hidden_layers=1,
+    num_attention_heads=2,
+    intermediate_size=64,
+)
+
+base_recipe = QuantizationRecipe(
+    weight_bits=4,
+    weight_group_size=32,
+    weight_quantization=True,
+    dynamic_weights=False,
+    activation_bits=8,
+    activation_group_size="per_token",
+    activation_quantization=True,
+    input_quantization=True,
+    output_quantization=True,
+    dynamic_activations=True,
+    range_learning=False,
+    exclude_layers=["lm_head"],
+)
+
+
+def get_test_llama_model_data() -> tuple[LlamaForCausalLM, torch.Tensor]:
+    model = LlamaForCausalLM(test_config)
+    input_ids = torch.randint(0, test_config.vocab_size, (1, 10))
+    return model, input_ids
+
+
+class TestQuantizedModuleUtils(unittest.TestCase):
+    def test_set_bit_widths_by_name(self) -> None:
+        model, _ = get_test_llama_model_data()
+        replace_all_linear_with_quantized_linear(model, base_recipe)
+
+        bit_width_dict = {}
+        for name, module in model.named_modules():
+            if isinstance(module, QuantizedLinear):
+                bit_width_dict[name] = {"weight": 7, "activation": 9}
+
+        set_bit_widths_by_name(model, bit_width_dict)
+
+        for _, module in model.named_modules():
+            if isinstance(module, QuantizedLinear):
+                assert module.weight_quantizer.num_bits == 7
+                assert module.input_quantizer is not None
+                assert module.input_quantizer.num_bits == 9
+                assert module.output_quantizer is not None
+                assert module.output_quantizer.num_bits == 9
+
+
+if __name__ == "__main__":
+    unittest.main()