use torch for rms_norm benchmarking

yuanheng-zhao · yuanheng-zhao · commit 8aa5fd82b687 · 2024-01-30T13:31:07.000+08:00
diff --git a/tests/test_infer_ops/triton/test_rmsnorm_triton.py b/tests/test_infer_ops/triton/test_rmsnorm_triton.py
@@ -3,7 +3,6 @@
 import triton
 from packaging import version
 from transformers.models.llama.modeling_llama import LlamaRMSNorm
-from vllm.model_executor.layers.layernorm import RMSNorm
 
 from colossalai.kernel.triton import rms_layernorm
 from colossalai.testing.utils import parameterize
@@ -36,7 +35,8 @@ def test_layer_norm(M, N):
     y_triton = rms_layernorm(x, weight, eps=eps)
     y_llama = rms_norm.forward(x).to(dtype)
 
-    assert torch.allclose(y_triton, y_llama, atol=1e-5, rtol=1e-5)
+    assert y_triton.shape == y_llama.shape
+    assert torch.allclose(y_triton, y_llama, atol=1e-5, rtol=1e-3)
 
 
 # Triton benchmark plot attributions
@@ -45,8 +45,8 @@ def test_layer_norm(M, N):
         x_names=["SEQUENCE_TOTAL"],
         x_vals=[i for i in range(128, 1025, 128)],
         line_arg="provider",
-        line_vals=["vllm_rms_layernorm", "triton_rms_layernorm"],
-        line_names=["vllm_rms_layernorm", "triton_rms_layernorm"],
+        line_vals=["torch_rms_layernorm", "triton_rms_layernorm"],
+        line_names=["torch_rms_layernorm", "triton_rms_layernorm"],
         styles=[("red", "-"), ("blue", "-")],
         ylabel="ms",
         plot_name=f"RMSNorm benchmarking results",
@@ -69,10 +69,10 @@ def benchmark_rms_layernorm(
     x_shape = (SEQUENCE_TOTAL, HIDDEN_SIZE)
     w_shape = (x_shape[-1],)
     weight = torch.ones(w_shape, dtype=dtype, device="cuda")
-    vllm_norm = RMSNorm(hidden_size=HIDDEN_SIZE).to(dtype=dtype, device="cuda")
+    torch_norm = LlamaRMSNorm(hidden_size=HIDDEN_SIZE).to(dtype=dtype, device="cuda")
     x = -2.3 + 0.5 * torch.randn(x_shape, dtype=dtype, device="cuda")
-    if provider == "vllm_rms_layernorm":
-        fn = lambda: vllm_norm(x)
+    if provider == "torch_rms_layernorm":
+        fn = lambda: torch_norm(x)
     elif provider == "triton_rms_layernorm":
         fn = lambda: rms_layernorm(x, weight, eps=eps)
     else: