Skip to content

Commit bbcba36

Browse files
committed
Updates
1 parent 6e88306 commit bbcba36

File tree

3 files changed

+51
-14
lines changed

3 files changed

+51
-14
lines changed

benchmarks/microbenchmarks/test/benchmark_config.yml

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -8,8 +8,8 @@ quantization_config_recipe_names:
88
- "float8wo"
99
# sparsity_config_recipe_names:
1010
# Will run a baseline inference for model by default, without sparsity for comparison
11-
- "semi-sparse"
12-
- "block"
11+
# - "semi-sparse"
12+
# - "block"
1313
output_dir: "benchmarks/microbenchmarks/results"
1414
model_params:
1515
- name: "small_bf16_linear"

test/test_model_architecture.py

Lines changed: 31 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -6,22 +6,47 @@
66

77
import unittest
88

9+
import torch
10+
from parameterized import parameterized
11+
912
from torchao.testing.model_architectures import create_model_and_input_data
13+
from torchao.utils import get_available_devices
1014

1115

1216
class TestModels(unittest.TestCase):
13-
def test_toy_linear_model(self):
14-
model, input_data = create_model_and_input_data("linear", 10, 64, 32)
17+
@parameterized.expand([(device,) for device in get_available_devices()])
18+
def test_toy_linear_model(self, device):
19+
# Skip if device is not available
20+
if device == "cuda" and not torch.cuda.is_available():
21+
self.skipTest("CUDA not available")
22+
23+
model, input_data = create_model_and_input_data(
24+
"linear", 10, 64, 32, device=device
25+
)
1526
output = model(input_data)
1627
self.assertEqual(output.shape, (10, 32))
1728

18-
def test_ln_linear_activation_model(self):
19-
model, input_data = create_model_and_input_data("ln_linear_sigmoid", 10, 64, 32)
29+
@parameterized.expand([(device,) for device in get_available_devices()])
30+
def test_ln_linear_activation_model(self, device):
31+
# Skip if device is not available
32+
if device == "cuda" and not torch.cuda.is_available():
33+
self.skipTest("CUDA not available")
34+
35+
model, input_data = create_model_and_input_data(
36+
"ln_linear_sigmoid", 10, 64, 32, device=device
37+
)
2038
output = model(input_data)
2139
self.assertEqual(output.shape, (10, 32))
2240

23-
def test_transformer_block(self):
24-
model, input_data = create_model_and_input_data("transformer_block", 10, 64, 32)
41+
@parameterized.expand([(device,) for device in get_available_devices()])
42+
def test_transformer_block(self, device):
43+
# Skip if device is not available
44+
if device == "cuda" and not torch.cuda.is_available():
45+
self.skipTest("CUDA not available")
46+
47+
model, input_data = create_model_and_input_data(
48+
"transformer_block", 10, 64, 32, device=device
49+
)
2550
output = model(input_data)
2651
self.assertEqual(output.shape, (10, 16, 64))
2752

torchao/testing/model_architectures.py

Lines changed: 18 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -21,9 +21,7 @@ def forward(self, x):
2121

2222

2323
class LNLinearActivationModel(nn.Module):
24-
def __init__(
25-
self, fc_dim1, fc_dim2, dtype=torch.bfloat16, activation="sigmoid", device=None
26-
):
24+
def __init__(self, fc_dim1, fc_dim2, dtype=torch.bfloat16, activation="sigmoid"):
2725
super().__init__()
2826

2927
activation = activation.lower()
@@ -41,7 +39,7 @@ def __init__(
4139
raise ValueError(f"Unsupported activation: {activation}")
4240

4341
self.ln = nn.LayerNorm(fc_dim1, elementwise_affine=False)
44-
self.fc = nn.Linear(fc_dim1, fc_dim2, bias=False).to(dtype=dtype, device=device)
42+
self.fc = nn.Linear(fc_dim1, fc_dim2, bias=False).to(dtype=dtype)
4543
self.activation = activation_map[activation]
4644

4745
def forward(self, x):
@@ -50,6 +48,20 @@ def forward(self, x):
5048
return self.activation(x)
5149

5250

51+
class RMSNorm(nn.Module):
52+
def __init__(self, dim: int, eps: float = 1e-5):
53+
super().__init__()
54+
self.eps = eps
55+
self.weight = nn.Parameter(torch.ones(dim))
56+
57+
def _norm(self, x):
58+
return x * torch.rsqrt(torch.mean(x * x, dim=-1, keepdim=True) + self.eps)
59+
60+
def forward(self, x: torch.Tensor) -> torch.Tensor:
61+
output = self._norm(x.float()).type_as(x)
62+
return output * self.weight
63+
64+
5365
class TransformerBlock(torch.nn.Module):
5466
def __init__(self, hidden_dim, num_heads=8, mlp_ratio=4, dtype=torch.bfloat16):
5567
super().__init__()
@@ -72,8 +84,8 @@ def __init__(self, hidden_dim, num_heads=8, mlp_ratio=4, dtype=torch.bfloat16):
7284
)
7385

7486
# Layer norms
75-
self.norm1 = nn.RMSNorm(hidden_dim, dtype=dtype)
76-
self.norm2 = nn.RMSNorm(hidden_dim, dtype=dtype)
87+
self.norm1 = RMSNorm(hidden_dim).to(dtype)
88+
self.norm2 = RMSNorm(hidden_dim).to(dtype)
7789

7890
# Activation
7991
self.activation = torch.nn.GELU()

0 commit comments

Comments
 (0)