-
Notifications
You must be signed in to change notification settings - Fork 10
/
test_transformer.py
111 lines (83 loc) · 2.73 KB
/
test_transformer.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
import torch
from ptflops import get_model_complexity_info
from torch import nn
import continual as co
def test_trans_enc_b1():
T = 10 # temporal sequence length
E = 4 # embedding dimension
N = 1 # batch size
H = 2 # num heads
encoder_layer = nn.TransformerEncoderLayer(
d_model=E, nhead=H, dim_feedforward=E * 2, dropout=0.0, batch_first=True
)
regenc = nn.TransformerEncoder(
encoder_layer,
num_layers=1,
norm=nn.LayerNorm(E),
)
enc = co.TransformerEncoder.build_from(regenc, sequence_len=T)
# NB: regular and continual transformers expect different input formats
query = torch.randn((N, E, T))
query_reg_format = query.permute(0, 2, 1) # N, T, E
# Baseline
oreg = regenc.forward(query_reg_format)
o = enc.forward(query)
assert torch.allclose(oreg.permute(0, 2, 1), o)
# Forward step
o_step = enc.forward_steps(query[:, :, :-1]) # init
o_step = enc.forward_step(query[:, :, -1])
assert torch.allclose(o[:, :, -1], o_step)
# FLOPs
flops, _ = get_model_complexity_info(
enc,
(E, T),
as_strings=False,
)
enc.call_mode = "forward_step"
step_flops, _ = get_model_complexity_info(
enc,
(E,),
as_strings=False,
)
assert step_flops <= flops / T
def test_trans_enc_b2():
T = 10 # temporal sequence length
E = 4 # embedding dimension
N = 1 # batch size
H = 2 # num heads
encoder_layer = nn.TransformerEncoderLayer(
d_model=E, nhead=H, dim_feedforward=E * 2, dropout=0.0, batch_first=True
)
regenc = nn.TransformerEncoder(
encoder_layer,
num_layers=2,
# norm=nn.LayerNorm(E),
)
enc = co.TransformerEncoder.build_from(regenc, sequence_len=T)
# NB: regular and continual transformers expect different input formats
query = torch.randn((N, E, T))
query_reg_format = query.permute(0, 2, 1) # N, T, E
# Baseline
oreg = regenc.forward(query_reg_format)
o = enc.forward(query)
assert torch.allclose(oreg.permute(0, 2, 1)[:, :, -1], o.squeeze(-1))
# Forward step
o_step = enc.forward_steps(query[:, :, :-1]) # init
o_step = enc.forward_step(query[:, :, -1], update_state=False)
assert torch.allclose(o[:, :, -1], o_step, atol=1e-7)
# Same result with forward_steps
o_step2 = enc.forward_steps(query[:, :, -1].unsqueeze(-1))
assert torch.allclose(o_step, o_step2.squeeze(-1))
# FLOPs
flops, _ = get_model_complexity_info(
enc,
(E, T),
as_strings=False,
)
enc.call_mode = "forward_step"
step_flops, _ = get_model_complexity_info(
enc,
(E,),
as_strings=False,
)
assert step_flops <= flops