Add base code for Fastformer

wilile26811249 · wilile26811249 · commit ca973361e169 · 2021-08-24T23:36:53.000+08:00
diff --git a/Fastformer.py b/Fastformer.py
@@ -0,0 +1,44 @@
+import einops
+import torch
+import torch.nn as nn
+
+class Fastformer(nn.Module):
+    def __init__(self, dim = 3, decode_dim = 16):
+        super(Fastformer, self).__init__()
+        # Generate weight for Wquery、Wkey and Wvalue
+        self.to_qkv = nn.Linear(dim, decode_dim * 3, bias = False)
+        self.weight_q = nn.Linear(dim, decode_dim, bias = False)
+        self.weight_k = nn.Linear(dim, decode_dim, bias = False)
+        self.weight_v = nn.Linear(dim, decode_dim, bias = False)
+        self.weight_r = nn.Linear(decode_dim, decode_dim, bias = False)
+        self.scale_factor = decode_dim ** -0.5
+
+    def forward(self, x, mask = None):
+        query = self.weight_q(x)
+        key = self.weight_k(x)
+        value = self.weight_v(x)
+        b, n, d = query.shape
+
+        # Caculate the global query
+        alpha_weight = torch.softmax(query * self.scale_factor, dim = -1)
+        global_query = query * alpha_weight
+        global_query = torch.einsum('b n d -> b d', global_query)
+
+        # Model the interaction between global query vector and the key vector
+        repeat_global_query = einops.repeat(global_query, 'b d -> b copy d', copy = n)
+        p = repeat_global_query * key
+        beta_weight = torch.softmax(key * self.scale_factor, dim = -1)
+        global_key = key * beta_weight
+        global_key = torch.einsum('b n d -> b d', global_key)
+
+        # key-value
+        key_value_interaction = torch.einsum('b j, b n j -> b n j', global_key, value)
+        key_value_interaction_out = self.weight_r(key_value_interaction)
+        result = key_value_interaction_out + query
+        return result
+
+if __name__ == '__main__':
+    model = Fastformer(dim = 3, decode_dim = 8)
+    x = torch.randn(4, 6, 3)
+    result = model(x)
+    print(result.size())