refine arch code

HydrogenSulfate · HydrogenSulfate · commit bbfadc027f26 · 2024-08-04T15:18:56.000+08:00
diff --git a/examples/spinn/helmholtz3d.py b/examples/spinn/helmholtz3d.py
@@ -41,24 +41,24 @@ def _helmholtz3d_exact_u(a1, a2, a3, x, y, z):
 
 
 def _helmholtz3d_source_term(a1, a2, a3, x, y, z, lda=1.0):
-    u_gt = _helmholtz3d_exact_u(a1, a2, a3, x, y, z)
+    u_gt = _helmholtz3d_exact_u(a1, a2, a3, x, y, z)[..., None]
     uxx = -((a1 * np.pi) ** 2) * u_gt
     uyy = -((a2 * np.pi) ** 2) * u_gt
     uzz = -((a3 * np.pi) ** 2) * u_gt
     return uxx + uyy + uzz + lda * u_gt
 
 
 def generate_train_helmholtz3d(a1, a2, a3, nc):
-    xc = np.random.uniform(-1.0, 1.0, [nc]).astype(dtype)
-    yc = np.random.uniform(-1.0, 1.0, [nc]).astype(dtype)
-    zc = np.random.uniform(-1.0, 1.0, [nc]).astype(dtype)
+    xc = np.random.uniform(-1.0, 1.0, [nc, 1]).astype(dtype)
+    yc = np.random.uniform(-1.0, 1.0, [nc, 1]).astype(dtype)
+    zc = np.random.uniform(-1.0, 1.0, [nc, 1]).astype(dtype)
     # source term
     xcm, ycm, zcm = np.meshgrid(xc, yc, zc, indexing="ij")
     uc = _helmholtz3d_source_term(a1, a2, a3, xcm, ycm, zcm).astype(dtype)
     # boundary (hard-coded)
     xb = [
-        np.asarray([1.0], dtype=dtype),
-        np.asarray([-1.0], dtype=dtype),
+        np.asarray([[1.0]], dtype=dtype),
+        np.asarray([[-1.0]], dtype=dtype),
         xc,
         xc,
         xc,
@@ -67,8 +67,8 @@ def generate_train_helmholtz3d(a1, a2, a3, nc):
     yb = [
         yc,
         yc,
-        np.asarray([1.0], dtype=dtype),
-        np.asarray([-1.0], dtype=dtype),
+        np.asarray([[1.0]], dtype=dtype),
+        np.asarray([[-1.0]], dtype=dtype),
         yc,
         yc,
     ]
@@ -77,8 +77,8 @@ def generate_train_helmholtz3d(a1, a2, a3, nc):
         zc,
         zc,
         zc,
-        np.asarray([1.0], dtype=dtype),
-        np.asarray([-1.0], dtype=dtype),
+        np.asarray([[1.0]], dtype=dtype),
+        np.asarray([[-1.0]], dtype=dtype),
     ]
     return xc, yc, zc, uc, xb, yb, zb
 
@@ -88,7 +88,7 @@ def generate_test_helmholtz3d(a1, a2, a3, nc_test):
     y = np.linspace(-1.0, 1.0, nc_test, dtype=dtype)
     z = np.linspace(-1.0, 1.0, nc_test, dtype=dtype)
     xm, ym, zm = np.meshgrid(x, y, z, indexing="ij")
-    u_gt = _helmholtz3d_exact_u(a1, a2, a3, xm, ym, zm).astype(dtype)
+    u_gt = _helmholtz3d_exact_u(a1, a2, a3, xm, ym, zm).astype(dtype)[..., None]
     x = x.reshape(-1, 1)
     y = y.reshape(-1, 1)
     z = z.reshape(-1, 1)
@@ -120,7 +120,7 @@ def _gen(self):
             self.xc = xc
             self.yc = yc
             self.zc = zc
-            self.uc = uc[..., np.newaxis]
+            self.uc = uc
 
         def __call__(self):
             self.iter += 1
diff --git a/ppsci/arch/spinn.py b/ppsci/arch/spinn.py
@@ -1,4 +1,4 @@
-# Copyright (c) 2023 PaddlePaddle Authors. All Rights Reserved.
+# Copyright (c) 2024 PaddlePaddle Authors. All Rights Reserved.
 
 # Licensed under the Apache License, Version 2.0 (the "License");
 # you may not use this file except in compliance with the License.
@@ -15,10 +15,12 @@
 from __future__ import annotations
 
 from typing import Dict
+from typing import List
 from typing import Optional
 from typing import Tuple
 from typing import Union
 
+import paddle
 import paddle.nn as nn
 
 from ppsci.arch import base
@@ -27,12 +29,12 @@
 
 
 class SPINN(base.Arch):
-    """
-    SPINN: Sparse Interaction Neural Network
+    """Separable Physics-Informed Neural Networks.
+
     Args:
         input_keys (Tuple[str, ...]): Keys of input variables.
         output_keys (Tuple[str, ...]): Keys of output variables.
-        r (int): Number of features for each output.
+        r (int): Number of features for each output dimension.
         num_layers (int): Number of layers.
         hidden_size (Union[int, Tuple[int, ...]]): Size of hidden layer.
         activation (str, optional): Name of activation function.
@@ -46,17 +48,19 @@ class SPINN(base.Arch):
         >>> from ppsci.arch import SPINN
         >>> model = SPINN(
         ...     input_keys=('x', 'y', 'z'),
-        ...     output_keys=('u',),
-        ...     r=316,
+        ...     output_keys=('u', 'v'),
+        ...     r=32,
         ...     num_layers=4,
         ...     hidden_size=32,
         ... )
-        >>> input_dict = {"x": paddle.rand([10]),
-        ...               "y": paddle.rand([10]),
-        ...               "z": paddle.rand([10])}
+        >>> input_dict = {"x": paddle.rand([3, 1]),
+        ...               "y": paddle.rand([4, 1]),
+        ...               "z": paddle.rand([5, 1])}
         >>> output_dict = model(input_dict)
         >>> print(output_dict["u"].shape)
-        [10, 10, 10, 1]
+        [3, 4, 5, 1]
+        >>> print(output_dict["v"].shape)
+        [3, 4, 5, 1]
     """
 
     def __init__(
@@ -106,18 +110,45 @@ def _init_weights(self):
                 initializer.glorot_normal_(m.weight)
                 initializer.zeros_(m.bias)
 
-    def forward_tensor(self, x, y, z):
+    def _tensor_contraction(self, x: paddle.Tensor, y: paddle.Tensor) -> paddle.Tensor:
+        """Tensor contraction between two tensors along the last channel.
+
+        Args:
+            x (Tensor): Input tensor with shape [*N, C].
+            y (Tensor): Input tensor with shape [*M, C]
+
+        Returns:
+            Tensor: Output tensor with shape [*N, *M, C].
+        """
+        x_ndim = x.ndim
+        y_ndim = y.ndim
+        out_dim = x_ndim + y_ndim - 1
+
+        # Align the dimensions of x and y to out_dim
+        if x_ndim < out_dim:
+            # Add singleton dimensions to x at the end of dimensions
+            x = x.unsqueeze([-2] * (out_dim - x_ndim))
+        if y_ndim < out_dim:
+            # Add singleton dimensions to y at the begin of dimensions
+            y = y.unsqueeze([0] * (out_dim - y_ndim))
+
+        # Multiply x and y with implicit broadcasting
+        out = x * y
+
+        return out
+
+    def forward_tensor(self, x, y, z) -> List[paddle.Tensor]:
         # forward each dim branch
         feature_f = []
         for i, input_var in enumerate((x, y, z)):
-            input_i = {self.input_keys[i]: input_var.unsqueeze(1)}
+            input_i = {self.input_keys[i]: input_var}
             output_f_i = self.branch_nets[i](input_i)
             feature_f.append(output_f_i["f"])  # [B, r*output_dim]
 
-        # dot product and sum over all branch outputs and
         output = []
         for i, key in enumerate(self.output_keys):
             st, ed = i * self.r, (i + 1) * self.r
+            # do tensor contraction and sum over all branch outputs
             if ed - st == self.r:
                 output_i = feature_f[0]
             else:
@@ -128,20 +159,18 @@ def forward_tensor(self, x, y, z):
                     output_ii = feature_f[j]
                 else:
                     output_ii = feature_f[j][:, st:ed]
-                if j != len(self.input_keys) - 1:
-                    output_i = output_i.unsqueeze(1) * output_ii.unsqueeze(0)
-                else:
-                    output_i = (
-                        output_i.unsqueeze(2) * output_ii.unsqueeze(0).unsqueeze(0)
-                    ).sum(axis=-1, keepdim=True)
+                output_i = self._tensor_contraction(output_i, output_ii)
+
+            output_i = output_i.sum(-1, keepdim=True)
             output.append(output_i)
 
-        return output[-1]
+        return output
 
     def forward(self, x):
         if self._input_transform is not None:
             x = self._input_transform(x)
-        output = [self.forward_tensor(x["x"], x["y"], x["z"])]
+
+        output = self.forward_tensor(*[x[key] for key in self.input_keys])
 
         output = {key: output[i] for i, key in enumerate(self.output_keys)}
 
diff --git a/ppsci/equation/pde/helmholtz.py b/ppsci/equation/pde/helmholtz.py
@@ -38,7 +38,7 @@ def hvp_revrev(f: Callable, primals: Tuple[paddle.Tensor, ...]) -> paddle.Tensor
     # TODO: Merge this option into ppsci.autodiff.ad
     g = lambda primals: paddle.incubate.autograd.jvp(f, primals)[1]
     tangents_out = paddle.incubate.autograd.jvp(g, primals)[1]
-    return tangents_out
+    return tangents_out[0]
 
 
 class Helmholtz(base.PDE):