bayesflow-org · stefanradev93 · May 8, 2025 · May 8, 2025 · May 8, 2025 · May 8, 2025
diff --git a/bayesflow/links/positive_definite.py b/bayesflow/links/positive_definite.py
@@ -1,7 +1,7 @@
 import keras
 
 from bayesflow.types import Tensor
-from bayesflow.utils import layer_kwargs, fill_triangular_matrix
+from bayesflow.utils import layer_kwargs, fill_triangular_matrix, positive_diag
 from bayesflow.utils.serialization import serializable
 
 
@@ -11,16 +11,21 @@ class PositiveDefinite(keras.Layer):
 
     def __init__(self, **kwargs):
         super().__init__(**layer_kwargs(kwargs))
-        self.built = True
+
+        self.layer_norm = keras.layers.LayerNormalization()
 
     def call(self, inputs: Tensor) -> Tensor:
-        # Build cholesky factor from inputs
-        L = fill_triangular_matrix(inputs, positive_diag=True)
+        # normalize the activation at initialization time mean = 0.0, std = 0.1
+        inputs = self.layer_norm(inputs) / 10
+
+        # form a cholesky factor
+        L = fill_triangular_matrix(inputs)
+        L = positive_diag(L)
 
-        # calculate positive definite matrix from cholesky factors
+        # calculate positive definite matrix from cholesky factors:
         psd = keras.ops.matmul(
             L,
-            keras.ops.moveaxis(L, -2, -1),  # L transposed
+            keras.ops.swapaxes(L, -2, -1),  # L transposed
         )
         return psd
 
@@ -31,13 +36,14 @@ def compute_output_shape(self, input_shape):
 
     def compute_input_shape(self, output_shape):
         """
-        Returns the shape of parameterization of a cholesky factor triangular matrix.
+        Returns the shape of parameterization of a Cholesky factor triangular matrix.
 
-        There are m nonzero elements of a lower triangular nxn matrix with m = n * (n + 1) / 2.
+        There are :math:`m` nonzero elements of a lower triangular :math:`n \\times n` matrix with
+        :math:`m = n (n + 1) / 2`, so for output shape (..., n, n) the returned shape is (..., m).
 
-        Example
-        -------
-        >>> PositiveDefinite().compute_output_shape((None, 3, 3))
+        Examples
+        --------
+        >>> PositiveDefinite().compute_input_shape((None, 3, 3))
         6
         """
         n = output_shape[-1]

diff --git a/bayesflow/utils/__init__.py b/bayesflow/utils/__init__.py
@@ -88,14 +88,15 @@
     expand_right_as,
     expand_right_to,
     expand_tile,
+    fill_triangular_matrix,
     pad,
+    positive_diag,
     searchsorted,
     size_of,
     stack_valid,
     tile_axis,
     tree_concatenate,
     tree_stack,
-    fill_triangular_matrix,
     weighted_mean,
 )
 

diff --git a/bayesflow/utils/tensor_utils.py b/bayesflow/utils/tensor_utils.py
@@ -310,8 +310,6 @@
         Batch of flattened nonzero matrix elements for triangular matrix.
     upper : bool
         Return upper triangular matrix if True, else lower triangular matrix. Default is False.
-    positive_diag : bool
-        Whether to apply a softplus operation to diagonal elements. Default is False.
 
     Returns
     -------
@@ -327,47 +325,70 @@
     batch_shape = x.shape[:-1]
     m = x.shape[-1]
 
-    if m == 1:
-        y = keras.ops.reshape(x, (-1, 1, 1))
-        if positive_diag:
-            y = keras.activations.softplus(y)
-        return y
-
-    # Calculate matrix shape
-    n = (0.25 + 2 * m) ** 0.5 - 0.5
-    if not np.isclose(np.floor(n), n):
-        raise ValueError(f"Input right-most shape ({m}) does not correspond to a triangular matrix.")
-    else:
-        n = int(n)
-
-    # Trick: Create triangular matrix by concatenating with a flipped version of its tail, then reshape.
-    x_tail = keras.ops.take(x, indices=list(range((m - (n**2 - m)), x.shape[-1])), axis=-1)
-    if not upper:
-        y = keras.ops.concatenate([x_tail, keras.ops.flip(x, axis=-1)], axis=len(batch_shape))
-        y = keras.ops.reshape(y, (-1, n, n))
-        y = keras.ops.tril(y)
-
-        if positive_diag:
-            y_offdiag = keras.ops.tril(y, k=-1)
-            # carve out diagonal, by setting upper and lower offdiagonals to zero
-            y_diag = keras.ops.tril(
-                keras.ops.triu(keras.activations.softplus(y)),  # apply softplus to enforce positivity
+    if m > 1:  # Matrix is larger than than 1x1
+        # Calculate matrix shape
+        n = (0.25 + 2 * m) ** 0.5 - 0.5
+        if not np.isclose(np.floor(n), n):
+            raise ValueError(f"Input right-most shape ({m}) does not correspond to a triangular matrix.")
+        else:
+            n = int(n)
+
+        # Trick: Create triangular matrix by concatenating with a flipped version of itself, then reshape.
+        if not upper:
+            x_list = [x, keras.ops.flip(x[..., n:], axis=-1)]
+
+            y = keras.ops.concatenate(x_list, axis=len(batch_shape))
+            y = keras.ops.reshape(y, (-1, n, n))
+            y = keras.ops.tril(y)
+
+        else:
+            x_list = [x[..., n:], keras.ops.flip(x, axis=-1)]
+
+            y = keras.ops.concatenate(x_list, axis=len(batch_shape))
+            y = keras.ops.reshape(y, (-1, n, n))
+            y = keras.ops.triu(
+                y,
             )
-            y = y_diag + y_offdiag
 
-    else:
-        y = keras.ops.concatenate([x, keras.ops.flip(x_tail, axis=-1)], axis=len(batch_shape))
-        y = keras.ops.reshape(y, (-1, n, n))
-        y = keras.ops.triu(
-            y,
-        )
-
-        if positive_diag:
-            y_offdiag = keras.ops.triu(y, k=1)
-            # carve out diagonal, by setting upper and lower offdiagonals to zero
-            y_diag = keras.ops.tril(
-                keras.ops.triu(keras.activations.softplus(y)),  # apply softplus to enforce positivity
-            )
-            y = y_diag + y_offdiag
+    else:  # Matrix is 1x1
+        y = keras.ops.reshape(x, (-1, 1, 1))
 
     return y
+
+
+def positive_diag(x: Tensor, method="default") -> Tensor:
+    """
+    Ensures that matrix elements on diagonal are positive.
+
+    Parameters
+    ----------
+    x : Tensor of shape (batch_size, n, n)
+        Batch of matrices.
+    method : str, optional
+        Method by which to ensure positivity of diagonal entries. Choose from
+        - "shifted_softplus": softplus(x + 0.5413)
+        - "exp": exp(x)
+        Both methods map a matrix filled with zeros to the unit matrix.
+        Default is "shifted_softplus".
+
+    Returns
+    -------
+    Tensor of shape (batch_size, n, n)
+    """
+    # ensure positivity
+    match method:
+        case "default" | "shifted_softplus":
+            x_positive = keras.activations.softplus(x + 0.5413)
+        case "exp":
+            x_positive = keras.ops.exp(x)
+
+    # zero all offdiagonals
+    x_diag_positive = keras.ops.tril(keras.ops.triu(x_positive))
+
+    # zero diagonal entries
+    x_offdiag = keras.ops.triu(x, k=1) + keras.ops.tril(x, k=-1)
+
+    # sum to get full matrices with softplus applied only to diagonal entries
+    x = x_diag_positive + x_offdiag
+
+    return x