fix trainable parameters in distributions (#520)

vpratz · web-flow · commit e329f4ba788a · 2025-06-22T04:02:09.000-04:00
diff --git a/bayesflow/distributions/diagonal_normal.py b/bayesflow/distributions/diagonal_normal.py
@@ -58,7 +58,6 @@ def __init__(
         self.seed_generator = seed_generator or keras.random.SeedGenerator()
 
         self.dim = None
-        self.log_normalization_constant = None
         self._mean = None
         self._std = None
 
@@ -71,17 +70,18 @@ def build(self, input_shape: Shape) -> None:
         self.mean = ops.cast(ops.broadcast_to(self.mean, (self.dim,)), "float32")
         self.std = ops.cast(ops.broadcast_to(self.std, (self.dim,)), "float32")
 
-        self.log_normalization_constant = -0.5 * self.dim * math.log(2.0 * math.pi) - ops.sum(ops.log(self.std))
-
         if self.trainable_parameters:
             self._mean = self.add_weight(
                 shape=ops.shape(self.mean),
-                initializer=keras.initializers.get(self.mean),
+                initializer=keras.initializers.get(keras.ops.copy(self.mean)),
                 dtype="float32",
                 trainable=True,
             )
             self._std = self.add_weight(
-                shape=ops.shape(self.std), initializer=keras.initializers.get(self.std), dtype="float32", trainable=True
+                shape=ops.shape(self.std),
+                initializer=keras.initializers.get(keras.ops.copy(self.std)),
+                dtype="float32",
+                trainable=True,
             )
         else:
             self._mean = self.mean
@@ -91,7 +91,8 @@ def log_prob(self, samples: Tensor, *, normalize: bool = True) -> Tensor:
         result = -0.5 * ops.sum((samples - self._mean) ** 2 / self._std**2, axis=-1)
 
         if normalize:
-            result += self.log_normalization_constant
+            log_normalization_constant = -0.5 * self.dim * math.log(2.0 * math.pi) - ops.sum(ops.log(self._std))
+            result += log_normalization_constant
 
         return result
 
diff --git a/bayesflow/distributions/diagonal_student_t.py b/bayesflow/distributions/diagonal_student_t.py
@@ -63,7 +63,6 @@ def __init__(
 
         self.seed_generator = seed_generator or keras.random.SeedGenerator()
 
-        self.log_normalization_constant = None
         self.dim = None
         self._loc = None
         self._scale = None
@@ -78,21 +77,16 @@ def build(self, input_shape: Shape) -> None:
         self.loc = ops.cast(ops.broadcast_to(self.loc, (self.dim,)), "float32")
         self.scale = ops.cast(ops.broadcast_to(self.scale, (self.dim,)), "float32")
 
-        self.log_normalization_constant = (
-            -0.5 * self.dim * math.log(self.df)
-            - 0.5 * self.dim * math.log(math.pi)
-            - math.lgamma(0.5 * self.df)
-            + math.lgamma(0.5 * (self.df + self.dim))
-            - ops.sum(keras.ops.log(self.scale))
-        )
-
         if self.trainable_parameters:
             self._loc = self.add_weight(
-                shape=ops.shape(self.loc), initializer=keras.initializers.get(self.loc), dtype="float32", trainable=True
+                shape=ops.shape(self.loc),
+                initializer=keras.initializers.get(keras.ops.copy(self.loc)),
+                dtype="float32",
+                trainable=True,
             )
             self._scale = self.add_weight(
                 shape=ops.shape(self.scale),
-                initializer=keras.initializers.get(self.scale),
+                initializer=keras.initializers.get(keras.ops.copy(self.scale)),
                 dtype="float32",
                 trainable=True,
             )
@@ -105,7 +99,14 @@ def log_prob(self, samples: Tensor, *, normalize: bool = True) -> Tensor:
         result = -0.5 * (self.df + self.dim) * ops.log1p(mahalanobis_term / self.df)
 
         if normalize:
-            result += self.log_normalization_constant
+            log_normalization_constant = (
+                -0.5 * self.dim * math.log(self.df)
+                - 0.5 * self.dim * math.log(math.pi)
+                - math.lgamma(0.5 * self.df)
+                + math.lgamma(0.5 * (self.df + self.dim))
+                - ops.sum(keras.ops.log(self._scale))
+            )
+            result += log_normalization_constant
 
         return result
 
diff --git a/bayesflow/distributions/mixture.py b/bayesflow/distributions/mixture.py
@@ -144,7 +144,7 @@ def build(self, input_shape: Shape) -> None:
 
         self._mixture_logits = self.add_weight(
             shape=(len(self.distributions),),
-            initializer=keras.initializers.get(self.mixture_logits),
+            initializer=keras.initializers.get(keras.ops.copy(self.mixture_logits)),
             dtype="float32",
             trainable=self.trainable_mixture,
         )

Original file line number	Diff line number	Diff line change
`@@ -144,7 +144,7 @@ def build(self, input_shape: Shape) -> None:`
`144`	`144`
`145`	`145`	`self._mixture_logits = self.add_weight(`
`146`	`146`	`shape=(len(self.distributions),),`
`147`		`- initializer=keras.initializers.get(self.mixture_logits),`
	`147`	`+ initializer=keras.initializers.get(keras.ops.copy(self.mixture_logits)),`
`148`	`148`	`dtype="float32",`
`149`	`149`	`trainable=self.trainable_mixture,`
`150`	`150`	`)`