bayesflow-org · stefanradev93 · Aug 27, 2025 · Jun 22, 2025 · Jun 22, 2025 · Jun 23, 2025
diff --git a/README.md b/README.md
@@ -76,7 +76,7 @@ Note that BayesFlow **will not run** without a backend.
 
 If you don't know which backend to use, we recommend JAX as it is currently the fastest backend.
 
-Once installed, [set the backend environment variable as required by keras](https://keras.io/getting_started/#configuring-your-backend).
+As of version ``2.0.7``, the backend will be set automatically. If you have multiple backends, you can manually [set the backend environment variable as described by keras](https://keras.io/getting_started/#configuring-your-backend).
 For example, inside your Python script write:
 
 ```python
@@ -97,8 +97,6 @@ Or just plainly set the environment variable in your shell:
 export KERAS_BACKEND=jax
 ```
 
-This way, you also don't have to manually set the backend every time you are starting Python to use BayesFlow.
-
 ## Getting Started
 
 Using the high-level interface is easy, as demonstrated by the minimal working example below:

diff --git a/bayesflow/__init__.py b/bayesflow/__init__.py
@@ -1,29 +1,12 @@
-from . import (
-    approximators,
-    adapters,
-    augmentations,
-    datasets,
-    diagnostics,
-    distributions,
-    experimental,
-    networks,
-    simulators,
-    utils,
-    workflows,
-    wrappers,
-)
-
-from .adapters import Adapter
-from .approximators import ContinuousApproximator, PointApproximator
-from .datasets import OfflineDataset, OnlineDataset, DiskDataset
-from .simulators import make_simulator
-from .workflows import BasicWorkflow
+# ruff: noqa: E402
+# disable E402 to allow for setup code before importing any internals (which could import keras)
 
 
 def setup():
     # perform any necessary setup without polluting the namespace
-    import keras
+    import os
     import logging
+    from importlib.util import find_spec
 
     # set the basic logging level if the user hasn't already
     logging.basicConfig(level=logging.INFO)
@@ -32,8 +15,63 @@ def setup():
     logger = logging.getLogger(__name__)
     logger.setLevel(logging.INFO)
 
+    issue_url = "https://github.com/bayesflow-org/bayesflow/issues/new?template=bug_report.md"
+
+    if "KERAS_BACKEND" not in os.environ:
+        # check for available backends and automatically set the KERAS_BACKEND env variable or raise an error
+        class Backend:
+            def __init__(self, display_name, package_name, env_name, install_url, priority):
+                self.display_name = display_name
+                self.package_name = package_name
+                self.env_name = env_name
+                self.install_url = install_url
+                self.priority = priority
+
+        backends = [
+            Backend("JAX", "jax", "jax", "https://docs.jax.dev/en/latest/quickstart.html#installation", 0),
+            Backend("PyTorch", "torch", "torch", "https://pytorch.org/get-started/locally/", 1),
+            Backend("TensorFlow", "tensorflow", "tensorflow", "https://www.tensorflow.org/install", 2),
+        ]
+
+        found_backends = []
+        for backend in backends:
+            if find_spec(backend.package_name) is not None:
+                found_backends.append(backend)
+
+        if not found_backends:
+            message = "No suitable backend found. Please install one of the following:\n"
+            for backend in backends:
+                message += f"{backend.display_name}\n"
+            message += "\n"
+
+            message += f"If you continue to see this error, please file a bug report at {issue_url}.\n"
+            message += (
+                "You can manually select a backend by setting the KERAS_BACKEND environment variable as shown below:\n"
+            )
+            message += "https://keras.io/getting_started/#configuring-your-backend"
+
+            raise ImportError(message)
+
+        if len(found_backends) > 1:
+            found_backends.sort(key=lambda b: b.priority)
+            chosen_backend = found_backends[0]
+            os.environ["KERAS_BACKEND"] = chosen_backend.env_name
+
+            logging.warning(
+                f"Multiple Keras-compatible backends detected ({', '.join(b.display_name for b in found_backends)}).\n"
+                f"Defaulting to {chosen_backend.display_name}.\n"
+                "To override, set the KERAS_BACKEND environment variable before importing bayesflow.\n"
+                "See: https://keras.io/getting_started/#configuring-your-backend"
+            )
+        else:
+            os.environ["KERAS_BACKEND"] = found_backends[0].env_name
+
+    import keras
     from bayesflow.utils import logging
 
+    if keras.backend.backend().lower() != os.environ["KERAS_BACKEND"].lower():
+        logging.warning("Automatic backend selection failed, most likely because Keras was imported before BayesFlow.")
+
     logging.info(f"Using backend {keras.backend.backend()!r}")
 
     if keras.backend.backend() == "torch":
@@ -60,3 +98,24 @@ def setup():
 # call and clean up namespace
 setup()
 del setup
+
+from . import (
+    approximators,
+    adapters,
+    augmentations,
+    datasets,
+    diagnostics,
+    distributions,
+    experimental,
+    networks,
+    simulators,
+    utils,
+    workflows,
+    wrappers,
+)
+
+from .adapters import Adapter
+from .approximators import ContinuousApproximator, PointApproximator
+from .datasets import OfflineDataset, OnlineDataset, DiskDataset
+from .simulators import make_simulator
+from .workflows import BasicWorkflow
diff --git a/bayesflow/adapters/transforms/nan_to_num.py b/bayesflow/adapters/transforms/nan_to_num.py
@@ -80,6 +80,8 @@ def inverse(self, data: dict[str, any], **kwargs) -> dict[str, any]:
         data = data.copy()
 
         # Retrieve mask and values to reconstruct NaNs
+        if self.key not in data.keys():
+            return data
         values = data[self.key]
 
         if not self.return_mask:

diff --git a/bayesflow/approximators/continuous_approximator.py b/bayesflow/approximators/continuous_approximator.py
@@ -537,7 +537,7 @@ def _sample(
             )
             batch_shape = keras.ops.shape(inference_conditions)[:-1]
         else:
-            batch_shape = keras.ops.shape(inference_conditions)[1:-1]
+            batch_shape = (num_samples,)
 
         return self.inference_network.sample(
             batch_shape, conditions=inference_conditions, **filter_kwargs(kwargs, self.inference_network.sample)

diff --git a/bayesflow/approximators/point_approximator.py b/bayesflow/approximators/point_approximator.py
@@ -143,12 +143,7 @@ def sample(
 
         return samples
 
-    def log_prob(
-        self,
-        *,
-        data: Mapping[str, np.ndarray],
-        **kwargs,
-    ) -> np.ndarray | dict[str, np.ndarray]:
+    def log_prob(self, data: Mapping[str, np.ndarray], **kwargs) -> np.ndarray | dict[str, np.ndarray]:
         """
         Computes the log-probability of given data under the parametric distribution(s) for given input conditions.
 

diff --git a/bayesflow/diagnostics/__init__.py b/bayesflow/diagnostics/__init__.py
@@ -5,6 +5,7 @@
 from .metrics import (
     bootstrap_comparison,
     calibration_error,
+    calibration_log_gamma,
     posterior_contraction,
     summary_space_comparison,
 )
@@ -18,7 +19,9 @@
     mc_confusion_matrix,
     mmd_hypothesis_test,
     pairs_posterior,
+    pairs_quantity,
     pairs_samples,
+    plot_quantity,
     recovery,
     recovery_from_estimates,
     z_score_contraction,

diff --git a/bayesflow/diagnostics/metrics/posterior_contraction.py b/bayesflow/diagnostics/metrics/posterior_contraction.py
@@ -10,7 +10,7 @@ def posterior_contraction(
     targets: Mapping[str, np.ndarray] | np.ndarray,
     variable_keys: Sequence[str] = None,
     variable_names: Sequence[str] = None,
-    aggregation: Callable = np.median,
+    aggregation: Callable | None = np.median,
 ) -> dict[str, any]:
     """
     Computes the posterior contraction (PC) from prior to posterior for the given samples.
@@ -27,16 +27,17 @@ def posterior_contraction(
        By default, select all keys.
     variable_names : Sequence[str], optional (default = None)
         Optional variable names to show in the output.
-    aggregation    : callable, optional (default = np.median)
+    aggregation    : callable or None, optional (default = np.median)
         Function to aggregate the PC across draws. Typically `np.mean` or `np.median`.
+        If None is provided, the individual values are returned.
 
     Returns
     -------
     result : dict
         Dictionary containing:
 
         - "values" : float or np.ndarray
-            The aggregated posterior contraction per variable
+            The (optionally aggregated) posterior contraction per variable
         - "metric_name" : str
             The name of the metric ("Posterior Contraction").
         - "variable_names" : str
@@ -59,6 +60,7 @@ def posterior_contraction(
     post_vars = samples["estimates"].var(axis=1, ddof=1)
     prior_vars = samples["targets"].var(axis=0, keepdims=True, ddof=1)
     contraction = np.clip(1 - (post_vars / prior_vars), 0, 1)
-    contraction = aggregation(contraction, axis=0)
+    if aggregation is not None:
+        contraction = aggregation(contraction, axis=0)
     variable_names = samples["estimates"].variable_names
     return {"values": contraction, "metric_name": "Posterior Contraction", "variable_names": variable_names}
diff --git a/bayesflow/diagnostics/plots/__init__.py b/bayesflow/diagnostics/plots/__init__.py
@@ -6,6 +6,8 @@
 from .mc_confusion_matrix import mc_confusion_matrix
 from .mmd_hypothesis_test import mmd_hypothesis_test
 from .pairs_posterior import pairs_posterior
+from .pairs_quantity import pairs_quantity
+from .plot_quantity import plot_quantity
 from .pairs_samples import pairs_samples
 from .recovery import recovery
 from .recovery_from_estimates import recovery_from_estimates

diff --git a/bayesflow/diagnostics/plots/calibration_ecdf.py b/bayesflow/diagnostics/plots/calibration_ecdf.py
@@ -1,9 +1,9 @@
 from collections.abc import Callable, Mapping, Sequence
 
 import numpy as np
-import keras
 import matplotlib.pyplot as plt
 
+from ...utils.dict_utils import compute_test_quantities
 from ...utils.plot_utils import prepare_plot_data, add_titles_and_labels, prettify_subplots
 from ...utils.ecdf import simultaneous_ecdf_bands
 from ...utils.ecdf.ranks import fractional_ranks, distance_ranks
@@ -136,33 +136,17 @@ def calibration_ecdf(
 
     # Optionally, compute and prepend test quantities from draws
     if test_quantities is not None:
-        test_quantities_estimates = {}
-        test_quantities_targets = {}
-
-        for key, test_quantity_fn in test_quantities.items():
-            # Apply test_quantity_func to ground-truths
-            tq_targets = test_quantity_fn(data=targets)
-            test_quantities_targets[key] = np.expand_dims(tq_targets, axis=1)
-
-            # # Flatten estimates for batch processing in test_quantity_fn, apply function, and restore shape
-            num_conditions, num_samples = next(iter(estimates.values())).shape[:2]
-            flattened_estimates = keras.tree.map_structure(lambda t: np.reshape(t, (-1, *t.shape[2:])), estimates)
-            flat_tq_estimates = test_quantity_fn(data=flattened_estimates)
-            test_quantities_estimates[key] = np.reshape(flat_tq_estimates, (num_conditions, num_samples, 1))
-
-        # Add custom test quantities to variable keys and names for plotting
-        # keys and names are set to the test_quantities dict keys
-        test_quantities_names = list(test_quantities.keys())
-
-        if variable_keys is None:
-            variable_keys = list(estimates.keys())
-
-        if isinstance(variable_names, list):
-            variable_names = test_quantities_names + variable_names
-
-        variable_keys = test_quantities_names + variable_keys
-        estimates = test_quantities_estimates | estimates
-        targets = test_quantities_targets | targets
+        updated_data = compute_test_quantities(
+            targets=targets,
+            estimates=estimates,
+            variable_keys=variable_keys,
+            variable_names=variable_names,
+            test_quantities=test_quantities,
+        )
+        variable_names = updated_data["variable_names"]
+        variable_keys = updated_data["variable_keys"]
+        estimates = updated_data["estimates"]
+        targets = updated_data["targets"]
 
     plot_data = prepare_plot_data(
         estimates=estimates,

diff --git a/bayesflow/diagnostics/plots/calibration_ecdf_from_quantiles.py b/bayesflow/diagnostics/plots/calibration_ecdf_from_quantiles.py
@@ -26,6 +26,7 @@ def calibration_ecdf_from_quantiles(
     fill_color: str = "grey",
     num_row: int = None,
     num_col: int = None,
+    markersize: float = None,
     **kwargs,
 ) -> plt.Figure:
     """
@@ -97,6 +98,8 @@ def calibration_ecdf_from_quantiles(
     num_col           : int, optional, default: None
         The number of columns for the subplots.
         Dynamically determined if None.
+    markersize        : float, optional, default: None
+        The marker size in points.
     **kwargs          : dict, optional, default: {}
         Keyword arguments can be passed to control the behavior of
         ECDF simultaneous band computation through the ``ecdf_bands_kwargs``
@@ -142,11 +145,15 @@ def calibration_ecdf_from_quantiles(
 
         if stacked:
             if j == 0:
-                plot_data["axes"][0].plot(xx, yy, marker="o", color=rank_ecdf_color, alpha=0.95, label="Rank ECDFs")
+                plot_data["axes"][0].plot(
+                    xx, yy, marker="o", color=rank_ecdf_color, markersize=markersize, alpha=0.95, label="Rank ECDFs"
+                )
             else:
-                plot_data["axes"][0].plot(xx, yy, marker="o", color=rank_ecdf_color, alpha=0.95)
+                plot_data["axes"][0].plot(xx, yy, marker="o", color=rank_ecdf_color, markersize=markersize, alpha=0.95)
         else:
-            plot_data["axes"].flat[j].plot(xx, yy, marker="o", color=rank_ecdf_color, alpha=0.95, label="Rank ECDF")
+            plot_data["axes"].flat[j].plot(
+                xx, yy, marker="o", color=rank_ecdf_color, markersize=markersize, alpha=0.95, label="Rank ECDF"
+            )
 
     # Compute uniform ECDF and bands
     alpha, z, L, U = pointwise_ecdf_bands(estimates.shape[0], **kwargs.pop("ecdf_bands_kwargs", {}))

diff --git a/bayesflow/diagnostics/plots/mc_calibration.py b/bayesflow/diagnostics/plots/mc_calibration.py
@@ -27,6 +27,7 @@ def mc_calibration(
     color: str = "#132a70",
     num_col: int = None,
     num_row: int = None,
+    markersize: float = None,
 ) -> plt.Figure:
     """Plots the calibration curves, the ECEs and the marginal histograms of predicted posterior model probabilities
     for a model comparison problem. The marginal histograms inform about the fraction of predictions in each bin.
@@ -60,6 +61,8 @@ def mc_calibration(
         The number of rows for the subplots. Dynamically determined if None.
     num_col             : int, optional, default: None
         The number of columns for the subplots. Dynamically determined if None.
+    markersize          : float, optional, default: None
+        The marker size in points.
 
     Returns
     -------
@@ -88,7 +91,7 @@ def mc_calibration(
 
     for j, ax in enumerate(plot_data["axes"].flat):
         # Plot calibration curve
-        ax.plot(ece["probs_pred"][j], ece["probs_true"][j], "o-", color=color)
+        ax.plot(ece["probs_pred"][j], ece["probs_true"][j], "o-", color=color, markersize=markersize)
 
         # Plot PMP distribution over bins
         uniform_bins = np.linspace(0.0, 1.0, num_bins + 1)