pymc-labs · NathanielF · Nov 10, 2025 · Nov 19, 2025 · Nov 20, 2025 · Nov 20, 2025
diff --git a/causalpy/__init__.py b/causalpy/__init__.py
@@ -41,4 +41,5 @@
     "RegressionKink",
     "skl_models",
     "SyntheticControl",
+    "variable_selection_priors",
 ]
diff --git a/causalpy/experiments/instrumental_variable.py b/causalpy/experiments/instrumental_variable.py
@@ -51,6 +51,12 @@ class InstrumentalVariable(BaseExperiment):
         If priors are not specified we will substitute MLE estimates for
         the beta coefficients. Example: ``priors = {"mus": [0, 0],
         "sigmas": [1, 1], "eta": 2, "lkj_sd": 2}``.
+    :param vs_prior_type : str or None, default=None
+        Type of variable selection prior: 'spike_and_slab', 'horseshoe', or None.
+        If None, uses standard normal priors.
+    :param vs_hyperparams : dict, optional
+        Hyperparameters for variable selection priors. Only used if vs_prior_type
+        is not None.
 
     Example
     --------
@@ -98,6 +104,8 @@ def __init__(
         formula: str,
         model: BaseExperiment | None = None,
         priors: dict | None = None,
+        vs_prior_type=None,
+        vs_hyperparams=None,
         **kwargs: dict,
     ) -> None:
         super().__init__(model=model)
@@ -107,6 +115,8 @@ def __init__(
         self.formula = formula
         self.instruments_formula = instruments_formula
         self.model = model
+        self.vs_prior_type = (vs_prior_type,)
+        self.vs_hyperparams = vs_hyperparams or {}
         self.input_validation()
 
         y, X = dmatrices(formula, self.data)
@@ -138,7 +148,14 @@ def __init__(
             }
         self.priors = priors
         self.model.fit(  # type: ignore[call-arg,union-attr]
-            X=self.X, Z=self.Z, y=self.y, t=self.t, coords=COORDS, priors=self.priors
+            X=self.X,
+            Z=self.Z,
+            y=self.y,
+            t=self.t,
+            coords=COORDS,
+            priors=self.priors,
+            vs_prior_type=vs_prior_type,
+            vs_hyperparams=vs_hyperparams,
         )
 
     def input_validation(self) -> None:

diff --git a/causalpy/pymc_models.py b/causalpy/pymc_models.py
@@ -26,6 +26,7 @@
 from pymc_extras.prior import Prior
 
 from causalpy.utils import round_num
+from causalpy.variable_selection_priors import VariableSelectionPrior
 
 
 class PyMCModel(pm.Model):
@@ -657,7 +658,9 @@ def build_model(  # type: ignore
         y: np.ndarray,
         t: np.ndarray,
         coords: Dict[str, Any],
-        priors: Dict[str, Any],
+        priors,
+        vs_prior_type=None,
+        vs_hyperparams=None,
     ) -> None:
         """Specify model with treatment regression and focal regression
         data and priors.
@@ -680,23 +683,47 @@ def build_model(  # type: ignore
             Dictionary of priors for the mus and sigmas of both
             regressions. Example: ``priors = {"mus": [0, 0],
             "sigmas": [1, 1], "eta": 2, "lkj_sd": 2}``.
+        :param vs_prior_type: An optional string. Can be "spike_and_slab"
+                              or "horseshoe" or "normal
+        :param vs_hyperparams: An optional dictionary of priors for the
+                               variable selection hyperparameters
+
         """
 
         # --- Priors ---
         with self:
             self.add_coords(coords)
-            beta_t = pm.Normal(
-                name="beta_t",
-                mu=priors["mus"][0],
-                sigma=priors["sigmas"][0],
-                dims="instruments",
-            )
-            beta_z = pm.Normal(
-                name="beta_z",
-                mu=priors["mus"][1],
-                sigma=priors["sigmas"][1],
-                dims="covariates",
-            )
+
+            # Create coefficient priors
+            if vs_prior_type:
+                # Use variable selection priors
+                vs_prior_treatment = VariableSelectionPrior(
+                    vs_prior_type, vs_hyperparams
+                )
+                vs_prior_outcome = VariableSelectionPrior(vs_prior_type, vs_hyperparams)
+
+                beta_t = vs_prior_treatment.create_prior(
+                    name="beta_t", n_params=Z.shape[1], dims="instruments", X=Z
+                )
+
+                beta_z = vs_prior_outcome.create_prior(
+                    name="beta_z", n_params=X.shape[1], dims="covariates", X=X
+                )
+            else:
+                # Use standard normal priors
+                beta_t = pm.Normal(
+                    name="beta_t",
+                    mu=priors["mus"][0],
+                    sigma=priors["sigmas"][0],
+                    dims="instruments",
+                )
+                beta_z = pm.Normal(
+                    name="beta_z",
+                    mu=priors["mus"][1],
+                    sigma=priors["sigmas"][1],
+                    dims="covariates",
+                )
+
             sd_dist = pm.Exponential.dist(priors["lkj_sd"], shape=2)
             chol, corr, sigmas = pm.LKJCholeskyCov(
                 name="chol_cov",
@@ -755,50 +782,32 @@ def sample_predictive_distribution(self, ppc_sampler: str | None = "jax") -> Non
                         )
                     )
 
-    def fit(  # type: ignore
+    def fit(
         self,
-        X: np.ndarray,
-        Z: np.ndarray,
-        y: np.ndarray,
-        t: np.ndarray,
-        coords: Dict[str, Any],
-        priors: Dict[str, Any],
-        ppc_sampler: str | None = None,
-    ) -> az.InferenceData:
-        """Draw samples from posterior distribution and potentially from
-        the prior and posterior predictive distributions.
-
-        Parameters
-        ----------
-        X : np.ndarray
-            Array used to predict our outcome y.
-        Z : np.ndarray
-            Array used to predict our treatment variable t.
-        y : np.ndarray
-            Array of values representing our focal outcome y.
-        t : np.ndarray
-            Array representing the treatment variable.
-        coords : dict
-            Dictionary with coordinate names for named dimensions.
-        priors : dict
-            Dictionary of priors for the model.
-        ppc_sampler : str, optional
-            Sampler for posterior predictive distribution. Can be 'jax',
-            'pymc', or None. Defaults to None, so the user can determine
-            if they wish to spend time sampling the posterior predictive
-            distribution independently.
-
-        Returns
-        -------
-        az.InferenceData
-            InferenceData object containing the samples.
+        X,
+        Z,
+        y,
+        t,
+        coords,
+        priors,
+        ppc_sampler=None,
+        vs_prior_type=None,
+        vs_hyperparams=None,
+    ):
+        """Draw samples from posterior distribution and potentially
+        from the prior and posterior predictive distributions. The
+        fit call can take values for the
+        ppc_sampler = ['jax', 'pymc', None]
+        We default to None, so the user can determine if they wish
+        to spend time sampling the posterior predictive distribution
+        independently.
         """
 
         # Ensure random_seed is used in sample_prior_predictive() and
         # sample_posterior_predictive() if provided in sample_kwargs.
         # Use JAX for ppc sampling of multivariate likelihood
 
-        self.build_model(X, Z, y, t, coords, priors)
+        self.build_model(X, Z, y, t, coords, priors, vs_prior_type, vs_hyperparams)
         with self:
             self.idata = pm.sample(**self.sample_kwargs)
         self.sample_predictive_distribution(ppc_sampler=ppc_sampler)

diff --git a/causalpy/tests/test_integration_pymc_examples.py b/causalpy/tests/test_integration_pymc_examples.py
@@ -676,6 +676,38 @@ def test_iv_reg(mock_pymc_sample):
         result.get_plot_data()
 
 
+@pytest.mark.integration
+def test_iv_reg_vs_prior(mock_pymc_sample):
+    df = cp.load_data("risk")
+    instruments_formula = "risk  ~ 1 + logmort0"
+    formula = "loggdp ~  1 + risk"
+    instruments_data = df[["risk", "logmort0"]]
+    data = df[["loggdp", "risk"]]
+
+    result = cp.InstrumentalVariable(
+        instruments_data=instruments_data,
+        data=data,
+        instruments_formula=instruments_formula,
+        formula=formula,
+        model=cp.pymc_models.InstrumentalVariableRegression(
+            sample_kwargs=sample_kwargs
+        ),
+        vs_prior_type="spike_and_slab",
+        vs_hyperparams={"pi_alpha": 5},
+    )
+    result.model.sample_predictive_distribution(ppc_sampler="pymc")
+    assert isinstance(df, pd.DataFrame)
+    assert isinstance(data, pd.DataFrame)
+    assert isinstance(instruments_data, pd.DataFrame)
+    assert isinstance(result, cp.InstrumentalVariable)
+    assert len(result.idata.posterior.coords["chain"]) == sample_kwargs["chains"]
+    assert len(result.idata.posterior.coords["draw"]) == sample_kwargs["draws"]
+    with pytest.raises(NotImplementedError):
+        result.get_plot_data()
+    assert "gamma_beta_t" in result.model.named_vars
+    assert "pi_beta_t" in result.model.named_vars
+
+
 @pytest.mark.integration
 def test_inverse_prop(mock_pymc_sample):
     """Test the InversePropensityWeighting class."""