From 358b825004998ec281db5899baab458391094a42 Mon Sep 17 00:00:00 2001 From: Rob Zinkov Date: Sat, 8 Feb 2025 21:24:12 +0100 Subject: [PATCH 01/18] Relax pm.observe to allow observing already observed variables (#7679) --- pymc/model/transform/conditioning.py | 11 +++++++---- 1 file changed, 7 insertions(+), 4 deletions(-) diff --git a/pymc/model/transform/conditioning.py b/pymc/model/transform/conditioning.py index 4d9f0553a1..7bb8abbf2e 100644 --- a/pymc/model/transform/conditioning.py +++ b/pymc/model/transform/conditioning.py @@ -25,6 +25,7 @@ from pymc.model.fgraph import ( ModelDeterministic, ModelFreeRV, + ModelValuedVar, extract_dims, fgraph_from_model, model_deterministic, @@ -74,7 +75,9 @@ def observe( m_new = pm.observe(m, {y: 0.5}) - Deterministic variables can also be observed. + Deterministic variables can also be observed. If the variable has already + been observed, its old value is replaced with the one provided. + This relies on PyMC ability to infer the logp of the underlying expression .. code-block:: python @@ -95,9 +98,9 @@ def observe( for var, obs in vars_to_observations.items() } - valid_model_vars = set(model.free_RVs + model.deterministics) + valid_model_vars = set(model.basic_RVs + model.deterministics) if any(var not in valid_model_vars for var in vars_to_observations): - raise ValueError("At least one var is not a free variable or deterministic in the model") + raise ValueError("At least one var is not a random variable or deterministic in the model") fgraph, memo = fgraph_from_model(model) @@ -106,7 +109,7 @@ def observe( model_var = memo[var] # Just a sanity check - assert isinstance(model_var.owner.op, ModelFreeRV | ModelDeterministic) + assert isinstance(model_var.owner.op, ModelValuedVar | ModelDeterministic) assert model_var in fgraph.variables var = model_var.owner.inputs[0] From 112af3e75cc065731d6449b13d8251c50f6a9cd0 Mon Sep 17 00:00:00 2001 From: Will Dean <57733339+wd60622@users.noreply.github.com> Date: Mon, 10 Feb 2025 11:48:47 +0100 Subject: [PATCH 02/18] Remove deprecated generator data (#7664) --- docs/source/api/data.rst | 1 - pymc/data.py | 51 +----------- pymc/pytensorf.py | 107 +------------------------ tests/test_data.py | 95 +--------------------- tests/test_pytensorf.py | 30 ------- tests/variational/test_minibatch_rv.py | 46 ----------- 6 files changed, 3 insertions(+), 327 deletions(-) diff --git a/docs/source/api/data.rst b/docs/source/api/data.rst index 09d3f42fdb..fe70e2d7f1 100644 --- a/docs/source/api/data.rst +++ b/docs/source/api/data.rst @@ -10,5 +10,4 @@ Data MutableData get_data Data - GeneratorAdapter Minibatch diff --git a/pymc/data.py b/pymc/data.py index 9373eb5775..c21ac3001f 100644 --- a/pymc/data.py +++ b/pymc/data.py @@ -33,18 +33,16 @@ from pytensor.scalar import Cast from pytensor.tensor.elemwise import Elemwise from pytensor.tensor.random.basic import IntegersRV -from pytensor.tensor.type import TensorType from pytensor.tensor.variable import TensorConstant, TensorVariable import pymc as pm -from pymc.pytensorf import GeneratorOp, convert_data, smarttypeX +from pymc.pytensorf import convert_data from pymc.vartypes import isgenerator __all__ = [ "ConstantData", "Data", - "GeneratorAdapter", "Minibatch", "MutableData", "get_data", @@ -86,51 +84,6 @@ def clone(self): return cp -class GeneratorAdapter: - """Class that helps infer data type of generator. - - It looks at the first item, preserving the order of the resulting generator. - """ - - def make_variable(self, gop, name=None): - var = GenTensorVariable(gop, self.tensortype, name) - var.tag.test_value = self.test_value - return var - - def __init__(self, generator): - if not pm.vartypes.isgenerator(generator): - raise TypeError("Object should be generator like") - self.test_value = smarttypeX(copy(next(generator))) - # make pickling potentially possible - self._yielded_test_value = False - self.gen = generator - self.tensortype = TensorType(self.test_value.dtype, ((False,) * self.test_value.ndim)) - - # python3 generator - def __next__(self): - """Next value in the generator.""" - if not self._yielded_test_value: - self._yielded_test_value = True - return self.test_value - else: - return smarttypeX(copy(next(self.gen))) - - # python2 generator - next = __next__ - - def __iter__(self): - """Return an iterator.""" - return self - - def __eq__(self, other): - """Return true if both objects are actually the same.""" - return id(self) == id(other) - - def __hash__(self): - """Return a hash of the object.""" - return hash(id(self)) - - class MinibatchIndexRV(IntegersRV): _print_name = ("minibatch_index", r"\operatorname{minibatch\_index}") @@ -170,8 +123,6 @@ def is_valid_observed(v) -> bool: isinstance(v.owner.op, MinibatchOp) and all(is_valid_observed(inp) for inp in v.owner.inputs) ) - # Or Generator - or isinstance(v.owner.op, GeneratorOp) ) diff --git a/pymc/pytensorf.py b/pymc/pytensorf.py index eda2064821..cf5700b95e 100644 --- a/pymc/pytensorf.py +++ b/pymc/pytensorf.py @@ -36,7 +36,6 @@ walk, ) from pytensor.graph.fg import FunctionGraph, Output -from pytensor.graph.op import Op from pytensor.scalar.basic import Cast from pytensor.scan.op import Scan from pytensor.tensor.basic import _as_tensor_variable @@ -63,10 +62,8 @@ "compile_pymc", "cont_inputs", "convert_data", - "convert_generator_data", "convert_observed_data", "floatX", - "generator", "gradient", "hessian", "hessian_diag", @@ -81,20 +78,10 @@ def convert_observed_data(data) -> np.ndarray | Variable: """Convert user provided dataset to accepted formats.""" if isgenerator(data): - return convert_generator_data(data) + raise TypeError("Data passed to `observed` cannot be a generator.") return convert_data(data) -def convert_generator_data(data) -> TensorVariable: - warnings.warn( - "Generator data is deprecated and we intend to remove it." - " If you disagree and need this, please get in touch via https://github.com/pymc-devs/pymc/issues.", - DeprecationWarning, - stacklevel=2, - ) - return generator(data) - - def convert_data(data) -> np.ndarray | Variable: ret: np.ndarray | Variable if hasattr(data, "to_numpy") and hasattr(data, "isnull"): @@ -625,98 +612,6 @@ def __call__(self, input): return pytensor.clone_replace(self.tensor, {oldinput: input}, rebuild_strict=False) -class GeneratorOp(Op): - """ - Generator Op is designed for storing python generators inside pytensor graph. - - __call__ creates TensorVariable - It has 2 new methods - - var.set_gen(gen): sets new generator - - var.set_default(value): sets new default value (None erases default value) - - If generator is exhausted, variable will produce default value if it is not None, - else raises `StopIteration` exception that can be caught on runtime. - - Parameters - ---------- - gen: generator that implements __next__ (py3) or next (py2) method - and yields np.arrays with same types - default: np.array with the same type as generator produces - """ - - __props__ = ("generator",) - - def __init__(self, gen, default=None): - warnings.warn( - "generator data is deprecated and will be removed in a future release", FutureWarning - ) - from pymc.data import GeneratorAdapter - - super().__init__() - if not isinstance(gen, GeneratorAdapter): - gen = GeneratorAdapter(gen) - self.generator = gen - self.set_default(default) - - def make_node(self, *inputs): - gen_var = self.generator.make_variable(self) - return Apply(self, [], [gen_var]) - - def perform(self, node, inputs, output_storage, params=None): - if self.default is not None: - output_storage[0][0] = next(self.generator, self.default) - else: - output_storage[0][0] = next(self.generator) - - def do_constant_folding(self, fgraph, node): - return False - - __call__ = pytensor.config.change_flags(compute_test_value="off")(Op.__call__) - - def set_gen(self, gen): - from pymc.data import GeneratorAdapter - - if not isinstance(gen, GeneratorAdapter): - gen = GeneratorAdapter(gen) - if not gen.tensortype == self.generator.tensortype: - raise ValueError("New generator should yield the same type") - self.generator = gen - - def set_default(self, value): - if value is None: - self.default = None - else: - value = np.asarray(value, self.generator.tensortype.dtype) - t1 = (False,) * value.ndim - t2 = self.generator.tensortype.broadcastable - if not t1 == t2: - raise ValueError("Default value should have the same type as generator") - self.default = value - - -def generator(gen, default=None): - """ - Create a generator variable with possibility to set default value and new generator. - - If generator is exhausted variable will produce default value if it is not None, - else raises `StopIteration` exception that can be caught on runtime. - - Parameters - ---------- - gen: generator that implements __next__ (py3) or next (py2) method - and yields np.arrays with same types - default: np.array with the same type as generator produces - - Returns - ------- - TensorVariable - It has 2 new methods - - var.set_gen(gen): sets new generator - - var.set_default(value): sets new default value (None erases default value) - """ - return GeneratorOp(gen, default)() - - def ix_(*args): """ PyTensor np.ix_ analog. diff --git a/tests/test_data.py b/tests/test_data.py index 0906ab8434..154737b637 100644 --- a/tests/test_data.py +++ b/tests/test_data.py @@ -13,11 +13,9 @@ # limitations under the License. import io -import itertools as it from os import path -import cloudpickle import numpy as np import pytensor import pytensor.tensor as pt @@ -29,7 +27,7 @@ import pymc as pm from pymc.data import MinibatchOp -from pymc.pytensorf import GeneratorOp, floatX +from pymc.pytensorf import floatX class TestData: @@ -495,97 +493,6 @@ def integers_ndim(ndim): i += 1 -@pytest.mark.usefixtures("strict_float32") -class TestGenerator: - def test_basic(self): - generator = pm.GeneratorAdapter(integers()) - gop = GeneratorOp(generator)() - assert gop.tag.test_value == np.float32(0) - f = pytensor.function([], gop) - assert f() == np.float32(0) - assert f() == np.float32(1) - for _ in range(2, 100): - f() - assert f() == np.float32(100) - - def test_ndim(self): - for ndim in range(10): - res = list(it.islice(integers_ndim(ndim), 0, 2)) - generator = pm.GeneratorAdapter(integers_ndim(ndim)) - gop = GeneratorOp(generator)() - f = pytensor.function([], gop) - assert ndim == res[0].ndim - np.testing.assert_equal(f(), res[0]) - np.testing.assert_equal(f(), res[1]) - - def test_cloning_available(self): - gop = pm.generator(integers()) - res = gop**2 - shared = pytensor.shared(pm.floatX(10)) - res1 = pytensor.clone_replace(res, {gop: shared}) - f = pytensor.function([], res1) - assert f() == np.float32(100) - - def test_default_value(self): - def gen(): - for i in range(2): - yield pm.floatX(np.ones((10, 10)) * i) - - gop = pm.generator(gen(), np.ones((10, 10)) * 10) - f = pytensor.function([], gop) - np.testing.assert_equal(np.ones((10, 10)) * 0, f()) - np.testing.assert_equal(np.ones((10, 10)) * 1, f()) - np.testing.assert_equal(np.ones((10, 10)) * 10, f()) - with pytest.raises(ValueError): - gop.set_default(1) - - def test_set_gen_and_exc(self): - def gen(): - for i in range(2): - yield pm.floatX(np.ones((10, 10)) * i) - - gop = pm.generator(gen()) - f = pytensor.function([], gop) - np.testing.assert_equal(np.ones((10, 10)) * 0, f()) - np.testing.assert_equal(np.ones((10, 10)) * 1, f()) - with pytest.raises(StopIteration): - f() - gop.set_gen(gen()) - np.testing.assert_equal(np.ones((10, 10)) * 0, f()) - np.testing.assert_equal(np.ones((10, 10)) * 1, f()) - - def test_pickling(self, datagen): - gen = pm.generator(datagen) - cloudpickle.loads(cloudpickle.dumps(gen)) - bad_gen = pm.generator(integers()) - with pytest.raises(TypeError): - cloudpickle.dumps(bad_gen) - - def test_gen_cloning_with_shape_change(self, datagen): - gen = pm.generator(datagen) - gen_r = pt.random.normal(size=gen.shape).T - X = gen.dot(gen_r) - res, _ = pytensor.scan(lambda x: x.sum(), X, n_steps=X.shape[0]) - assert res.eval().shape == (50,) - shared = pytensor.shared(datagen.data.astype(gen.dtype)) - res2 = pytensor.clone_replace(res, {gen: shared**2}) - assert res2.eval().shape == (1000,) - - -def gen1(): - i = 0 - while True: - yield np.ones((10, 100)) * i - i += 1 - - -def gen2(): - i = 0 - while True: - yield np.ones((20, 100)) * i - i += 1 - - @pytest.mark.usefixtures("strict_float32") class TestMinibatch: data = np.random.rand(30, 10) diff --git a/tests/test_pytensorf.py b/tests/test_pytensorf.py index c434f1a9c7..b8c82886b9 100644 --- a/tests/test_pytensorf.py +++ b/tests/test_pytensorf.py @@ -27,7 +27,6 @@ from pytensor.graph.basic import Variable, equal_computations from pytensor.tensor.random.basic import normal, uniform from pytensor.tensor.subtensor import AdvancedIncSubtensor -from pytensor.tensor.variable import TensorVariable import pymc as pm @@ -37,19 +36,16 @@ from pymc.exceptions import NotConstantValueError from pymc.logprob.utils import ParameterValueError from pymc.pytensorf import ( - GeneratorOp, collect_default_updates, compile, constant_fold, convert_data, - convert_generator_data, extract_obs_data, hessian, hessian_diag, replace_rng_nodes, replace_vars_in_graphs, reseed_rngs, - smarttypeX, walk_model, ) from pymc.vartypes import int_types @@ -265,32 +261,6 @@ def test_convert_data(input_dtype): assert pytensor_output.dtype == intX -@pytest.mark.parametrize("input_dtype", ["int32", "int64", "float32", "float64"]) -def test_convert_generator_data(input_dtype): - # Create a generator object producing NumPy arrays with the intended dtype. - # This is required to infer the correct dtype. - square_generator = (np.array([i**2], dtype=input_dtype) for i in range(100)) - - # Output is NOT wrapped with `pm.floatX`/`intX`, - # but produced from calling a special Op. - with pytest.warns(DeprecationWarning, match="get in touch"): - result = convert_generator_data(square_generator) - apply = result.owner - op = apply.op - # Make sure the returned object is a PyTensor TensorVariable - assert isinstance(result, TensorVariable) - assert isinstance(op, GeneratorOp), f"It's a {type(apply)}" - # There are no inputs - because it generates... - assert apply.inputs == [] - - # Evaluation results should have the correct* dtype! - # (*intX/floatX will be enforced!) - evaled = result.eval() - expected_dtype = smarttypeX(np.array(1, dtype=input_dtype)).dtype - assert result.type.dtype == expected_dtype - assert evaled.dtype == np.dtype(expected_dtype) - - def test_pandas_to_array_pandas_index(): data = pd.Index([1, 2, 3]) result = convert_data(data) diff --git a/tests/variational/test_minibatch_rv.py b/tests/variational/test_minibatch_rv.py index 84d118c581..33229e0bb7 100644 --- a/tests/variational/test_minibatch_rv.py +++ b/tests/variational/test_minibatch_rv.py @@ -22,9 +22,7 @@ from pymc import Normal, draw from pymc.data import Minibatch -from pymc.testing import select_by_precision from pymc.variational.minibatch_rv import create_minibatch_rv -from tests.test_data import gen1, gen2 class TestMinibatchRandomVariable: @@ -42,50 +40,6 @@ def test_density_scaling(self): p2 = pytensor.function([], model2.logp()) assert p1() * 2 == p2() - def test_density_scaling_with_generator(self): - # We have different size generators - - def true_dens(): - g = gen1() - for i, point in enumerate(g): - yield st.norm.logpdf(point).sum() * 10 - - t = true_dens() - # We have same size models - with pm.Model() as model1: - pm.Normal("n", observed=gen1(), total_size=100) - p1 = pytensor.function([], model1.logp()) - - with pm.Model() as model2: - gen_var = pm.generator(gen2()) - pm.Normal("n", observed=gen_var, total_size=100) - p2 = pytensor.function([], model2.logp()) - - for i in range(10): - _1, _2, _t = p1(), p2(), next(t) - decimals = select_by_precision(float64=7, float32=1) - np.testing.assert_almost_equal(_1, _t, decimal=decimals) # Value O(-50,000) - np.testing.assert_almost_equal(_1, _2) - # Done - - def test_gradient_with_scaling(self): - with pm.Model() as model1: - genvar = pm.generator(gen1()) - m = pm.Normal("m") - pm.Normal("n", observed=genvar, total_size=1000) - grad1 = model1.compile_fn(model1.dlogp(vars=m), point_fn=False) - with pm.Model() as model2: - m = pm.Normal("m") - shavar = pytensor.shared(np.ones((1000, 100))) - pm.Normal("n", observed=shavar) - grad2 = model2.compile_fn(model2.dlogp(vars=m), point_fn=False) - - for i in range(10): - shavar.set_value(np.ones((100, 100)) * i) - g1 = grad1(1) - g2 = grad2(1) - np.testing.assert_almost_equal(g1, g2) - def test_multidim_scaling(self): with pm.Model() as model0: pm.Normal("n", observed=[[1, 1], [1, 1]], total_size=[]) From ce5f2a27170a9f422f974fdf9fc415dfe53c35cb Mon Sep 17 00:00:00 2001 From: "pre-commit-ci[bot]" <66853113+pre-commit-ci[bot]@users.noreply.github.com> Date: Mon, 10 Feb 2025 22:00:13 +0100 Subject: [PATCH 03/18] [pre-commit.ci] pre-commit autoupdate (#7661) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit updates: - [github.com/astral-sh/ruff-pre-commit: v0.9.2 → v0.9.6](https://github.com/astral-sh/ruff-pre-commit/compare/v0.9.2...v0.9.6) Co-authored-by: pre-commit-ci[bot] <66853113+pre-commit-ci[bot]@users.noreply.github.com> --- .pre-commit-config.yaml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.pre-commit-config.yaml b/.pre-commit-config.yaml index 1100f0e2fd..07d2a7c670 100644 --- a/.pre-commit-config.yaml +++ b/.pre-commit-config.yaml @@ -49,7 +49,7 @@ repos: - --exclude=versioneer.py - --last-year-present - repo: https://github.com/astral-sh/ruff-pre-commit - rev: v0.9.2 + rev: v0.9.6 hooks: - id: ruff args: [--fix, --show-fixes] From 0772383430639348499e37968004842ac5e8224d Mon Sep 17 00:00:00 2001 From: "dependabot[bot]" <49699333+dependabot[bot]@users.noreply.github.com> Date: Mon, 24 Feb 2025 02:58:13 +0000 Subject: [PATCH 04/18] Bump docker/build-push-action from 6.8.0 to 6.14.0 Bumps [docker/build-push-action](https://github.com/docker/build-push-action) from 6.8.0 to 6.14.0. - [Release notes](https://github.com/docker/build-push-action/releases) - [Commits](https://github.com/docker/build-push-action/compare/32945a339266b759abcbdc89316275140b0fc960...0adf9959216b96bec444f325f1e493d4aa344497) --- updated-dependencies: - dependency-name: docker/build-push-action dependency-type: direct:production update-type: version-update:semver-minor ... Signed-off-by: dependabot[bot] --- .github/workflows/devcontainer-docker-image.yml | 2 +- .github/workflows/docker-image.yml | 4 ++-- 2 files changed, 3 insertions(+), 3 deletions(-) diff --git a/.github/workflows/devcontainer-docker-image.yml b/.github/workflows/devcontainer-docker-image.yml index c9dc6bd937..96b48c2479 100644 --- a/.github/workflows/devcontainer-docker-image.yml +++ b/.github/workflows/devcontainer-docker-image.yml @@ -48,7 +48,7 @@ jobs: - name: Build and push Docker image id: docker_build - uses: docker/build-push-action@32945a339266b759abcbdc89316275140b0fc960 + uses: docker/build-push-action@0adf9959216b96bec444f325f1e493d4aa344497 with: context: . file: scripts/dev.Dockerfile diff --git a/.github/workflows/docker-image.yml b/.github/workflows/docker-image.yml index 5e66fe6f2d..8f898e63ff 100644 --- a/.github/workflows/docker-image.yml +++ b/.github/workflows/docker-image.yml @@ -36,7 +36,7 @@ jobs: type=semver,pattern={{major}}.{{minor}} - name: Build and load image - uses: docker/build-push-action@32945a339266b759abcbdc89316275140b0fc960 + uses: docker/build-push-action@0adf9959216b96bec444f325f1e493d4aa344497 with: context: . file: scripts/Dockerfile @@ -48,7 +48,7 @@ jobs: docker run --rm ${{ env.CONTAINER_NAME }} conda run -n pymc-dev python -c 'import pymc;print(pymc.__version__)' - name: Build and push - uses: docker/build-push-action@32945a339266b759abcbdc89316275140b0fc960 + uses: docker/build-push-action@0adf9959216b96bec444f325f1e493d4aa344497 with: context: . push: true From 710c38b0438f2fe22f3981110c1cfb4ca6124f21 Mon Sep 17 00:00:00 2001 From: Will Dean <57733339+wd60622@users.noreply.github.com> Date: Mon, 24 Feb 2025 12:57:54 +0100 Subject: [PATCH 05/18] Point to correct tests action badge (#7682) * point to correct tests action * change the target --- README.rst | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/README.rst b/README.rst index 7f230d9694..4af51b652b 100644 --- a/README.rst +++ b/README.rst @@ -296,8 +296,8 @@ Thanks to our contributors .. |Binder| image:: https://mybinder.org/badge_logo.svg :target: https://mybinder.org/v2/gh/pymc-devs/pymc/main?filepath=%2Fdocs%2Fsource%2Fnotebooks -.. |Build Status| image:: https://github.com/pymc-devs/pymc/workflows/pytest/badge.svg - :target: https://github.com/pymc-devs/pymc/actions +.. |Build Status| image:: https://github.com/pymc-devs/pymc/workflows/tests/badge.svg + :target: https://github.com/pymc-devs/pymc/actions?query=workflow%3Atests+branch%3Amain .. |Coverage| image:: https://codecov.io/gh/pymc-devs/pymc/branch/main/graph/badge.svg :target: https://codecov.io/gh/pymc-devs/pymc .. |Dockerhub| image:: https://img.shields.io/docker/automated/pymc/pymc.svg From 53e0774685a054fed7760c6bc4c2bbee009508e4 Mon Sep 17 00:00:00 2001 From: Ricardo Vieira Date: Mon, 24 Feb 2025 12:38:12 +0100 Subject: [PATCH 06/18] Fix bug with chained CustomSymbolicDists --- pymc/distributions/distribution.py | 6 ++- tests/distributions/test_custom.py | 73 +++++++++++++++++++++++++++++- 2 files changed, 75 insertions(+), 4 deletions(-) diff --git a/pymc/distributions/distribution.py b/pymc/distributions/distribution.py index b2ec6fb79b..577d9245a6 100644 --- a/pymc/distributions/distribution.py +++ b/pymc/distributions/distribution.py @@ -27,7 +27,7 @@ from pytensor import tensor as pt from pytensor.compile.builders import OpFromGraph -from pytensor.graph import FunctionGraph, clone_replace, node_rewriter +from pytensor.graph import FunctionGraph, graph_replace, node_rewriter from pytensor.graph.basic import Apply, Variable from pytensor.graph.rewriting.basic import in2out from pytensor.graph.utils import MetaType @@ -588,7 +588,9 @@ def inline_symbolic_random_variable(fgraph, node): """Expand a SymbolicRV when obtaining the logp graph if `inline_logprob` is True.""" op = node.op if op.inline_logprob: - return clone_replace(op.inner_outputs, dict(zip(op.inner_inputs, node.inputs))) + return graph_replace( + op.inner_outputs, dict(zip(op.inner_inputs, node.inputs)), strict=False + ) # Registered before pre-canonicalization which happens at position=-10 diff --git a/tests/distributions/test_custom.py b/tests/distributions/test_custom.py index d3de7cf4f7..a076eef7b6 100644 --- a/tests/distributions/test_custom.py +++ b/tests/distributions/test_custom.py @@ -21,6 +21,7 @@ from numpy import random as npr from pytensor import scan from pytensor import tensor as pt +from pytensor.graph import FunctionGraph from scipy import stats as st from pymc.distributions import ( @@ -42,11 +43,11 @@ Uniform, ) from pymc.distributions.custom import CustomDist, CustomDistRV, CustomSymbolicDistRV -from pymc.distributions.distribution import support_point +from pymc.distributions.distribution import inline_symbolic_random_variable, support_point from pymc.distributions.shape_utils import change_dist_size, rv_size_is_none, to_tuple from pymc.distributions.transforms import log from pymc.exceptions import BlockModelAccessError -from pymc.logprob import logcdf, logp +from pymc.logprob import conditional_logp, logcdf, logp from pymc.model import Deterministic, Model from pymc.pytensorf import collect_default_updates from pymc.sampling import draw, sample, sample_posterior_predictive @@ -648,3 +649,71 @@ def dist(p, size): assert out.owner.op.extended_signature == "[size],(),[rng]->(),[rng]" assert out.owner.op.ndim_supp == 0 assert out.owner.op.ndims_params == [0] + + def test_inline_does_not_duplicate_graph(self): + mu = Normal.dist() + x = CustomDist.dist(mu, dist=lambda mu, size: Normal.dist(mu, size=size)) + + fgraph = FunctionGraph(outputs=[x], clone=False) + [inner_x, inner_rng_update] = inline_symbolic_random_variable.transform(fgraph, x.owner) + assert inner_rng_update.owner.inputs[-2] is mu + assert inner_x.owner.inputs[-2] is mu + + def test_chained_custom_dist_bug(self): + """Regression test for issue reported in https://discourse.pymc.io/t/error-with-custom-distribution-after-using-scan/16255 + + This bug was caused by a duplication of a Scan-based CustomSymbolicDist when inlining another CustomSymbolicDist that used it as an input. + PyTensor failed to merge the two Scan graphs, causing a failure in the logp extraction. + """ + + rng = np.random.default_rng(123) + steps = 4 + batch = 2 + + def scan_dist(seq, n_steps, size): + def step(s): + innov = Normal.dist() + traffic = s + innov + return traffic, {innov.owner.inputs[0]: innov.owner.outputs[0]} + + rv_seq, _ = pytensor.scan( + fn=step, + sequences=[seq], + outputs_info=[None], + n_steps=n_steps, + strict=True, + ) + return rv_seq + + def normal_shifted(mu, size): + return Normal.dist(mu=mu, size=size) - 1 + + seq = pt.matrix("seq", shape=(batch, steps)) + latent_rv = CustomDist.dist( + seq.T, + steps, + dist=scan_dist, + shape=(steps, batch), + ) + latent_rv.name = "latent" + + observed_rv = CustomDist.dist( + latent_rv, + dist=normal_shifted, + shape=(steps, batch), + ) + observed_rv.name = "observed" + + latent_vv = latent_rv.type() + observed_vv = observed_rv.type() + + observed_logp = conditional_logp({latent_rv: latent_vv, observed_rv: observed_vv})[ + observed_vv + ] + latent_vv_test = rng.standard_normal(size=(steps, batch)) + observed_vv_test = rng.standard_normal(size=(steps, batch)) + expected_logp = st.norm.logpdf(observed_vv_test + 1, loc=latent_vv_test) + np.testing.assert_allclose( + observed_logp.eval({latent_vv: latent_vv_test, observed_vv: observed_vv_test}), + expected_logp, + ) From a9c5a8597e2053bff161eaecd1a6c2e9f131f078 Mon Sep 17 00:00:00 2001 From: "pre-commit-ci[bot]" <66853113+pre-commit-ci[bot]@users.noreply.github.com> Date: Mon, 24 Feb 2025 17:09:34 +0000 Subject: [PATCH 07/18] [pre-commit.ci] pre-commit autoupdate MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit updates: - [github.com/astral-sh/ruff-pre-commit: v0.9.6 → v0.9.7](https://github.com/astral-sh/ruff-pre-commit/compare/v0.9.6...v0.9.7) --- .pre-commit-config.yaml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.pre-commit-config.yaml b/.pre-commit-config.yaml index 07d2a7c670..5a77cfc3f7 100644 --- a/.pre-commit-config.yaml +++ b/.pre-commit-config.yaml @@ -49,7 +49,7 @@ repos: - --exclude=versioneer.py - --last-year-present - repo: https://github.com/astral-sh/ruff-pre-commit - rev: v0.9.6 + rev: v0.9.7 hooks: - id: ruff args: [--fix, --show-fixes] From 2fbc8a927fce87d0dca7f23296fafd3ef290f77a Mon Sep 17 00:00:00 2001 From: nataziel Date: Fri, 14 Feb 2025 22:10:37 +1000 Subject: [PATCH 08/18] avoid jaxifying logp multiple times --- pymc/sampling/jax.py | 1 + 1 file changed, 1 insertion(+) diff --git a/pymc/sampling/jax.py b/pymc/sampling/jax.py index b2cbff9b68..1759eb85b8 100644 --- a/pymc/sampling/jax.py +++ b/pymc/sampling/jax.py @@ -669,6 +669,7 @@ def sample_jax_nuts( random_seed=random_seed, initial_points=initial_points, nuts_kwargs=nuts_kwargs, + logp_fn=logp_fn, ) tic2 = datetime.now() From 4170386b602a2ff14861c798c804744c9be2b5b0 Mon Sep 17 00:00:00 2001 From: Ricardo Vieira Date: Mon, 24 Feb 2025 13:27:56 +0100 Subject: [PATCH 09/18] Remove h5py conda dependency --- conda-envs/environment-dev.yml | 1 - conda-envs/environment-jax.yml | 1 - conda-envs/environment-test.yml | 1 - conda-envs/windows-environment-dev.yml | 1 - conda-envs/windows-environment-test.yml | 1 - requirements-dev.txt | 1 - 6 files changed, 6 deletions(-) diff --git a/conda-envs/environment-dev.yml b/conda-envs/environment-dev.yml index 546e1277d4..465a74f584 100644 --- a/conda-envs/environment-dev.yml +++ b/conda-envs/environment-dev.yml @@ -9,7 +9,6 @@ dependencies: - blas - cachetools>=4.2.1 - cloudpickle -- h5py>=2.7 - numpy>=1.25.0 - pandas>=0.24.0 - pip diff --git a/conda-envs/environment-jax.yml b/conda-envs/environment-jax.yml index 6bde602133..5d9c98a720 100644 --- a/conda-envs/environment-jax.yml +++ b/conda-envs/environment-jax.yml @@ -9,7 +9,6 @@ dependencies: - blas - cachetools>=4.2.1 - cloudpickle -- h5py>=2.7 - zarr>=2.5.0,<3 # Jaxlib version must not be greater than jax version! - blackjax>=1.2.2 diff --git a/conda-envs/environment-test.yml b/conda-envs/environment-test.yml index 20f0478998..92afc6a021 100644 --- a/conda-envs/environment-test.yml +++ b/conda-envs/environment-test.yml @@ -9,7 +9,6 @@ dependencies: - blas - cachetools>=4.2.1 - cloudpickle -- h5py>=2.7 - jax - numpy>=1.25.0 - pandas>=0.24.0 diff --git a/conda-envs/windows-environment-dev.yml b/conda-envs/windows-environment-dev.yml index 503cf125b2..337c48a827 100644 --- a/conda-envs/windows-environment-dev.yml +++ b/conda-envs/windows-environment-dev.yml @@ -9,7 +9,6 @@ dependencies: - blas - cachetools>=4.2.1 - cloudpickle -- h5py>=2.7 - numpy>=1.25.0 - pandas>=0.24.0 - pip diff --git a/conda-envs/windows-environment-test.yml b/conda-envs/windows-environment-test.yml index 5136d997a3..baa3a75f45 100644 --- a/conda-envs/windows-environment-test.yml +++ b/conda-envs/windows-environment-test.yml @@ -9,7 +9,6 @@ dependencies: - blas - cachetools>=4.2.1 - cloudpickle -- h5py>=2.7 - libpython - mkl-service>=2.3.0 - m2w64-toolchain diff --git a/requirements-dev.txt b/requirements-dev.txt index b0ef92f505..2ae0a3f0b7 100644 --- a/requirements-dev.txt +++ b/requirements-dev.txt @@ -5,7 +5,6 @@ arviz>=0.13.0 cachetools>=4.2.1 cloudpickle git+https://github.com/pymc-devs/pymc-sphinx-theme -h5py>=2.7 ipython>=7.16 jupyter-sphinx mcbackend>=0.4.0 From b7b969a1d14d976dbc97c5eeb35073810ac838e5 Mon Sep 17 00:00:00 2001 From: Ricardo Vieira Date: Mon, 24 Feb 2025 14:19:40 +0100 Subject: [PATCH 10/18] Remove m2w64-toolchain windows conda dependency --- conda-envs/windows-environment-test.yml | 1 - 1 file changed, 1 deletion(-) diff --git a/conda-envs/windows-environment-test.yml b/conda-envs/windows-environment-test.yml index baa3a75f45..48eef03e51 100644 --- a/conda-envs/windows-environment-test.yml +++ b/conda-envs/windows-environment-test.yml @@ -11,7 +11,6 @@ dependencies: - cloudpickle - libpython - mkl-service>=2.3.0 -- m2w64-toolchain - numpy>=1.25.0 - pandas>=0.24.0 - pip From 9343436afead1f38343b7e7610c372d803f04f88 Mon Sep 17 00:00:00 2001 From: Ricardo Vieira Date: Mon, 24 Feb 2025 13:47:20 +0100 Subject: [PATCH 11/18] Remove PolyaGamma pin and add to conda env --- .github/workflows/mypy.yml | 1 - .github/workflows/tests.yml | 4 ---- conda-envs/environment-dev.yml | 2 +- conda-envs/environment-test.yml | 1 + conda-envs/windows-environment-dev.yml | 1 + conda-envs/windows-environment-test.yml | 1 + tests/distributions/test_continuous.py | 4 +++- 7 files changed, 7 insertions(+), 7 deletions(-) diff --git a/.github/workflows/mypy.yml b/.github/workflows/mypy.yml index e6ea6826f8..4a2d072b9e 100644 --- a/.github/workflows/mypy.yml +++ b/.github/workflows/mypy.yml @@ -26,7 +26,6 @@ jobs: - name: Install-pymc and mypy dependencies run: | pip install -e . - pip install --pre -U polyagamma python --version - name: Run mypy run: | diff --git a/.github/workflows/tests.yml b/.github/workflows/tests.yml index 268656f68b..814ef329aa 100644 --- a/.github/workflows/tests.yml +++ b/.github/workflows/tests.yml @@ -158,8 +158,6 @@ jobs: - name: Install-pymc run: | pip install -e . - # TODO: https://github.com/pymc-devs/pymc/issues/7417 - pip install --pre -U 'polyagamma<1.3.7' python --version micromamba list - name: Run tests @@ -210,7 +208,6 @@ jobs: - name: Install-pymc run: | pip install -e . - pip install --pre -U 'polyagamma<1.3.7' python --version micromamba list - name: Run tests @@ -363,7 +360,6 @@ jobs: - name: Install-pymc run: | pip install -e . - pip install --pre -U 'polyagamma<1.3.7' python --version micromamba list - name: Run tests diff --git a/conda-envs/environment-dev.yml b/conda-envs/environment-dev.yml index 465a74f584..081b0a9a33 100644 --- a/conda-envs/environment-dev.yml +++ b/conda-envs/environment-dev.yml @@ -26,6 +26,7 @@ dependencies: - myst-nb<=1.0.0 - numpydoc - pre-commit>=2.8.0 +- polyagamma - pytest-cov>=2.5 - pytest>=3.0 - rich>=13.7.1 @@ -35,7 +36,6 @@ dependencies: - sphinx>=1.5 - sphinxext-rediraffe - watermark -- polyagamma - sphinx-remove-toctrees - mypy=1.5.1 - types-cachetools diff --git a/conda-envs/environment-test.yml b/conda-envs/environment-test.yml index 92afc6a021..d0abd12016 100644 --- a/conda-envs/environment-test.yml +++ b/conda-envs/environment-test.yml @@ -13,6 +13,7 @@ dependencies: - numpy>=1.25.0 - pandas>=0.24.0 - pip +- polyagamma - pytensor>=2.26.2,<2.28 - python-graphviz - networkx diff --git a/conda-envs/windows-environment-dev.yml b/conda-envs/windows-environment-dev.yml index 337c48a827..26cbad150d 100644 --- a/conda-envs/windows-environment-dev.yml +++ b/conda-envs/windows-environment-dev.yml @@ -24,6 +24,7 @@ dependencies: - ipython>=7.16 - myst-nb<=1.0.0 - numpydoc +- polyagamma - pre-commit>=2.8.0 - pytest-cov>=2.5 - pytest>=3.0 diff --git a/conda-envs/windows-environment-test.yml b/conda-envs/windows-environment-test.yml index 48eef03e51..c749c08162 100644 --- a/conda-envs/windows-environment-test.yml +++ b/conda-envs/windows-environment-test.yml @@ -14,6 +14,7 @@ dependencies: - numpy>=1.25.0 - pandas>=0.24.0 - pip +- polyagamma - pytensor>=2.26.2,<2.28 - python-graphviz - networkx diff --git a/tests/distributions/test_continuous.py b/tests/distributions/test_continuous.py index cfdd0b3d60..b9ef2476da 100644 --- a/tests/distributions/test_continuous.py +++ b/tests/distributions/test_continuous.py @@ -2440,7 +2440,9 @@ def test_rng_different_shapes(self): ) class TestPolyaGamma(BaseTestDistributionRandom): def polyagamma_rng_fn(self, size, h, z, rng): - return random_polyagamma(h, z, size=size, random_state=rng._bit_generator) + # Polyagamma returns different values if inputs have explicit broadcasted dims + # Which PyTensor RVs always do when size is not None. + return random_polyagamma(np.atleast_1d(h), np.atleast_1d(z), size=size, random_state=rng) pymc_dist = pm.PolyaGamma pymc_dist_params = {"h": 1.0, "z": 0.0} From 0035ab720ca7a1672fc7dde482fd06db13ace0d4 Mon Sep 17 00:00:00 2001 From: Ricardo Vieira Date: Mon, 24 Feb 2025 15:30:24 +0100 Subject: [PATCH 12/18] Replace arange by range for iteration --- pymc/backends/mcbackend.py | 2 +- pymc/distributions/shape_utils.py | 8 +++----- pymc/sampling/forward.py | 2 +- tests/distributions/test_multivariate.py | 8 ++++---- 4 files changed, 9 insertions(+), 11 deletions(-) diff --git a/pymc/backends/mcbackend.py b/pymc/backends/mcbackend.py index 9bc8ff1043..d02a6dbebb 100644 --- a/pymc/backends/mcbackend.py +++ b/pymc/backends/mcbackend.py @@ -181,7 +181,7 @@ def get_sampler_stats( def _slice(self, idx: slice) -> "IBaseTrace": # Get the integer indices start, stop, step = idx.indices(len(self)) - indices = np.arange(start, stop, step) + indices = tuple(range(start, stop, step)) # Create a NumPyChain for the sliced data nchain = mcb.backends.numpy.NumPyChain( diff --git a/pymc/distributions/shape_utils.py b/pymc/distributions/shape_utils.py index 7dd0d94414..5d83ded6cb 100644 --- a/pymc/distributions/shape_utils.py +++ b/pymc/distributions/shape_utils.py @@ -378,9 +378,7 @@ def get_support_shape( raise ValueError( f"Number of shape dimensions is too small for ndim_supp of {ndim_supp}" ) - inferred_support_shape = [ - shape[i] - support_shape_offset[i] for i in np.arange(-ndim_supp, 0) - ] + inferred_support_shape = [shape[i] - support_shape_offset[i] for i in range(-ndim_supp, 0)] if inferred_support_shape is None and dims is not None: dims = convert_dims(dims) @@ -389,7 +387,7 @@ def get_support_shape( raise ValueError(f"Number of dims is too small for ndim_supp of {ndim_supp}") model = modelcontext(None) inferred_support_shape = [ - model.dim_lengths[dims[i]] - support_shape_offset[i] for i in np.arange(-ndim_supp, 0) + model.dim_lengths[dims[i]] - support_shape_offset[i] for i in range(-ndim_supp, 0) ] if inferred_support_shape is None and observed is not None: @@ -399,7 +397,7 @@ def get_support_shape( f"Number of observed dimensions is too small for ndim_supp of {ndim_supp}" ) inferred_support_shape = [ - observed.shape[i] - support_shape_offset[i] for i in np.arange(-ndim_supp, 0) + observed.shape[i] - support_shape_offset[i] for i in range(-ndim_supp, 0) ] if inferred_support_shape is None: diff --git a/pymc/sampling/forward.py b/pymc/sampling/forward.py index 19957e0540..5f3c983b58 100644 --- a/pymc/sampling/forward.py +++ b/pymc/sampling/forward.py @@ -876,7 +876,7 @@ def sample_posterior_predictive( try: with progress: task = progress.add_task("Sampling ...", completed=0, total=samples) - for idx in np.arange(samples): + for idx in range(samples): if nchain > 1: # the trace object will either be a MultiTrace (and have _straces)... if hasattr(_trace, "_straces"): diff --git a/tests/distributions/test_multivariate.py b/tests/distributions/test_multivariate.py index 39b6c562e1..8cc2af2659 100644 --- a/tests/distributions/test_multivariate.py +++ b/tests/distributions/test_multivariate.py @@ -1564,8 +1564,8 @@ def test_zsn_dims(self, dims, n_zerosum_axes): ) ndim_supp = v.owner.op.ndim_supp - n_zerosum_axes = np.arange(-ndim_supp, 0) - nonzero_axes = np.arange(v.ndim - ndim_supp) + n_zerosum_axes = tuple(range(-ndim_supp, 0)) + nonzero_axes = tuple(range(v.ndim - ndim_supp)) for samples in [ s.posterior.v, random_samples, @@ -1595,8 +1595,8 @@ def test_zsn_shape(self, n_zerosum_axes): ) ndim_supp = v.owner.op.ndim_supp - n_zerosum_axes = np.arange(-ndim_supp, 0) - nonzero_axes = np.arange(v.ndim - ndim_supp) + n_zerosum_axes = tuple(range(-ndim_supp, 0)) + nonzero_axes = tuple(range(v.ndim - ndim_supp)) for samples in [ s.posterior.v, random_samples, From f374411d015ae60d443bce0f129f6e8537bc21b2 Mon Sep 17 00:00:00 2001 From: Ricardo Vieira Date: Mon, 24 Feb 2025 15:32:38 +0100 Subject: [PATCH 13/18] Fix __bool__ usages with PyTensor variables --- pymc/distributions/multivariate.py | 5 ++--- pymc/logprob/transforms.py | 4 +++- pymc/logprob/utils.py | 5 ++--- tests/distributions/test_custom.py | 2 +- tests/distributions/test_multivariate.py | 2 +- tests/logprob/test_basic.py | 2 +- tests/logprob/test_order.py | 2 +- 7 files changed, 11 insertions(+), 11 deletions(-) diff --git a/pymc/distributions/multivariate.py b/pymc/distributions/multivariate.py index 32f9e30f06..9ff56027a8 100644 --- a/pymc/distributions/multivariate.py +++ b/pymc/distributions/multivariate.py @@ -2057,14 +2057,12 @@ class KroneckerNormal(Continuous): rv_op = KroneckerNormalRV.rv_op @classmethod - def dist(cls, mu, covs=None, chols=None, evds=None, sigma=None, *args, **kwargs): + def dist(cls, mu, covs=None, chols=None, evds=None, sigma=0.0, *args, **kwargs): if len([i for i in [covs, chols, evds] if i is not None]) != 1: raise ValueError( "Incompatible parameterization. Specify exactly one of covs, chols, or evds." ) - sigma = sigma if sigma else 0 - if chols is not None: covs = [chol.dot(chol.T) for chol in chols] elif evds is not None: @@ -2076,6 +2074,7 @@ def dist(cls, mu, covs=None, chols=None, evds=None, sigma=None, *args, **kwargs) covs.append(cov_i) mu = pt.as_tensor_variable(mu) + sigma = pt.as_tensor_variable(sigma) return super().dist([mu, sigma, *covs], **kwargs) diff --git a/pymc/logprob/transforms.py b/pymc/logprob/transforms.py index 8446874996..8b5eac7b16 100644 --- a/pymc/logprob/transforms.py +++ b/pymc/logprob/transforms.py @@ -470,7 +470,9 @@ def find_measurable_transforms(fgraph: FunctionGraph, node: Apply) -> list[Varia # Do not apply rewrite to discrete variables except for their addition and negation if measurable_input.type.dtype.startswith("int"): - if not (find_negated_var(measurable_output) or isinstance(node.op.scalar_op, Add)): + if not ( + find_negated_var(measurable_output) is not None or isinstance(node.op.scalar_op, Add) + ): return None # Do not allow rewrite if output is cast to a float, because we don't have meta-info on the type of the MeasurableVariable if not measurable_output.type.dtype.startswith("int"): diff --git a/pymc/logprob/utils.py b/pymc/logprob/utils.py index e1fdc903ee..a028341032 100644 --- a/pymc/logprob/utils.py +++ b/pymc/logprob/utils.py @@ -186,13 +186,12 @@ def expand_fn(var): return [] if any( - ancestor_var - for ancestor_var in walk(inputs, expand=expand_fn, bfs=False) - if ( + ( ancestor_var.owner and isinstance(ancestor_var.owner.op, MeasurableOp) and not isinstance(ancestor_var.owner.op, ValuedRV) ) + for ancestor_var in walk(inputs, expand=expand_fn, bfs=False) ): return True return False diff --git a/tests/distributions/test_custom.py b/tests/distributions/test_custom.py index a076eef7b6..dba68c26e6 100644 --- a/tests/distributions/test_custom.py +++ b/tests/distributions/test_custom.py @@ -604,7 +604,7 @@ def test_symbolic_dist(self): def dist(size): return Truncated.dist(Beta.dist(1, 1, size=size), lower=0.1, upper=0.9) - assert CustomDist.dist(dist=dist) + CustomDist.dist(dist=dist) def test_nested_custom_dist(self): """Test we can create CustomDist that creates another CustomDist""" diff --git a/tests/distributions/test_multivariate.py b/tests/distributions/test_multivariate.py index 8cc2af2659..45798ecbde 100644 --- a/tests/distributions/test_multivariate.py +++ b/tests/distributions/test_multivariate.py @@ -421,7 +421,7 @@ def test_matrixnormal(self, n): @pytest.mark.parametrize("n", [2, 3]) @pytest.mark.parametrize("m", [3]) - @pytest.mark.parametrize("sigma", [None, 1]) + @pytest.mark.parametrize("sigma", [0, 1]) def test_kroneckernormal(self, n, m, sigma): np.random.seed(5) N = n * m diff --git a/tests/logprob/test_basic.py b/tests/logprob/test_basic.py index 64cbf63b3e..f6014d78a3 100644 --- a/tests/logprob/test_basic.py +++ b/tests/logprob/test_basic.py @@ -382,7 +382,7 @@ def test_warn_random_found_probability_inference(func, scipy_func, test_value): with pytest.warns( UserWarning, match="RandomVariables {input} were found in the derived graph" ): - assert func(rv, 0.0) + func(rv, 0.0) res = func(rv, 0.0, warn_rvs=False) # This is the problem we are warning about, as now we can no longer identify the original rv in the graph diff --git a/tests/logprob/test_order.py b/tests/logprob/test_order.py index 1b1fbac636..32c5d3c001 100644 --- a/tests/logprob/test_order.py +++ b/tests/logprob/test_order.py @@ -291,4 +291,4 @@ def test_non_measurable_max_grad(): joint_logp = pt.sum([term.sum() for term in logp_terms]) # Test that calling gradient does not raise a NotImplementedError - assert pt.grad(joint_logp, x_vv) + pt.grad(joint_logp, x_vv) From f9e0f8c7eb20bd10f108eb96368cb0a2eb5251f8 Mon Sep 17 00:00:00 2001 From: Ricardo Vieira Date: Mon, 24 Feb 2025 15:34:01 +0100 Subject: [PATCH 14/18] Chaining classmethod and property no longer supported in Python 3.13 --- pymc/distributions/distribution.py | 44 ++++++++++-------------------- 1 file changed, 15 insertions(+), 29 deletions(-) diff --git a/pymc/distributions/distribution.py b/pymc/distributions/distribution.py index 577d9245a6..5ec5df4671 100644 --- a/pymc/distributions/distribution.py +++ b/pymc/distributions/distribution.py @@ -192,20 +192,19 @@ def support_point(op, rv, *dist_params): return new_cls -class _class_or_instancemethod(classmethod): - """Allow a method to be called both as a classmethod and an instancemethod. +class _class_or_instance_property(property): + """Allow a property to be accessed from a class or an instance. - Priority is given to the instancemethod. + Priority is given to the instance. This is used to allow extracting information from the signature of a SymbolicRandomVariable - which may be provided either as a class attribute or as an instance attribute. + which may be available early as a class attribute or only later as an instance attribute. - Adapted from https://stackoverflow.com/a/28238047 + Adapted from https://stackoverflow.com/a/13624858 """ - def __get__(self, instance, type_): - descr_get = super().__get__ if instance is None else self.__func__.__get__ - return descr_get(instance, type_) + def __get__(self, owner_self, owner_cls): + return self.fget(owner_self if owner_self is not None else owner_cls) class SymbolicRandomVariable(MeasurableOp, OpFromGraph): @@ -241,8 +240,7 @@ class SymbolicRandomVariable(MeasurableOp, OpFromGraph): _print_name: tuple[str, str] = ("Unknown", "\\operatorname{Unknown}") """Tuple of (name, latex name) used for for pretty-printing variables of this type""" - @_class_or_instancemethod - @property + @_class_or_instance_property def signature(cls_or_self) -> None | str: # Convert "expanded" signature into "vanilla" signature that has no rng and size tokens extended_signature = cls_or_self.extended_signature @@ -257,40 +255,28 @@ def signature(cls_or_self) -> None | str: return signature - @_class_or_instancemethod - @property + @_class_or_instance_property def ndims_params(cls_or_self) -> Sequence[int] | None: - """Number of core dimensions of the distribution's parameters.""" + """Return number of core dimensions of the distribution's parameters.""" signature = cls_or_self.signature if signature is None: return None inputs_signature, _ = _parse_gufunc_signature(signature) return [len(sig) for sig in inputs_signature] - @_class_or_instancemethod - @property + @_class_or_instance_property def ndim_supp(cls_or_self) -> int | None: - """Number of support dimensions of the RandomVariable. + """Return number of support dimensions of the RandomVariable. (0 for scalar, 1 for vector, ...) """ signature = cls_or_self.signature if signature is None: - return None + return getattr(cls_or_self, "_ndim_supp", None) _, outputs_params_signature = _parse_gufunc_signature(signature) return max(len(out_sig) for out_sig in outputs_params_signature) - @_class_or_instancemethod - def _parse_extended_signature(cls_or_self) -> tuple[tuple[str, ...], tuple[str, ...]] | None: - extended_signature = cls_or_self.extended_signature - if extended_signature is None: - return None - - fake_signature = extended_signature.replace("[rng]", "(rng)").replace("[size]", "(size)") - return _parse_gufunc_signature(fake_signature) - - @_class_or_instancemethod - @property + @_class_or_instance_property def default_output(cls_or_self) -> int | None: extended_signature = cls_or_self.extended_signature if extended_signature is None: @@ -374,7 +360,7 @@ def __init__( if "ndim_supp" in kwargs: # For backwards compatibility we allow passing ndim_supp without signature # This is the only variable that PyMC absolutely needs to work with SymbolicRandomVariables - self.ndim_supp = kwargs.pop("ndim_supp") + self._ndim_supp = kwargs.pop("ndim_supp") if self.ndim_supp is None: raise ValueError("ndim_supp or signature must be provided") From 71630320eb0bbe1577f5e26d0d292be12fbeaec9 Mon Sep 17 00:00:00 2001 From: Ricardo Vieira Date: Mon, 24 Feb 2025 15:36:25 +0100 Subject: [PATCH 15/18] More precise type-hints --- pymc/distributions/shape_utils.py | 4 ++-- pymc/gp/cov.py | 1 + pymc/model/core.py | 2 +- 3 files changed, 4 insertions(+), 3 deletions(-) diff --git a/pymc/distributions/shape_utils.py b/pymc/distributions/shape_utils.py index 5d83ded6cb..98c743b70e 100644 --- a/pymc/distributions/shape_utils.py +++ b/pymc/distributions/shape_utils.py @@ -369,7 +369,7 @@ def get_support_shape( support_shape_offset = [0] * ndim_supp elif isinstance(support_shape_offset, int): support_shape_offset = [support_shape_offset] * ndim_supp - inferred_support_shape: Sequence[int | np.ndarray | Variable] | None = None + inferred_support_shape: Sequence[int | np.ndarray | TensorVariable] | None = None if shape is not None: shape = to_tuple(shape) @@ -411,7 +411,7 @@ def get_support_shape( # There were two sources of support_shape, make sure they are consistent inferred_support_shape = [ cast( - Variable, + TensorVariable, Assert(msg="support_shape does not match respective shape dimension")( inferred, pt.eq(inferred, explicit) ), diff --git a/pymc/gp/cov.py b/pymc/gp/cov.py index 16e51cdde9..787a1ad925 100644 --- a/pymc/gp/cov.py +++ b/pymc/gp/cov.py @@ -164,6 +164,7 @@ class Covariance(BaseCovariance): def __init__(self, input_dim: int, active_dims: IntSequence | None = None): self.input_dim = input_dim + self.active_dims: np.ndarray[Any, np.dtype[np.int_]] if active_dims is None: self.active_dims = np.arange(input_dim) else: diff --git a/pymc/model/core.py b/pymc/model/core.py index cef6cd6d1b..5a7b2651cf 100644 --- a/pymc/model/core.py +++ b/pymc/model/core.py @@ -895,7 +895,7 @@ def coords(self) -> dict[str, tuple | None]: return self._coords @property - def dim_lengths(self) -> dict[str, Variable]: + def dim_lengths(self) -> dict[str, TensorVariable]: """The symbolic lengths of dimensions in the model. The values are typically instances of ``TensorVariable`` or ``ScalarSharedVariable``. From 2832e982e4f84fd6f21e13ef30f37c88a46f36f3 Mon Sep 17 00:00:00 2001 From: Ricardo Vieira Date: Mon, 24 Feb 2025 15:37:46 +0100 Subject: [PATCH 16/18] Mypy doesn't believe you can call max on numpy arrays --- pymc/gp/cov.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pymc/gp/cov.py b/pymc/gp/cov.py index 787a1ad925..e275e8de52 100644 --- a/pymc/gp/cov.py +++ b/pymc/gp/cov.py @@ -170,7 +170,7 @@ def __init__(self, input_dim: int, active_dims: IntSequence | None = None): else: self.active_dims = np.asarray(active_dims, int) - if max(self.active_dims) > self.input_dim: + if self.active_dims.max() > self.input_dim: raise ValueError("Values in `active_dims` can't be larger than `input_dim`.") @property From 355b475b0273df4251bb11d029155845f44b446f Mon Sep 17 00:00:00 2001 From: ricardoV94 Date: Sun, 23 Feb 2025 21:18:43 +0100 Subject: [PATCH 17/18] Bump PyTensor and support Numpy>2.0 and Python=3.13 --- .github/workflows/tests.yml | 8 ++++---- conda-envs/environment-dev.yml | 4 ++-- conda-envs/environment-docs.yml | 2 +- conda-envs/environment-jax.yml | 4 ++-- conda-envs/environment-test.yml | 4 ++-- conda-envs/windows-environment-dev.yml | 4 ++-- conda-envs/windows-environment-test.yml | 4 ++-- requirements-dev.txt | 4 ++-- requirements.txt | 2 +- setup.py | 1 + tests/distributions/test_continuous.py | 2 -- tests/distributions/test_multivariate.py | 23 +++++++++++++++++------ 12 files changed, 36 insertions(+), 26 deletions(-) diff --git a/.github/workflows/tests.yml b/.github/workflows/tests.yml index 814ef329aa..97e50bef2a 100644 --- a/.github/workflows/tests.yml +++ b/.github/workflows/tests.yml @@ -56,7 +56,7 @@ jobs: matrix: os: [ubuntu-20.04] floatx: [float64] - python-version: ["3.12"] + python-version: ["3.13"] test-subset: - | tests/test_util.py @@ -230,7 +230,7 @@ jobs: matrix: os: [macos-latest] floatx: [float64] - python-version: ["3.12"] + python-version: ["3.13"] test-subset: - | tests/sampling/test_parallel.py @@ -288,7 +288,7 @@ jobs: matrix: os: [ubuntu-20.04] floatx: [float64] - python-version: ["3.12"] + python-version: ["3.13"] test-subset: - tests/sampling/test_jax.py tests/sampling/test_mcmc_external.py fail-fast: false @@ -334,7 +334,7 @@ jobs: matrix: os: [windows-latest] floatx: [float32] - python-version: ["3.12"] + python-version: ["3.13"] test-subset: - tests/sampling/test_mcmc.py tests/ode/test_ode.py tests/ode/test_utils.py tests/distributions/test_transform.py fail-fast: false diff --git a/conda-envs/environment-dev.yml b/conda-envs/environment-dev.yml index 081b0a9a33..b2e8f17849 100644 --- a/conda-envs/environment-dev.yml +++ b/conda-envs/environment-dev.yml @@ -12,7 +12,7 @@ dependencies: - numpy>=1.25.0 - pandas>=0.24.0 - pip -- pytensor>=2.26.2,<2.28 +- pytensor>=2.28.1,<2.29 - python-graphviz - networkx - scipy>=1.4.1 @@ -37,7 +37,7 @@ dependencies: - sphinxext-rediraffe - watermark - sphinx-remove-toctrees -- mypy=1.5.1 +- mypy=1.15.0 - types-cachetools - pip: - git+https://github.com/pymc-devs/pymc-sphinx-theme diff --git a/conda-envs/environment-docs.yml b/conda-envs/environment-docs.yml index fd035676c3..823f6563f6 100644 --- a/conda-envs/environment-docs.yml +++ b/conda-envs/environment-docs.yml @@ -11,7 +11,7 @@ dependencies: - numpy>=1.25.0 - pandas>=0.24.0 - pip -- pytensor>=2.26.2,<2.28 +- pytensor>=2.28.1,<2.29 - python-graphviz - rich>=13.7.1 - scipy>=1.4.1 diff --git a/conda-envs/environment-jax.yml b/conda-envs/environment-jax.yml index 5d9c98a720..ea630e806f 100644 --- a/conda-envs/environment-jax.yml +++ b/conda-envs/environment-jax.yml @@ -20,7 +20,7 @@ dependencies: - numpyro>=0.8.0 - pandas>=0.24.0 - pip -- pytensor>=2.26.2,<2.28 +- pytensor>=2.28.1,<2.29 - python-graphviz - networkx - rich>=13.7.1 @@ -33,7 +33,7 @@ dependencies: - pre-commit>=2.8.0 - pytest-cov>=2.5 - pytest>=3.0 -- mypy=1.5.1 +- mypy=1.15.0 - types-cachetools - pip: - numdifftools>=0.9.40 diff --git a/conda-envs/environment-test.yml b/conda-envs/environment-test.yml index d0abd12016..37729d4d1a 100644 --- a/conda-envs/environment-test.yml +++ b/conda-envs/environment-test.yml @@ -14,7 +14,7 @@ dependencies: - pandas>=0.24.0 - pip - polyagamma -- pytensor>=2.26.2,<2.28 +- pytensor>=2.28.1,<2.29 - python-graphviz - networkx - rich>=13.7.1 @@ -27,7 +27,7 @@ dependencies: - pre-commit>=2.8.0 - pytest-cov>=2.5 - pytest>=3.0 -- mypy=1.5.1 +- mypy=1.15.0 - types-cachetools - pip: - numdifftools>=0.9.40 diff --git a/conda-envs/windows-environment-dev.yml b/conda-envs/windows-environment-dev.yml index 26cbad150d..1c4d8f04d5 100644 --- a/conda-envs/windows-environment-dev.yml +++ b/conda-envs/windows-environment-dev.yml @@ -12,7 +12,7 @@ dependencies: - numpy>=1.25.0 - pandas>=0.24.0 - pip -- pytensor>=2.26.2,<2.28 +- pytensor>=2.28.1,<2.29 - python-graphviz - networkx - rich>=13.7.1 @@ -35,7 +35,7 @@ dependencies: - sphinx>=1.5 - watermark - sphinx-remove-toctrees -- mypy=1.5.1 +- mypy=1.15.0 - types-cachetools - pip: - git+https://github.com/pymc-devs/pymc-sphinx-theme diff --git a/conda-envs/windows-environment-test.yml b/conda-envs/windows-environment-test.yml index c749c08162..63f5f8ee4e 100644 --- a/conda-envs/windows-environment-test.yml +++ b/conda-envs/windows-environment-test.yml @@ -15,7 +15,7 @@ dependencies: - pandas>=0.24.0 - pip - polyagamma -- pytensor>=2.26.2,<2.28 +- pytensor>=2.28.1,<2.29 - python-graphviz - networkx - rich>=13.7.1 @@ -28,7 +28,7 @@ dependencies: - pre-commit>=2.8.0 - pytest-cov>=2.5 - pytest>=3.0 -- mypy=1.5.1 +- mypy=1.15.0 - types-cachetools - pip: - numdifftools>=0.9.40 diff --git a/requirements-dev.txt b/requirements-dev.txt index 2ae0a3f0b7..c868dbd52e 100644 --- a/requirements-dev.txt +++ b/requirements-dev.txt @@ -8,7 +8,7 @@ git+https://github.com/pymc-devs/pymc-sphinx-theme ipython>=7.16 jupyter-sphinx mcbackend>=0.4.0 -mypy==1.5.1 +mypy==1.15.0 myst-nb<=1.0.0 numdifftools>=0.9.40 numpy>=1.25.0 @@ -16,7 +16,7 @@ numpydoc pandas>=0.24.0 polyagamma pre-commit>=2.8.0 -pytensor>=2.26.2,<2.28 +pytensor>=2.28.1,<2.29 pytest-cov>=2.5 pytest>=3.0 rich>=13.7.1 diff --git a/requirements.txt b/requirements.txt index c1f82bb4dd..fb0c131ce7 100644 --- a/requirements.txt +++ b/requirements.txt @@ -3,7 +3,7 @@ cachetools>=4.2.1 cloudpickle numpy>=1.25.0 pandas>=0.24.0 -pytensor>=2.26.1,<2.28 +pytensor>=2.28.1,<2.29 rich>=13.7.1 scipy>=1.4.1 threadpoolctl>=3.1.0,<4.0.0 diff --git a/setup.py b/setup.py index 99bcadd86a..30e50981c3 100755 --- a/setup.py +++ b/setup.py @@ -33,6 +33,7 @@ "Programming Language :: Python :: 3.10", "Programming Language :: Python :: 3.11", "Programming Language :: Python :: 3.12", + "Programming Language :: Python :: 3.13", "License :: OSI Approved :: Apache Software License", "Intended Audience :: Science/Research", "Topic :: Scientific/Engineering", diff --git a/tests/distributions/test_continuous.py b/tests/distributions/test_continuous.py index b9ef2476da..9c108d2035 100644 --- a/tests/distributions/test_continuous.py +++ b/tests/distributions/test_continuous.py @@ -2338,10 +2338,8 @@ class TestInverseGamma(BaseTestDistributionRandom): pymc_dist_params = {"alpha": 2.0, "beta": 5.0} expected_rv_op_params = {"alpha": 2.0, "beta": 5.0} reference_dist_params = {"a": 2.0, "scale": 5.0} - reference_dist = seeded_scipy_distribution_builder("invgamma") checks_to_run = [ "check_pymc_params_match_rv_op", - "check_pymc_draws_match_reference", ] diff --git a/tests/distributions/test_multivariate.py b/tests/distributions/test_multivariate.py index 45798ecbde..2694230c32 100644 --- a/tests/distributions/test_multivariate.py +++ b/tests/distributions/test_multivariate.py @@ -27,6 +27,7 @@ from pytensor.tensor import TensorVariable from pytensor.tensor.blockwise import Blockwise from pytensor.tensor.nlinalg import MatrixInverse +from pytensor.tensor.random.basic import multivariate_normal from pytensor.tensor.random.utils import broadcast_params from pytensor.tensor.slinalg import Cholesky @@ -1392,6 +1393,11 @@ def test_dirichlet_multinomial_support_point(self, a, n, size, expected): class TestMvNormalCov(BaseTestDistributionRandom): + def mvnormal_rng_fn(self, size, mean, cov, rng): + if isinstance(size, int): + size = (size,) + return multivariate_normal.rng_fn(rng, mean, cov, size=size) + pymc_dist = pm.MvNormal pymc_dist_params = { "mu": np.array([1.0, 2.0]), @@ -1407,7 +1413,8 @@ class TestMvNormalCov(BaseTestDistributionRandom): "mean": np.array([1.0, 2.0]), "cov": np.array([[2.0, 0.0], [0.0, 3.5]]), } - reference_dist = seeded_numpy_distribution_builder("multivariate_normal") + reference_dist = lambda self: ft.partial(self.mvnormal_rng_fn, rng=self.get_random_state()) # noqa: E731 + checks_to_run = [ "check_pymc_params_match_rv_op", "check_pymc_draws_match_reference", @@ -1531,12 +1538,12 @@ class TestZeroSumNormal: def assert_zerosum_axes(self, random_samples, axes_to_check, check_zerosum_axes=True): if check_zerosum_axes: for ax in axes_to_check: - assert np.isclose(random_samples.mean(axis=ax), 0).all(), ( + assert np.allclose(random_samples.mean(axis=ax), 0), ( f"{ax} is a zerosum_axis but is not summing to 0 across all samples." ) else: for ax in axes_to_check: - assert not np.isclose(random_samples.mean(axis=ax), 0).all(), ( + assert not np.allclose(random_samples.mean(axis=ax), 0), ( f"{ax} is not a zerosum_axis, but is nonetheless summing to 0 across all samples." ) @@ -1775,7 +1782,9 @@ def test_batched_sigma(self): class TestMvStudentTCov(BaseTestDistributionRandom): def mvstudentt_rng_fn(self, size, nu, mu, scale, rng): - mv_samples = rng.multivariate_normal(np.zeros_like(mu), scale, size=size) + if isinstance(size, int): + size = (size,) + mv_samples = multivariate_normal.rng_fn(rng, np.zeros_like(mu), scale, size=size) chi2_samples = rng.chisquare(nu, size=size) return (mv_samples / np.sqrt(chi2_samples[:, None] / nu)) + mu @@ -2111,9 +2120,11 @@ def check_random_variable_prior(self): class TestKroneckerNormal(BaseTestDistributionRandom): def kronecker_rng_fn(self, size, mu, covs=None, sigma=None, rng=None): - cov = pm.math.kronecker(covs[0], covs[1]).eval() + if isinstance(size, int): + size = (size,) + cov = np.kron(covs[0], covs[1]) cov += sigma**2 * np.identity(cov.shape[0]) - return st.multivariate_normal.rvs(mean=mu, cov=cov, size=size, random_state=rng) + return multivariate_normal.rng_fn(rng, mean=mu, cov=cov, size=size) pymc_dist = pm.KroneckerNormal From 62335ac8501606107f410edff5221298ace3356b Mon Sep 17 00:00:00 2001 From: Ricardo Vieira Date: Wed, 26 Feb 2025 13:17:59 +0100 Subject: [PATCH 18/18] Fix bug when reusing jax logp for initial point generation --- pymc/sampling/jax.py | 5 ++++- tests/sampling/test_jax.py | 21 +++++++++++++++++++++ 2 files changed, 25 insertions(+), 1 deletion(-) diff --git a/pymc/sampling/jax.py b/pymc/sampling/jax.py index 1759eb85b8..193554380c 100644 --- a/pymc/sampling/jax.py +++ b/pymc/sampling/jax.py @@ -240,7 +240,10 @@ def eval_logp_initial_point(point: dict[str, np.ndarray]) -> jax.Array: Wraps jaxified logp function to accept a dict of {model_variable: np.array} key:value pairs. """ - return logp_fn(point.values()) + # Because logp_fn is not jitted, we need to convert inputs to jax arrays, + # or some methods that are only available for jax arrays will fail + # such as x.at[indices].set(y) + return logp_fn([jax.numpy.asarray(v) for v in point.values()]) initial_points = _init_jitter( model, diff --git a/tests/sampling/test_jax.py b/tests/sampling/test_jax.py index ddec60e539..68092d9b92 100644 --- a/tests/sampling/test_jax.py +++ b/tests/sampling/test_jax.py @@ -352,6 +352,27 @@ def test_get_batched_jittered_initial_points(): assert np.all(ips[0][0] != ips[0][1]) +def test_get_batched_jittered_initial_points_set_subtensor(): + """Regression bug for issue described in + https://discourse.pymc.io/t/attributeerror-numpy-ndarray-object-has-no-attribute-at-when-sampling-lkj-cholesky-covariance-priors-for-multivariate-normal-models-example-with-numpyro-or-blackjax/16598/3 + + Which was caused by passing numpy arrays to a non-jitted logp function + """ + with pm.Model() as model: + # Set operation will use `x.at[1].set(100)` which is only available in JAX + x = pm.Normal("x", mu=[-100, -100]) + mu_y = x[1].set(100) + y = pm.Normal("y", mu=mu_y) + + logp_fn = get_jaxified_logp(model) + [x_ips, y_ips] = _get_batched_jittered_initial_points( + model, chains=3, initvals=None, logp_fn=logp_fn, jitter=True, random_seed=0 + ) + assert np.all(x_ips < -10) + assert np.all(y_ips[..., 0] < -10) + assert np.all(y_ips[..., 1] > 10) + + @pytest.mark.parametrize( "sampler", [