Merge branch 'master' into sunxd/move_ad

yebai · web-flow · commit a303b9be48d3 · 2024-02-16T19:09:35.000Z
diff --git a/Project.toml b/Project.toml
@@ -1,6 +1,6 @@
 name = "Turing"
 uuid = "fce5fe82-541a-59a6-adf8-730c64b5f9a0"
-version = "0.30.2"
+version = "0.30.4"
 
 [deps]
 ADTypes = "47edcb42-4c32-4615-8424-f2b9edc5f35b"
diff --git a/src/mcmc/Inference.jl b/src/mcmc/Inference.jl
@@ -127,6 +127,23 @@ DynamicPPL.unflatten(vi::SimpleVarInfo, θ::NamedTuple) = SimpleVarInfo(θ, vi.l
 # Algorithm for sampling from the prior
 struct Prior <: InferenceAlgorithm end
 
+function AbstractMCMC.step(
+    rng::Random.AbstractRNG,
+    model::DynamicPPL.Model,
+    sampler::DynamicPPL.Sampler{<:Prior},
+    state=nothing;
+    kwargs...,
+)
+    vi = last(DynamicPPL.evaluate!!(
+        model,
+        VarInfo(),
+        SamplingContext(
+            rng, DynamicPPL.SampleFromPrior(), DynamicPPL.PriorContext()
+        )
+    ))
+    return vi, nothing
+end
+
 """
     mh_accept(logp_current::Real, logp_proposal::Real, log_proposal_ratio::Real)
 
@@ -242,36 +259,6 @@ function AbstractMCMC.sample(
                                    chain_type=chain_type, progress=progress, kwargs...)
 end
 
-function AbstractMCMC.sample(
-    rng::AbstractRNG,
-    model::AbstractModel,
-    alg::Prior,
-    ensemble::AbstractMCMC.AbstractMCMCEnsemble,
-    N::Integer,
-    n_chains::Integer;
-    chain_type=DynamicPPL.default_chain_type(alg),
-    progress=PROGRESS[],
-    kwargs...
-)
-    return AbstractMCMC.sample(rng, model, SampleFromPrior(), ensemble, N, n_chains;
-                                    chain_type, progress, kwargs...)
-end
-
-function AbstractMCMC.sample(
-    rng::AbstractRNG,
-    model::AbstractModel,
-    alg::Prior,
-    N::Integer;
-    chain_type=DynamicPPL.default_chain_type(alg),
-    resume_from=nothing,
-    initial_state=DynamicPPL.loadstate(resume_from),
-    progress=PROGRESS[],
-    kwargs...
-)
-    return AbstractMCMC.mcmcsample(rng, model, SampleFromPrior(), N;
-                                    chain_type, initial_state, progress, kwargs...)
-end
-
 ##########################
 # Chain making utilities #
 ##########################
diff --git a/src/mcmc/emcee.jl b/src/mcmc/emcee.jl
@@ -58,7 +58,7 @@ function AbstractMCMC.step(
         vis[1],
         map(vis) do vi
             vi = DynamicPPL.link!!(vi, spl, model)
-            AMH.Transition(vi[spl], getlogp(vi))
+            AMH.Transition(vi[spl], getlogp(vi), false)
         end
     )
 
diff --git a/src/mcmc/mh.jl b/src/mcmc/mh.jl
@@ -386,7 +386,7 @@ function propose!!(
 
     # Create a sampler and the previous transition.
     mh_sampler = AMH.MetropolisHastings(dt)
-    prev_trans = AMH.Transition(vt, getlogp(vi))
+    prev_trans = AMH.Transition(vt, getlogp(vi), false)
 
     # Make a new transition.
     densitymodel = AMH.DensityModel(
@@ -421,7 +421,7 @@ function propose!!(
 
     # Create a sampler and the previous transition.
     mh_sampler = AMH.MetropolisHastings(spl.alg.proposals)
-    prev_trans = AMH.Transition(vals, getlogp(vi))
+    prev_trans = AMH.Transition(vals, getlogp(vi), false)
 
     # Make a new transition.
     densitymodel = AMH.DensityModel(
diff --git a/src/optimisation/Optimisation.jl b/src/optimisation/Optimisation.jl
@@ -283,17 +283,17 @@ function optim_function(
     model::Model,
     estimator::Union{MLE, MAP};
     constrained::Bool=true,
-    autoad::Union{Nothing, AbstractADType}=NoAD(),
+    adtype::Union{Nothing, AbstractADType}=NoAD(),
 )
-    if autoad === nothing
-        Base.depwarn("the use of `autoad=nothing` is deprecated, please use `autoad=SciMLBase.NoAD()`", :optim_function)
+    if adtype === nothing
+        Base.depwarn("the use of `adtype=nothing` is deprecated, please use `adtype=SciMLBase.NoAD()`", :optim_function)
     end
 
     obj, init, t = optim_objective(model, estimator; constrained=constrained)
     
     l(x, _) = obj(x)
-    f = if autoad isa AbstractADType && autoad !== NoAD()
-        OptimizationFunction(l, autoad)
+    f = if adtype isa AbstractADType && adtype !== NoAD()
+        OptimizationFunction(l, adtype)
     else
         OptimizationFunction(
             l;
@@ -310,10 +310,10 @@ function optim_problem(
     estimator::Union{MAP, MLE};
     constrained::Bool=true,
     init_theta=nothing,
-    autoad::Union{Nothing, AbstractADType}=NoAD(),
+    adtype::Union{Nothing, AbstractADType}=NoAD(),
     kwargs...,
 )
-    f, init, transform = optim_function(model, estimator; constrained=constrained, autoad=autoad)
+    f, init, transform = optim_function(model, estimator; constrained=constrained, adtype=adtype)
 
     u0 = init_theta === nothing ? init() : init(init_theta)
     prob = OptimizationProblem(f, u0; kwargs...)
diff --git a/src/variational/advi.jl b/src/variational/advi.jl
@@ -1,45 +1,17 @@
-# TODO(torfjelde): Find a better solution.
-struct Vec{N,B} <: Bijectors.Bijector
-    b::B
-    size::NTuple{N, Int}
-end
-
-Bijectors.inverse(f::Vec) = Vec(Bijectors.inverse(f.b), f.size)
-
-Bijectors.output_length(f::Vec, sz) = Bijectors.output_length(f.b, sz)
-Bijectors.output_length(f::Vec, n::Int) = Bijectors.output_length(f.b, n)
-
-function Bijectors.with_logabsdet_jacobian(f::Vec, x)
-    return Bijectors.transform(f, x), Bijectors.logabsdetjac(f, x)
-end
-
-function Bijectors.transform(f::Vec, x::AbstractVector)
-    # Reshape into shape compatible with wrapped bijector and then `vec` again.
-    return vec(f.b(reshape(x, f.size)))
-end
-
-function Bijectors.transform(f::Vec{N,<:Bijectors.Inverse}, x::AbstractVector) where N
-    # Reshape into shape compatible with original (forward) bijector and then `vec` again.
-    return vec(f.b(reshape(x, Bijectors.output_length(f.b.orig, prod(f.size)))))
-end
-
-function Bijectors.transform(f::Vec, x::AbstractMatrix)
-    # At the moment we do batching for higher-than-1-dim spaces by simply using
-    # lists of inputs rather than `AbstractArray` with `N + 1` dimension.
-    cols = Iterators.Stateful(eachcol(x))
-    # Make `init` a matrix to ensure type-stability
-    init = reshape(f(first(cols)), :, 1)
-    return mapreduce(f, hcat, cols; init = init)
-end
-
-function Bijectors.logabsdetjac(f::Vec, x::AbstractVector)
-    return Bijectors.logabsdetjac(f.b, reshape(x, f.size))
-end
+# TODO: Move to Bijectors.jl if we find further use for this.
+"""
+    wrap_in_vec_reshape(f, in_size)
 
-function Bijectors.logabsdetjac(f::Vec, x::AbstractMatrix)
-    return map(eachcol(x)) do x_
-        Bijectors.logabsdetjac(f, x_)
-    end
+Wraps a bijector `f` such that it operates on vectors of length `prod(in_size)` and produces
+a vector of length `prod(Bijectors.output(f, in_size))`.
+"""
+function wrap_in_vec_reshape(f, in_size)
+    vec_in_length = prod(in_size)
+    reshape_inner = Bijectors.Reshape((vec_in_length,), in_size)
+    out_size = Bijectors.output_size(f, in_size)
+    vec_out_length = prod(out_size)
+    reshape_outer = Bijectors.Reshape(out_size, (vec_out_length,))
+    return reshape_outer ∘ f ∘ reshape_inner
 end
 
 
@@ -83,7 +55,7 @@ function Bijectors.bijector(
         if d isa Distributions.UnivariateDistribution
             b
         else
-            Vec(b, size(d))
+            wrap_in_vec_reshape(b, size(d))
         end
     end
 
@@ -106,7 +78,10 @@ meanfield(model::DynamicPPL.Model) = meanfield(Random.default_rng(), model)
 function meanfield(rng::Random.AbstractRNG, model::DynamicPPL.Model)
     # Setup.
     varinfo = DynamicPPL.VarInfo(model)
-    num_params = length(varinfo[DynamicPPL.SampleFromPrior()])
+    # Use linked `varinfo` to determine the correct number of parameters.
+    # TODO: Replace with `length` once this is implemented for `VarInfo`.
+    varinfo_linked = DynamicPPL.link(varinfo, model)
+    num_params = length(varinfo_linked[:])
 
     # initial params
     μ = randn(rng, num_params)
@@ -134,7 +109,10 @@ function AdvancedVI.update(
     td::Bijectors.TransformedDistribution{<:DistributionsAD.TuringDiagMvNormal},
     θ::AbstractArray,
 )
-    μ, ω = θ[1:length(td)], θ[length(td) + 1:end]
+    # `length(td.dist) != length(td)` if `td.transform` changes the dimensionality,
+    # so we need to use the length of the underlying distribution `td.dist` here.
+    # TODO: Check if we can get away with `view` instead of `getindex` for all AD backends.
+    μ, ω = θ[begin:(begin + length(td.dist) - 1)], θ[(begin + length(td.dist)):end]
     return AdvancedVI.update(td, μ, StatsFuns.softplus.(ω))
 end
 
diff --git a/test/mcmc/Inference.jl b/test/mcmc/Inference.jl
@@ -140,6 +140,21 @@
         @test all(haskey(x, :lp) for x in chains)
         @test mean(x[:s][1] for x in chains) ≈ 3 atol=0.1
         @test mean(x[:m][1] for x in chains) ≈ 0 atol=0.1
+
+        @testset "#2169" begin
+            # Not exactly the same as the issue, but similar.
+            @model function issue2169_model()
+                if DynamicPPL.leafcontext(__context__) isa DynamicPPL.PriorContext
+                    x ~ Normal(0, 1)
+                else
+                    x ~ Normal(1000, 1)
+                end
+            end
+
+            model = issue2169_model()
+            chain = sample(model, Prior(), 10)
+            @test all(mean(chain[:x]) .< 5)
+        end
     end
 
     @testset "chain ordering" begin
diff --git a/test/variational/advi.jl b/test/variational/advi.jl
@@ -64,5 +64,10 @@
         x0_inv = inverse(b)(z0)
         @test size(x0_inv) == size(x0)
         @test all(x0 .≈ x0_inv)
+
+        # And regression for https://github.com/TuringLang/Turing.jl/issues/2160.
+        q = vi(m, ADVI(10, 1000))
+        x = rand(q, 1000)
+        @test mean(eachcol(x)) ≈ [0.5, 0.5] atol=0.1
     end
 end

Original file line number	Diff line number	Diff line change
`@@ -58,7 +58,7 @@ function AbstractMCMC.step(`
`58`	`58`	`vis[1],`
`59`	`59`	`map(vis) do vi`
`60`	`60`	`vi = DynamicPPL.link!!(vi, spl, model)`
`61`		`- AMH.Transition(vi[spl], getlogp(vi))`
	`61`	`+ AMH.Transition(vi[spl], getlogp(vi), false)`
`62`	`62`	`end`
`63`	`63`	`)`
`64`	`64`