From 00d43d04faff256378af9aa2b3aa5881ee7d3782 Mon Sep 17 00:00:00 2001 From: Kyurae Kim Date: Mon, 30 Sep 2024 10:10:56 -0700 Subject: [PATCH 01/14] fix make `ScoreGradELBO` immutable --- src/objectives/elbo/scoregradelbo.jl | 16 ++++++++++----- test/interface/scoregradelbo.jl | 29 ---------------------------- 2 files changed, 11 insertions(+), 34 deletions(-) diff --git a/src/objectives/elbo/scoregradelbo.jl b/src/objectives/elbo/scoregradelbo.jl index 053c6b3f..495a7326 100644 --- a/src/objectives/elbo/scoregradelbo.jl +++ b/src/objectives/elbo/scoregradelbo.jl @@ -1,3 +1,4 @@ + """ ScoreGradELBO(n_samples; kwargs...) @@ -41,16 +42,20 @@ struct ScoreGradELBO{EntropyEst<:AbstractEntropyEstimator} <: entropy::EntropyEst n_samples::Int baseline_window_size::Int - baseline_history::Vector{Float64} end function ScoreGradELBO( n_samples::Int; entropy::AbstractEntropyEstimator=ClosedFormEntropy(), baseline_window_size::Int=10, - baseline_history::Vector{Float64}=Float64[], ) - return ScoreGradELBO(entropy, n_samples, baseline_window_size, baseline_history) + return ScoreGradELBO(entropy, n_samples, baseline_window_size) +end + +function init( + ::Random.AbstractRNG, ::ScoreGradELBO, prob, params::AbstractVector{T}, restructure +) where {T<:Real} + return T[] end function Base.show(io::IO, obj::ScoreGradELBO) @@ -120,6 +125,7 @@ function AdvancedVI.estimate_gradient!( restructure, state, ) + baseline_history = state q_stop = restructure(params) aux = ( rng=rng, @@ -134,6 +140,6 @@ function AdvancedVI.estimate_gradient!( ) nelbo = DiffResults.value(out) stat = (elbo=-nelbo,) - push!(obj.baseline_history, -nelbo) - return out, nothing, stat + push!(baseline_history, -nelbo) + return out, baseline_history, stat end diff --git a/test/interface/scoregradelbo.jl b/test/interface/scoregradelbo.jl index a800f744..8a6ebb14 100644 --- a/test/interface/scoregradelbo.jl +++ b/test/interface/scoregradelbo.jl @@ -26,32 +26,3 @@ using Test @test elbo ≈ elbo_ref rtol = 0.2 end end - -@testset "interface ScoreGradELBO STL variance reduction" begin - seed = (0x38bef07cf9cc549d) - rng = StableRNG(seed) - - modelstats = normal_meanfield(rng, Float64) - @unpack model, μ_true, L_true, n_dims, is_meanfield = modelstats - - @testset for ad in [ - ADTypes.AutoForwardDiff(), ADTypes.AutoReverseDiff(), ADTypes.AutoZygote() - ] - q_true = MeanFieldGaussian( - Vector{eltype(μ_true)}(μ_true), Diagonal(Vector{eltype(L_true)}(diag(L_true))) - ) - params, re = Optimisers.destructure(q_true) - obj = ScoreGradELBO( - 1000; entropy=StickingTheLandingEntropy(), baseline_history=[0.0] - ) - out = DiffResults.DiffResult(zero(eltype(params)), similar(params)) - - aux = (rng=rng, obj=obj, problem=model, restructure=re, q_stop=q_true, adtype=ad) - AdvancedVI.value_and_gradient!( - ad, AdvancedVI.estimate_scoregradelbo_ad_forward, params, aux, out - ) - value = DiffResults.value(out) - grad = DiffResults.gradient(out) - @test norm(grad) ≈ 0 atol = 10 # high tolerance required. - end -end From af7a5a685a3122b3378013ef93ec37be2eb45cff Mon Sep 17 00:00:00 2001 From: Kyurae Kim Date: Mon, 30 Sep 2024 11:13:33 -0700 Subject: [PATCH 02/14] fix error in `ScoreGradELBO` --- src/objectives/elbo/scoregradelbo.jl | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/src/objectives/elbo/scoregradelbo.jl b/src/objectives/elbo/scoregradelbo.jl index 495a7326..e3471d1e 100644 --- a/src/objectives/elbo/scoregradelbo.jl +++ b/src/objectives/elbo/scoregradelbo.jl @@ -99,9 +99,9 @@ function estimate_objective(obj::ScoreGradELBO, q, prob; n_samples::Int=obj.n_sa end function estimate_scoregradelbo_ad_forward(params′, aux) - @unpack rng, obj, problem, adtype, restructure, q_stop = aux + @unpack rng, obj, problem, adtype, restructure, q_stop, baseline_history = aux baseline = compute_control_variate_baseline( - obj.baseline_history, obj.baseline_window_size + baseline_history, obj.baseline_window_size ) q = restructure_ad_forward(adtype, restructure, params′) samples_stop = rand(rng, q_stop, obj.n_samples) @@ -133,6 +133,7 @@ function AdvancedVI.estimate_gradient!( obj=obj, problem=prob, restructure=restructure, + baseline_history=baseline_history, q_stop=q_stop, ) AdvancedVI.value_and_gradient!( From 7370248d4d5e4ca7182ba512da0bda96756d7705 Mon Sep 17 00:00:00 2001 From: Kyurae Kim Date: Tue, 1 Oct 2024 14:00:25 -0700 Subject: [PATCH 03/14] fix type instability, use OnlineStats for baseline window --- Project.toml | 2 ++ src/AdvancedVI.jl | 1 + src/objectives/elbo/scoregradelbo.jl | 27 +++++++------------ test/inference/repgradelbo_locationscale.jl | 2 +- .../scoregradelbo_distributionsad.jl | 4 +-- 5 files changed, 15 insertions(+), 21 deletions(-) diff --git a/Project.toml b/Project.toml index 572ea144..136fb45a 100644 --- a/Project.toml +++ b/Project.toml @@ -14,6 +14,7 @@ FillArrays = "1a297f60-69ca-5386-bcde-b61e274b549b" Functors = "d9f16b24-f501-4c13-a1f2-28368ffc5196" LinearAlgebra = "37e2e46d-f89d-539d-b4ee-838fcccc9c8e" LogDensityProblems = "6fdf6af0-433a-55f7-b3ed-c6c6e0b8df7c" +OnlineStats = "a15396b6-48d5-5d58-9928-6d29437db91e" Optimisers = "3bd65402-5787-11e9-1adc-39752487f4e2" ProgressMeter = "92933f4c-e287-5a05-a399-4b506db050ca" Random = "9a3f8284-a2c9-5f02-9a11-845980a1fd5c" @@ -49,6 +50,7 @@ Functors = "0.4" LinearAlgebra = "1" LogDensityProblems = "2" Mooncake = "0.4" +OnlineStats = "1" Optimisers = "0.2.16, 0.3" ProgressMeter = "1.6" Random = "1" diff --git a/src/AdvancedVI.jl b/src/AdvancedVI.jl index aebe765e..178f52aa 100644 --- a/src/AdvancedVI.jl +++ b/src/AdvancedVI.jl @@ -6,6 +6,7 @@ using Accessors using Random using Distributions +using OnlineStats using Functors using Optimisers diff --git a/src/objectives/elbo/scoregradelbo.jl b/src/objectives/elbo/scoregradelbo.jl index e3471d1e..b3dbb6c0 100644 --- a/src/objectives/elbo/scoregradelbo.jl +++ b/src/objectives/elbo/scoregradelbo.jl @@ -53,9 +53,10 @@ function ScoreGradELBO( end function init( - ::Random.AbstractRNG, ::ScoreGradELBO, prob, params::AbstractVector{T}, restructure + ::Random.AbstractRNG, obj::ScoreGradELBO, prob, params::AbstractVector{T}, restructure ) where {T<:Real} - return T[] + buf = MovingWindow(T, obj.baseline_window_size) + return fit!(buf, one(T)) end function Base.show(io::IO, obj::ScoreGradELBO) @@ -68,14 +69,6 @@ function Base.show(io::IO, obj::ScoreGradELBO) return print(io, ")") end -function compute_control_variate_baseline(history, window_size) - if length(history) == 0 - return 1.0 - end - min_index = max(1, length(history) - window_size) - return mean(history[min_index:end]) -end - function estimate_energy_with_samples( prob, samples_stop, samples_logprob, samples_logprob_stop, baseline ) @@ -99,10 +92,7 @@ function estimate_objective(obj::ScoreGradELBO, q, prob; n_samples::Int=obj.n_sa end function estimate_scoregradelbo_ad_forward(params′, aux) - @unpack rng, obj, problem, adtype, restructure, q_stop, baseline_history = aux - baseline = compute_control_variate_baseline( - baseline_history, obj.baseline_window_size - ) + @unpack rng, obj, problem, adtype, restructure, q_stop, baseline = aux q = restructure_ad_forward(adtype, restructure, params′) samples_stop = rand(rng, q_stop, obj.n_samples) entropy = estimate_entropy_maybe_stl(obj.entropy, samples_stop, q, q_stop) @@ -125,7 +115,8 @@ function AdvancedVI.estimate_gradient!( restructure, state, ) - baseline_history = state + baseline_buf = state + baseline = mean(OnlineStats.value(baseline_buf)) q_stop = restructure(params) aux = ( rng=rng, @@ -133,7 +124,7 @@ function AdvancedVI.estimate_gradient!( obj=obj, problem=prob, restructure=restructure, - baseline_history=baseline_history, + baseline=baseline, q_stop=q_stop, ) AdvancedVI.value_and_gradient!( @@ -141,6 +132,6 @@ function AdvancedVI.estimate_gradient!( ) nelbo = DiffResults.value(out) stat = (elbo=-nelbo,) - push!(baseline_history, -nelbo) - return out, baseline_history, stat + fit!(baseline_buf, -nelbo) + return out, baseline_buf, stat end diff --git a/test/inference/repgradelbo_locationscale.jl b/test/inference/repgradelbo_locationscale.jl index 1ca31885..87c626f8 100644 --- a/test/inference/repgradelbo_locationscale.jl +++ b/test/inference/repgradelbo_locationscale.jl @@ -15,7 +15,7 @@ if @isdefined(Enzyme) ) end -@testset "inference ScoreGradELBO VILocationScale" begin +@testset "inference RepGradELBO VILocationScale" begin @testset "$(modelname) $(objname) $(realtype) $(adbackname)" for realtype in [Float64, Float32], (modelname, modelconstr) in diff --git a/test/inference/scoregradelbo_distributionsad.jl b/test/inference/scoregradelbo_distributionsad.jl index 1de7af1d..5445972f 100644 --- a/test/inference/scoregradelbo_distributionsad.jl +++ b/test/inference/scoregradelbo_distributionsad.jl @@ -5,8 +5,8 @@ AD_scoregradelbo_distributionsad = Dict( :Zygote => AutoZygote(), ) -if @isdefined(Tapir) - AD_scoregradelbo_distributionsad[:Tapir] = AutoTapir(; safe_mode=false) +if @isdefined(Mooncake) + AD_scoregradelbo_distributionsad[:Moonscake] = AutoMooncake(; config=nothing) end #if @isdefined(Enzyme) From 1ab02c5e77d3b20675779d7a45055bdcbe21c21d Mon Sep 17 00:00:00 2001 From: Kyurae Kim Date: Tue, 1 Oct 2024 21:07:04 -0700 Subject: [PATCH 04/14] fix default options of `ScoreGradientELBO`, enable more tests --- src/objectives/elbo/scoregradelbo.jl | 19 ++++++++++++------- .../scoregradelbo_distributionsad.jl | 12 ++++-------- test/inference/scoregradelbo_locationscale.jl | 6 +----- .../scoregradelbo_locationscale_bijectors.jl | 12 ++++-------- test/interface/scoregradelbo.jl | 17 +++++++++++++++++ 5 files changed, 38 insertions(+), 28 deletions(-) diff --git a/src/objectives/elbo/scoregradelbo.jl b/src/objectives/elbo/scoregradelbo.jl index b3dbb6c0..6be1e50d 100644 --- a/src/objectives/elbo/scoregradelbo.jl +++ b/src/objectives/elbo/scoregradelbo.jl @@ -26,9 +26,8 @@ To reduce the variance of the gradient estimator, we use a baseline computed fro - `n_samples::Int`: Number of Monte Carlo samples used to estimate the ELBO. # Keyword Arguments -- `entropy`: The estimator for the entropy term. (Type `<: AbstractEntropyEstimator`; Default: `ClosedFormEntropy()`) +- `entropy`: The estimator for the entropy term. (Type `<: AbstractEntropyEstimator`; Default: `FullMonteCarloEntropy()`) - `baseline_window_size::Int`: The window size to use to compute the baseline. (Default: `10`) -- `baseline_history::Vector{Float64}`: The history of the baseline. (Default: `Float64[]`) # Requirements - The variational approximation ``q_{\\lambda}`` implements `rand` and `logpdf`. @@ -46,7 +45,7 @@ end function ScoreGradELBO( n_samples::Int; - entropy::AbstractEntropyEstimator=ClosedFormEntropy(), + entropy::AbstractEntropyEstimator=MonteCarloEntropy(), baseline_window_size::Int=10, ) return ScoreGradELBO(entropy, n_samples, baseline_window_size) @@ -55,8 +54,7 @@ end function init( ::Random.AbstractRNG, obj::ScoreGradELBO, prob, params::AbstractVector{T}, restructure ) where {T<:Real} - buf = MovingWindow(T, obj.baseline_window_size) - return fit!(buf, one(T)) + return MovingWindow(T, obj.baseline_window_size) end function Base.show(io::IO, obj::ScoreGradELBO) @@ -116,7 +114,12 @@ function AdvancedVI.estimate_gradient!( state, ) baseline_buf = state - baseline = mean(OnlineStats.value(baseline_buf)) + baseline_history = OnlineStats.value(baseline_buf) + baseline = if isempty(baseline_history) + one(eltype(params)) + else + mean(baseline_history) + end q_stop = restructure(params) aux = ( rng=rng, @@ -132,6 +135,8 @@ function AdvancedVI.estimate_gradient!( ) nelbo = DiffResults.value(out) stat = (elbo=-nelbo,) - fit!(baseline_buf, -nelbo) + if obj.baseline_window_size > 0 + fit!(baseline_buf, -nelbo) + end return out, baseline_buf, stat end diff --git a/test/inference/scoregradelbo_distributionsad.jl b/test/inference/scoregradelbo_distributionsad.jl index 5445972f..2207bf3f 100644 --- a/test/inference/scoregradelbo_distributionsad.jl +++ b/test/inference/scoregradelbo_distributionsad.jl @@ -9,20 +9,16 @@ if @isdefined(Mooncake) AD_scoregradelbo_distributionsad[:Moonscake] = AutoMooncake(; config=nothing) end -#if @isdefined(Enzyme) -# AD_scoregradelbo_distributionsad[:Enzyme] = AutoEnzyme() -#end +if @isdefined(Enzyme) + AD_scoregradelbo_distributionsad[:Enzyme] = AutoEnzyme() +end @testset "inference ScoreGradELBO DistributionsAD" begin @testset "$(modelname) $(objname) $(realtype) $(adbackname)" for realtype in [Float64, Float32], (modelname, modelconstr) in Dict(:Normal => normal_meanfield), n_montecarlo in [1, 10], - (objname, objective) in Dict( - :ScoreGradELBOClosedFormEntropy => ScoreGradELBO(n_montecarlo), - :ScoreGradELBOStickingTheLanding => - ScoreGradELBO(n_montecarlo; entropy=StickingTheLandingEntropy()), - ), + (objname, objective) in Dict(:ScoreGradELBO => ScoreGradELBO(n_montecarlo)), (adbackname, adtype) in AD_scoregradelbo_distributionsad seed = (0x38bef07cf9cc549d) diff --git a/test/inference/scoregradelbo_locationscale.jl b/test/inference/scoregradelbo_locationscale.jl index f0073d7c..f862d59d 100644 --- a/test/inference/scoregradelbo_locationscale.jl +++ b/test/inference/scoregradelbo_locationscale.jl @@ -21,11 +21,7 @@ end (modelname, modelconstr) in Dict(:Normal => normal_meanfield, :Normal => normal_fullrank), n_montecarlo in [1, 10], - (objname, objective) in Dict( - :ScoreGradELBOClosedFormEntropy => ScoreGradELBO(n_montecarlo), - :ScoreGradELBOStickingTheLanding => - ScoreGradELBO(n_montecarlo; entropy=StickingTheLandingEntropy()), - ), + (objname, objective) in Dict(:ScoreGradELBO => ScoreGradELBO(n_montecarlo)), (adbackname, adtype) in AD_locationscale seed = (0x38bef07cf9cc549d) diff --git a/test/inference/scoregradelbo_locationscale_bijectors.jl b/test/inference/scoregradelbo_locationscale_bijectors.jl index bee8234a..b6ad769a 100644 --- a/test/inference/scoregradelbo_locationscale_bijectors.jl +++ b/test/inference/scoregradelbo_locationscale_bijectors.jl @@ -5,9 +5,9 @@ AD_scoregradelbo_locationscale_bijectors = Dict( #:Zygote => AutoZygote(), ) -#if @isdefined(Tapir) -# AD_scoregradelbo_locationscale_bijectors[:Tapir] = AutoTapir(; safe_mode=false) -#end +if @isdefined(Mooncake) + AD_scoregradelbo_locationscale_bijectors[:Mooncake] = AutoMooncake(; config=nothing) +end if @isdefined(Enzyme) AD_scoregradelbo_locationscale_bijectors[:Enzyme] = AutoEnzyme() @@ -19,11 +19,7 @@ end (modelname, modelconstr) in Dict(:NormalLogNormalMeanField => normallognormal_meanfield), n_montecarlo in [1, 10], - (objname, objective) in Dict( - #:ScoreGradELBOClosedFormEntropy => ScoreGradELBO(n_montecarlo), # not supported yet. - :ScoreGradELBOStickingTheLanding => - ScoreGradELBO(n_montecarlo; entropy=StickingTheLandingEntropy()), - ), + (objname, objective) in Dict(:ScoreGradELBO => ScoreGradELBO(n_montecarlo)), (adbackname, adtype) in AD_scoregradelbo_locationscale_bijectors seed = (0x38bef07cf9cc549d) diff --git a/test/interface/scoregradelbo.jl b/test/interface/scoregradelbo.jl index 8a6ebb14..63e8c645 100644 --- a/test/interface/scoregradelbo.jl +++ b/test/interface/scoregradelbo.jl @@ -25,4 +25,21 @@ using Test elbo = estimate_objective(obj, q0, model; n_samples=10^4) @test elbo ≈ elbo_ref rtol = 0.2 end + + @testset "baseline_window" begin + T = 100 + adtype = AutoForwardDiff() + + obj = ScoreGradELBO(10) + _, _, stats, _ = optimize(rng, model, obj, q0, T; show_progress=false, adtype) + @test isfinite(last(stats).elbo) + + obj = ScoreGradELBO(10; baseline_window_size=0) + _, _, stats, _ = optimize(rng, model, obj, q0, T; show_progress=false, adtype) + @test isfinite(last(stats).elbo) + + obj = ScoreGradELBO(10; baseline_window_size=1) + _, _, stats, _ = optimize(rng, model, obj, q0, T; show_progress=false, adtype) + @test isfinite(last(stats).elbo) + end end From d110ec712085cb70f0e83d76e996c70ec12fc014 Mon Sep 17 00:00:00 2001 From: Kyurae Kim Date: Thu, 3 Oct 2024 20:34:50 -0700 Subject: [PATCH 05/14] refactor `ScoreGradELBO` Sampling is now done out of the AD path. --- src/objectives/elbo/entropy.jl | 6 ----- src/objectives/elbo/repgradelbo.jl | 7 +++++ src/objectives/elbo/scoregradelbo.jl | 38 +++++++++++++--------------- 3 files changed, 24 insertions(+), 27 deletions(-) diff --git a/src/objectives/elbo/entropy.jl b/src/objectives/elbo/entropy.jl index fa34022a..2b607132 100644 --- a/src/objectives/elbo/entropy.jl +++ b/src/objectives/elbo/entropy.jl @@ -38,9 +38,3 @@ function estimate_entropy( end end -function estimate_entropy_maybe_stl( - entropy_estimator::AbstractEntropyEstimator, samples, q, q_stop -) - q_maybe_stop = maybe_stop_entropy_score(entropy_estimator, q, q_stop) - return estimate_entropy(entropy_estimator, samples, q_maybe_stop) -end diff --git a/src/objectives/elbo/repgradelbo.jl b/src/objectives/elbo/repgradelbo.jl index b8bf63fa..e6f04ae8 100644 --- a/src/objectives/elbo/repgradelbo.jl +++ b/src/objectives/elbo/repgradelbo.jl @@ -45,6 +45,13 @@ function Base.show(io::IO, obj::RepGradELBO) return print(io, ")") end +function estimate_entropy_maybe_stl( + entropy_estimator::AbstractEntropyEstimator, samples, q, q_stop +) + q_maybe_stop = maybe_stop_entropy_score(entropy_estimator, q, q_stop) + return estimate_entropy(entropy_estimator, samples, q_maybe_stop) +end + function estimate_energy_with_samples(prob, samples) return mean(Base.Fix1(LogDensityProblems.logdensity, prob), eachsample(samples)) end diff --git a/src/objectives/elbo/scoregradelbo.jl b/src/objectives/elbo/scoregradelbo.jl index 6be1e50d..388507de 100644 --- a/src/objectives/elbo/scoregradelbo.jl +++ b/src/objectives/elbo/scoregradelbo.jl @@ -36,8 +36,7 @@ To reduce the variance of the gradient estimator, we use a baseline computed fro Depending on the options, additional requirements on ``q_{\\lambda}`` may apply. """ -struct ScoreGradELBO{EntropyEst<:AbstractEntropyEstimator} <: - AdvancedVI.AbstractVariationalObjective +struct ScoreGradELBO{EntropyEst<:AbstractEntropyEstimator} <: AbstractVariationalObjective entropy::EntropyEst n_samples::Int baseline_window_size::Int @@ -67,20 +66,11 @@ function Base.show(io::IO, obj::ScoreGradELBO) return print(io, ")") end -function estimate_energy_with_samples( - prob, samples_stop, samples_logprob, samples_logprob_stop, baseline -) - fv = Base.Fix1(LogDensityProblems.logdensity, prob).(eachsample(samples_stop)) - fv_mean = mean(fv) - score_grad = mean(@. samples_logprob * (fv - baseline)) - score_grad_stop = mean(@. samples_logprob_stop * (fv - baseline)) - return fv_mean + (score_grad - score_grad_stop) -end - function estimate_objective( rng::Random.AbstractRNG, obj::ScoreGradELBO, q, prob; n_samples::Int=obj.n_samples ) - samples, entropy = reparam_with_entropy(rng, q, q, obj.n_samples, obj.entropy) + samples = rand(rng, q, n_samples) + entropy = estimate_entropy(obj.entropy, samples, q) energy = map(Base.Fix1(LogDensityProblems.logdensity, prob), eachsample(samples)) return mean(energy) + entropy end @@ -90,15 +80,19 @@ function estimate_objective(obj::ScoreGradELBO, q, prob; n_samples::Int=obj.n_sa end function estimate_scoregradelbo_ad_forward(params′, aux) - @unpack rng, obj, problem, adtype, restructure, q_stop, baseline = aux + @unpack rng, obj, problem, adtype, restructure, samples, q_stop, baseline = aux q = restructure_ad_forward(adtype, restructure, params′) - samples_stop = rand(rng, q_stop, obj.n_samples) - entropy = estimate_entropy_maybe_stl(obj.entropy, samples_stop, q, q_stop) - samples_logprob = logpdf.(Ref(q), AdvancedVI.eachsample(samples_stop)) - samples_logprob_stop = logpdf.(Ref(q_stop), AdvancedVI.eachsample(samples_stop)) - energy = estimate_energy_with_samples( - problem, samples_stop, samples_logprob, samples_logprob_stop, baseline - ) + + ℓq = logpdf.(Ref(q), AdvancedVI.eachsample(samples)) + ℓq_stop = logpdf.(Ref(q_stop), AdvancedVI.eachsample(samples)) + ℓπ = map(Base.Fix1(LogDensityProblems.logdensity, problem), eachsample(samples)) + ℓπ_mean = mean(ℓπ) + score_grad = mean(@. ℓq * (ℓπ - baseline)) + score_grad_stop = mean(@. ℓq_stop * (ℓπ - baseline)) + + energy = ℓπ_mean + (score_grad - score_grad_stop) + entropy = estimate_entropy(obj.entropy, samples, q) + elbo = energy + entropy return -elbo end @@ -121,6 +115,7 @@ function AdvancedVI.estimate_gradient!( mean(baseline_history) end q_stop = restructure(params) + samples = rand(rng, q_stop, obj.n_samples) aux = ( rng=rng, adtype=adtype, @@ -128,6 +123,7 @@ function AdvancedVI.estimate_gradient!( problem=prob, restructure=restructure, baseline=baseline, + samples=samples, q_stop=q_stop, ) AdvancedVI.value_and_gradient!( From 42c1034d8006aaebe91a04947772bc8189717078 Mon Sep 17 00:00:00 2001 From: Kyurae Kim Date: Thu, 3 Oct 2024 23:39:44 -0400 Subject: [PATCH 06/14] run formatter Co-authored-by: github-actions[bot] <41898282+github-actions[bot]@users.noreply.github.com> --- src/objectives/elbo/entropy.jl | 1 - 1 file changed, 1 deletion(-) diff --git a/src/objectives/elbo/entropy.jl b/src/objectives/elbo/entropy.jl index 2b607132..210b49ca 100644 --- a/src/objectives/elbo/entropy.jl +++ b/src/objectives/elbo/entropy.jl @@ -37,4 +37,3 @@ function estimate_entropy( -logpdf(q, mc_sample) end end - From feb3200799af5f7022ecadccfe182a732c8196b0 Mon Sep 17 00:00:00 2001 From: Kyurae Kim Date: Fri, 4 Oct 2024 10:52:25 -0700 Subject: [PATCH 07/14] fix default value for baseline control variate --- src/objectives/elbo/scoregradelbo.jl | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/objectives/elbo/scoregradelbo.jl b/src/objectives/elbo/scoregradelbo.jl index 388507de..231a91c9 100644 --- a/src/objectives/elbo/scoregradelbo.jl +++ b/src/objectives/elbo/scoregradelbo.jl @@ -110,7 +110,7 @@ function AdvancedVI.estimate_gradient!( baseline_buf = state baseline_history = OnlineStats.value(baseline_buf) baseline = if isempty(baseline_history) - one(eltype(params)) + zero(eltype(params)) else mean(baseline_history) end From 45b37c111ce05f2fb01f195b8f58237bdb3a66c5 Mon Sep 17 00:00:00 2001 From: Hong Ge <3279477+yebai@users.noreply.github.com> Date: Tue, 22 Oct 2024 13:17:06 +0100 Subject: [PATCH 08/14] Update CI.yml --- .github/workflows/CI.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.github/workflows/CI.yml b/.github/workflows/CI.yml index 224f81d4..c1a5fdc3 100644 --- a/.github/workflows/CI.yml +++ b/.github/workflows/CI.yml @@ -19,8 +19,8 @@ jobs: fail-fast: false matrix: version: - - '1.7' - '1.10' + - '1' os: - ubuntu-latest - macOS-latest From 86eccf775572179d94205b04b0b625c723727f7e Mon Sep 17 00:00:00 2001 From: Kyurae Kim Date: Mon, 4 Nov 2024 22:24:01 -0800 Subject: [PATCH 09/14] fix move log density computation out of the AD path --- src/objectives/elbo/scoregradelbo.jl | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) diff --git a/src/objectives/elbo/scoregradelbo.jl b/src/objectives/elbo/scoregradelbo.jl index 231a91c9..dd5a0ac3 100644 --- a/src/objectives/elbo/scoregradelbo.jl +++ b/src/objectives/elbo/scoregradelbo.jl @@ -80,15 +80,14 @@ function estimate_objective(obj::ScoreGradELBO, q, prob; n_samples::Int=obj.n_sa end function estimate_scoregradelbo_ad_forward(params′, aux) - @unpack rng, obj, problem, adtype, restructure, samples, q_stop, baseline = aux + @unpack rng, obj, logprobs, adtype, restructure, samples, q_stop, baseline = aux q = restructure_ad_forward(adtype, restructure, params′) ℓq = logpdf.(Ref(q), AdvancedVI.eachsample(samples)) ℓq_stop = logpdf.(Ref(q_stop), AdvancedVI.eachsample(samples)) - ℓπ = map(Base.Fix1(LogDensityProblems.logdensity, problem), eachsample(samples)) - ℓπ_mean = mean(ℓπ) - score_grad = mean(@. ℓq * (ℓπ - baseline)) - score_grad_stop = mean(@. ℓq_stop * (ℓπ - baseline)) + ℓπ_mean = mean(logprobs) + score_grad = mean(@. ℓq * (logprobs - baseline)) + score_grad_stop = mean(@. ℓq_stop * (logprobs - baseline)) energy = ℓπ_mean + (score_grad - score_grad_stop) entropy = estimate_entropy(obj.entropy, samples, q) @@ -116,11 +115,12 @@ function AdvancedVI.estimate_gradient!( end q_stop = restructure(params) samples = rand(rng, q_stop, obj.n_samples) + ℓprobs = map(Base.Fix1(LogDensityProblems.logdensity, prob), eachsample(samples)) aux = ( rng=rng, adtype=adtype, obj=obj, - problem=prob, + logprobs=ℓprobs, restructure=restructure, baseline=baseline, samples=samples, From dc23a02ab472f83d96adcabc21c465ccaffe4571 Mon Sep 17 00:00:00 2001 From: Kyurae Kim Date: Mon, 4 Nov 2024 22:42:54 -0800 Subject: [PATCH 10/14] update change the `ScoreGradELBO` objective to be VarGrad underneath --- Project.toml | 2 - src/objectives/elbo/scoregradelbo.jl | 71 +++++++--------------------- 2 files changed, 16 insertions(+), 57 deletions(-) diff --git a/Project.toml b/Project.toml index 136fb45a..572ea144 100644 --- a/Project.toml +++ b/Project.toml @@ -14,7 +14,6 @@ FillArrays = "1a297f60-69ca-5386-bcde-b61e274b549b" Functors = "d9f16b24-f501-4c13-a1f2-28368ffc5196" LinearAlgebra = "37e2e46d-f89d-539d-b4ee-838fcccc9c8e" LogDensityProblems = "6fdf6af0-433a-55f7-b3ed-c6c6e0b8df7c" -OnlineStats = "a15396b6-48d5-5d58-9928-6d29437db91e" Optimisers = "3bd65402-5787-11e9-1adc-39752487f4e2" ProgressMeter = "92933f4c-e287-5a05-a399-4b506db050ca" Random = "9a3f8284-a2c9-5f02-9a11-845980a1fd5c" @@ -50,7 +49,6 @@ Functors = "0.4" LinearAlgebra = "1" LogDensityProblems = "2" Mooncake = "0.4" -OnlineStats = "1" Optimisers = "0.2.16, 0.3" ProgressMeter = "1.6" Random = "1" diff --git a/src/objectives/elbo/scoregradelbo.jl b/src/objectives/elbo/scoregradelbo.jl index dd5a0ac3..7c358515 100644 --- a/src/objectives/elbo/scoregradelbo.jl +++ b/src/objectives/elbo/scoregradelbo.jl @@ -37,32 +37,12 @@ To reduce the variance of the gradient estimator, we use a baseline computed fro Depending on the options, additional requirements on ``q_{\\lambda}`` may apply. """ struct ScoreGradELBO{EntropyEst<:AbstractEntropyEstimator} <: AbstractVariationalObjective - entropy::EntropyEst n_samples::Int - baseline_window_size::Int -end - -function ScoreGradELBO( - n_samples::Int; - entropy::AbstractEntropyEstimator=MonteCarloEntropy(), - baseline_window_size::Int=10, -) - return ScoreGradELBO(entropy, n_samples, baseline_window_size) -end - -function init( - ::Random.AbstractRNG, obj::ScoreGradELBO, prob, params::AbstractVector{T}, restructure -) where {T<:Real} - return MovingWindow(T, obj.baseline_window_size) end function Base.show(io::IO, obj::ScoreGradELBO) - print(io, "ScoreGradELBO(entropy=") - print(io, obj.entropy) - print(io, ", n_samples=") + print(io, "ScoreGradELBO(n_samples=") print(io, obj.n_samples) - print(io, ", baseline_window_size=") - print(io, obj.baseline_window_size) return print(io, ")") end @@ -70,9 +50,9 @@ function estimate_objective( rng::Random.AbstractRNG, obj::ScoreGradELBO, q, prob; n_samples::Int=obj.n_samples ) samples = rand(rng, q, n_samples) - entropy = estimate_entropy(obj.entropy, samples, q) - energy = map(Base.Fix1(LogDensityProblems.logdensity, prob), eachsample(samples)) - return mean(energy) + entropy + ℓπ = map(Base.Fix1(LogDensityProblems.logdensity, prob), eachsample(samples)) + ℓq = logpdf.(Ref(q), AdvancedVI.eachsample(samples)) + return mean(ℓπ - ℓq) end function estimate_objective(obj::ScoreGradELBO, q, prob; n_samples::Int=obj.n_samples) @@ -80,20 +60,12 @@ function estimate_objective(obj::ScoreGradELBO, q, prob; n_samples::Int=obj.n_sa end function estimate_scoregradelbo_ad_forward(params′, aux) - @unpack rng, obj, logprobs, adtype, restructure, samples, q_stop, baseline = aux + @unpack rng, obj, logprob, adtype, restructure, samples = aux q = restructure_ad_forward(adtype, restructure, params′) - + ℓπ = logprob ℓq = logpdf.(Ref(q), AdvancedVI.eachsample(samples)) - ℓq_stop = logpdf.(Ref(q_stop), AdvancedVI.eachsample(samples)) - ℓπ_mean = mean(logprobs) - score_grad = mean(@. ℓq * (logprobs - baseline)) - score_grad_stop = mean(@. ℓq_stop * (logprobs - baseline)) - - energy = ℓπ_mean + (score_grad - score_grad_stop) - entropy = estimate_entropy(obj.entropy, samples, q) - - elbo = energy + entropy - return -elbo + f = ℓq - ℓπ + return var(f) / 2 end function AdvancedVI.estimate_gradient!( @@ -106,33 +78,22 @@ function AdvancedVI.estimate_gradient!( restructure, state, ) - baseline_buf = state - baseline_history = OnlineStats.value(baseline_buf) - baseline = if isempty(baseline_history) - zero(eltype(params)) - else - mean(baseline_history) - end - q_stop = restructure(params) - samples = rand(rng, q_stop, obj.n_samples) - ℓprobs = map(Base.Fix1(LogDensityProblems.logdensity, prob), eachsample(samples)) + q = restructure(params) + samples = rand(rng, q, obj.n_samples) + ℓπ = map(Base.Fix1(LogDensityProblems.logdensity, prob), eachsample(samples)) aux = ( rng=rng, adtype=adtype, obj=obj, - logprobs=ℓprobs, + logprob=ℓπ, restructure=restructure, - baseline=baseline, samples=samples, - q_stop=q_stop, ) AdvancedVI.value_and_gradient!( adtype, estimate_scoregradelbo_ad_forward, params, aux, out ) - nelbo = DiffResults.value(out) - stat = (elbo=-nelbo,) - if obj.baseline_window_size > 0 - fit!(baseline_buf, -nelbo) - end - return out, baseline_buf, stat + ℓq = logpdf.(Ref(q), AdvancedVI.eachsample(samples)) + elbo = mean(ℓπ - ℓq) + stat = (elbo=elbo,) + return out, nothing, stat end From f030d1469dc350c5cbd5f2065f8270a70085e234 Mon Sep 17 00:00:00 2001 From: Kyurae Kim Date: Mon, 4 Nov 2024 22:44:29 -0800 Subject: [PATCH 11/14] fix remove unnecessary import --- src/AdvancedVI.jl | 1 - 1 file changed, 1 deletion(-) diff --git a/src/AdvancedVI.jl b/src/AdvancedVI.jl index 453e560b..1d0c4f50 100644 --- a/src/AdvancedVI.jl +++ b/src/AdvancedVI.jl @@ -6,7 +6,6 @@ using Accessors using Random using Distributions -using OnlineStats using Functors using Optimisers From 2577dce7a0de413a8cde367d5325d70b6056eab1 Mon Sep 17 00:00:00 2001 From: Kyurae Kim Date: Mon, 4 Nov 2024 22:47:44 -0800 Subject: [PATCH 12/14] fix ScoreGradELBO outdated docs and removed unused parametric type --- src/objectives/elbo/scoregradelbo.jl | 16 +--------------- 1 file changed, 1 insertion(+), 15 deletions(-) diff --git a/src/objectives/elbo/scoregradelbo.jl b/src/objectives/elbo/scoregradelbo.jl index 7c358515..f95d4cd4 100644 --- a/src/objectives/elbo/scoregradelbo.jl +++ b/src/objectives/elbo/scoregradelbo.jl @@ -14,29 +14,15 @@ Evidence lower-bound objective computed with score function gradients. \\end{aligned} ``` -To reduce the variance of the gradient estimator, we use a baseline computed from a running average of the previous ELBO values and subtract it from the objective. - -```math -\\mathbb{E}_{z \\sim q_{\\lambda}}\\left[ - \\nabla_{\\lambda} \\log q_{\\lambda}(z) \\left(\\pi\\left(z\\right) - \\beta\\right) -\\right] -``` - # Arguments - `n_samples::Int`: Number of Monte Carlo samples used to estimate the ELBO. -# Keyword Arguments -- `entropy`: The estimator for the entropy term. (Type `<: AbstractEntropyEstimator`; Default: `FullMonteCarloEntropy()`) -- `baseline_window_size::Int`: The window size to use to compute the baseline. (Default: `10`) - # Requirements - The variational approximation ``q_{\\lambda}`` implements `rand` and `logpdf`. - `logpdf(q, x)` must be differentiable with respect to `q` by the selected AD backend. - The target distribution and the variational approximation have the same support. - -Depending on the options, additional requirements on ``q_{\\lambda}`` may apply. """ -struct ScoreGradELBO{EntropyEst<:AbstractEntropyEstimator} <: AbstractVariationalObjective +struct ScoreGradELBO <: AbstractVariationalObjective n_samples::Int end From f0bbc1bac8b6b07ec1e939262e0c9f1d32558715 Mon Sep 17 00:00:00 2001 From: Kyurae Kim Date: Mon, 4 Nov 2024 22:56:13 -0800 Subject: [PATCH 13/14] update docs for `ScoreGradELBO` --- src/objectives/elbo/scoregradelbo.jl | 12 +----------- 1 file changed, 1 insertion(+), 11 deletions(-) diff --git a/src/objectives/elbo/scoregradelbo.jl b/src/objectives/elbo/scoregradelbo.jl index f95d4cd4..b724f5df 100644 --- a/src/objectives/elbo/scoregradelbo.jl +++ b/src/objectives/elbo/scoregradelbo.jl @@ -2,17 +2,7 @@ """ ScoreGradELBO(n_samples; kwargs...) -Evidence lower-bound objective computed with score function gradients. -```math -\\begin{aligned} -\\nabla_{\\lambda} \\mathrm{ELBO}\\left(\\lambda\\right) -&\\= -\\mathbb{E}_{z \\sim q_{\\lambda}}\\left[ - \\log \\pi\\left(z\\right) \\nabla_{\\lambda} \\log q_{\\lambda}(z) -\\right] -+ \\mathbb{H}\\left(q_{\\lambda}\\right), -\\end{aligned} -``` +Evidence lower-bound objective computed with score function gradient with the VarGrad objective, also known as the leave-one-out control variate. # Arguments - `n_samples::Int`: Number of Monte Carlo samples used to estimate the ELBO. From 43e858132d5a68dadf733f67629e3cbcf54e52e9 Mon Sep 17 00:00:00 2001 From: Kyurae Kim Date: Mon, 4 Nov 2024 22:56:44 -0800 Subject: [PATCH 14/14] update docs for `ScoreGradELBO` --- src/objectives/elbo/scoregradelbo.jl | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/objectives/elbo/scoregradelbo.jl b/src/objectives/elbo/scoregradelbo.jl index b724f5df..143b9423 100644 --- a/src/objectives/elbo/scoregradelbo.jl +++ b/src/objectives/elbo/scoregradelbo.jl @@ -5,7 +5,7 @@ Evidence lower-bound objective computed with score function gradient with the VarGrad objective, also known as the leave-one-out control variate. # Arguments -- `n_samples::Int`: Number of Monte Carlo samples used to estimate the ELBO. +- `n_samples::Int`: Number of Monte Carlo samples used to estimate the VarGrad objective. # Requirements - The variational approximation ``q_{\\lambda}`` implements `rand` and `logpdf`.