TuringLang · Red-Portal · Oct 8, 2024 · Oct 8, 2024 · Oct 8, 2024 · Oct 8, 2024
diff --git a/.github/workflows/CI.yml b/.github/workflows/CI.yml
@@ -19,8 +19,8 @@ jobs:
       fail-fast: false
       matrix:
         version:
-          - '1.7'
           - '1.10'
+          - '1'
         os:
           - ubuntu-latest
           - macOS-latest

diff --git a/.github/workflows/Enzyme.yml b/.github/workflows/Enzyme.yml
@@ -0,0 +1,40 @@
+name: Enzyme
+on:
+  push:
+    branches:
+      - master
+    tags: ['*']
+  pull_request:
+  workflow_dispatch:
+concurrency:
+  # Skip intermediate builds: always.
+  # Cancel intermediate builds: only if it is a pull request build.
+  group: ${{ github.workflow }}-${{ github.ref }}
+  cancel-in-progress: ${{ startsWith(github.ref, 'refs/pull/') }}
+jobs:
+  test:
+    name: Julia ${{ matrix.version }} - ${{ matrix.os }} - ${{ matrix.arch }} - ${{ github.event_name }}
+    env: 
+      TEST_GROUP: Enzyme
+    runs-on: ${{ matrix.os }}
+    strategy:
+      fail-fast: false
+      matrix:
+        version:
+          - '1.10'
+          - '1'
+        os:
+          - ubuntu-latest
+          - macOS-latest
+          - windows-latest
+        arch:
+          - x64
+    steps:
+      - uses: actions/checkout@v4
+      - uses: julia-actions/setup-julia@v1
+        with:
+          version: ${{ matrix.version }}
+          arch: ${{ matrix.arch }}
+      - uses: julia-actions/cache@v1
+      - uses: julia-actions/julia-buildpkg@v1
+      - uses: julia-actions/julia-runtest@v1
diff --git a/Project.toml b/Project.toml
@@ -18,7 +18,6 @@ Optimisers = "3bd65402-5787-11e9-1adc-39752487f4e2"
 ProgressMeter = "92933f4c-e287-5a05-a399-4b506db050ca"
 Random = "9a3f8284-a2c9-5f02-9a11-845980a1fd5c"
 Requires = "ae029012-a4dd-5104-9daa-d747884805df"
-SimpleUnPack = "ce78b400-467f-4804-87d8-8f486da07d0a"
 StatsBase = "2913bbd2-ae8a-5f71-8c99-4fb6c76f3a91"
 
 [weakdeps]
@@ -45,29 +44,24 @@ DocStringExtensions = "0.8, 0.9"
 Enzyme = "0.13"
 FillArrays = "1.3"
 ForwardDiff = "0.10"
-Functors = "0.4"
+Functors = "0.4, 0.5"
 LinearAlgebra = "1"
 LogDensityProblems = "2"
 Mooncake = "0.4"
-Optimisers = "0.2.16, 0.3"
+Optimisers = "0.2.16, 0.3, 0.4"
 ProgressMeter = "1.6"
 Random = "1"
 Requires = "1.0"
 ReverseDiff = "1"
-SimpleUnPack = "1.1.0"
 StatsBase = "0.32, 0.33, 0.34"
 Zygote = "0.6"
 julia = "1.7"
 
 [extras]
 Bijectors = "76274a88-744f-5084-9051-94815aaf08c4"
 Enzyme = "7da242da-08ed-463a-9acd-ee780be4f1d9"
-ForwardDiff = "f6369f11-7733-5829-9624-2563aa707210"
-Mooncake = "da2b9cff-9c12-43a0-ae48-6db2b0edb7d6"
 Pkg = "44cfe95a-1eb2-52ea-b672-e2afdf69b78f"
-ReverseDiff = "37e2e3b7-166d-5795-8a7a-e32c996b4267"
 Test = "8dfed614-e22c-5e08-85e1-65c5234f0b40"
-Zygote = "e88e6eb3-aa80-5325-afca-941959d7151f"
 
 [targets]
 test = ["Pkg", "Test"]
diff --git a/README.md b/README.md
@@ -26,7 +26,6 @@ a `LogDensityProblem` can be implemented as
 
 ```julia
 using LogDensityProblems
-using SimpleUnPack
 
 struct NormalLogNormal{MX,SX,MY,SY}
     μ_x::MX

diff --git a/bench/Project.toml b/bench/Project.toml
@@ -30,7 +30,7 @@ ForwardDiff = "0.10"
 InteractiveUtils = "1"
 LogDensityProblems = "2"
 Mooncake = "0.4.5"
-Optimisers = "0.3"
+Optimisers = "0.3, 0.4"
 Random = "1"
 ReverseDiff = "1"
 SimpleUnPack = "1"

diff --git a/docs/Project.toml b/docs/Project.toml
@@ -11,22 +11,20 @@ Optimisers = "3bd65402-5787-11e9-1adc-39752487f4e2"
 Plots = "91a5bcdd-55d7-5caf-9e0b-520d859cae80"
 QuasiMonteCarlo = "8a4e6c94-4038-4cdc-81c3-7e6ffdb2a71b"
 ReverseDiff = "37e2e3b7-166d-5795-8a7a-e32c996b4267"
-SimpleUnPack = "ce78b400-467f-4804-87d8-8f486da07d0a"
 StatsFuns = "4c63d2b9-4356-54db-8cca-17b64c39e42c"
 
 [compat]
 ADTypes = "1"
 AdvancedVI = "0.3"
 Bijectors = "0.13.6"
 Distributions = "0.25"
-Documenter = "0.26, 0.27, 1"
+Documenter = "1"
 FillArrays = "1"
 ForwardDiff = "0.10"
 LogDensityProblems = "2.1.1"
-Optimisers = "0.3"
+Optimisers = "0.3, 0.4"
 Plots = "1"
 QuasiMonteCarlo = "0.3"
 ReverseDiff = "1"
-SimpleUnPack = "1"
 StatsFuns = "1"
 julia = "1.10"
diff --git a/docs/make.jl b/docs/make.jl
@@ -20,6 +20,7 @@ makedocs(;
         "Variational Families" => "families.md",
         "Optimization" => "optimization.md",
     ],
+    warnonly=[:missing_docs],
 )
 
 deploydocs(; repo="github.com/TuringLang/AdvancedVI.jl", push_preview=true)
diff --git a/docs/src/elbo/repgradelbo.md b/docs/src/elbo/repgradelbo.md
@@ -129,7 +129,6 @@ using LinearAlgebra
 using LogDensityProblems
 using Plots
 using Random
-using SimpleUnPack
 
 using Optimisers
 using ADTypes, ForwardDiff
@@ -143,7 +142,7 @@ struct NormalLogNormal{MX,SX,MY,SY}
 end
 
 function LogDensityProblems.logdensity(model::NormalLogNormal, θ)
-    @unpack μ_x, σ_x, μ_y, Σ_y = model
+    (; μ_x, σ_x, μ_y, Σ_y) = model
     logpdf(LogNormal(μ_x, σ_x), θ[1]) + logpdf(MvNormal(μ_y, Σ_y), θ[2:end])
 end
 
@@ -168,7 +167,7 @@ L  = Diagonal(ones(d));
 q0 = AdvancedVI.MeanFieldGaussian(μ, L)
 
 function Bijectors.bijector(model::NormalLogNormal)
-    @unpack μ_x, σ_x, μ_y, Σ_y = model
+    (; μ_x, σ_x, μ_y, Σ_y) = model
     Bijectors.Stacked(
         Bijectors.bijector.([LogNormal(μ_x, σ_x), MvNormal(μ_y, Σ_y)]),
         [1:1, 2:1+length(μ_y)])
@@ -295,7 +294,7 @@ qmcrng = SobolSample(; R=OwenScramble(; base=2, pad=32))
 function Distributions.rand(
     rng::AbstractRNG, q::MvLocationScale{<:Diagonal,D,L}, num_samples::Int
 ) where {L,D}
-    @unpack location, scale, dist = q
+    (; location, scale, dist) = q
     n_dims = length(location)
     scale_diag = diag(scale)
     unif_samples = QuasiMonteCarlo.sample(num_samples, length(q), qmcrng)
@@ -337,7 +336,7 @@ savefig("advi_qmc_dist.svg")
 function Distributions.rand(
     rng::AbstractRNG, q::MvLocationScale{<:Diagonal, D, L}, num_samples::Int
 ) where {L, D}
-    @unpack location, scale, dist = q 
+    (; location, scale, dist) = q 
     n_dims       = length(location)
     scale_diag   = diag(scale)
     scale_diag.*rand(rng, dist, n_dims, num_samples) .+ location

diff --git a/docs/src/examples.md b/docs/src/examples.md
@@ -15,7 +15,6 @@ Using the `LogDensityProblems` interface, we the model can be defined as follows
 
 ```@example elboexample
 using LogDensityProblems
-using SimpleUnPack
 
 struct NormalLogNormal{MX,SX,MY,SY}
     μ_x::MX
@@ -25,7 +24,7 @@ struct NormalLogNormal{MX,SX,MY,SY}
 end
 
 function LogDensityProblems.logdensity(model::NormalLogNormal, θ)
-    @unpack μ_x, σ_x, μ_y, Σ_y = model
+    (; μ_x, σ_x, μ_y, Σ_y) = model
     return logpdf(LogNormal(μ_x, σ_x), θ[1]) + logpdf(MvNormal(μ_y, Σ_y), θ[2:end])
 end
 
@@ -59,7 +58,7 @@ Thus, we will use [Bijectors](https://github.com/TuringLang/Bijectors.jl) to mat
 using Bijectors
 
 function Bijectors.bijector(model::NormalLogNormal)
-    @unpack μ_x, σ_x, μ_y, Σ_y = model
+    (; μ_x, σ_x, μ_y, Σ_y) = model
     return Bijectors.Stacked(
         Bijectors.bijector.([LogNormal(μ_x, σ_x), MvNormal(μ_y, Σ_y)]),
         [1:1, 2:(1 + length(μ_y))],

diff --git a/docs/src/optimization.md b/docs/src/optimization.md
@@ -24,3 +24,5 @@ PolynomialAveraging
 ```
 
 [^DCAMHV2020]: Dhaka, A. K., Catalina, A., Andersen, M. R., Magnusson, M., Huggins, J., & Vehtari, A. (2020). Robust, accurate stochastic optimization for variational inference. Advances in Neural Information Processing Systems, 33, 10961-10973.
+[^KMJ2024]: Khaled, A., Mishchenko, K., & Jin, C. (2023). Dowg unleashed: An efficient universal parameter-free gradient descent method. Advances in Neural Information Processing Systems, 36, 6748-6769.
+[^IHC2023]: Ivgi, M., Hinder, O., & Carmon, Y. (2023). Dog is sgd's best friend: A parameter-free dynamic step size schedule. In International Conference on Machine Learning (pp. 14465-14499). PMLR.
diff --git a/src/AdvancedVI.jl b/src/AdvancedVI.jl
@@ -1,7 +1,6 @@
 
 module AdvancedVI
 
-using SimpleUnPack: @unpack, @pack!
 using Accessors
 
 using Random

diff --git a/src/families/location_scale.jl b/src/families/location_scale.jl
@@ -56,7 +56,7 @@ function (re::RestructureMeanField)(flat::AbstractVector)
 end
 
 function Optimisers.destructure(q::MvLocationScale{<:Diagonal,D,L,E}) where {D,L,E}
-    @unpack location, scale, dist = q
+    (; location, scale, dist) = q
     flat = vcat(location, diag(scale))
     return flat, RestructureMeanField(q)
 end
@@ -69,27 +69,27 @@ Base.size(q::MvLocationScale) = size(q.location)
 Base.eltype(::Type{<:MvLocationScale{S,D,L,E}}) where {S,D,L,E} = eltype(D)
 
 function StatsBase.entropy(q::MvLocationScale)
-    @unpack location, scale, dist = q
+    (; location, scale, dist) = q
     n_dims = length(location)
     # `convert` is necessary because `entropy` is not type stable upstream
     return n_dims * convert(eltype(location), entropy(dist)) + logdet(scale)
 end
 
 function Distributions.logpdf(q::MvLocationScale, z::AbstractVector{<:Real})
-    @unpack location, scale, dist = q
+    (; location, scale, dist) = q
     return sum(Base.Fix1(logpdf, dist), scale \ (z - location)) - logdet(scale)
 end
 
 function Distributions.rand(q::MvLocationScale)
-    @unpack location, scale, dist = q
+    (; location, scale, dist) = q
     n_dims = length(location)
     return scale * rand(dist, n_dims) + location
 end
 
 function Distributions.rand(
     rng::AbstractRNG, q::MvLocationScale{S,D,L}, num_samples::Int
 ) where {S,D,L}
-    @unpack location, scale, dist = q
+    (; location, scale, dist) = q
     n_dims = length(location)
     return scale * rand(rng, dist, n_dims, num_samples) .+ location
 end
@@ -98,7 +98,7 @@ end
 function Distributions.rand(
     rng::AbstractRNG, q::MvLocationScale{<:Diagonal,D,L}, num_samples::Int
 ) where {L,D}
-    @unpack location, scale, dist = q
+    (; location, scale, dist) = q
     n_dims = length(location)
     scale_diag = diag(scale)
     return scale_diag .* rand(rng, dist, n_dims, num_samples) .+ location
@@ -107,14 +107,14 @@ end
 function Distributions._rand!(
     rng::AbstractRNG, q::MvLocationScale, x::AbstractVecOrMat{<:Real}
 )
-    @unpack location, scale, dist = q
+    (; location, scale, dist) = q
     rand!(rng, dist, x)
     x[:] = scale * x
     return x .+= location
 end
 
 function Distributions.mean(q::MvLocationScale)
-    @unpack location, scale = q
+    (; location, scale) = q
     return location + scale * Fill(mean(q.dist), length(location))
 end
 

diff --git a/src/families/location_scale_low_rank.jl b/src/families/location_scale_low_rank.jl
@@ -52,7 +52,7 @@ Base.size(q::MvLocationScaleLowRank) = size(q.location)
 Base.eltype(::Type{<:MvLocationScaleLowRank{L,SD,SF,D,E}}) where {L,SD,SF,D,E} = eltype(L)
 
 function StatsBase.entropy(q::MvLocationScaleLowRank)
-    @unpack location, scale_diag, scale_factors, dist = q
+    (; location, scale_diag, scale_factors, dist) = q
     n_dims = length(location)
     scale_diag2 = scale_diag .* scale_diag
     UtDinvU = Hermitian(scale_factors' * (scale_factors ./ scale_diag2))
@@ -63,7 +63,7 @@ end
 function Distributions.logpdf(
     q::MvLocationScaleLowRank, z::AbstractVector{<:Real}; non_differntiable::Bool=false
 )
-    @unpack location, scale_diag, scale_factors, dist = q
+    (; location, scale_diag, scale_factors, dist) = q
     μ_base = mean(dist)
     n_dims = length(location)
 
@@ -86,7 +86,7 @@ function Distributions.logpdf(
 end
 
 function Distributions.rand(q::MvLocationScaleLowRank)
-    @unpack location, scale_diag, scale_factors, dist = q
+    (; location, scale_diag, scale_factors, dist) = q
     n_dims = length(location)
     n_factors = size(scale_factors, 2)
     u_diag = rand(dist, n_dims)
@@ -97,7 +97,7 @@ end
 function Distributions.rand(
     rng::AbstractRNG, q::MvLocationScaleLowRank{S,D,L}, num_samples::Int
 ) where {S,D,L}
-    @unpack location, scale_diag, scale_factors, dist = q
+    (; location, scale_diag, scale_factors, dist) = q
     n_dims = length(location)
     n_factors = size(scale_factors, 2)
     u_diag = rand(rng, dist, n_dims, num_samples)
@@ -108,7 +108,7 @@ end
 function Distributions._rand!(
     rng::AbstractRNG, q::MvLocationScaleLowRank, x::AbstractVecOrMat{<:Real}
 )
-    @unpack location, scale_diag, scale_factors, dist = q
+    (; location, scale_diag, scale_factors, dist) = q
 
     rand!(rng, dist, x)
     x[:] = scale_diag .* x
@@ -120,22 +120,22 @@ function Distributions._rand!(
 end
 
 function Distributions.mean(q::MvLocationScaleLowRank)
-    @unpack location, scale_diag, scale_factors = q
+    (; location, scale_diag, scale_factors) = q
     μ = mean(q.dist)
     return location +
            scale_diag .* Fill(μ, length(scale_diag)) +
            scale_factors * Fill(μ, size(scale_factors, 2))
 end
 
 function Distributions.var(q::MvLocationScaleLowRank)
-    @unpack scale_diag, scale_factors = q
+    (; scale_diag, scale_factors) = q
     σ2 = var(q.dist)
     return σ2 *
            (scale_diag .* scale_diag + sum(scale_factors .* scale_factors; dims=2)[:, 1])
 end
 
 function Distributions.cov(q::MvLocationScaleLowRank)
-    @unpack scale_diag, scale_factors = q
+    (; scale_diag, scale_factors) = q
     σ2 = var(q.dist)
     return σ2 * (Diagonal(scale_diag .* scale_diag) + scale_factors * scale_factors')
 end

diff --git a/src/objectives/elbo/repgradelbo.jl b/src/objectives/elbo/repgradelbo.jl
@@ -86,7 +86,7 @@ function estimate_objective(obj::RepGradELBO, q, prob; n_samples::Int=obj.n_samp
 end
 
 function estimate_repgradelbo_ad_forward(params′, aux)
-    @unpack rng, obj, problem, adtype, restructure, q_stop = aux
+    (; rng, obj, problem, adtype, restructure, q_stop) = aux
     q = restructure_ad_forward(adtype, restructure, params′)
     samples, entropy = reparam_with_entropy(rng, q, q_stop, obj.n_samples, obj.entropy)
     energy = estimate_energy_with_samples(problem, samples)

diff --git a/src/objectives/elbo/scoregradelbo.jl b/src/objectives/elbo/scoregradelbo.jl
@@ -94,7 +94,7 @@ function estimate_objective(obj::ScoreGradELBO, q, prob; n_samples::Int=obj.n_sa
 end
 
 function estimate_scoregradelbo_ad_forward(params′, aux)
-    @unpack rng, obj, problem, adtype, restructure, q_stop = aux
+    (; rng, obj, problem, adtype, restructure, q_stop) = aux
     baseline = compute_control_variate_baseline(
         obj.baseline_history, obj.baseline_window_size
     )

diff --git a/src/optimization/rules.jl b/src/optimization/rules.jl
@@ -8,8 +8,6 @@ It's only parameter is the initial guess of the Euclidean distance to the optimu
 # Parameters
 - `repsilon`: Initial guess of the Euclidean distance between the initial point and
             the optimum. (default value: `1e-6`)
-
-[^KMJ2024]: Khaled, A., Mishchenko, K., & Jin, C. (2023). Dowg unleashed: An efficient universal parameter-free gradient descent method. Advances in Neural Information Processing Systems, 36, 6748-6769.
 """
 Optimisers.@def struct DoWG <: Optimisers.AbstractRule
     repsilon = 1e-6
@@ -37,8 +35,6 @@ The original paper recommends \$ 10^{-4} ( 1 + \\lVert \\lambda_0 \\rVert ) \$,
 
 # Parameters
 - `repsilon`: Initial guess of the Euclidean distance between the initial point and the optimum. (default value: `1e-6`)
-
-[^IHC2023]: Ivgi, M., Hinder, O., & Carmon, Y. (2023). Dog is sgd's best friend: A parameter-free dynamic step size schedule. In International Conference on Machine Learning (pp. 14465-14499). PMLR.
 """
 Optimisers.@def struct DoG <: Optimisers.AbstractRule
     repsilon = 1e-6