From df95a2977da6851498ae1aa1ee0183c422d37d2c Mon Sep 17 00:00:00 2001 From: Phillip Alday Date: Thu, 6 Jan 2022 09:52:52 -0600 Subject: [PATCH] Allow LMM to init GLMM (#588) * simplifiy kwarg passing * use lmm to init glmm * version bump --- Project.toml | 2 +- src/generalizedlinearmixedmodel.jl | 104 ++++++++++++----------------- test/pirls.jl | 4 +- 3 files changed, 44 insertions(+), 66 deletions(-) diff --git a/Project.toml b/Project.toml index e98f2b1d5..2da05227c 100644 --- a/Project.toml +++ b/Project.toml @@ -1,7 +1,7 @@ name = "MixedModels" uuid = "ff71e718-51f3-5ec2-a782-8ffcbfa3c316" author = ["Phillip Alday ", "Douglas Bates ", "Jose Bayoan Santiago Calderon "] -version = "4.5.0" +version = "4.6.0" [deps] Arrow = "69666777-d1a9-59fb-9406-91d4454c9d45" diff --git a/src/generalizedlinearmixedmodel.jl b/src/generalizedlinearmixedmodel.jl index 100ec2f62..fb787869d 100644 --- a/src/generalizedlinearmixedmodel.jl +++ b/src/generalizedlinearmixedmodel.jl @@ -167,30 +167,9 @@ function fit( tbl, d::Distribution=Normal(), l::Link=canonicallink(d); - wts=[], - contrasts=Dict{Symbol,Any}(), - offset=[], - verbose::Bool=false, - fast::Bool=false, - nAGQ::Integer=1, - progress::Bool=true, - thin::Int=typemax(Int), + kwargs..., ) - return fit( - GeneralizedLinearMixedModel, - f, - columntable(tbl), - d, - l; - wts, - offset, - contrasts, - verbose, - fast, - nAGQ, - progress, - thin, - ) + return fit(GeneralizedLinearMixedModel, f, columntable(tbl), d, l; kwargs...) end function fit( @@ -202,21 +181,10 @@ function fit( wts=[], contrasts=Dict{Symbol,Any}(), offset=[], - verbose::Bool=false, - fast::Bool=false, - nAGQ::Integer=1, - progress::Bool=true, - thin::Int=typemax(Int), + kwargs..., ) return fit!( - GeneralizedLinearMixedModel( - f, tbl, d, l; wts=wts, offset=offset, contrasts=contrasts - ); - verbose, - fast, - nAGQ, - progress, - thin, + GeneralizedLinearMixedModel(f, tbl, d, l; wts, offset, contrasts); kwargs... ) end @@ -226,37 +194,16 @@ function fit( tbl, d::Distribution, l::Link=canonicallink(d); - wts=[], - contrasts=Dict{Symbol,Any}(), - offset=[], - verbose::Bool=false, - REML::Bool=false, - fast::Bool=false, - nAGQ::Integer=1, - progress::Bool=true, - thin::Int=typemax(Int), + kwargs..., ) - return fit( - GeneralizedLinearMixedModel, - f, - tbl, - d, - l; - wts, - contrasts, - offset, - verbose, - fast, - nAGQ, - progress, - thin, - ) + return fit(GeneralizedLinearMixedModel, f, tbl, d, l; kwargs...) end """ fit!(m::GeneralizedLinearMixedModel; fast=false, nAGQ=1, verbose=false, progress=true, - thin::Int=1) + thin::Int=1, + init_from_lmm=Set()) Optimize the objective function for `m`. @@ -271,6 +218,25 @@ and it may not be shown at all for models that are optimized quickly. If `verbose` is `true`, then both the intermediate results of both the nonlinear optimization and PIRLS are also displayed on standard output. At every `thin`th iteration is recorded in `fitlog`, optimization progress is saved in `m.optsum.fitlog`. + +By default, the starting values for model fitting are taken from a (non mixed, +i.e. marginal ) GLM fit. Experience with larger datasets (many thousands of +observations and/or hundreds of levels of the grouping variables) has suggested +that fittinga (Gaussian) linear mixed model on the untransformed data may +provide better starting values and thus overall faster fits even though an +entire LMM must be fit before the GLMM can be fit. `init_from_lmm` can be used +to specify which starting values from an LMM with. Valid options are any +collection (array, set, etc.) containing one or more of `:β` and `:θ`, the +default is the empty set. + +!!! note + Initializing from an LMM requires fitting the entire LMM first, so when + `progress=true`, there will be two progress bars: first for the LMM, then + for the GLMM. + +!!! warning + The `init_from_lmm` functionality is experimental and may change or be removed entirely + without being considered a breaking change. """ function fit!( m::GeneralizedLinearMixedModel{T}; @@ -279,11 +245,16 @@ function fit!( nAGQ::Integer=1, progress::Bool=true, thin::Int=typemax(Int), + init_from_lmm=Set(), ) where {T} - β = m.β + β = copy(m.β) + θ = copy(m.θ) lm = m.LMM optsum = lm.optsum + issubset(init_from_lmm, [:θ, :β]) || + throw(ArgumentError("Invalid parameter selection for init_from_lmm")) + if optsum.feval > 0 throw(ArgumentError("This model has already been fitted. Use refit!() instead.")) end @@ -292,9 +263,16 @@ function fit!( throw(ArgumentError("The response is constant and thus model fitting has failed")) end + if !isempty(init_from_lmm) + fit!(lm; progress) + :θ in init_from_lmm && copyto!(θ, lm.θ) + :β in init_from_lmm && copyto!(β, lm.β) + unfit!(lm) + end + if !fast optsum.lowerbd = vcat(fill!(similar(β), T(-Inf)), optsum.lowerbd) - optsum.initial = vcat(β, m.θ) + optsum.initial = vcat(β, lm.optsum.final) optsum.final = copy(optsum.initial) end setpar! = fast ? setθ! : setβθ! diff --git a/test/pirls.jl b/test/pirls.jl index 88b1acc63..77a70c4a6 100644 --- a/test/pirls.jl +++ b/test/pirls.jl @@ -90,11 +90,11 @@ end @testset "cbpp" begin cbpp = dataset(:cbpp) - gm2 = fit(MixedModel, first(gfms[:cbpp]), cbpp, Binomial(); wts=float(cbpp.hsz), progress=false) + gm2 = fit(MixedModel, first(gfms[:cbpp]), cbpp, Binomial(); wts=float(cbpp.hsz), progress=false, init_from_lmm=[:β, :θ]) @test weights(gm2) == cbpp.hsz @test deviance(gm2,true) ≈ 100.09585619892968 atol=0.0001 @test sum(abs2, gm2.u[1]) ≈ 9.723054788538546 atol=0.0001 - @test logdet(gm2) ≈ 16.90105378801136 atol=0.0001 + @test logdet(gm2) ≈ 16.90105378801136 atol=0.001 @test isapprox(sum(gm2.resp.devresid), 73.47174762237978, atol=0.001) @test isapprox(loglikelihood(gm2), -92.02628186840045, atol=0.001) @test !dispersion_parameter(gm2)