From 5a0fd2c1f3fc0f8301aaff7dbcbe736edc8f4773 Mon Sep 17 00:00:00 2001 From: Santtu Tikka Date: Wed, 8 May 2024 12:26:21 +0300 Subject: [PATCH] more tests, fix latent projection, run-extended --- R/dynamiteformula.R | 14 ++-- R/plot.R | 4 +- R/prepare_stan_input.R | 3 - R/priors.R | 27 +++---- R/stanblocks_families.R | 27 ++++--- README.Rmd | 7 +- README.md | 12 +-- tests/testthat/test-cmdstanr.R | 137 ++++++++++++++++++++------------- tests/testthat/test-errors.R | 37 +++++++++ tests/testthat/test-lfactor.R | 13 +++- tests/testthat/test-output.R | 8 +- tests/testthat/test-predict.R | 11 +++ tests/testthat/test-warnings.R | 2 +- 13 files changed, 197 insertions(+), 105 deletions(-) diff --git a/R/dynamiteformula.R b/R/dynamiteformula.R index a196723..5770786 100644 --- a/R/dynamiteformula.R +++ b/R/dynamiteformula.R @@ -619,12 +619,14 @@ get_dag <- function(x, project = FALSE, covariates = FALSE, lag_dep_pa <- lag_dep[lag_dep$resp == resp[i], ] lag_dep_ch <- lag_dep[lag_dep$var == resp[i], ] lag_dep_new <- vector(mode = "list", length = nrow(lag_dep_ch)) - for (j in seq_len(nrow(lag_dep_ch))) { - lag_dep_new[[j]] <- data.frame( - var = c(contemp_pa, lag_dep_pa$var), - order = c(rep(0L, k), lag_dep_pa$order) + lag_dep_ch$order[j], - resp = lag_dep_ch$resp[j] - ) + if (nrow(lag_dep_pa) > 0L) { + for (j in seq_len(nrow(lag_dep_ch))) { + lag_dep_new[[j]] <- data.frame( + var = c(contemp_pa, lag_dep_pa$var), + order = c(rep(0L, k), lag_dep_pa$order) + lag_dep_ch$order[j], + resp = lag_dep_ch$resp[j] + ) + } } lag_dep <- rbind( lag_dep[lag_dep$resp != resp[i] & lag_dep$var != resp[i], ], diff --git a/R/plot.R b/R/plot.R index 2b6a44e..d52a2da 100644 --- a/R/plot.R +++ b/R/plot.R @@ -545,8 +545,8 @@ plot_varying <- function(coefs, level, alpha, scales, n_params) { ) } title <- glue::glue( - "Posterior means and {100 * (1 - 2 * level)} %", - "intervals of the {title_spec}" + "Posterior means and {100 * (1 - 2 * level)} ", + "% intervals of the {title_spec}" ) # avoid NSE notes from R CMD check time <- mean <- category <- parameter <- NULL diff --git a/R/prepare_stan_input.R b/R/prepare_stan_input.R index bb418b6..d28a47d 100644 --- a/R/prepare_stan_input.R +++ b/R/prepare_stan_input.R @@ -479,8 +479,6 @@ initialize_multivariate_channel <- function(y, y_cg, y_name, cg_idx, list(channel = channel, sampling = sampling) } - - #' Default channel preparation #' #' Computes default channel-specific variables for Stan sampling, @@ -1220,7 +1218,6 @@ prepare_channel_student <- function(y, Y, channel, sampling, out } - #' Raise an error if factor type is not supported by a family #' #' @param y \[`character(1)`]\cr Response variable the error is related to. diff --git a/R/priors.R b/R/priors.R index bc8eca9..e298d18 100644 --- a/R/priors.R +++ b/R/priors.R @@ -71,19 +71,20 @@ extract_vectorizable_priors <- function(priors, y) { prepare_common_priors <- function(priors, M, shrinkage, P, correlated_nu, correlated_lf) { common_priors <- NULL - if (shrinkage) { - common_priors <- ifelse_( - is.null(priors), - data.frame( - parameter = "xi", - response = "", - prior = "normal(0, 1)", - type = "xi", - category = "" - ), - priors[priors$type == "xi", ] - ) - } + # Shrinkage feature removed for now + #if (shrinkage) { + # common_priors <- ifelse_( + # is.null(priors), + # data.frame( + # parameter = "xi", + # response = "", + # prior = "normal(0, 1)", + # type = "xi", + # category = "" + # ), + # priors[priors$type == "xi", ] + # ) + #} if (M > 1L && correlated_nu) { common_priors <- ifelse_( is.null(priors), diff --git a/R/stanblocks_families.R b/R/stanblocks_families.R index 5f993fd..3d8f6a0 100644 --- a/R/stanblocks_families.R +++ b/R/stanblocks_families.R @@ -704,13 +704,6 @@ loglik_lines_gaussian <- function(y, obs, idt, default, ...) { loglik_lines_multinomial <- function(idt, cvars, cgvars, backend, threading, ...) { - stopifnot_( - stan_version(backend) >= "2.24", - c( - "Multinomial family is not supported for this version of {.pkg {backend}}.", - `i` = "Please install a newer version of {.pkg {backend}}." - ) - ) cgvars$categories <- cgvars$y cgvars$y <- cgvars$y_cg cgvars$multinomial <- TRUE @@ -2143,9 +2136,12 @@ model_lines_categorical <- function(y, idt, obs, family, priors, onlyif(has_fixed || has_varying, c("J_{y}", "K_{y}")), onlyif(has_X, "X") ) - likelihood <- glue::glue( - "target += reduce_sum({distr}_loglik_{y}_lpmf, {seq1T}, grainsize, ", - "{fun_args});" + likelihood <- paste_rows( + paste0( + "target += reduce_sum({distr}_loglik_{y}_lpmf, {seq1T}, grainsize, ", + "{fun_args});" + ), + .indent = idt(1) ) } else { likelihood <- loglik_lines_categorical( @@ -2210,7 +2206,16 @@ model_lines_gaussian <- function(y, obs, idt, priors, paste_rows(priors, model_text, .parse = FALSE) } -model_lines_multinomial <- function(cvars, cgvars, idt, threading, ...) { +model_lines_multinomial <- function(cvars, cgvars, idt, backend, + threading, ...) { + stopifnot_( + stan_version(backend) >= "2.24", + c( + "Multinomial family is not supported for + this version of {.pkg {backend}}.", + `i` = "Please install a newer version of {.pkg {backend}}." + ) + ) cgvars$priors <- lapply( cgvars$y[-1L], function(s) { diff --git a/README.Rmd b/README.Rmd index 913b066..9eca856 100644 --- a/README.Rmd +++ b/README.Rmd @@ -43,7 +43,7 @@ The `dynamite` package is developed with the support of the Research Council of ## Installation -You can install the most recent stable version of `dynmite` from [CRAN](https://cran.r-project.org/package=dynamite) or the development version from [R-universe](https://r-universe.dev/search/) by running one the following lines: +You can install the most recent stable version of `dynamite` from [CRAN](https://cran.r-project.org/package=dynamite) or the development version from [R-universe](https://r-universe.dev/search/) by running one the following lines: ```{r, eval = FALSE} install.packages("dynamite") @@ -73,7 +73,8 @@ gaussian_example_fit <- dynamite( ```{r, echo = FALSE} set.seed(1) library("dynamite") -gaussian_example_fit <- update(gaussian_example_fit, +gaussian_example_fit <- update( + gaussian_example_fit, iter = 2000, warmup = 1000, thin = 1, chains = 2, cores = 2, refresh = 0 ) @@ -99,7 +100,7 @@ Traceplots and density plots for time-invariant parameters: plot(gaussian_example_fit, plot_type = "trace", types = "beta") ``` -Posterior predictive samples for the first 4 groups (samples based on the posterior distribution of model parameters and observed data on first time point): +Posterior predictive samples for the first 4 groups (using the samples based on the posterior distribution of the model parameters and observed data on the first time point): ```{r, warning=FALSE, fig.width = 9, fig.height = 4} library("ggplot2") diff --git a/README.md b/README.md index 176b43a..814544f 100644 --- a/README.md +++ b/README.md @@ -54,7 +54,7 @@ on DMPMs and the `dynamite` package, see the related ## Installation -You can install the most recent stable version of `dynmite` from +You can install the most recent stable version of `dynamite` from [CRAN](https://cran.r-project.org/package=dynamite) or the development version from [R-universe](https://r-universe.dev/search/) by running one the following lines: @@ -105,8 +105,8 @@ print(gaussian_example_fit) #> #> Elapsed time (seconds): #> warmup sample -#> chain:1 5.801 3.542 -#> chain:2 5.658 3.544 +#> chain:1 5.546 3.396 +#> chain:2 5.533 3.524 #> #> Summary statistics of the time- and group-invariant parameters: #> # A tibble: 6 × 10 @@ -144,9 +144,9 @@ plot(gaussian_example_fit, plot_type = "trace", types = "beta") -Posterior predictive samples for the first 4 groups (samples based on -the posterior distribution of model parameters and observed data on -first time point): +Posterior predictive samples for the first 4 groups (using the samples +based on the posterior distribution of the model parameters and observed +data on the first time point): ``` r library("ggplot2") diff --git a/tests/testthat/test-cmdstanr.R b/tests/testthat/test-cmdstanr.R index 186c9e9..fc79f14 100644 --- a/tests/testthat/test-cmdstanr.R +++ b/tests/testthat/test-cmdstanr.R @@ -144,8 +144,8 @@ test_that("multivariate gaussian with threading produces a valid model", { ) f <- obs(c(y1, y2) ~ -1 + lag(y1) | -1 + lag(y1) + lag(y2), "mvgaussian") + obs(x ~ -1 + lag(y1), "gaussian") - out <- dynamite( - dformula = f, + code <- get_code( + x = f, data = d, time = "t", group = "id", @@ -153,11 +153,10 @@ test_that("multivariate gaussian with threading produces a valid model", { threads_per_chain = 2, grainsize = 10, chains = 2, - parallel_chains = 1, - debug = list(no_compile = TRUE, model_code = TRUE) + parallel_chains = 1 ) e <- new.env() - e$file <- cmdstanr::write_stan_file(out$model_code) + e$file <- cmdstanr::write_stan_file(code) model <- with(e, {cmdstanr::cmdstan_model(file, compile = FALSE)}) expect_true(model$check_syntax()) }) @@ -176,6 +175,7 @@ test_that("threading produces valid model code for other distributions", { b = rbinom(n, 10, 0.4), s = rt(n, 15), bb = rbeta(n, 3, 6), + e = rexp(n, 2), time = rep(seq_len(n_time), each = n_id), id = rep(seq_len(n_id)), trials = rep(10, n) @@ -184,9 +184,10 @@ test_that("threading produces valid model code for other distributions", { obs(p ~ lag(g) + lag(b), family = "negbin") + obs(b ~ lag(b) + lag(b) * lag(g) + trials(trials), family = "binomial") + obs(s ~ lag(g), family = "student") + - obs(bb ~ lag(b), family = "beta") - out <- dynamite( - dformula = f, + obs(bb ~ lag(b), family = "beta") + + obs(e ~ lag(s), family = "exponential") + code <- get_code( + x = f, data = d, time = "time", group = "id", @@ -194,11 +195,10 @@ test_that("threading produces valid model code for other distributions", { threads_per_chain = 2, grainsize = 10, chains = 2, - parallel_chains = 1, - debug = list(no_compile = TRUE, model_code = TRUE) + parallel_chains = 1 ) e <- new.env() - e$file <- cmdstanr::write_stan_file(out$model_code) + e$file <- cmdstanr::write_stan_file(code) model <- with(e, {cmdstanr::cmdstan_model(file, compile = FALSE)}) expect_true(model$check_syntax()) }) @@ -232,8 +232,8 @@ test_that("threading produces a valid model for cumulative", { id = rep(seq_len(n), t) ) - out <- dynamite( - dformula = obs(y ~ x, family = "cumulative", link = "logit"), + code <- get_code( + x = obs(y ~ x, family = "cumulative", link = "logit"), data = d, time = "time", group = "id", @@ -241,17 +241,15 @@ test_that("threading produces a valid model for cumulative", { threads_per_chain = 2, grainsize = 10, chains = 2, - parallel_chains = 1, - debug = list(no_compile = TRUE, model_code = TRUE) + parallel_chains = 1 ) e <- new.env() - e$file <- cmdstanr::write_stan_file(out$model_code) + e$file <- cmdstanr::write_stan_file(code) model <- with(e, {cmdstanr::cmdstan_model(file, compile = FALSE)}) expect_true(model$check_syntax()) - out <- dynamite( - dformula = - obs(y ~ -1 + x + varying(~1), family = "cumulative", link = "logit") + + code <- get_code( + x = obs(y ~ -1 + x + varying(~1), family = "cumulative", link = "logit") + splines(df = 10), data = d, time = "time", @@ -260,18 +258,16 @@ test_that("threading produces a valid model for cumulative", { threads_per_chain = 2, grainsize = 10, chains = 2, - parallel_chains = 1, - debug = list(no_compile = TRUE, model_code = TRUE) + parallel_chains = 1 ) e <- new.env() - e$file <- cmdstanr::write_stan_file(out$model_code) + e$file <- cmdstanr::write_stan_file(code) model <- with(e, {cmdstanr::cmdstan_model(file, compile = FALSE)}) expect_true(model$check_syntax()) # no predictors - out <- dynamite( - dformula = - obs(y ~ -1 + varying(~1), family = "cumulative", link = "logit") + + code <- get_code( + x = obs(y ~ -1 + varying(~1), family = "cumulative", link = "logit") + splines(df = 10), data = d, time = "time", @@ -280,17 +276,15 @@ test_that("threading produces a valid model for cumulative", { threads_per_chain = 2, grainsize = 10, chains = 2, - parallel_chains = 1, - debug = list(no_compile = TRUE, model_code = TRUE) + parallel_chains = 1 ) e <- new.env() - e$file <- cmdstanr::write_stan_file(out$model_code) + e$file <- cmdstanr::write_stan_file(code) model <- with(e, {cmdstanr::cmdstan_model(file, compile = FALSE)}) expect_true(model$check_syntax()) - out <- dynamite( - dformula = - obs(y ~ 1, family = "cumulative", link = "logit") + + code <- get_code( + x = obs(y ~ 1, family = "cumulative", link = "logit") + splines(df = 10), data = d, time = "time", @@ -299,11 +293,47 @@ test_that("threading produces a valid model for cumulative", { threads_per_chain = 2, grainsize = 10, chains = 2, - parallel_chains = 1, - debug = list(no_compile = TRUE, model_code = TRUE) + parallel_chains = 1 + ) + e <- new.env() + e$file <- cmdstanr::write_stan_file(code) + model <- with(e, {cmdstanr::cmdstan_model(file, compile = FALSE)}) + expect_true(model$check_syntax()) +}) + +test_that("threading produces a valid model for multinomial", { + skip_if_not(run_extended_tests) + skip_on_os("mac") + + set.seed(1) + n_id <- 100L + n_time <- 20L + d <- data.frame( + y1 = sample(10, size = n_id * n_time, replace = TRUE), + y2 = sample(15, size = n_id * n_time, replace = TRUE), + y3 = sample(20, size = n_id * n_time, replace = TRUE), + z = rnorm(n_id * n_time), + time = seq_len(n_time), + id = rep(seq_len(n_id), each = n_time) + ) + d$n <- d$y1 + d$y2 + d$y3 + f <- obs( + c(y1, y2, y3) ~ z + lag(y1) + lag(y2) + lag(y3) + trials(n), + family = "multinomial" + ) + code <- get_code( + x = f, + data = d, + time = "time", + group = "id", + backend = "cmdstanr", + threads_per_chain = 2, + grainsize = 10, + chains = 2, + parallel_chains = 1 ) e <- new.env() - e$file <- cmdstanr::write_stan_file(out$model_code) + e$file <- cmdstanr::write_stan_file(code) model <- with(e, {cmdstanr::cmdstan_model(file, compile = FALSE)}) expect_true(model$check_syntax()) }) @@ -313,51 +343,49 @@ test_that("syntax is correct for various models", { skip_on_os("mac") # categorical_example_fit - out <- dynamite( - dformula = obs(x ~ z + lag(x) + lag(y), family = "categorical") + + code <- get_code( + x = obs(x ~ z + lag(x) + lag(y), family = "categorical") + obs(y ~ z + lag(x) + lag(y), family = "categorical"), data = categorical_example, time = "time", group = "id", - backend = "cmdstanr", - debug = list(no_compile = TRUE, model_code = TRUE) + backend = "cmdstanr" ) e <- new.env() - e$file <- cmdstanr::write_stan_file(out$model_code) + e$file <- cmdstanr::write_stan_file(code) model <- with(e, {cmdstanr::cmdstan_model(file, compile = FALSE)}) expect_true(model$check_syntax()) # gaussian_example_fit - out <- dynamite( - dformula = - obs(y ~ -1 + z + varying(~ x + lag(y)) + random(~1), family = "gaussian") + + code <- get_code( + x = obs( + y ~ -1 + z + varying(~ x + lag(y)) + random(~1), family = "gaussian" + ) + random_spec() + splines(df = 20), data = gaussian_example, time = "time", group = "id", - backend = "cmdstanr", - debug = list(no_compile = TRUE, model_code = TRUE) + backend = "cmdstanr" ) e <- new.env() - e$file <- cmdstanr::write_stan_file(out$model_code) + e$file <- cmdstanr::write_stan_file(code) model <- with(e, {cmdstanr::cmdstan_model(file, compile = FALSE)}) expect_true(model$check_syntax()) # categorical_example_fit - out <- dynamite( - dformula = obs(g ~ lag(g) + lag(logp), family = "gaussian") + + code <- get_code( + x = obs(g ~ lag(g) + lag(logp), family = "gaussian") + obs(p ~ lag(g) + lag(logp) + lag(b), family = "poisson") + obs(b ~ lag(b) * lag(logp) + lag(b) * lag(g), family = "bernoulli") + aux(numeric(logp) ~ log(p + 1)), data = multichannel_example, time = "time", group = "id", - backend = "cmdstanr", - debug = list(no_compile = TRUE, model_code = TRUE) + backend = "cmdstanr" ) e <- new.env() - e$file <- cmdstanr::write_stan_file(out$model_code) + e$file <- cmdstanr::write_stan_file(code) model <- with(e, {cmdstanr::cmdstan_model(file, compile = FALSE)}) expect_true(model$check_syntax()) @@ -387,16 +415,15 @@ test_that("syntax is correct for various models", { id = rep(seq_len(n), t) ) - out <- dynamite( - dformula = obs(y ~ x, family = "cumulative", link = "logit"), + code <- get_code( + x = obs(y ~ x, family = "cumulative", link = "logit"), data = d, time = "time", group = "id", - backend = "cmdstanr", - debug = list(no_compile = TRUE, model_code = TRUE) + backend = "cmdstanr" ) e <- new.env() - e$file <- cmdstanr::write_stan_file(out$model_code) + e$file <- cmdstanr::write_stan_file(code) model <- with(e, {cmdstanr::cmdstan_model(file, compile = FALSE)}) expect_true(model$check_syntax()) }) diff --git a/tests/testthat/test-errors.R b/tests/testthat/test-errors.R index 8344826..1b4a983 100644 --- a/tests/testthat/test-errors.R +++ b/tests/testthat/test-errors.R @@ -1431,3 +1431,40 @@ test_that("plot errors when the input is not a dynamitefit object", { "Argument `x` must be a object." ) }) + +# Model errors ------------------------------------------------------------ + +test_that("multinomial model fails if stan version < 2.24", { + set.seed(1) + n_id <- 10L + n_time <- 5L + d <- data.frame( + y1 = sample(10, size = n_id * n_time, replace = TRUE), + y2 = sample(15, size = n_id * n_time, replace = TRUE), + y3 = sample(20, size = n_id * n_time, replace = TRUE), + z = rnorm(n_id * n_time), + time = seq_len(n_time), + id = rep(seq_len(n_id), each = n_time) + ) + d$n <- d$y1 + d$y2 + d$y3 + f <- obs( + c(y1, y2, y3) ~ z + lag(y1) + lag(y2) + lag(y3) + trials(n), + family = "multinomial" + ) + expect_error( + mockthat::with_mock( + stan_version = function(...) "2.23", + dynamite( + dformula = f, + data = d, + time = "time", + group = "id", + backend = "rstan" + ) + ), + paste0( + "Multinomial family is not supported for this version of rstan\\.\n", + "i Please install a newer version of rstan\\." + ) + ) +}) diff --git a/tests/testthat/test-lfactor.R b/tests/testthat/test-lfactor.R index f821f00..b2ae642 100644 --- a/tests/testthat/test-lfactor.R +++ b/tests/testthat/test-lfactor.R @@ -190,7 +190,7 @@ test_that("psis can be plotted", { ) }) -test_that("new group levels can't be included if model has latent factor", { +test_that("new group levels can't be included if model has a latent factor", { skip_if_not(run_extended_tests) nd <- latent_factor_example nd$id[nd$id == 1] <- 100 @@ -208,3 +208,14 @@ test_that("new group levels can't be included if model has latent factor", { ) ) }) + +test_that("predict works with a latent factor", { + skip_if_not(run_extended_tests) + expect_error( + pred <- predict(latent_factor_example_fit, n_draws = 5), + NA + ) + expect_true( + all(is.finite(pred$y_new)) + ) +}) diff --git a/tests/testthat/test-output.R b/tests/testthat/test-output.R index 0826f6e..3076714 100644 --- a/tests/testthat/test-output.R +++ b/tests/testthat/test-output.R @@ -130,7 +130,7 @@ test_that("default formula plot works", { NA ) expect_error( - plot(f, show_auxiliary = TRUE), + plot(f, show_auxiliary = FALSE), NA ) expect_error( @@ -138,7 +138,7 @@ test_that("default formula plot works", { NA ) expect_error( - plot(f, show_auxiliary = TRUE, show_covariates = TRUE), + plot(f, show_auxiliary = FALSE, show_covariates = TRUE), NA ) multichannel_formula <- obs(g ~ lag(g) + lag(logp), family = "gaussian") + @@ -172,7 +172,7 @@ test_that("tikz formula plot works", { NA ) expect_error( - plot(f, show_auxiliary = TRUE, tikz = TRUE), + plot(f, show_auxiliary = FALSE, tikz = TRUE), NA ) expect_error( @@ -180,7 +180,7 @@ test_that("tikz formula plot works", { NA ) expect_error( - plot(f, show_auxiliary = TRUE, show_covariates = TRUE, tikz = TRUE), + plot(f, show_auxiliary = FALSE, show_covariates = TRUE, tikz = TRUE), NA ) multichannel_formula <- obs(g ~ lag(g) + lag(logp), family = "gaussian") + diff --git a/tests/testthat/test-predict.R b/tests/testthat/test-predict.R index 1df9d69..d14fff9 100644 --- a/tests/testthat/test-predict.R +++ b/tests/testthat/test-predict.R @@ -465,3 +465,14 @@ test_that("predict with loglik works", { dplyr::pull(y_loglik) expect_equal(manual, automatic) }) + +test_that("thin works", { + expect_error( + pred <- predict(gaussian_example_fit, thin = 10), + NA + ) + expect_equal( + unique(pred$.draw), + 1L:20L + ) +}) diff --git a/tests/testthat/test-warnings.R b/tests/testthat/test-warnings.R index 0134844..fba37b3 100644 --- a/tests/testthat/test-warnings.R +++ b/tests/testthat/test-warnings.R @@ -294,7 +294,7 @@ test_that("windows and old rstan warns on attach", { # Deprecated -------------------------------------------------------------- -test_that("deprecated work and warn", { +test_that("deprecated warn", { expect_warning( plot_betas(gaussian_example_fit), "'plot_betas' is deprecated"