tests/testthat/test-extended.R

run_extended_tests <- identical(Sys.getenv("DYNAMITE_EXTENDED_TESTS"), "true")

data.table::setDTthreads(1) # For CRAN

# Capture both message and output types
capture_all_output <- function(x) {
  utils::capture.output(
    utils::capture.output(x, type = "message"),
    type = "output"
  )
}

test_that("multivariate gaussian fit and predict work", {
  skip_if_not(run_extended_tests)

  set.seed(1)
  N <- 100
  T_ <- 50
  S <- crossprod(matrix(rnorm(4), 2, 2))
  L <- t(chol(S))
  y1 <- matrix(0, N, T_)
  y2 <- matrix(0, N, T_)
  x <- matrix(0, N, T_)
  for (t in 2:T_) {
    for (i in 1:N){
      mu <- c(0.7 * y1[i, t - 1], 0.4 * y2[i, t - 1] - 0.2 * y1[i, t - 1])
      y <- mu + L %*% rnorm(2)
      y1[i, t] <- y[1L]
      y2[i, t] <- y[2L]
      x[i, t] <- rnorm(1, c(0.5 * y1[i, t - 1]), 0.5)
    }
  }
  d <- data.frame(
    y1 = c(y1),
    y2 = c(y2),
    x = c(x),
    t = rep(1:T_, each = N),
    id = 1:N
  )
  f <- obs(c(y1, y2) ~ -1 + lag(y1) | -1 + lag(y1) + lag(y2), "mvgaussian") +
    obs(x ~ -1 + lag(y1), "gaussian")
  fit <- dynamite(
    dformula = f,
    data = d,
    time = "t",
    group = "id",
    chains = 1,
    iter = 2000,
    refresh = 0
  )

  expect_error(
    sumr <- summary(fit, types = "corr"),
    NA
  )
  expect_equal(
    sumr$mean,
    cov2cor(S)[2, 1],
    tolerance = 0.1
  )
  expect_error(
    sumr <- summary(fit, types = "sigma"),
    NA
  )
  expect_equal(
    sumr$mean,
    c(0.5, sqrt(diag(S))),
    tolerance = 0.1
  )
  expect_error(
    sumr <- summary(fit, types = "beta"),
    NA
  )
  expect_equal(
    sumr$mean,
    c(0.5, 0.7, -0.2, 0.4),
    tolerance = 0.1
  )

  expect_error(
    pred <- predict(fit, n_draws = 5),
    NA
  )
  expect_true(
    all(!is.na(pred[, c("y1_new", "y2_new", "x_new")])),
    NA
  )

  expect_error(
    pred <- predict(fit, n_draws = 5, type = "mean"),
    NA
  )
  pred <- pred[pred$t > 1, ]
  expect_true(
    all(!is.na(pred[, c("y1_mean", "y2_mean", "x_mean")])),
    NA
  )

  expect_error(
    pred <- predict(fit, n_draws = 5, type = "link"),
    NA
  )
  pred <- pred[pred$t > 1, ]
  expect_true(
    all(!is.na(pred[, c("y1_link", "y2_link", "x_link")])),
    NA
  )

  expect_error(
    pred <- fitted(fit, n_draws = 5),
    NA
  )
  pred <- pred[pred$t > 1, ]
  expect_true(
    all(!is.na(pred[, c("y1_fitted", "y2_fitted", "x_fitted")])),
    NA
  )
})

test_that("multinomial fit and predict work", {
  skip_if_not(run_extended_tests)

  set.seed(1)
  n_id <- 100L
  n_time <- 20L

  d <- data.frame(
    y1 = sample(10, size = n_id, replace = TRUE),
    y2 = sample(12, size = n_id, replace = TRUE),
    y3 = sample(15, size = n_id, replace = TRUE),
    time = 1,
    id = seq_len(n_id)
  )
  d$n <- d$y1 + d$y2 + d$y3

  d <- dplyr::right_join(
    d,
    data.frame(
      time = rep(seq_len(n_time), each = n_id),
      id = seq_len(n_id)
    )
  )

  d$z <- rnorm(nrow(d))
  d$n[is.na(d$n)] <- sample(37, size = sum(is.na(d$n)), replace = TRUE)

  f <- obs(
    c(y1, y2, y3) ~ z + lag(y1) + lag(y2) + lag(y3) + trials(n),
    family = "multinomial"
  )

  init <- list(
    beta_y2 = c(1.2, 0.8, 0.2, 0.1),
    beta_y3 = c(1, 0.5, 0.6, 0.3),
    a_y2 = -0.1,
    a_y3 = 0.2
  )

  expect_error(
    fit <- dynamite(
      dformula = f,
      data = d,
      time = "time",
      group = "id",
      chains = 1,
      iter = 1,
      algorithm = "Fixed_param",
      init = list(init),
    ),
    NA
  )
  expect_error(
    pred <- predict(fit, type = "response"),
    NA
  )
  expect_identical(
    pred$y1_new + pred$y2_new + pred$y3_new,
    pred$n
  )
  expect_error(
    predict(fit, type = "mean"),
    NA
  )
  expect_error(
    predict(fit, type = "link"),
    NA
  )
  expect_error(
    fitted(fit),
    NA
  )
})


test_that("time-invariant cumulative fit and predict work", {
  skip_if_not(run_extended_tests)

  set.seed(0)

  n <- 100
  t <- 30
  x <- matrix(0, n, t)
  y <- matrix(0, n, t)
  p <- matrix(0, n, 4)
  alpha <- c(-1, 0, 1)

  for (i in seq_len(t)) {
    x[, i] <- rnorm(n)
    eta <- 0.6 * x[, i]
    p[, 1] <- 1 - plogis(eta - alpha[1])
    p[, 2] <- plogis(eta - alpha[1]) - plogis(eta - alpha[2])
    p[, 3] <- plogis(eta - alpha[2]) - plogis(eta - alpha[3])
    p[, 4] <- plogis(eta - alpha[3])
    y[, i] <- apply(p, 1, sample, x = 1:4, size = 1, replace = FALSE)
  }

  d <- data.frame(
    y = factor(c(y), levels = 1:4), x = c(x),
    time = rep(seq_len(t), each = n),
    id = rep(seq_len(n), t)
  )

  expect_error(
    fit <- dynamite(
      dformula = obs(y ~ x, family = "cumulative", link = "logit"),
      data = d,
      time = "time",
      group = "id"
    ),
    NA
  )
  expect_error(
    plot(fit),
    NA
  )
  expect_error(
    as.data.table(fit, types = "cutpoint"),
    NA
  )
  expect_error(
    pred <- predict(fit, type = "response", n_draws = 10),
    NA
  )
  expect_true(
    all(pred$y_new %in% 1:4)
  )
  expect_error(
    pred <- predict(fit, type = "mean", n_draws = 10),
    NA
  )
  expect_true(
    all(!is.na(pred[, paste0("y_mean_", 1:4)]))
  )
  expect_error(
    pred <- predict(fit, type = "link", n_draws = 10),
    NA
  )
  expect_true(
    all(!is.na(pred$y_link))
  )
  expect_error(
    pred <- fitted(fit, n_draws = 10),
    NA
  )
  expect_true(
    all(!is.na(pred[, paste0("y_fitted_", 1:4)]))
  )
})

test_that("time-varying cutpoints for cumulative works", {
  skip_if_not(run_extended_tests)

  set.seed(34)

  n <- 50
  t <- 30
  x <- matrix(0, n, t)
  y <- matrix(0, n, t)
  alpha_spline <- t(replicate(3, cumsum(rnorm(t, sd = 0.5))))
  p <- array(0, c(n, 4, t))
  cutpoints <- matrix(0, 3, t)

  for (i in seq_len(t)) {
    tmp <- exp(c(0, alpha_spline[, i]))
    for (j in 1:3) {
      cutpoints[j, i] <- log(sum(tmp[1:j]) / sum(tmp[(j + 1):4]))
    }
    x[, i] <- rnorm(n)
    eta <- 0.6 * x[, i]
    p[, 1, i] <- 1 - plogis(eta - cutpoints[1, i])
    p[, 2, i] <- plogis(eta - cutpoints[1, i]) - plogis(eta - cutpoints[2, i])
    p[, 3, i] <- plogis(eta - cutpoints[2, i]) - plogis(eta - cutpoints[3, i])
    p[, 4, i] <- plogis(eta - cutpoints[3, i])
    y[, i] <- apply(p[, , i], 1, sample, x = 1:4, size = 1, replace = TRUE)
  }
  d <- data.frame(
    y = factor(y, levels = 1:4),
    x = c(x),
    time = rep(seq_len(t), each = n),
    id = rep(seq_len(n), t)
  )

  expect_error(
    fit <- dynamite(
      dformula =
        obs(y ~ -1 + x + varying(~1), family = "cumulative", link = "logit") +
        splines(10),
      data = d,
      time = "time",
      group = "id"
    ),
    NA
  )
  expect_error(
    plot(fit),
    NA
  )
  expect_error(
    as.data.table(fit, types = "cutpoint"),
    NA
  )
  expect_error(
    pred <- predict(fit, type = "response", n_draws = 10),
    NA
  )
  expect_true(
    all(pred$y_new %in% 1:4)
  )
  expect_error(
    pred <- predict(fit, type = "mean", n_draws = 10),
    NA
  )
  expect_true(
    all(!is.na(pred[, paste0("y_mean_", 1:4)]))
  )
  expect_error(
    pred <- predict(fit, type = "link", n_draws = 10),
    NA
  )
  expect_true(
    all(!is.na(pred$y_link))
  )
  expect_error(
    pred <- fitted(fit, n_draws = 10),
    NA
  )
  expect_true(
    all(!is.na(pred[, paste0("y_fitted_", 1:4)]))
  )
})

test_that("non-glm categorical fit works", {
  skip_if_not(run_extended_tests)
  skip_on_os("mac") # Seems to segfault on MacOS
  expect_error(
    mockthat::with_mock(
      stan_supports_categorical_logit_glm = function(...) FALSE,
      dynamite(
        dformula = obs(x ~ z + lag(x) + lag(y), family = "categorical") +
          obs(y ~ z + lag(x) + lag(y), family = "categorical"),
        data = categorical_example,
        time = "time",
        group = "id",
        iter = 2000,
        chains = 1,
        refresh = 0,
        verbose = FALSE,
        threads_per_chain = 2
      )
    ),
    NA
  )
})

test_that("get_parameter_dims works for dynamiteformula", {
  skip_if_not(run_extended_tests)

  expect_error(
    dims <- get_parameter_dims(
      x = obs(x ~ z + lag(x) + lag(y), family = "categorical") +
        obs(y ~ z + lag(x) + lag(y), family = "categorical"),
      data = categorical_example,
      time = "time",
      group = "id"
    ),
    NA
  )
  expect_identical(
    dims,
    get_parameter_dims(categorical_example_fit)
  )
})

test_that("predict with random variable trials works", {
  skip_if_not(run_extended_tests)

  set.seed(1)
  N <- 100
  T_ <- 50
  y <- matrix(0, N, T_)
  n <- matrix(0, N, T_)
  for (t in 2:T_) {
    for (i in 1:N) {
      n[i, t] <- rpois(1, 5)
      lp <- -3.0 + 1.25 * y[i, t - 1]
      y[i, t] <- rbinom(1, 1 + n[i, t], plogis(lp))
    }
  }
  d <- data.frame(
    n = c(n),
    y = c(y),
    t = rep(1:T_, each = N),
    id = 1:N
  )
  f <- obs(n ~ 1, family = "poisson") +
    aux(integer(nn) ~ n + 1) +
    obs(y ~ lag(y) + trials(nn), family = "binomial")
  fit <- dynamite(
    dformula = f,
    data = d,
    time = "t",
    group = "id",
    chains = 1,
    iter = 2000,
    refresh = 0
  )
  expect_error(sumr <- summary(fit), NA)
  expect_equal(sumr$mean, c(log(5), -3.0, 1.25), tolerance = 0.1)
  expect_error(predict(fit, n_draws = 5), NA)
})

test_that("shrinkage for splines is functional", {
  skip("Shrinkage feature removed at least for now.")

  set.seed(1)
  expect_error(
    fit <- dynamite(
      dformula =
        obs(
          y ~ -1 + z + varying(~ x + lag(y)) + random(~1),
          family = "gaussian"
        ) +
        random_spec() +
        splines(df = 20, shrinkage = TRUE),
      data = gaussian_example,
      time = "time",
      group = "id",
      iter = 2000,
      warmup = 1000,
      chains = 1,
      refresh = 0
    ),
    NA
  )
  expect_error(
    summary(fit, types = "xi"),
    NA
  )
})

test_that("update without recompile works", {
  skip_if_not(run_extended_tests)

  set.seed(0)
  gaussian_fit <- dynamite(
    dformula =
      obs(
        y ~ -1 + z + varying(~ x + lag(y)) + random(~1),
        family = "gaussian"
      ) +
      random_spec() +
      splines(df = 20),
    data = gaussian_example,
    time = "time",
    group = "id",
    iter = 2000,
    warmup = 1000,
    thin = 1,
    chains = 2,
    cores = 2,
    refresh = 0,
    save_warmup = FALSE,
    pars = c(
      "omega_alpha_1_y", "omega_raw_alpha_y", "nu_raw", "nu", "L",
      "sigma_nu", "a_y"
    ),
    include = FALSE
  )
  expect_error(
    fit <- update(
      gaussian_fit,
      data = gaussian_example,
      warmup = 1000,
      iter = 2000,
      thin = 1
    ),
    NA
  )
  # Internal update_ function
  expect_error(
    lfo(gaussian_fit, L = 20, chains = 4, verbose_stan = FALSE),
    NA
  )
})

test_that("custom stan model works", {
  skip_if_not(run_extended_tests)

  # The same as gaussian_example_fit, but we need to refit because
  # the results may not be reproducible across different platforms
  set.seed(1)
  initial_fit <- dynamite(
    dformula = obs(y ~ -1 + z + varying(~ x + lag(y)) +
                     random(~1), family = "gaussian") +
      random_spec() +
      splines(df = 20),
    data = gaussian_example,
    time = "time",
    group = "id",
    iter = 2000,
    warmup = 1000,
    thin = 10,
    chains = 2,
    cores = 2,
    refresh = 0,
    save_warmup = FALSE,
    pars = c(
      "omega_alpha_1_y", "omega_raw_alpha_y", "nu_raw", "nu", "L",
      "sigma_nu", "a_y"
    ),
    include = FALSE
  )
  code <- get_code(initial_fit)
  set.seed(1)
  expect_error(
    custom_fit <- dynamite(
      dformula = obs(y ~ -1 + z + varying(~ x + lag(y)) +
                       random(~1), family = "gaussian") +
        random_spec() +
        splines(df = 20),
      data = gaussian_example,
      time = "time",
      group = "id",
      iter = 2000,
      warmup = 1000,
      thin = 10,
      chains = 2,
      cores = 2,
      refresh = 0,
      save_warmup = FALSE,
      pars = c(
        "omega_alpha_1_y", "omega_raw_alpha_y", "nu_raw", "nu", "L",
        "sigma_nu", "a_y"
      ),
      include = FALSE,
      custom_stan_model = code
    ),
    NA
  )
  expect_equal(
    rstan::extract(initial_fit$stanfit, permuted = FALSE),
    rstan::extract(custom_fit$stanfit, permuted = FALSE)
  )
})

test_that("dynamice works", {
  skip_if_not(run_extended_tests)

  set.seed(1)
  n <- 50
  p <- 1000
  y <- replicate(p, stats::arima.sim(list(ar = 0.7), n, sd = 0.1))
  d <- data.frame(y = c(y), time = 1:n, id = rep(seq_len(p), each = n))
  dmiss <- d
  dmiss$y[sample(seq_len(nrow(d)), size = 0.2 * nrow(d))] <- NA

  # Long format imputation
  expect_error(
    fit_long <- dynamice(
      obs(y ~ lag(y), "gaussian"),
      time = "time",
      group = "id",
      data = dmiss,
      chains = 1,
      refresh = 0,
      backend = "rstan",
      impute_format = "long",
      keep_imputed = FALSE,
      mice_args = list(m = 3, print = FALSE)
    ),
    NA
  )

  # Wide format imputation
  expect_error(
    fit_wide <- dynamice(
      obs(y ~ lag(y), "gaussian"),
      time = "time",
      group = "id", data = dmiss,
      chains = 1,
      refresh = 0,
      backend = "rstan",
      impute_format = "wide",
      keep_imputed = FALSE,
      mice_args = list(m = 3, print = FALSE)
    ),
    NA
  )

  # single group
  set.seed(1)
  n <- 100
  y <- stats::arima.sim(list(ar = 0.7), n, sd = 0.1)
  d <- data.frame(y = c(y), time = 1:n)
  dmiss_single <- d
  dmiss_single$y[sample(seq_len(nrow(d)), size = 0.2 * nrow(d))] <- NA
  expect_error(
    fit_single <- dynamice(
      obs(y ~ lag(y), "gaussian"),
      time = "time",
      data = dmiss_single,
      chains = 1,
      refresh = 0,
      backend = "rstan",
      impute_format = "long",
      keep_imputed = FALSE,
      mice_args = list(m = 3, print = FALSE)
    ),
    NA
  )
})

test_that("information on >2 chains is summarized in print", {
  skip_if_not(run_extended_tests)

  set.seed(1)
  fit <- dynamite(
    dformula =
      obs(y ~ -1 + z +
            varying(~ x + lag(y)) + random(~1), family = "gaussian") +
      random_spec() +
      splines(df = 20),
    data = gaussian_example,
    time = "time",
    group = "id",
    iter = 2000,
    warmup = 1000,
    thin = 10,
    chains = 4,
    refresh = 0,
    save_warmup = FALSE,
    pars = c(
      "omega_alpha_1_y", "omega_raw_alpha_y", "nu_raw", "nu", "L",
      "sigma_nu", "a_y"
    ),
    include = FALSE
  )
  out <- capture_all_output(print(fit))
  expect_true(
    any(
      grepl(
        "Elapsed time (seconds) for fastest and slowest chains",
        out,
        fixed = TRUE
      )
    )
  )
})


test_that("latent factor models are identifiable", {
  skip_if_not(run_extended_tests)
  skip_on_os("windows") # Fails for Windows for some reason

  generate_data <- function(N, T_, D, alpha, mean_lambda, sd_lambda, sd_alpha) {
    y <- matrix(0, N, T_)
    x <- matrix(rnorm(N * T_), N, T_)
    psi <-
      c(splines::bs(seq_len(T_), df = D, intercept = TRUE) %*% cumsum(rnorm(D)))
    lambda <- rnorm(N, 0, sd_lambda)
    lambda <- mean_lambda + lambda - mean(lambda)
    a <- rnorm(N, alpha, sd_alpha)
    for (t in 1:T_) {
      y[, t] <- rnorm(N, a + lambda * psi[t] + x[, t])
    }
    list(data = data.frame(
      y = c(y), x = c(x),
      time = rep(seq_len(T_), each = N),
      id = rep(seq_len(N), times = T_)
    ),
    psi = psi, lambda = lambda,
    mean_lambda_psi = mean(lambda) * psi,
    alpha = alpha, beta = 1, kappa = sd_lambda / (1 + sd_lambda),
    sigma_lambda = sd_lambda, sigma_alpha = sd_alpha, sigma_y = 1, tau_psi = 1,
    zeta = 1 + sd_lambda)
  }

  set.seed(1)
  sim <- generate_data(
    N = 100,
    T_ = 50,
    D = 20,
    alpha = 1,
    mean_lambda = 1,
    sd_lambda = 1,
    sd_alpha = 1
  )
  dformula <- obs(y ~ x + random(~ 1), family = "gaussian") +
    lfactor() + splines(20)
  priors <- get_priors(dformula, data = sim$data, time = "time", group = "id")
  priors$prior[priors$parameter != "kappa_y"] <- "normal(0, 5)"
  fit1 <- dynamite(
    dformula, priors = priors,
    data = sim$data, time = "time", group = "id",
    backend = "cmdstanr", stanc_options = list("O1"),
    iter_sampling = 5000, iter_warmup = 5000,
    parallel_chains = 4, refresh = 0, seed = 1
  )
  sumr1 <- as_draws(fit1) |>
    posterior::summarise_draws()
  expect_true(all(sumr1$rhat < 1.1))
  expect_true(all(sumr1$ess_bulk > 500))
  expect_true(all(sumr1$ess_tail > 500))
  expect_equal(
    summary(fit1, type = "psi")$mean,
    sim$mean_lambda_psi,
    tolerance = 0.5
  )

  set.seed(2)
  sim <- generate_data(
    100, 50, 20,
    alpha = 0, mean_lambda = 1, sd_lambda = 0.1, sd_alpha = 2
  )
  dformula <- obs(y ~ -1 + x + random(~ 1), family = "gaussian") +
    lfactor() +
    splines(20)
  priors <- get_priors(dformula, data = sim$data, time = "time", group = "id")
  priors$prior[priors$parameter != "kappa_y"] <- "normal(0, 5)"
  fit2 <- dynamite(
    dformula, priors = priors,
    data = sim$data, time = "time", group = "id",
    backend = "cmdstanr", stanc_options = list("O1"),
    iter_sampling = 5000, iter_warmup = 5000,
    parallel_chains = 4, refresh = 0, seed = 1
  )
  sumr2 <- as_draws(fit2) |>
    posterior::summarise_draws()
  expect_true(all(sumr2$rhat < 1.1))
  expect_true(all(sumr2$ess_bulk > 500))
  expect_true(all(sumr2$ess_tail > 500))
  expect_equal(
    summary(fit2, type = "psi")$mean,
    sim$mean_lambda_psi,
    tolerance = 0.5
  )

  set.seed(3)
  sim <- generate_data(
    100, 50, 20,
    alpha = -1, mean_lambda = 0.5, sd_lambda = 2, sd_alpha = 0
  )
  dformula <- obs(y ~ x, family = "gaussian") +
    lfactor() +
    splines(20)
  priors <- get_priors(dformula, data = sim$data, time = "time", group = "id")
  priors$prior[priors$parameter != "kappa_y"] <- "normal(0, 5)"
  # need some informativeness on prior of kappa (or zeta?)
  priors$prior[priors$parameter == "kappa_y"] <- "beta(10, 10)"
  fit3 <- dynamite(
    dformula, priors = priors,
    data = sim$data, time = "time", group = "id",
    backend = "cmdstanr", stanc_options = list("O1"),
    iter_sampling = 5000, iter_warmup = 5000,
    parallel_chains = 4, refresh = 0, seed = 1
  )
  sumr3 <- as_draws(fit3) |>
    posterior::summarise_draws()

  expect_true(all(sumr3$rhat < 1.1))
  expect_true(all(sumr3$ess_bulk > 500))
  expect_true(all(sumr3$ess_tail > 500))
  expect_equal(
    summary(fit3, type = "psi")$mean,
    sim$mean_lambda_psi,
    tolerance = 0.5
  )

  set.seed(4)
  sim <- generate_data(
    100, 50, 20,
    alpha = 0, mean_lambda = -1, sd_lambda = 0.5, sd_alpha = 0.5
  )
  dformula <- obs(y ~ x + random(~ 1), family = "gaussian") +
    lfactor() + splines(20)
  priors <- get_priors(dformula, data = sim$data, time = "time", group = "id")
  priors$prior[priors$parameter != "kappa_y"] <- "normal(0, 5)"
  fit4 <- dynamite(
    dformula, priors = priors,
    data = sim$data, time = "time", group = "id",
    backend = "cmdstanr", stanc_options = list("O1"),
    iter_sampling = 5000, iter_warmup = 5000,
    parallel_chains = 4, refresh = 0, seed = 1
  )
  sumr4 <- as_draws(fit4) |>
    posterior::summarise_draws()

  expect_true(all(sumr4$rhat < 1.1))
  expect_true(all(sumr4$ess_bulk > 500))
  expect_true(all(sumr4$ess_tail > 500))
  expect_equal(
    summary(fit4, type = "psi")$mean,
    sim$mean_lambda_psi,
    tolerance = 0.5
  )

  set.seed(5)
  sim <- generate_data(
    100, 50, 20,
    alpha = 1, mean_lambda = 0, sd_lambda = 0.5, sd_alpha = 1
  )
  dformula <- obs(y ~ x + random(~ 1), family = "gaussian") +
    lfactor(nonzero_lambda = FALSE) +
    splines(20)
  priors <- get_priors(dformula, data = sim$data, time = "time", group = "id")
  priors$prior[] <- "normal(0, 5)"
  fit5 <- dynamite(
    dformula, priors = priors,
    data = sim$data, time = "time", group = "id",
    backend = "cmdstanr", stanc_options = list("O1"),
    iter_sampling = 5000, iter_warmup = 5000,
    parallel_chains = 4, refresh = 0, seed = 1
  )
  sumr5 <- as_draws(fit5) |>
    posterior::summarise_draws()

  expect_true(all(sumr5$rhat < 1.1))
  expect_true(all(sumr5$ess_bulk > 500))
  expect_true(all(sumr5$ess_tail > 500))
  expect_equal(
    summary(fit5, type = "psi")$mean,
    -sim$psi,
    tolerance = 0.5
  )

  set.seed(6)
  sim <- generate_data(
    100, 50, 20,
    alpha = 1, mean_lambda = 0, sd_lambda = 0.5, sd_alpha = 0
  )
  dformula <- obs(y ~ x, family = "gaussian") +
    lfactor(nonzero_lambda = FALSE) +
    splines(20)
  priors <- get_priors(dformula, data = sim$data, time = "time", group = "id")
  priors$prior[] <- "normal(0, 5)"
  fit6 <- dynamite(
    dformula, priors = priors,
    data = sim$data, time = "time", group = "id",
    backend = "cmdstanr", stanc_options = list("O1"),
    iter_sampling = 5000, iter_warmup = 5000,
    parallel_chains = 4, refresh = 0, seed = 1
  )
  sumr6 <- as_draws(fit6) |>
    posterior::summarise_draws()

  expect_true(all(sumr6$rhat < 1.1))
  expect_true(all(sumr6$ess_bulk > 500))
  expect_true(all(sumr6$ess_tail > 500))
  expect_equal(
    summary(fit6, type = "psi")$mean,
    -sim$psi,
    tolerance = 0.5
  )

  # Test bivariate case with nonzero_lambda
  set.seed(123)
  N <- 50
  T_ <- 50
  D <- 10
  x <- y <- matrix(0, N, T_)
  psi <- matrix(NA, 2, T_)
  lambda_y <- rnorm(N)
  lambda_y <- lambda_y - mean(lambda_y)
  lambda_x <- rnorm(N)
  lambda_x <- lambda_x - mean(lambda_x)
  L <- t(chol(matrix(c(1, 0.7, 0.7, 1), 2, 2)))
  B <- t(splines::bs(seq_len(T_), df = D, intercept = TRUE))
  omega <- matrix(NA, 2, D)
  omega[, 1] <- c(5, 2) + L %*% rnorm(2)
  for (i in 2:D) {
    omega[, i] <- omega[, i - 1] + L %*% rnorm(2)
  }
  psi[1, ] <- omega[1, ] %*% B
  psi[2, ] <- omega[2, ] %*% B
  for (t in 1:T_) {
    y[, t] <- rnorm(N, lambda_y * psi[1, t])
    x[, t] <- rnorm(N, lambda_x * psi[2, t])
  }
  d <- data.frame(
    y = c(y), x = c(x),
    time = rep(seq_len(T_), each = N),
    id = rep(seq_len(N), times = T_)
  )
  dformula <- obs(y ~ 1, family = "gaussian") +
    obs(x ~ 1, family = "gaussian") +
    lfactor(nonzero_lambda = FALSE, flip_sign = TRUE) +
    splines(10)
  fit <- dynamite(
    dformula,
    data = d, time = "time", group = "id",
    backend = "cmdstanr", stanc_options = list("O1"),
    iter_sampling = 5000, iter_warmup = 5000,
    parallel_chains = 4, refresh = 0, seed = 1
  )
  sumr <- as_draws(fit) |>
    posterior::summarise_draws()

  expect_true(all(sumr$rhat < 1.1))
  expect_true(all(sumr$ess_bulk > 500))
  expect_true(all(sumr$ess_tail > 500))
})
santikka/dynamite documentation built on April 17, 2025, 11:47 a.m.