NOT_CRAN <- identical(tolower(Sys.getenv("NOT_CRAN")), "true") knitr::opts_chunk$set( collapse = TRUE, comment = "#>", eval = NOT_CRAN, fig.width = 7, fig.height = 4, dpi = 96 )
smoothbp is a bespoke Metropolis-within-Gibbs sampler for Bayesian
hierarchical piecewise regression with multiple logistic-smoothed
change-points. The recovery tests are a self-consistency check — simulator
and likelihood share the same code path. To trust the package one also
wants to see that it agrees with an independent implementation. brms
(via Stan) is the natural reference for fitting the same generative model
via NUTS on a custom non-linear formula. mcp (via JAGS) is the closest
specialist competitor and provides a complementary perspective: it fits
hard (sharp) change-points rather than the logistic-smoothed
transition that smoothbp uses.
Four scenarios are compared below:
b0 with all
structural parameters scalar (~ 1). Compared against brms.omega ~ 1 + treatment. Compared against brms.When the change-point parameter omega drifts outside the range of the
time variable, the smoothing term collapses and delta1 becomes effectively
unidentified. NUTS chains that initialise in that region can get stuck
there for the entire run, producing an artificially multimodal posterior
that has nothing to do with the data. The clean fix is to bound omega
above at max(tau) in both samplers, and to initialise brms chains near
the prior mean (which is what smoothbp does internally). Both fixes
applied below.
library(smoothbp) library(posterior) library(ggplot2) library(dplyr) library(tidyr) have_brms <- requireNamespace("brms", quietly = TRUE) && NOT_CRAN if (!have_brms && NOT_CRAN) { message("brms not installed -- install.packages('brms') to render the comparison.") } have_mcp <- requireNamespace("mcp", quietly = TRUE) && requireNamespace("rjags", quietly = TRUE) && NOT_CRAN if (!have_mcp && NOT_CRAN) { message("mcp or rjags not installed -- install JAGS then install.packages('mcp') ", "to render the mcp comparison.") }
set.seed(31) dat <- simulate_smoothbp( n_subj = 25, n_obs = 10, b0 = 5.0, b1 = -0.4, delta = 1.4, omega = 3.2, rho = 4.0, sigma = 0.4, sigma_u = 0.7, seed = 31L ) true_params(dat)
t0 <- Sys.time() fit_sbp <- smoothbp( formula = y ~ tau, b0 = ~ 1 + (1 | subject), b1 = ~ 1, deltas = list(~ 1), omega = list(~ 1), rho = list(~ 1), data = dat, priors = smoothbp_priors( b0 = prior_normal(0, 10), b1 = prior_normal(0, 2), omega = list(prior_normal(3, 2, lb = 0, ub = max(dat$tau))), rho = list(prior_normal(3, 2, lb = 0)), # Diffuse variance priors, matched (as closely as the two parameterisations # allow) to the brms half-t below. smoothbp's variance update is conjugate, # so the family must be inverse-gamma; (0.001, 0.001) makes it effectively # non-informative for the well-identified residual SD. sigma = prior_invgamma(0.001, 0.001), sigma_u = prior_invgamma(0.001, 0.001) ), chains = 4L, iter = 2000L, warmup = 1000L, seed = 31L, .verbose = FALSE ) time_sbp <- as.numeric(difftime(Sys.time(), t0, units = "secs"))
suppressPackageStartupMessages(library(brms)) # Note: brms requires named non-linear parameters in its formula. Here `b2` # is the brms-side name for the slope-change parameter; it corresponds to # `delta` (delta_1) in smoothbp's parameterisation. bf_smoothed <- brms::bf( y ~ b0 + b1 * (tau - omega) + b2 * (tau - omega) / (1 + exp(-(tau - omega) * rho)), b0 ~ 1 + (1 | subject), b1 ~ 1, b2 ~ 1, omega ~ 1, rho ~ 1, # b2 = smoothbp's delta_1 nl = TRUE ) # brms::prior() captures arguments by non-standard evaluation, so # `ub = max(dat$tau)` would be embedded literally in the generated Stan # code. Pre-compute the value and pass it via prior_string() which uses # standard evaluation and accepts numeric bounds. ub_omega <- max(dat$tau) priors_brms <- c( brms::prior(normal(0, 10), nlpar = "b0"), brms::prior(normal(0, 2), nlpar = "b1"), brms::prior(normal(0, 2), nlpar = "b2"), brms::prior_string("normal(3, 2)", nlpar = "omega", lb = 0, ub = ub_omega), brms::prior(normal(3, 2), nlpar = "rho", lb = 0), # Diffuse variance priors, matched to smoothbp's near-non-informative # inverse-gamma above. A wide half-t leaves the residual SD likelihood- # dominated; the group SD remains weakly identified with only 25 subjects. # In a non-linear model the group SD belongs to its nlpar, so `nlpar = "b0"` # is required (a bare `class = "sd"` matches no parameter). brms::prior(student_t(3, 0, 10), class = "sigma"), brms::prior(student_t(3, 0, 10), class = "sd", nlpar = "b0") ) # Initialise every chain near the prior mean. Without this, Stan's default # uniform-on-(-2, 2) initialisation lands some chains in the unidentifiable # omega > max(tau) trap and they never escape. init_fun <- function() list( b_b0 = array(rnorm(1, 5, 1)), b_b1 = array(rnorm(1, 0, 0.3)), b_b2 = array(rnorm(1, 0, 0.3)), b_omega = array(rnorm(1, 3, 0.3)), b_rho = array(rnorm(1, 3, 0.5)) ) t0 <- Sys.time() fit_brms <- brms::brm( bf_smoothed, data = dat, prior = priors_brms, chains = 4, iter = 2000, warmup = 1000, seed = 31, refresh = 0, init = init_fun, control = list(adapt_delta = 0.95) ) time_brms <- as.numeric(difftime(Sys.time(), t0, units = "secs"))
sbp_names <- c( "b0_(Intercept)", "b1_(Intercept)", "delta1_(Intercept)", "omega1_(Intercept)", "rho1_(Intercept)", "sigma", "sigma_u" ) brms_names <- c( "b_b0_Intercept", "b_b1_Intercept", "b_b2_Intercept", "b_omega_Intercept", "b_rho_Intercept", "sigma", "sd_subject__b0_Intercept" ) posterior::summarise_draws( posterior::as_draws_df(fit_brms), rhat, ess_bulk ) %>% dplyr::filter(variable %in% brms_names) %>% dplyr::transmute(parameter = sbp_names[match(variable, brms_names)], rhat, ess_bulk) %>% knitr::kable(digits = 3, caption = "brms convergence on population-level parameters.")
If any rhat exceeds 1.05, brms has not mixed and the comparison below is
contaminated; do not interpret disagreements as smoothbp errors. With the
bounded omega prior and the seeded init_fun, all four chains should land
in the same basin.
sbp_draws <- posterior::as_draws_df(fit_sbp$draws)[, sbp_names] brms_draws <- as.data.frame(posterior::as_draws_df(fit_brms))[, brms_names] colnames(brms_draws) <- sbp_names draws_long <- dplyr::bind_rows( tidyr::pivot_longer(sbp_draws, cols = dplyr::everything(), names_to = "parameter", values_to = "value") %>% dplyr::mutate(package = "smoothbp"), tidyr::pivot_longer(brms_draws, cols = dplyr::everything(), names_to = "parameter", values_to = "value") %>% dplyr::mutate(package = "brms") ) draws_long %>% dplyr::group_by(parameter, package) %>% dplyr::summarise( mean = mean(value), sd = sd(value), q025 = quantile(value, 0.025), q975 = quantile(value, 0.975), .groups = "drop" ) %>% tidyr::pivot_wider( names_from = package, values_from = c(mean, sd, q025, q975), names_glue = "{package}_{.value}" ) %>% dplyr::mutate( delta_mean_pct = 100 * abs(smoothbp_mean - brms_mean) / (abs(brms_mean) + 1e-8) ) %>% knitr::kable(digits = 3, caption = "Posterior summaries for both packages.")
p_overlay <- ggplot(draws_long, aes(x = value, fill = package, colour = package)) + geom_density(alpha = 0.35) + facet_wrap(~ parameter, scales = "free", ncol = 3) + scale_fill_manual(values = c(smoothbp = "#1f77b4", brms = "#d62728")) + scale_colour_manual(values = c(smoothbp = "#1f77b4", brms = "#d62728")) + labs( title = "Marginal posteriors: smoothbp vs brms", subtitle = sprintf( "Same simulated data, same priors. smoothbp: %.1fs brms: %.1fs", time_sbp, time_brms ), x = NULL, y = "density" ) + theme_minimal(base_size = 11) + theme(legend.position = "bottom") p_overlay
Agreement of posterior means is necessary but not sufficient: two posteriors can share a mean yet differ in spread or shape. We therefore compare the full marginal distributions with the 1-Wasserstein (earth-mover's) distance, standardised by the pooled posterior SD so it is scale-free. As a Monte Carlo reference we also report the distance between two independent smoothbp runs (seeds 31 and 32); cross-package distances of the same order as this within-sampler reference indicate marginals that are indistinguishable up to Monte Carlo error.
All priors, including the variance components, are matched as closely as the
two parameterisations allow. smoothbp's variance update is conjugate, so its
family must be inverse-gamma; here it is made near-non-informative
(prior_invgamma(0.001, 0.001)) and brms is given a correspondingly wide
half-t. Under matched diffuse priors the residual SD sigma is
likelihood-dominated and should agree to Monte Carlo error. The random-effect
SD sigma_u is only weakly identified by 25 subjects, so any small residual
cross-package difference there reflects that weak identification (and the
unavoidable inverse-gamma-versus-half-t tail difference) rather than a
discrepancy between the samplers.
# standardised 1-D 1-Wasserstein distance between two samples w1_std <- function(x, y) { p <- ppoints(1000) qx <- quantile(x, p, names = FALSE, type = 8) qy <- quantile(y, p, names = FALSE, type = 8) mean(abs(qx - qy)) / stats::sd(c(x, y)) } # second smoothbp run (different seed) as a within-sampler Monte Carlo reference fit_sbp_b <- smoothbp( formula = y ~ tau, b0 = ~ 1 + (1 | subject), b1 = ~ 1, deltas = list(~ 1), omega = list(~ 1), rho = list(~ 1), data = dat, priors = smoothbp_priors( b0 = prior_normal(0, 10), b1 = prior_normal(0, 2), omega = list(prior_normal(3, 2, lb = 0, ub = max(dat$tau))), rho = list(prior_normal(3, 2, lb = 0)), sigma = prior_invgamma(0.001, 0.001), sigma_u = prior_invgamma(0.001, 0.001) ), chains = 4L, iter = 2000L, warmup = 1000L, seed = 32L, .verbose = FALSE ) sbp_a <- posterior::as_draws_df(fit_sbp$draws)[, sbp_names] sbp_b <- posterior::as_draws_df(fit_sbp_b$draws)[, sbp_names] brms_w <- as.data.frame(posterior::as_draws_df(fit_brms))[, brms_names] colnames(brms_w) <- sbp_names data.frame( parameter = sbp_names, W1_cross = vapply(sbp_names, function(nm) w1_std(sbp_a[[nm]], brms_w[[nm]]), numeric(1)), W1_null = vapply(sbp_names, function(nm) w1_std(sbp_a[[nm]], sbp_b[[nm]]), numeric(1)), row.names = NULL ) |> knitr::kable(digits = 3, caption = "Standardised 1-Wasserstein distance (pooled-SD units): smoothbp vs brms (W1_cross) and a within-sampler reference from two smoothbp seeds (W1_null). Cross comparable to null => indistinguishable up to Monte Carlo error.")
To export the overlay figure for the manuscript, run (outside CRAN):
ggplot2::ggsave("figure_overlay_s1.png", p_overlay, width = 7, height = 5, dpi = 150)
ess_sbp <- posterior::summarise_draws(fit_sbp$draws, ess_bulk) %>% dplyr::filter(variable %in% sbp_names) %>% dplyr::transmute(parameter = variable, smoothbp_ess_per_sec = ess_bulk / time_sbp) ess_brms <- posterior::summarise_draws( posterior::as_draws_df(fit_brms), ess_bulk ) %>% dplyr::filter(variable %in% brms_names) %>% dplyr::mutate(parameter = sbp_names[match(variable, brms_names)]) %>% dplyr::transmute(parameter, brms_ess_per_sec = ess_bulk / time_brms) dplyr::full_join(ess_sbp, ess_brms, by = "parameter") %>% knitr::kable(digits = 1, caption = "Bulk ESS / second by package and parameter.")
data.frame( package = c("smoothbp", "brms (Stan/NUTS)"), wall_clock_s = round(c(time_sbp, time_brms), 1), speedup = c(sprintf("%.0fx faster", time_brms / time_sbp), "reference") ) |> knitr::kable(col.names = c("Package", "Wall-clock (s)", "Relative speed"), caption = "Scenario 1 wall-clock time. Single run on the same machine.")
This scenario exercises the HMC-within-Gibbs sampler path that is
activated when a non-linear parameter (omega or rho) has two or more
coefficients. Here omega ~ 1 + treatment gives a group-specific
change-point location.
set.seed(8147) n_subj <- 30 n_obs <- 10 tau_range <- c(0, 6) b0_true <- 5.0 b1_true <- -0.4 delta_true <- 1.4 omega_int <- 3.2 omega_trt <- -0.8 rho_true <- 4.0 sigma_true <- 0.4 sigma_u_true <- 0.7 treatment <- rep(c(0, 1), each = n_subj / 2) u_j <- rnorm(n_subj, 0, sigma_u_true) .sigmoid <- function(x) 1 / (1 + exp(-x)) rows <- vector("list", n_subj) for (j in seq_len(n_subj)) { tau_j <- seq(tau_range[1], tau_range[2], length.out = n_obs) omega_j <- omega_int + omega_trt * treatment[j] d_j <- tau_j - omega_j s_j <- .sigmoid(d_j * rho_true) mu_j <- (b0_true + u_j[j]) + b1_true * d_j + delta_true * d_j * s_j rows[[j]] <- data.frame( subject = factor(j), tau = tau_j, treatment = treatment[j], y = mu_j + rnorm(n_obs, 0, sigma_true) ) } dat_cov <- do.call(rbind, rows) cat(sprintf("True omega: intercept = %.1f, treatment effect = %.1f\n", omega_int, omega_trt)) cat(sprintf(" Control (treatment = 0): omega = %.1f\n", omega_int)) cat(sprintf(" Treated (treatment = 1): omega = %.1f\n", omega_int + omega_trt))
t0 <- Sys.time() fit_sbp_cov <- smoothbp( formula = y ~ tau, b0 = ~ 1 + (1 | subject), b1 = ~ 1, deltas = list(~ 1), omega = list(~ 1 + treatment), rho = list(~ 1), data = dat_cov, priors = smoothbp_priors( b0 = prior_normal(0, 10), b1 = prior_normal(0, 2), omega = list(list( "(Intercept)" = prior_normal(3, 2, lb = 0, ub = max(dat_cov$tau)), "treatment" = prior_normal(0, 2) )), rho = list(prior_normal(3, 2, lb = 0)) ), chains = 4L, iter = 2000L, warmup = 1000L, seed = 8147L, .verbose = FALSE ) time_sbp_cov <- as.numeric(difftime(Sys.time(), t0, units = "secs"))
bf_cov <- brms::bf( y ~ b0 + b1 * (tau - omega) + b2 * (tau - omega) / (1 + exp(-(tau - omega) * rho)), b0 ~ 1 + (1 | subject), b1 ~ 1, b2 ~ 1, omega ~ 1 + treatment, rho ~ 1, nl = TRUE ) priors_brms_cov <- c( brms::prior(normal(0, 10), nlpar = "b0"), brms::prior(normal(0, 2), nlpar = "b1"), brms::prior(normal(0, 2), nlpar = "b2"), brms::prior_string("normal(3, 2)", nlpar = "omega", coef = "Intercept"), brms::prior(normal(0, 2), nlpar = "omega", coef = "treatment"), brms::prior(normal(3, 2), nlpar = "rho", lb = 0) ) init_fun_cov <- function() list( b_b0 = array(rnorm(1, 5, 1)), b_b1 = array(rnorm(1, 0, 0.3)), b_b2 = array(rnorm(1, 0, 0.3)), b_omega = array(c(rnorm(1, 3, 0.3), rnorm(1, -0.5, 0.3))), b_rho = array(rnorm(1, 3, 0.5)) ) t0 <- Sys.time() fit_brms_cov <- brms::brm( bf_cov, data = dat_cov, prior = priors_brms_cov, chains = 4, iter = 2000, warmup = 1000, seed = 8147, refresh = 0, init = init_fun_cov, control = list(adapt_delta = 0.95) ) time_brms_cov <- as.numeric(difftime(Sys.time(), t0, units = "secs"))
sbp_names_cov <- c( "b0_(Intercept)", "b1_(Intercept)", "delta1_(Intercept)", "omega1_(Intercept)", "omega1_treatment", "rho1_(Intercept)", "sigma", "sigma_u" ) brms_names_cov <- c( "b_b0_Intercept", "b_b1_Intercept", "b_b2_Intercept", "b_omega_Intercept", "b_omega_treatment", "b_rho_Intercept", "sigma", "sd_subject__b0_Intercept" ) sbp_draws_cov <- posterior::as_draws_df(fit_sbp_cov$draws)[, sbp_names_cov] brms_draws_cov <- as.data.frame( posterior::as_draws_df(fit_brms_cov) )[, brms_names_cov] colnames(brms_draws_cov) <- sbp_names_cov draws_long_cov <- dplyr::bind_rows( tidyr::pivot_longer(sbp_draws_cov, cols = dplyr::everything(), names_to = "parameter", values_to = "value") %>% dplyr::mutate(package = "smoothbp"), tidyr::pivot_longer(brms_draws_cov, cols = dplyr::everything(), names_to = "parameter", values_to = "value") %>% dplyr::mutate(package = "brms") ) draws_long_cov %>% dplyr::group_by(parameter, package) %>% dplyr::summarise( mean = mean(value), sd = sd(value), q025 = quantile(value, 0.025), q975 = quantile(value, 0.975), .groups = "drop" ) %>% tidyr::pivot_wider( names_from = package, values_from = c(mean, sd, q025, q975), names_glue = "{package}_{.value}" ) %>% dplyr::mutate( delta_mean_pct = 100 * abs(smoothbp_mean - brms_mean) / (abs(brms_mean) + 1e-8) ) %>% knitr::kable(digits = 3, caption = "Posterior summaries: omega ~ 1 + treatment.")
true_vals_cov <- data.frame( parameter = c("b0_(Intercept)", "b1_(Intercept)", "delta1_(Intercept)", "omega1_(Intercept)", "omega1_treatment", "rho1_(Intercept)", "sigma", "sigma_u"), true_value = c(b0_true, b1_true, delta_true, omega_int, omega_trt, rho_true, sigma_true, sigma_u_true) ) ggplot(draws_long_cov, aes(x = value, fill = package, colour = package)) + geom_density(alpha = 0.35) + geom_vline(data = true_vals_cov, aes(xintercept = true_value), linetype = "dashed", linewidth = 0.5) + facet_wrap(~ parameter, scales = "free", ncol = 3) + scale_fill_manual(values = c(smoothbp = "#1f77b4", brms = "#d62728")) + scale_colour_manual(values = c(smoothbp = "#1f77b4", brms = "#d62728")) + labs( title = "Marginal posteriors: smoothbp vs brms (omega ~ 1 + treatment)", subtitle = sprintf( "smoothbp: %.1fs brms: %.1fs. Dashed lines = true values.", time_sbp_cov, time_brms_cov ), x = NULL, y = "density" ) + theme_minimal(base_size = 11) + theme(legend.position = "bottom")
ess_sbp_cov <- posterior::summarise_draws(fit_sbp_cov$draws, ess_bulk) %>% dplyr::filter(variable %in% sbp_names_cov) %>% dplyr::transmute(parameter = variable, smoothbp_ess_per_sec = ess_bulk / time_sbp_cov) ess_brms_cov <- posterior::summarise_draws( posterior::as_draws_df(fit_brms_cov), ess_bulk ) %>% dplyr::filter(variable %in% brms_names_cov) %>% dplyr::mutate(parameter = sbp_names_cov[match(variable, brms_names_cov)]) %>% dplyr::transmute(parameter, brms_ess_per_sec = ess_bulk / time_brms_cov) dplyr::full_join(ess_sbp_cov, ess_brms_cov, by = "parameter") %>% knitr::kable(digits = 1, caption = "Bulk ESS / second by package and parameter (covariate model).")
data.frame( package = c("smoothbp", "brms (Stan/NUTS)"), wall_clock_s = round(c(time_sbp_cov, time_brms_cov), 1), speedup = c(sprintf("%.0fx faster", time_brms_cov / time_sbp_cov), "reference") ) |> knitr::kable(col.names = c("Package", "Wall-clock (s)", "Relative speed"), caption = "Scenario 2 wall-clock time. Covariate-on-omega model.")
mcp (Lindeløv, 2020) is the closest R package to smoothbp in terms
of use case: Bayesian piecewise regression with flexible segment
specification and a formula interface. The fundamental modelling
difference is that mcp uses a hard (step-function) change-point
— the trajectory makes an instantaneous kink at cp_1 — while
smoothbp uses a logistic sigmoid to smooth the transition, with
the sharpness parameter rho controlling how abrupt it is. As
rho -> infinity the smooth transition converges to the hard kink, so
on data generated with large rho the two approaches should give
consistent estimates of the change-point location and slopes.
A second structural difference: Both packages support complex hierarchical designs including random intercepts and varying change-points. For this comparison, we use a fixed-effects dataset to focus on the core change-point estimation.
mcp's two-segment joined-slope model has parameters:
| mcp parameter | smoothbp equivalent |
|---------------|---------------------|
| cp_1 | omega_(Intercept) (change-point location) |
| time_1 | b1_(Intercept) (pre-change slope) |
| time_2 | b1_(Intercept) + delta1_(Intercept) (post-change slope) |
| int_1 | b0_(Intercept) - b1 * omega (level at tau = 0, not at change-point) |
| sigma_1 | sigma |
The intercept-level parameters are not directly comparable because
smoothbp anchors b0 at the change-point while mcp anchors
int_1 at tau = 0. The slopes and change-point location are
directly comparable.
set.seed(5501) dat_mcp <- simulate_smoothbp( n_subj = 1, n_obs = 150, b0 = 5.0, b1 = -0.4, delta = 1.4, omega = 3.2, rho = 5.0, # moderately sharp — should agree well with mcp sigma = 0.4, sigma_u = 0.0, # no random effects tau_range = c(0, 6), seed = 5501L ) # mcp expects a plain data.frame without the factor column dat_mcp_plain <- data.frame(tau = dat_mcp$tau, y = dat_mcp$y) cat(sprintf("n = %d | true omega = 3.2 | true b1 = -0.4 | true delta1 = 1.4\n", nrow(dat_mcp_plain)))
t0 <- Sys.time() fit_sbp_mcp <- smoothbp( formula = y ~ tau, b0 = ~ 1, b1 = ~ 1, deltas = list(~ 1), omega = list(~ 1), rho = list(fixed(100)), # Use fixed() to specify a sharp, hard kink data = dat_mcp, priors = smoothbp_priors( b0 = prior_normal(0, 10), b1 = prior_normal(0, 2), omega = list(prior_normal(3, 2, lb = 0, ub = max(dat_mcp$tau))) ), chains = 4L, iter = 2000L, warmup = 1000L, seed = 5501L, .verbose = FALSE ) time_sbp_mcp <- as.numeric(difftime(Sys.time(), t0, units = "secs")) cat(sprintf("smoothbp: %.1f s\n", time_sbp_mcp))
# Two-segment joined-slope model: the natural mcp equivalent model_mcp <- list( y ~ 1 + tau, # segment 1: intercept + pre-change slope ~ 0 + tau # segment 2: joined at cp_1, post-change slope ) # Restrict cp_1 to the observed time range, matching the smoothbp bound prior_mcp <- list(cp_1 = "dunif(0, 6)") set.seed(5501) t0 <- Sys.time() fit_mcp <- mcp::mcp( model_mcp, data = dat_mcp_plain, prior = prior_mcp, par_x = "tau", chains = 4L, iter = 2000L, adapt = 1000L, ) time_mcp <- as.numeric(difftime(Sys.time(), t0, units = "secs")) cat(sprintf("mcp: %.1f s\n", time_mcp))
The critical comparison is on omega / cp_1 (change-point location)
and the two slopes. The post-change slope from mcp (time_2) is
equivalent to b1 + delta1 in smoothbp.
sbp_sum_mcp <- posterior::summarise_draws(fit_sbp_mcp$draws) |> dplyr::filter(variable %in% c("b1_(Intercept)", "delta1_(Intercept)", "omega1_(Intercept)", "rho1_(Intercept)", "sigma")) mcp_sum <- summary(fit_mcp) |> dplyr::filter(name %in% c("cp_1", "int_1", "tau_1", "tau_2", "sigma_1")) # Compute post-change slope from smoothbp draws for direct comparison dm_sbp_mcp <- posterior::as_draws_matrix(fit_sbp_mcp$draws) post_slope <- as.numeric(dm_sbp_mcp[, "b1_(Intercept)"] + dm_sbp_mcp[, "delta1_(Intercept)"]) comparison_mcp <- data.frame( parameter = c("omega / cp_1", "b1 / time_1 (pre-change slope)", "b1+delta1 / time_2 (post-change slope)", "sigma"), truth = c(3.2, -0.4, -0.4 + 1.4, 0.4), sbp_mean = c( sbp_sum_mcp$mean[sbp_sum_mcp$variable == "omega1_(Intercept)"], sbp_sum_mcp$mean[sbp_sum_mcp$variable == "b1_(Intercept)"], mean(post_slope), sbp_sum_mcp$mean[sbp_sum_mcp$variable == "sigma"] ), sbp_lo = c( sbp_sum_mcp$q2.5[sbp_sum_mcp$variable == "omega1_(Intercept)"], sbp_sum_mcp$q2.5[sbp_sum_mcp$variable == "b1_(Intercept)"], quantile(post_slope, 0.025), sbp_sum_mcp$q2.5[sbp_sum_mcp$variable == "sigma"] ), sbp_hi = c( sbp_sum_mcp$q97.5[sbp_sum_mcp$variable == "omega1_(Intercept)"], sbp_sum_mcp$q97.5[sbp_sum_mcp$variable == "b1_(Intercept)"], quantile(post_slope, 0.975), sbp_sum_mcp$q97.5[sbp_sum_mcp$variable == "sigma"] ), mcp_mean = mcp_sum$mean[match(c("cp_1","tau_1","tau_2","sigma_1"), mcp_sum$name)], mcp_lo = mcp_sum$lower[match(c("cp_1","tau_1","tau_2","sigma_1"), mcp_sum$name)], mcp_hi = mcp_sum$upper[match(c("cp_1","tau_1","tau_2","sigma_1"), mcp_sum$name)] ) knitr::kable(comparison_mcp, digits = 3, caption = "Scenario 3: smoothbp vs mcp on comparable parameters. smoothbp post-change slope is b1 + delta1.")
The plot below overlays the posterior mean fitted trajectories from both
models. With rho = 5 the smoothbp transition is fairly sharp and the
two trajectories are visually similar near the change-point, but the
logistic rounding is visible in the smoothbp curve.
tau_grid <- seq(0, 6, length.out = 200) # smoothbp posterior mean trajectory draws_sbp_mcp <- posterior::as_draws_matrix(fit_sbp_mcp$draws) b0_m <- mean(draws_sbp_mcp[, "b0_(Intercept)"]) b1_m <- mean(draws_sbp_mcp[, "b1_(Intercept)"]) delta1_m <- mean(draws_sbp_mcp[, "delta1_(Intercept)"]) om_m <- mean(draws_sbp_mcp[, "omega1_(Intercept)"]) rho_m <- mean(draws_sbp_mcp[, "rho1_(Intercept)"]) sigmoid <- function(x) 1 / (1 + exp(-x)) d_grid <- tau_grid - om_m mu_sbp <- b0_m + b1_m * d_grid + delta1_m * d_grid * sigmoid(d_grid * rho_m) # mcp posterior mean trajectory cp_m <- mcp_sum$mean[mcp_sum$name == "cp_1"] int_m <- mcp_sum$mean[mcp_sum$name == "int_1"] t1_m <- mcp_sum$mean[mcp_sum$name == "tau_1"] t2_m <- mcp_sum$mean[mcp_sum$name == "tau_2"] mu_mcp <- ifelse(tau_grid < cp_m, int_m + t1_m * tau_grid, int_m + t1_m * cp_m + t2_m * (tau_grid - cp_m)) traj_df <- data.frame( tau = rep(tau_grid, 2), mu = c(mu_sbp, mu_mcp), model = rep(c("smoothbp (logistic)", "mcp (hard kink)"), each = 200) ) ggplot() + geom_point(aes(tau, y), data = dat_mcp_plain, alpha = 0.25, size = 0.8) + geom_line(aes(tau, mu, colour = model, linetype = model), data = traj_df, linewidth = 0.9) + geom_vline(xintercept = 3.2, linetype = "dotted", colour = "grey40") + scale_colour_manual(values = c("smoothbp (logistic)" = "#2166ac", "mcp (hard kink)" = "#d6604d")) + labs(title = "Posterior mean fitted trajectories", subtitle = "Dotted line = true change-point (omega = 3.2)", x = "tau", y = "y", colour = NULL, linetype = NULL) + theme_bw() + theme(legend.position = "bottom")
# smoothbp ESS on omega sbp_ess_mcp <- posterior::summarise_draws( fit_sbp_mcp$draws[,, c("omega1_(Intercept)", "b1_(Intercept)", "delta1_(Intercept)", "sigma")], ess_bulk = posterior::ess_bulk ) |> dplyr::mutate(ess_per_sec = ess_bulk / time_sbp_mcp, package = "smoothbp") # mcp ESS — extract draws from mcp object # mcp stores posterior draws as a coda::mcmc.list in $mcmc_post mcp_draws <- posterior::as_draws_array(fit_mcp$mcmc_post) mcp_ess_draws <- posterior::summarise_draws( mcp_draws[, , c("cp_1", "tau_1", "sigma_1")], ess_bulk = posterior::ess_bulk ) mcp_ess_raw <- setNames(mcp_ess_draws$ess_bulk, mcp_ess_draws$variable) data.frame( parameter = c("omega / cp_1", "b1 / time_1", "sigma"), sbp_ess_s = round(sbp_ess_mcp$ess_per_sec[ match(c("omega1_(Intercept)", "b1_(Intercept)", "sigma"), sbp_ess_mcp$variable)], 1), mcp_ess_s = round(c(mcp_ess_raw["cp_1"], mcp_ess_raw["tau_1"], mcp_ess_raw["sigma_1"]) / time_mcp, 1) ) |> knitr::kable(caption = "ESS/second: smoothbp vs mcp. Higher is better.")
data.frame( package = c("smoothbp", "mcp (JAGS)"), wall_clock_s = round(c(time_sbp_mcp, time_mcp), 1), speedup = c(sprintf("%.0fx faster", time_mcp / time_sbp_mcp), "reference") ) |> knitr::kable(col.names = c("Package", "Wall-clock (s)", "Relative speed"), caption = "Scenario 3 wall-clock time. Single-group, n = 150.")
Model equivalence at high rho: when rho is large (sharp
transitions), the smoothbp and mcp posteriors for the change-point
location and slopes are broadly consistent, validating that both
packages locate the structural break correctly.
Modelling the sharpness: smoothbp estimates rho as a free
parameter, quantifying transition sharpness as part of the posterior.
mcp's hard-kink model implicitly assumes rho = infinity; this can
lead to a slightly overconfident change-point posterior when
transitions are genuinely gradual.
Random effects: Both mcp and smoothbp support hierarchical models with random intercepts and varying change-points. smoothbp's conjugate Gibbs updates for random intercepts often lead to more efficient sampling of the population mean and group-level deviations compared to general-purpose MCMC.
Speed: smoothbp's compiled Rust back-end is substantially faster per iteration than mcp's JAGS back-end, and does not require JAGS to be installed separately.
Flexibility: Both packages support multiple change-points. mcp offers a wider range of likelihood families (non-Gaussian), variance/autoregressive terms, and non-linear segments. smoothbp offers unique strengths including spike-and-slab variable selection for structural break discovery and the ability to include covariates on all five structural parameters (b0, b1, delta, omega, rho) in a single model.
If both samplers have converged, the marginal posteriors overlay almost
exactly, the means agree to within Monte Carlo error (delta_mean_pct
should be a fraction of a per cent for well-mixed parameters), and the
credible intervals are within sampling variability of each other.
smoothbp jointly samples the intercept b0 and random effects u in a
single conjugate Gibbs block. This avoids the slow-mixing ridge that
arises when b0 and u are updated in separate blocks (since the
likelihood depends on b0 + u_j). As a result, b0 and sigma_u mix
well, with ESS comparable to or exceeding brms on these parameters.
For the non-linear parameters (omega, rho), NUTS typically achieves
higher ESS per iteration, but smoothbp's cheaper per-iteration cost keeps
ESS per second competitive.
If you remove the ub = max(dat$tau) bound or the init_fun, you will
likely see the brms posterior on omega and b2 go bimodal: one cluster
near the truth, and a second cluster in the unidentifiable region with
omega > max(tau) and b2 pinned at a ridiculous value matching the
right-edge slope. That is a sampler-mixing artefact, not a real feature of
the posterior, and worth demonstrating in your own analyses before trusting
results from change-point models that allow omega to escape the data
range.
A note on smoothbp's robustness: smoothbp initialises every chain near the
prior mean, which keeps it out of the degenerate region by construction.
That is excellent in practice when the user has a sensible prior on
omega, and is the typical case. It also means that on a problem where
the posterior is genuinely multimodal, the chains may all converge to
whichever mode is closest to the prior mean and fail to explore the
others. If you suspect multimodality, fit with deliberately overdispersed
initial values by varying the prior across runs.
This scenario validates the new multi-breakpoint architecture by simulating data with two true change-points and fitting both smoothbp and brms. If the two samplers agree — i.e., marginal posteriors overlap — we have strong evidence that the implementation is correct.
set.seed(9801) n <- 300 tau <- seq(0, 12, length.out = n) # True model: two breakpoints om1_true <- 3.5; rho1_true <- 5.0; delta1_true <- -1.2 om2_true <- 8.0; rho2_true <- 4.0; delta2_true <- 1.8 b0_true <- 5.0; b1_true <- 0.3; sigma_true <- 0.3 d1 <- tau - om1_true; s1 <- plogis(d1 * rho1_true) d2 <- tau - om2_true; s2 <- plogis(d2 * rho2_true) mu <- b0_true + b1_true * (tau - om1_true) + delta1_true * d1 * s1 + delta2_true * d2 * s2 y <- mu + rnorm(n, sd = sigma_true) dat_multi <- data.frame(y = y, tau = tau) cat(sprintf("True: b0=%.1f b1=%.1f delta1=%.1f omega1=%.1f delta2=%.1f omega2=%.1f sigma=%.1f\n", b0_true, b1_true, delta1_true, om1_true, delta2_true, om2_true, sigma_true))
t0 <- Sys.time() fit_sbp_multi <- smoothbp( formula = y ~ tau, b0 = ~ 1, b1 = ~ 1, deltas = list(~ 1, ~ 1), omega = list(~ 1, ~ 1), rho = list(~ 1, ~ 1), data = dat_multi, priors = smoothbp_priors( b0 = prior_normal(0, 10), b1 = prior_normal(0, 2), omega = list( prior_normal(3.5, 2, lb = 0, ub = 6), prior_normal(8.0, 2, lb = 4, ub = max(dat_multi$tau)) ), rho = list(prior_normal(4, 2, lb = 0), prior_normal(4, 2, lb = 0)) ), chains = 4L, iter = 2000L, warmup = 1000L, seed = 9801L, .verbose = FALSE ) time_sbp_multi <- as.numeric(difftime(Sys.time(), t0, units = "secs")) cat(sprintf("smoothbp: %.1f s\n", time_sbp_multi))
suppressPackageStartupMessages(library(brms)) # Two-breakpoint non-linear formula for brms bf_multi <- brms::bf( y ~ b0 + b1 * (tau - om1) + d1 * (tau - om1) / (1 + exp(-(tau - om1) * rho1)) + d2 * (tau - om2) / (1 + exp(-(tau - om2) * rho2)), b0 ~ 1, b1 ~ 1, d1 ~ 1, om1 ~ 1, rho1 ~ 1, d2 ~ 1, om2 ~ 1, rho2 ~ 1, nl = TRUE ) priors_brms_multi <- c( brms::prior(normal(0, 10), nlpar = "b0"), brms::prior(normal(0, 2), nlpar = "b1"), brms::prior(normal(0, 2), nlpar = "d1"), brms::prior_string("normal(3.5, 2)", nlpar = "om1", lb = 0, ub = 6), brms::prior(normal(4, 2), nlpar = "rho1", lb = 0), brms::prior(normal(0, 2), nlpar = "d2"), brms::prior_string("normal(8.0, 2)", nlpar = "om2", lb = 4, ub = 12), brms::prior(normal(4, 2), nlpar = "rho2", lb = 0) ) init_multi <- function() list( b_b0 = array(rnorm(1, 5, 0.5)), b_b1 = array(rnorm(1, 0.3, 0.1)), b_d1 = array(rnorm(1, -1, 0.3)), b_om1 = array(rnorm(1, 3.5, 0.3)), b_rho1 = array(rnorm(1, 5, 0.5)), b_d2 = array(rnorm(1, 1.5, 0.3)), b_om2 = array(rnorm(1, 8, 0.3)), b_rho2 = array(rnorm(1, 4, 0.5)) ) t0 <- Sys.time() fit_brms_multi <- brms::brm( bf_multi, data = dat_multi, prior = priors_brms_multi, chains = 4, iter = 2000, warmup = 1000, seed = 9801, refresh = 0, init = init_multi, control = list(adapt_delta = 0.95) ) time_brms_multi <- as.numeric(difftime(Sys.time(), t0, units = "secs"))
sbp_names_multi <- c( "b0_(Intercept)", "b1_(Intercept)", "delta1_(Intercept)", "omega1_(Intercept)", "rho1_(Intercept)", "delta2_(Intercept)", "omega2_(Intercept)", "rho2_(Intercept)", "sigma" ) brms_names_multi <- c( "b_b0_Intercept", "b_b1_Intercept", "b_d1_Intercept", "b_om1_Intercept", "b_rho1_Intercept", "b_d2_Intercept", "b_om2_Intercept", "b_rho2_Intercept", "sigma" ) true_vals_multi <- c(b0_true, b1_true, delta1_true, om1_true, rho1_true, delta2_true, om2_true, rho2_true, sigma_true) sbp_draws_multi <- posterior::as_draws_df(fit_sbp_multi$draws)[, sbp_names_multi] brms_draws_multi <- as.data.frame( posterior::as_draws_df(fit_brms_multi) )[, brms_names_multi] colnames(brms_draws_multi) <- sbp_names_multi draws_long_multi <- dplyr::bind_rows( tidyr::pivot_longer(sbp_draws_multi, cols = dplyr::everything(), names_to = "parameter", values_to = "value") |> dplyr::mutate(package = "smoothbp"), tidyr::pivot_longer(brms_draws_multi, cols = dplyr::everything(), names_to = "parameter", values_to = "value") |> dplyr::mutate(package = "brms") ) draws_long_multi |> dplyr::group_by(parameter, package) |> dplyr::summarise(mean = mean(value), sd = sd(value), q025 = quantile(value, 0.025), q975 = quantile(value, 0.975), .groups = "drop") |> tidyr::pivot_wider(names_from = package, values_from = c(mean, sd, q025, q975), names_glue = "{package}_{.value}") |> dplyr::mutate( truth = true_vals_multi[match(parameter, sbp_names_multi)], delta_mean_pct = 100 * abs(smoothbp_mean - brms_mean) / (abs(brms_mean) + 1e-8) ) |> knitr::kable(digits = 3, caption = "Scenario 4: two-breakpoint smoothbp vs brms.")
The densities from smoothbp (blue) and brms (red) should overlap almost exactly if both samplers are targeting the same posterior. Dashed vertical lines mark the true data-generating values.
true_df_multi <- data.frame( parameter = sbp_names_multi, true_value = true_vals_multi ) ggplot(draws_long_multi, aes(x = value, fill = package, colour = package)) + geom_density(alpha = 0.35, linewidth = 0.6) + geom_vline(data = true_df_multi, aes(xintercept = true_value), linetype = "dashed", linewidth = 0.5, colour = "black") + facet_wrap(~ parameter, scales = "free", ncol = 3) + scale_fill_manual(values = c(smoothbp = "#1f77b4", brms = "#d62728")) + scale_colour_manual(values = c(smoothbp = "#1f77b4", brms = "#d62728")) + labs( title = "Scenario 4: Multi-breakpoint posteriors — smoothbp vs brms", subtitle = sprintf( "Two true change-points. smoothbp: %.1fs brms: %.1fs\nDashed = true value. Overlapping densities = agreement.", time_sbp_multi, time_brms_multi ), x = NULL, y = "Density" ) + theme_minimal(base_size = 11) + theme(legend.position = "bottom")
ess_sbp_multi <- posterior::summarise_draws( fit_sbp_multi$draws[, , sbp_names_multi], ess_bulk = posterior::ess_bulk ) |> dplyr::transmute(parameter = variable, smoothbp_ess_per_sec = ess_bulk / time_sbp_multi) ess_brms_multi <- posterior::summarise_draws( posterior::as_draws_df(fit_brms_multi), ess_bulk = posterior::ess_bulk ) |> dplyr::filter(variable %in% brms_names_multi) |> dplyr::mutate(parameter = sbp_names_multi[match(variable, brms_names_multi)]) |> dplyr::transmute(parameter, brms_ess_per_sec = ess_bulk / time_brms_multi) dplyr::full_join(ess_sbp_multi, ess_brms_multi, by = "parameter") |> knitr::kable(digits = 1, caption = "ESS/second: smoothbp vs brms (two-breakpoint model).")
data.frame( package = c("smoothbp", "brms (Stan/NUTS)"), wall_clock_s = round(c(time_sbp_multi, time_brms_multi), 1), speedup = c(sprintf("%.0fx faster", time_brms_multi / time_sbp_multi), "reference") ) |> knitr::kable(col.names = c("Package", "Wall-clock (s)", "Relative speed"), caption = "Scenario 4 wall-clock time. Two-breakpoint model (single group, fixed effects).")
Any scripts or data that you put into this service are public.
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.