smoothbp is a bespoke Metropolis-within-Gibbs sampler
for Bayesian hierarchical piecewise regression with
multiple logistic-smoothed change-points. The recovery
tests are a self-consistency check — simulator and likelihood share the
same code path. To trust the package one also wants to see that it
agrees with an independent implementation. brms (via Stan)
is the natural reference for fitting the same generative model via NUTS
on a custom non-linear formula. mcp (via JAGS) is the
closest specialist competitor and provides a complementary perspective:
it fits hard (sharp) change-points rather than the
logistic-smoothed transition that smoothbp uses.
Four scenarios are compared below:
omega ~ 1 + treatment. Compared against
brms.When the change-point parameter omega drifts outside the
range of the time variable, the smoothing term collapses and
delta1 becomes effectively unidentified. NUTS chains that
initialise in that region can get stuck there for the entire run,
producing an artificially multimodal posterior that has nothing to do
with the data. The clean fix is to bound omega above at
max(tau) in both samplers, and to initialise brms chains
near the prior mean (which is what smoothbp does internally). Both fixes
applied below.
library(smoothbp)
library(posterior)
library(ggplot2)
library(dplyr)
library(tidyr)
have_brms <- requireNamespace("brms", quietly = TRUE) && NOT_CRAN
if (!have_brms && NOT_CRAN) {
message("brms not installed -- install.packages('brms') to render the comparison.")
}
have_mcp <- requireNamespace("mcp", quietly = TRUE) &&
requireNamespace("rjags", quietly = TRUE) && NOT_CRAN
if (!have_mcp && NOT_CRAN) {
message("mcp or rjags not installed -- install JAGS then install.packages('mcp') ",
"to render the mcp comparison.")
}t0 <- Sys.time()
fit_sbp <- smoothbp(
formula = y ~ tau,
b0 = ~ 1 + (1 | subject),
b1 = ~ 1,
deltas = list(~ 1),
omega = list(~ 1),
rho = list(~ 1),
data = dat,
priors = smoothbp_priors(
b0 = prior_normal(0, 10),
b1 = prior_normal(0, 2),
omega = list(prior_normal(3, 2, lb = 0, ub = max(dat$tau))),
rho = list(prior_normal(3, 2, lb = 0))
),
chains = 4L, iter = 2000L, warmup = 1000L,
seed = 31L, .verbose = FALSE
)
time_sbp <- as.numeric(difftime(Sys.time(), t0, units = "secs"))suppressPackageStartupMessages(library(brms))
bf_smoothed <- brms::bf(
y ~ b0 + b1 * (tau - omega) +
b2 * (tau - omega) / (1 + exp(-(tau - omega) * rho)),
b0 ~ 1 + (1 | subject),
b1 ~ 1, b2 ~ 1, omega ~ 1, rho ~ 1,
nl = TRUE
)
# brms::prior() captures arguments by non-standard evaluation, so
# `ub = max(dat$tau)` would be embedded literally in the generated Stan
# code. Pre-compute the value and pass it via prior_string() which uses
# standard evaluation and accepts numeric bounds.
ub_omega <- max(dat$tau)
priors_brms <- c(
brms::prior(normal(0, 10), nlpar = "b0"),
brms::prior(normal(0, 2), nlpar = "b1"),
brms::prior(normal(0, 2), nlpar = "b2"),
brms::prior_string("normal(3, 2)", nlpar = "omega",
lb = 0, ub = ub_omega),
brms::prior(normal(3, 2), nlpar = "rho", lb = 0)
)
# Initialise every chain near the prior mean. Without this, Stan's default
# uniform-on-(-2, 2) initialisation lands some chains in the unidentifiable
# omega > max(tau) trap and they never escape.
init_fun <- function() list(
b_b0 = array(rnorm(1, 5, 1)),
b_b1 = array(rnorm(1, 0, 0.3)),
b_b2 = array(rnorm(1, 0, 0.3)),
b_omega = array(rnorm(1, 3, 0.3)),
b_rho = array(rnorm(1, 3, 0.5))
)
t0 <- Sys.time()
fit_brms <- brms::brm(
bf_smoothed,
data = dat,
prior = priors_brms,
chains = 4, iter = 2000, warmup = 1000,
seed = 31, refresh = 0,
init = init_fun,
control = list(adapt_delta = 0.95)
)
time_brms <- as.numeric(difftime(Sys.time(), t0, units = "secs"))sbp_names <- c(
"b0_(Intercept)", "b1_(Intercept)", "delta1_(Intercept)",
"omega1_(Intercept)", "rho1_(Intercept)",
"sigma", "sigma_u"
)
brms_names <- c(
"b_b0_Intercept", "b_b1_Intercept", "b_b2_Intercept",
"b_omega_Intercept", "b_rho_Intercept",
"sigma", "sd_subject__b0_Intercept"
)
posterior::summarise_draws(
posterior::as_draws_df(fit_brms), rhat, ess_bulk
) %>%
dplyr::filter(variable %in% brms_names) %>%
dplyr::transmute(parameter = sbp_names[match(variable, brms_names)],
rhat, ess_bulk) %>%
knitr::kable(digits = 3,
caption = "brms convergence on population-level parameters.")If any rhat exceeds 1.05, brms has not mixed and the
comparison below is contaminated; do not interpret disagreements as
smoothbp errors. With the bounded omega prior and the
seeded init_fun, all four chains should land in the same
basin.
sbp_draws <- posterior::as_draws_df(fit_sbp$draws)[, sbp_names]
brms_draws <- as.data.frame(posterior::as_draws_df(fit_brms))[, brms_names]
colnames(brms_draws) <- sbp_names
draws_long <- dplyr::bind_rows(
tidyr::pivot_longer(sbp_draws, cols = dplyr::everything(),
names_to = "parameter", values_to = "value") %>%
dplyr::mutate(package = "smoothbp"),
tidyr::pivot_longer(brms_draws, cols = dplyr::everything(),
names_to = "parameter", values_to = "value") %>%
dplyr::mutate(package = "brms")
)
draws_long %>%
dplyr::group_by(parameter, package) %>%
dplyr::summarise(
mean = mean(value),
sd = sd(value),
q025 = quantile(value, 0.025),
q975 = quantile(value, 0.975),
.groups = "drop"
) %>%
tidyr::pivot_wider(
names_from = package,
values_from = c(mean, sd, q025, q975),
names_glue = "{package}_{.value}"
) %>%
dplyr::mutate(
delta_mean_pct = 100 * abs(smoothbp_mean - brms_mean) /
(abs(brms_mean) + 1e-8)
) %>%
knitr::kable(digits = 3,
caption = "Posterior summaries for both packages.")ggplot(draws_long, aes(x = value, fill = package, colour = package)) +
geom_density(alpha = 0.35) +
facet_wrap(~ parameter, scales = "free", ncol = 3) +
scale_fill_manual(values = c(smoothbp = "#1f77b4", brms = "#d62728")) +
scale_colour_manual(values = c(smoothbp = "#1f77b4", brms = "#d62728")) +
labs(
title = "Marginal posteriors: smoothbp vs brms",
subtitle = sprintf(
"Same simulated data, same priors. smoothbp: %.1fs brms: %.1fs",
time_sbp, time_brms
),
x = NULL, y = "density"
) +
theme_minimal(base_size = 11) +
theme(legend.position = "bottom")ess_sbp <- posterior::summarise_draws(fit_sbp$draws, ess_bulk) %>%
dplyr::filter(variable %in% sbp_names) %>%
dplyr::transmute(parameter = variable,
smoothbp_ess_per_sec = ess_bulk / time_sbp)
ess_brms <- posterior::summarise_draws(
posterior::as_draws_df(fit_brms), ess_bulk
) %>%
dplyr::filter(variable %in% brms_names) %>%
dplyr::mutate(parameter = sbp_names[match(variable, brms_names)]) %>%
dplyr::transmute(parameter, brms_ess_per_sec = ess_bulk / time_brms)
dplyr::full_join(ess_sbp, ess_brms, by = "parameter") %>%
knitr::kable(digits = 1,
caption = "Bulk ESS / second by package and parameter.")This scenario exercises the HMC-within-Gibbs sampler
path that is activated when a non-linear parameter (omega
or rho) has two or more coefficients. Here
omega ~ 1 + treatment gives a group-specific change-point
location.
set.seed(8147)
n_subj <- 30
n_obs <- 10
tau_range <- c(0, 6)
b0_true <- 5.0
b1_true <- -0.4
delta_true <- 1.4
omega_int <- 3.2
omega_trt <- -0.8
rho_true <- 4.0
sigma_true <- 0.4
sigma_u_true <- 0.7
treatment <- rep(c(0, 1), each = n_subj / 2)
u_j <- rnorm(n_subj, 0, sigma_u_true)
.sigmoid <- function(x) 1 / (1 + exp(-x))
rows <- vector("list", n_subj)
for (j in seq_len(n_subj)) {
tau_j <- seq(tau_range[1], tau_range[2], length.out = n_obs)
omega_j <- omega_int + omega_trt * treatment[j]
d_j <- tau_j - omega_j
s_j <- .sigmoid(d_j * rho_true)
mu_j <- (b0_true + u_j[j]) + b1_true * d_j + delta_true * d_j * s_j
rows[[j]] <- data.frame(
subject = factor(j),
tau = tau_j,
treatment = treatment[j],
y = mu_j + rnorm(n_obs, 0, sigma_true)
)
}
dat_cov <- do.call(rbind, rows)
cat(sprintf("True omega: intercept = %.1f, treatment effect = %.1f\n",
omega_int, omega_trt))
cat(sprintf(" Control (treatment = 0): omega = %.1f\n", omega_int))
cat(sprintf(" Treated (treatment = 1): omega = %.1f\n", omega_int + omega_trt))t0 <- Sys.time()
fit_sbp_cov <- smoothbp(
formula = y ~ tau,
b0 = ~ 1 + (1 | subject),
b1 = ~ 1,
deltas = list(~ 1),
omega = list(~ 1 + treatment),
rho = list(~ 1),
data = dat_cov,
priors = smoothbp_priors(
b0 = prior_normal(0, 10),
b1 = prior_normal(0, 2),
omega = list(list(
"(Intercept)" = prior_normal(3, 2, lb = 0, ub = max(dat_cov$tau)),
"treatment" = prior_normal(0, 2)
)),
rho = list(prior_normal(3, 2, lb = 0))
),
chains = 4L, iter = 2000L, warmup = 1000L,
seed = 8147L, .verbose = FALSE
)
time_sbp_cov <- as.numeric(difftime(Sys.time(), t0, units = "secs"))bf_cov <- brms::bf(
y ~ b0 + b1 * (tau - omega) +
b2 * (tau - omega) / (1 + exp(-(tau - omega) * rho)),
b0 ~ 1 + (1 | subject),
b1 ~ 1,
b2 ~ 1,
omega ~ 1 + treatment,
rho ~ 1,
nl = TRUE
)
priors_brms_cov <- c(
brms::prior(normal(0, 10), nlpar = "b0"),
brms::prior(normal(0, 2), nlpar = "b1"),
brms::prior(normal(0, 2), nlpar = "b2"),
brms::prior_string("normal(3, 2)", nlpar = "omega", coef = "Intercept"),
brms::prior(normal(0, 2), nlpar = "omega", coef = "treatment"),
brms::prior(normal(3, 2), nlpar = "rho", lb = 0)
)
init_fun_cov <- function() list(
b_b0 = array(rnorm(1, 5, 1)),
b_b1 = array(rnorm(1, 0, 0.3)),
b_b2 = array(rnorm(1, 0, 0.3)),
b_omega = array(c(rnorm(1, 3, 0.3), rnorm(1, -0.5, 0.3))),
b_rho = array(rnorm(1, 3, 0.5))
)
t0 <- Sys.time()
fit_brms_cov <- brms::brm(
bf_cov,
data = dat_cov,
prior = priors_brms_cov,
chains = 4, iter = 2000, warmup = 1000,
seed = 8147, refresh = 0,
init = init_fun_cov,
control = list(adapt_delta = 0.95)
)
time_brms_cov <- as.numeric(difftime(Sys.time(), t0, units = "secs"))sbp_names_cov <- c(
"b0_(Intercept)", "b1_(Intercept)", "delta1_(Intercept)",
"omega1_(Intercept)", "omega1_treatment",
"rho1_(Intercept)", "sigma", "sigma_u"
)
brms_names_cov <- c(
"b_b0_Intercept", "b_b1_Intercept", "b_b2_Intercept",
"b_omega_Intercept", "b_omega_treatment",
"b_rho_Intercept", "sigma", "sd_subject__b0_Intercept"
)
sbp_draws_cov <- posterior::as_draws_df(fit_sbp_cov$draws)[, sbp_names_cov]
brms_draws_cov <- as.data.frame(
posterior::as_draws_df(fit_brms_cov)
)[, brms_names_cov]
colnames(brms_draws_cov) <- sbp_names_cov
draws_long_cov <- dplyr::bind_rows(
tidyr::pivot_longer(sbp_draws_cov, cols = dplyr::everything(),
names_to = "parameter", values_to = "value") %>%
dplyr::mutate(package = "smoothbp"),
tidyr::pivot_longer(brms_draws_cov, cols = dplyr::everything(),
names_to = "parameter", values_to = "value") %>%
dplyr::mutate(package = "brms")
)
draws_long_cov %>%
dplyr::group_by(parameter, package) %>%
dplyr::summarise(
mean = mean(value),
sd = sd(value),
q025 = quantile(value, 0.025),
q975 = quantile(value, 0.975),
.groups = "drop"
) %>%
tidyr::pivot_wider(
names_from = package,
values_from = c(mean, sd, q025, q975),
names_glue = "{package}_{.value}"
) %>%
dplyr::mutate(
delta_mean_pct = 100 * abs(smoothbp_mean - brms_mean) /
(abs(brms_mean) + 1e-8)
) %>%
knitr::kable(digits = 3,
caption = "Posterior summaries: omega ~ 1 + treatment.")true_vals_cov <- data.frame(
parameter = c("b0_(Intercept)", "b1_(Intercept)", "delta1_(Intercept)",
"omega1_(Intercept)", "omega1_treatment",
"rho1_(Intercept)", "sigma", "sigma_u"),
true_value = c(b0_true, b1_true, delta_true,
omega_int, omega_trt, rho_true, sigma_true, sigma_u_true)
)
ggplot(draws_long_cov, aes(x = value, fill = package, colour = package)) +
geom_density(alpha = 0.35) +
geom_vline(data = true_vals_cov, aes(xintercept = true_value),
linetype = "dashed", linewidth = 0.5) +
facet_wrap(~ parameter, scales = "free", ncol = 3) +
scale_fill_manual(values = c(smoothbp = "#1f77b4", brms = "#d62728")) +
scale_colour_manual(values = c(smoothbp = "#1f77b4", brms = "#d62728")) +
labs(
title = "Marginal posteriors: smoothbp vs brms (omega ~ 1 + treatment)",
subtitle = sprintf(
"smoothbp: %.1fs brms: %.1fs. Dashed lines = true values.",
time_sbp_cov, time_brms_cov
),
x = NULL, y = "density"
) +
theme_minimal(base_size = 11) +
theme(legend.position = "bottom")ess_sbp_cov <- posterior::summarise_draws(fit_sbp_cov$draws, ess_bulk) %>%
dplyr::filter(variable %in% sbp_names_cov) %>%
dplyr::transmute(parameter = variable,
smoothbp_ess_per_sec = ess_bulk / time_sbp_cov)
ess_brms_cov <- posterior::summarise_draws(
posterior::as_draws_df(fit_brms_cov), ess_bulk
) %>%
dplyr::filter(variable %in% brms_names_cov) %>%
dplyr::mutate(parameter = sbp_names_cov[match(variable, brms_names_cov)]) %>%
dplyr::transmute(parameter, brms_ess_per_sec = ess_bulk / time_brms_cov)
dplyr::full_join(ess_sbp_cov, ess_brms_cov, by = "parameter") %>%
knitr::kable(digits = 1,
caption = "Bulk ESS / second by package and parameter (covariate model).")mcp (Lindeløv, 2020) is the closest R package to
smoothbp in terms of use case: Bayesian piecewise
regression with flexible segment specification and a formula interface.
The fundamental modelling difference is that mcp uses a
hard (step-function) change-point — the trajectory
makes an instantaneous kink at cp_1 — while
smoothbp uses a logistic sigmoid to smooth
the transition, with the sharpness parameter rho
controlling how abrupt it is. As rho -> infinity the
smooth transition converges to the hard kink, so on data generated with
large rho the two approaches should give consistent
estimates of the change-point location and slopes.
A second structural difference: Both packages support complex hierarchical designs including random intercepts and varying change-points. For this comparison, we use a fixed-effects dataset to focus on the core change-point estimation.
mcp’s two-segment joined-slope model has parameters:
| mcp parameter | smoothbp equivalent |
|---|---|
cp_1 |
omega_(Intercept) (change-point location) |
time_1 |
b1_(Intercept) (pre-change slope) |
time_2 |
b1_(Intercept) + delta1_(Intercept) (post-change
slope) |
int_1 |
b0_(Intercept) - b1 * omega (level at tau = 0, not at
change-point) |
sigma_1 |
sigma |
The intercept-level parameters are not directly comparable because
smoothbp anchors b0 at the change-point while
mcp anchors int_1 at tau = 0. The
slopes and change-point location are directly comparable.
set.seed(5501)
dat_mcp <- simulate_smoothbp(
n_subj = 1,
n_obs = 150,
b0 = 5.0,
b1 = -0.4,
b2 = 1.4,
omega = 3.2,
rho = 5.0, # moderately sharp — should agree well with mcp
sigma = 0.4,
sigma_u = 0.0, # no random effects
tau_range = c(0, 6),
seed = 5501L
)
# mcp expects a plain data.frame without the factor column
dat_mcp_plain <- data.frame(tau = dat_mcp$tau, y = dat_mcp$y)
cat(sprintf("n = %d | true omega = 3.2 | true b1 = -0.4 | true delta1 = 1.4\n",
nrow(dat_mcp_plain)))t0 <- Sys.time()
fit_sbp_mcp <- smoothbp(
formula = y ~ tau,
b0 = ~ 1,
b1 = ~ 1,
deltas = list(~ 1),
omega = list(~ 1),
rho = list(fixed(100)), # Use fixed() to specify a sharp, hard kink
data = dat_mcp,
priors = smoothbp_priors(
b0 = prior_normal(0, 10),
b1 = prior_normal(0, 2),
omega = list(prior_normal(3, 2, lb = 0, ub = max(dat_mcp$tau)))
),
chains = 4L, iter = 2000L, warmup = 1000L,
seed = 5501L, .verbose = FALSE
)
time_sbp_mcp <- as.numeric(difftime(Sys.time(), t0, units = "secs"))
cat(sprintf("smoothbp: %.1f s\n", time_sbp_mcp))# Two-segment joined-slope model: the natural mcp equivalent
model_mcp <- list(
y ~ 1 + tau, # segment 1: intercept + pre-change slope
~ 0 + tau # segment 2: joined at cp_1, post-change slope
)
# Restrict cp_1 to the observed time range, matching the smoothbp bound
prior_mcp <- list(cp_1 = "dunif(0, 6)")
set.seed(5501)
t0 <- Sys.time()
fit_mcp <- mcp::mcp(
model_mcp,
data = dat_mcp_plain,
prior = prior_mcp,
par_x = "tau",
chains = 4L, iter = 2000L, adapt = 1000L,
)
time_mcp <- as.numeric(difftime(Sys.time(), t0, units = "secs"))
cat(sprintf("mcp: %.1f s\n", time_mcp))The critical comparison is on omega / cp_1
(change-point location) and the two slopes. The post-change slope from
mcp (time_2) is equivalent to b1 + delta1 in
smoothbp.
sbp_sum_mcp <- posterior::summarise_draws(fit_sbp_mcp$draws) |>
dplyr::filter(variable %in% c("b1_(Intercept)", "delta1_(Intercept)", "omega1_(Intercept)",
"rho1_(Intercept)", "sigma"))
mcp_sum <- summary(fit_mcp) |>
dplyr::filter(name %in% c("cp_1", "int_1", "tau_1", "tau_2", "sigma_1"))
# Compute post-change slope from smoothbp draws for direct comparison
dm_sbp_mcp <- posterior::as_draws_matrix(fit_sbp_mcp$draws)
post_slope <- as.numeric(dm_sbp_mcp[, "b1_(Intercept)"] +
dm_sbp_mcp[, "delta1_(Intercept)"])
comparison_mcp <- data.frame(
parameter = c("omega / cp_1",
"b1 / time_1 (pre-change slope)",
"b1+delta1 / time_2 (post-change slope)",
"sigma"),
truth = c(3.2, -0.4, -0.4 + 1.4, 0.4),
sbp_mean = c(
sbp_sum_mcp$mean[sbp_sum_mcp$variable == "omega1_(Intercept)"],
sbp_sum_mcp$mean[sbp_sum_mcp$variable == "b1_(Intercept)"],
mean(post_slope),
sbp_sum_mcp$mean[sbp_sum_mcp$variable == "sigma"]
),
sbp_lo = c(
sbp_sum_mcp$q2.5[sbp_sum_mcp$variable == "omega1_(Intercept)"],
sbp_sum_mcp$q2.5[sbp_sum_mcp$variable == "b1_(Intercept)"],
quantile(post_slope, 0.025),
sbp_sum_mcp$q2.5[sbp_sum_mcp$variable == "sigma"]
),
sbp_hi = c(
sbp_sum_mcp$q97.5[sbp_sum_mcp$variable == "omega1_(Intercept)"],
sbp_sum_mcp$q97.5[sbp_sum_mcp$variable == "b1_(Intercept)"],
quantile(post_slope, 0.975),
sbp_sum_mcp$q97.5[sbp_sum_mcp$variable == "sigma"]
),
mcp_mean = mcp_sum$mean[match(c("cp_1","tau_1","tau_2","sigma_1"),
mcp_sum$name)],
mcp_lo = mcp_sum$lower[match(c("cp_1","tau_1","tau_2","sigma_1"),
mcp_sum$name)],
mcp_hi = mcp_sum$upper[match(c("cp_1","tau_1","tau_2","sigma_1"),
mcp_sum$name)]
)
knitr::kable(comparison_mcp, digits = 3,
caption = "Scenario 3: smoothbp vs mcp on comparable parameters.
smoothbp post-change slope is b1 + delta1.")The plot below overlays the posterior mean fitted trajectories from
both models. With rho = 5 the smoothbp transition is fairly
sharp and the two trajectories are visually similar near the
change-point, but the logistic rounding is visible in the smoothbp
curve.
tau_grid <- seq(0, 6, length.out = 200)
# smoothbp posterior mean trajectory
draws_sbp_mcp <- posterior::as_draws_matrix(fit_sbp_mcp$draws)
b0_m <- mean(draws_sbp_mcp[, "b0_(Intercept)"])
b1_m <- mean(draws_sbp_mcp[, "b1_(Intercept)"])
delta1_m <- mean(draws_sbp_mcp[, "delta1_(Intercept)"])
om_m <- mean(draws_sbp_mcp[, "omega1_(Intercept)"])
rho_m <- mean(draws_sbp_mcp[, "rho1_(Intercept)"])
sigmoid <- function(x) 1 / (1 + exp(-x))
d_grid <- tau_grid - om_m
mu_sbp <- b0_m + b1_m * d_grid + delta1_m * d_grid * sigmoid(d_grid * rho_m)
# mcp posterior mean trajectory
cp_m <- mcp_sum$mean[mcp_sum$name == "cp_1"]
int_m <- mcp_sum$mean[mcp_sum$name == "int_1"]
t1_m <- mcp_sum$mean[mcp_sum$name == "tau_1"]
t2_m <- mcp_sum$mean[mcp_sum$name == "tau_2"]
mu_mcp <- ifelse(tau_grid < cp_m,
int_m + t1_m * tau_grid,
int_m + t1_m * cp_m + t2_m * (tau_grid - cp_m))
traj_df <- data.frame(
tau = rep(tau_grid, 2),
mu = c(mu_sbp, mu_mcp),
model = rep(c("smoothbp (logistic)", "mcp (hard kink)"), each = 200)
)
ggplot() +
geom_point(aes(tau, y), data = dat_mcp_plain, alpha = 0.25, size = 0.8) +
geom_line(aes(tau, mu, colour = model, linetype = model),
data = traj_df, linewidth = 0.9) +
geom_vline(xintercept = 3.2, linetype = "dotted", colour = "grey40") +
scale_colour_manual(values = c("smoothbp (logistic)" = "#2166ac",
"mcp (hard kink)" = "#d6604d")) +
labs(title = "Posterior mean fitted trajectories",
subtitle = "Dotted line = true change-point (omega = 3.2)",
x = "tau", y = "y", colour = NULL, linetype = NULL) +
theme_bw() +
theme(legend.position = "bottom")# smoothbp ESS on omega
sbp_ess_mcp <- posterior::summarise_draws(
fit_sbp_mcp$draws[,, c("omega1_(Intercept)", "b1_(Intercept)",
"delta1_(Intercept)", "sigma")],
ess_bulk = posterior::ess_bulk
) |>
dplyr::mutate(ess_per_sec = ess_bulk / time_sbp_mcp,
package = "smoothbp")
# mcp ESS — extract draws from mcp object
# mcp stores posterior draws as a coda::mcmc.list in $mcmc_post
mcp_draws <- posterior::as_draws_array(fit_mcp$mcmc_post)
mcp_ess_draws <- posterior::summarise_draws(
mcp_draws[, , c("cp_1", "tau_1", "sigma_1")],
ess_bulk = posterior::ess_bulk
)
mcp_ess_raw <- setNames(mcp_ess_draws$ess_bulk, mcp_ess_draws$variable)
data.frame(
parameter = c("omega / cp_1", "b1 / time_1", "sigma"),
sbp_ess_s = round(sbp_ess_mcp$ess_per_sec[
match(c("omega1_(Intercept)", "b1_(Intercept)", "sigma"),
sbp_ess_mcp$variable)], 1),
mcp_ess_s = round(c(mcp_ess_raw["cp_1"],
mcp_ess_raw["tau_1"],
mcp_ess_raw["sigma_1"]) / time_mcp, 1)
) |>
knitr::kable(caption = "ESS/second: smoothbp vs mcp.
Higher is better.")Model equivalence at high rho: when
rho is large (sharp transitions), the smoothbp and mcp
posteriors for the change-point location and slopes are broadly
consistent, validating that both packages locate the structural break
correctly.
Modelling the sharpness: smoothbp estimates
rho as a free parameter, quantifying transition sharpness
as part of the posterior. mcp’s hard-kink model implicitly assumes
rho = infinity; this can lead to a slightly overconfident
change-point posterior when transitions are genuinely gradual.
Random effects: Both mcp and
smoothbp support hierarchical models with random intercepts
and varying change-points. smoothbp’s conjugate Gibbs
updates for random intercepts often lead to more efficient sampling of
the population mean and group-level deviations compared to
general-purpose MCMC.
Speed: smoothbp’s compiled Rust back-end is substantially faster per iteration than mcp’s JAGS back-end, and does not require JAGS to be installed separately.
Flexibility: Both packages support
multiple change-points. mcp offers a wider
range of likelihood families (non-Gaussian), variance/autoregressive
terms, and non-linear segments. smoothbp offers unique
strengths including spike-and-slab variable selection
for structural break discovery and the ability to include
covariates on all five structural parameters (b0, b1,
delta, omega, rho) in a single model.
If both samplers have converged, the marginal posteriors overlay
almost exactly, the means agree to within Monte Carlo error
(delta_mean_pct should be a fraction of a per cent for
well-mixed parameters), and the credible intervals are within sampling
variability of each other.
smoothbp jointly samples the intercept b0
and random effects u in a single conjugate Gibbs block.
This avoids the slow-mixing ridge that arises when b0 and
u are updated in separate blocks (since the likelihood
depends on b0 + u_j). As a result, b0 and
sigma_u mix well, with ESS comparable to or exceeding brms
on these parameters.
For the non-linear parameters (omega, rho),
NUTS typically achieves higher ESS per iteration, but smoothbp’s cheaper
per-iteration cost keeps ESS per second competitive.
If you remove the ub = max(dat$tau) bound or the
init_fun, you will likely see the brms posterior on
omega and b2 go bimodal: one cluster near the
truth, and a second cluster in the unidentifiable region with
omega > max(tau) and b2 pinned at a
ridiculous value matching the right-edge slope. That is a sampler-mixing
artefact, not a real feature of the posterior, and worth demonstrating
in your own analyses before trusting results from change-point models
that allow omega to escape the data range.
A note on smoothbp’s robustness: smoothbp initialises every chain
near the prior mean, which keeps it out of the degenerate region by
construction. That is excellent in practice when the user has a sensible
prior on omega, and is the typical case. It also means that
on a problem where the posterior is genuinely multimodal, the chains may
all converge to whichever mode is closest to the prior mean and fail to
explore the others. If you suspect multimodality, fit with deliberately
overdispersed initial values by varying the prior across runs.
This scenario validates the new multi-breakpoint architecture by simulating data with two true change-points and fitting both smoothbp and brms. If the two samplers agree — i.e., marginal posteriors overlap — we have strong evidence that the implementation is correct.
set.seed(9801)
n <- 300
tau <- seq(0, 12, length.out = n)
# True model: two breakpoints
om1_true <- 3.5; rho1_true <- 5.0; delta1_true <- -1.2
om2_true <- 8.0; rho2_true <- 4.0; delta2_true <- 1.8
b0_true <- 5.0; b1_true <- 0.3; sigma_true <- 0.3
d1 <- tau - om1_true; s1 <- plogis(d1 * rho1_true)
d2 <- tau - om2_true; s2 <- plogis(d2 * rho2_true)
mu <- b0_true + b1_true * (tau - om1_true) +
delta1_true * d1 * s1 + delta2_true * d2 * s2
y <- mu + rnorm(n, sd = sigma_true)
dat_multi <- data.frame(y = y, tau = tau)
cat(sprintf("True: b0=%.1f b1=%.1f delta1=%.1f omega1=%.1f delta2=%.1f omega2=%.1f sigma=%.1f\n",
b0_true, b1_true, delta1_true, om1_true, delta2_true, om2_true, sigma_true))t0 <- Sys.time()
fit_sbp_multi <- smoothbp(
formula = y ~ tau,
b0 = ~ 1,
b1 = ~ 1,
deltas = list(~ 1, ~ 1),
omega = list(~ 1, ~ 1),
rho = list(~ 1, ~ 1),
data = dat_multi,
priors = smoothbp_priors(
b0 = prior_normal(0, 10),
b1 = prior_normal(0, 2),
omega = list(
prior_normal(3.5, 2, lb = 0, ub = 6),
prior_normal(8.0, 2, lb = 4, ub = max(dat_multi$tau))
),
rho = list(prior_normal(4, 2, lb = 0),
prior_normal(4, 2, lb = 0))
),
chains = 4L, iter = 2000L, warmup = 1000L,
seed = 9801L, .verbose = FALSE
)
time_sbp_multi <- as.numeric(difftime(Sys.time(), t0, units = "secs"))
cat(sprintf("smoothbp: %.1f s\n", time_sbp_multi))suppressPackageStartupMessages(library(brms))
# Two-breakpoint non-linear formula for brms
bf_multi <- brms::bf(
y ~ b0 + b1 * (tau - om1) +
d1 * (tau - om1) / (1 + exp(-(tau - om1) * rho1)) +
d2 * (tau - om2) / (1 + exp(-(tau - om2) * rho2)),
b0 ~ 1, b1 ~ 1,
d1 ~ 1, om1 ~ 1, rho1 ~ 1,
d2 ~ 1, om2 ~ 1, rho2 ~ 1,
nl = TRUE
)
priors_brms_multi <- c(
brms::prior(normal(0, 10), nlpar = "b0"),
brms::prior(normal(0, 2), nlpar = "b1"),
brms::prior(normal(0, 2), nlpar = "d1"),
brms::prior_string("normal(3.5, 2)", nlpar = "om1", lb = 0, ub = 6),
brms::prior(normal(4, 2), nlpar = "rho1", lb = 0),
brms::prior(normal(0, 2), nlpar = "d2"),
brms::prior_string("normal(8.0, 2)", nlpar = "om2", lb = 4, ub = 12),
brms::prior(normal(4, 2), nlpar = "rho2", lb = 0)
)
init_multi <- function() list(
b_b0 = array(rnorm(1, 5, 0.5)),
b_b1 = array(rnorm(1, 0.3, 0.1)),
b_d1 = array(rnorm(1, -1, 0.3)),
b_om1 = array(rnorm(1, 3.5, 0.3)),
b_rho1 = array(rnorm(1, 5, 0.5)),
b_d2 = array(rnorm(1, 1.5, 0.3)),
b_om2 = array(rnorm(1, 8, 0.3)),
b_rho2 = array(rnorm(1, 4, 0.5))
)
t0 <- Sys.time()
fit_brms_multi <- brms::brm(
bf_multi,
data = dat_multi,
prior = priors_brms_multi,
chains = 4, iter = 2000, warmup = 1000,
seed = 9801, refresh = 0,
init = init_multi,
control = list(adapt_delta = 0.95)
)
time_brms_multi <- as.numeric(difftime(Sys.time(), t0, units = "secs"))sbp_names_multi <- c(
"b0_(Intercept)", "b1_(Intercept)",
"delta1_(Intercept)", "omega1_(Intercept)", "rho1_(Intercept)",
"delta2_(Intercept)", "omega2_(Intercept)", "rho2_(Intercept)",
"sigma"
)
brms_names_multi <- c(
"b_b0_Intercept", "b_b1_Intercept",
"b_d1_Intercept", "b_om1_Intercept", "b_rho1_Intercept",
"b_d2_Intercept", "b_om2_Intercept", "b_rho2_Intercept",
"sigma"
)
true_vals_multi <- c(b0_true, b1_true,
delta1_true, om1_true, rho1_true,
delta2_true, om2_true, rho2_true,
sigma_true)
sbp_draws_multi <- posterior::as_draws_df(fit_sbp_multi$draws)[, sbp_names_multi]
brms_draws_multi <- as.data.frame(
posterior::as_draws_df(fit_brms_multi)
)[, brms_names_multi]
colnames(brms_draws_multi) <- sbp_names_multi
draws_long_multi <- dplyr::bind_rows(
tidyr::pivot_longer(sbp_draws_multi, cols = dplyr::everything(),
names_to = "parameter", values_to = "value") |>
dplyr::mutate(package = "smoothbp"),
tidyr::pivot_longer(brms_draws_multi, cols = dplyr::everything(),
names_to = "parameter", values_to = "value") |>
dplyr::mutate(package = "brms")
)
draws_long_multi |>
dplyr::group_by(parameter, package) |>
dplyr::summarise(mean = mean(value), sd = sd(value),
q025 = quantile(value, 0.025),
q975 = quantile(value, 0.975), .groups = "drop") |>
tidyr::pivot_wider(names_from = package,
values_from = c(mean, sd, q025, q975),
names_glue = "{package}_{.value}") |>
dplyr::mutate(
truth = true_vals_multi[match(parameter, sbp_names_multi)],
delta_mean_pct = 100 * abs(smoothbp_mean - brms_mean) /
(abs(brms_mean) + 1e-8)
) |>
knitr::kable(digits = 3,
caption = "Scenario 4: two-breakpoint smoothbp vs brms.")The densities from smoothbp (blue) and brms (red) should overlap almost exactly if both samplers are targeting the same posterior. Dashed vertical lines mark the true data-generating values.
true_df_multi <- data.frame(
parameter = sbp_names_multi,
true_value = true_vals_multi
)
ggplot(draws_long_multi, aes(x = value, fill = package, colour = package)) +
geom_density(alpha = 0.35, linewidth = 0.6) +
geom_vline(data = true_df_multi,
aes(xintercept = true_value),
linetype = "dashed", linewidth = 0.5, colour = "black") +
facet_wrap(~ parameter, scales = "free", ncol = 3) +
scale_fill_manual(values = c(smoothbp = "#1f77b4", brms = "#d62728")) +
scale_colour_manual(values = c(smoothbp = "#1f77b4", brms = "#d62728")) +
labs(
title = "Scenario 4: Multi-breakpoint posteriors — smoothbp vs brms",
subtitle = sprintf(
"Two true change-points. smoothbp: %.1fs brms: %.1fs\nDashed = true value. Overlapping densities = agreement.",
time_sbp_multi, time_brms_multi
),
x = NULL, y = "Density"
) +
theme_minimal(base_size = 11) +
theme(legend.position = "bottom")ess_sbp_multi <- posterior::summarise_draws(
fit_sbp_multi$draws[, , sbp_names_multi], ess_bulk = posterior::ess_bulk
) |>
dplyr::transmute(parameter = variable,
smoothbp_ess_per_sec = ess_bulk / time_sbp_multi)
ess_brms_multi <- posterior::summarise_draws(
posterior::as_draws_df(fit_brms_multi), ess_bulk = posterior::ess_bulk
) |>
dplyr::filter(variable %in% brms_names_multi) |>
dplyr::mutate(parameter = sbp_names_multi[match(variable, brms_names_multi)]) |>
dplyr::transmute(parameter, brms_ess_per_sec = ess_bulk / time_brms_multi)
dplyr::full_join(ess_sbp_multi, ess_brms_multi, by = "parameter") |>
knitr::kable(digits = 1,
caption = "ESS/second: smoothbp vs brms (two-breakpoint model).")