Nothing
#' Internal function for the Clustered Grouped Calibration Curve (CGC)
#'
#' Estimates the calibration curves using the CGC approach. The function supports two grouping methods:
#' equal-sized groups (\code{"grouped"}) or interval-based groups (\code{"interval"}).
#' Optionally, a calibration plot can be produced with cluster-specific curves.
#'
#' @param data optional data frame containing the variables \code{p}, \code{y},
#' and \code{cluster}. If supplied, variable names should be given without
#' quotation marks.
#' @param p predicted probabilities (numeric vector) or name of the column in
#' \code{data}.
#' @param y binary outcome variable or the name of the column in \code{data}.
#' @param cluster cluster identifier (factor, character, or integer) or name of
#' the column in \code{data}.
#' @param ntiles integer, number of groups (tiles) for calibration. Default is \code{10}.
#' @param cluster_curves logical, whether to include cluster-specific calibration
#' curves in the plot. Default is \code{FALSE}.
#' @param plot logical, whether to return a calibration plot. Default is \code{TRUE}.
#' @param size numeric, point size for plotted curves. Default is \code{1}.
#' @param linewidth numeric, line width for plotted curves. Default is \code{0.4}.
#' @param univariate logical, whether to use univariate meta-analysis. Default is \code{FALSE}.
#' @param method character, grouping method: \code{"grouped"} (equal-sized groups) or
#' \code{"interval"} (interval-based). Default is \code{"grouped"}.
#' @param cl.level the confidence level for the calculation of the confidence interval. Default is \code{0.95}.
#'
#' @details
#' When \code{method = "grouped"}, the predictions are divided into equal-sized bins using quantiles.
#' Conversely, if \code{method ="interval"}, the predictions are divided into fixed-width bins across [0, 1].
#'
#' The function performs a meta-analysis within each group. This can be either a univariate or bivariate analysis,
#' which is specified in the \code{univariate} argument. The univariate analysis is performed using the
#' \code{\link[meta]{metaprop}} function and the bivariate analysis employs the \code{\link[metafor]{rma.mv}} function.
#' Hereafter, the results are aggregated and plotted as calibration curves.
#'
#' @return A list containing:
#' \describe{
#' \item{\code{plot_data}}{Data frame of meta-analysis calibration estimates.}
#' \item{\code{trad_grouped}}{Data frame with traditional grouped calibration results.}
#' \item{\code{observed_data}}{Data frame with per-observation calibration data.}
#' \item{\code{cluster_data}}{Data frame with cluster-specific calibration summaries.}
#' \item{\code{plot}}{A \code{ggplot2} object if \code{plot = TRUE}, otherwise \code{NULL}.}
#' }
#'
CGC <- function(data = NULL,
p,
y,
cluster,
cl.level = 0.95,
ntiles = 10,
cluster_curves = FALSE,
plot = TRUE,
size = 1,
linewidth = 0.4,
univariate = FALSE,
method = c("grouped", "interval")) {
# --- Arguments check ---
callFn = match.call()
method = match.arg(method)
alpha = 1 - cl.level
if (!is.null(data)) {
if(!all(sapply(c("p", "y", "cluster"), function(a) as.character(callFn[a])) %in% colnames(data)))
stop(paste("Variables", paste0(
callFn[c("p", "y", "cluster")], collapse = ", "
), "were not found in the data.frame."))
p = eval(callFn$p, data)
logit = Logit(p)
y = eval(callFn$y, data)
cluster = eval(callFn$cluster, data)
}
if(ntiles < 5) {
warning(sprintf("Number of groups is %s, which is too low. Will be set to 5", ntiles),
immediate. = TRUE)
ntiles = 5
}
# --- Base dataframe ---
df = data.frame(
p = as.numeric(p),
y = as.numeric(y),
cluster = as.factor(cluster)
)
# --- Assign decile/interval groups ---
if (method == "grouped") {
df <- df %>%
group_by(cluster) %>%
mutate(decile_group = ntile(p, ntiles))
} else if (method == "interval") {
df <- df %>%
group_by(cluster) %>%
mutate(decile_group = cut(p, breaks = seq(0, 1, length.out = ntiles + 1)))
}
# --- Cluster-level summaries ---
deciles_cluster <- df %>%
group_by(cluster, decile_group) %>%
mutate(n_tile = n()) %>%
ungroup() %>%
mutate(N_tile = n())
if(min(deciles_cluster$n_tile) < 50)
warning("There is a group with less than 50 observations. Consider decreasing the number of groups.")
decilesbycluster <- deciles_cluster %>%
group_by(cluster, decile_group) %>%
summarise(
n_tile = n(),
y_mean = mean(y),
x_mean = mean(p),
.groups = "drop"
)
# --- Traditional grouped calibration ---
deciles_all <- df %>%
ungroup() %>%
mutate(decile_group = if (method == "grouped") ntile(p, ntiles) else cut(p, breaks = seq(0, 1, length.out = ntiles + 1))) %>%
group_by(decile_group) %>%
summarise(
std_error_y = sd(y) / sqrt(length(y)),
std_error_x = sd(p) / sqrt(length(p)),
var_x = var(p),
var_y = var(y),
p = mean(p),
y = mean(y),
.groups = "drop"
)
# --- Within-cluster variances ---
var_150_cluster_decile <- deciles_cluster %>%
group_by(cluster, decile_group) %>%
summarise(
var_x_cluster_tile = var(p),
var_y_cluster_tile = var(y),
preds_150 = mean(p),
y_150 = mean(y),
ntile_150 = n(),
.groups = "drop"
)
# --- Meta-analysis across groups ---
data_all <- data.frame()
for (i in unique(var_150_cluster_decile$decile_group)) {
data_meta <- var_150_cluster_decile %>% filter(decile_group == i)
if (univariate) {
x_meta <- metaprop(
data = data_meta, event = preds_150 * ntile_150,
n = ntile_150, studlab = cluster,
method = "Inverse", backtransf = TRUE
)
y_meta <- metaprop(
data = data_meta, event = y_150 * ntile_150,
n = ntile_150, studlab = cluster,
method = "Inverse", backtransf = TRUE
)
data_meta_curve <- data.frame(
te_x = x_meta$TE.random,
ci_up_x = x_meta$upper.random,
ci_low_x = x_meta$lower.random,
pre_up_x = x_meta$upper.predict,
pre_low_x = x_meta$lower.predict,
te_y = y_meta$TE.random,
ci_up_y = y_meta$upper.random,
ci_low_y = y_meta$lower.random,
pre_up_y = y_meta$upper.predict,
pre_low_y = y_meta$lower.predict,
decile_group = i,
ntile_plot = sum(data_meta$ntile_150)
)
} else {
preds_escalc <- escalc(
measure = "PLO", xi = preds_150 * ntile_150,
ni = ntile_150, data = data_meta
)
preds_escalc$group <- "p"
y_escalc <- escalc(
measure = "PLO", xi = y_150 * ntile_150,
ni = ntile_150, data = data_meta
)
y_escalc$group <- "y"
dat <- rbind(preds_escalc, y_escalc)
res <- try(rma.mv(yi, vi,
mods = ~ group - 1,
random = ~ group | cluster,
struct = "UN", data = dat,
control = list(iter.max = 10000, rel.tol = 1e-6)
))
if (inherits(res, "try-error") | nrow(dat) == 2) {
warning("Meta-analysis failed for decile group ", i, ". Returning NA values.")
next
}
pred_up = res$b + qt(1 - alpha / 2, df = nrow(y_escalc) - 2) * sqrt(res$tau2 + res$se^2)
pred_low = res$b + qt(alpha / 2, df = nrow(y_escalc) - 2) * sqrt(res$tau2 + res$se^2)
data_meta_curve <- data.frame(
te_x = res$b[1],
ci_up_x = res$ci.ub[1],
ci_low_x = res$ci.lb[1],
pre_up_x = pred_up[1],
pre_low_x = pred_low[1],
te_y = res$b[2],
ci_up_y = res$ci.ub[2],
ci_low_y = res$ci.lb[2],
pre_up_y = pred_up[2],
pre_low_y = pred_low[2],
decile_group = i,
ntile_plot = sum(data_meta$ntile_150)
)
}
data_all <- rbind(data_all, data_meta_curve)
}
data_all <- data_all %>% mutate(across(-c(decile_group, ntile_plot), Ilogit))
# --- Plotting ---
curve <- NULL
if (plot) {
curve <- ggplot(data_all, aes(x = te_x, y = te_y)) +
geom_abline(linetype = "dashed", alpha = 0.1) +
geom_ribbon(aes(ymax = pre_up_y, ymin = pre_low_y, fill = "PI 95%"), alpha = 1) +
geom_ribbon(aes(ymax = ci_up_y, ymin = ci_low_y, fill = "CI 95%"), alpha = 1) +
geom_errorbar(aes(x = te_x, y = te_y, xmin = pre_low_x, xmax = pre_up_x),
alpha = 0.2, width = 0, lwd = linewidth, lty = "dashed"
) +
scale_fill_manual(name = "Heterogeneity", values = c("cornflowerblue", "lightblue")) +
geom_point(data = deciles_all, aes(x = p, y = y, color = "Traditional grouped"), size = size) +
geom_line(data = deciles_all, aes(x = p, y = y, color = "Traditional grouped"), linewidth = linewidth) +
geom_point(data = data_all, aes(color = paste0("CG-C(", method, ")")), size = size * 1.3) +
geom_line(data = data_all, aes(color = paste0("CG-C(", method, ")")), linewidth = linewidth) +
scale_color_manual(name = "Methodology", values = c("black", "gold")) +
xlab("Estimated probability") +
ylab("Observed proportion") +
scale_x_continuous(breaks = seq(0, 1, 0.1)) +
scale_y_continuous(breaks = seq(0, 1, 0.2)) +
coord_cartesian(xlim = c(0, 1), ylim = c(0, 1)) +
theme_classic(base_size = 8, base_family = "serif") +
theme(
legend.key.size = unit(0.3, "cm"),
panel.grid.major = element_blank(),
panel.grid.minor = element_blank()
)
if (cluster_curves) {
curve <- curve +
geom_line(
data = decilesbycluster, aes(x_mean, y_mean, group = cluster),
lwd = linewidth, alpha = 0.5, show.legend = FALSE
) +
geom_point(
data = decilesbycluster, aes(x_mean, y_mean, group = cluster),
alpha = 0.8, size = size / 3, show.legend = FALSE
)
}
}
# --- Return ---
list(
plot_data = data_all,
trad_grouped = deciles_all,
observed_data = deciles_cluster,
cluster_data = decilesbycluster,
plot = curve
)
}
Any scripts or data that you put into this service are public.
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.