Nothing
#' Internal function for the Meta-Analytical Calibration Curve (MAC2)
#'
#' Computes meta-analytical calibration curves using multiple methods (logistic regression,
#' loess or splines) and performs meta-analysis across clusters
#' to generate aggregated calibration curves with confidence and prediction intervals.
#'
#' @param data optional data frame containing the variables \code{p}, \code{y},
#' and \code{cluster}. If supplied, variable names should be given without
#' quotation marks.
#' @param p predicted probabilities (numeric vector) or name of the column in
#' \code{data}.
#' @param y binary outcome variable or the name of the column in \code{data}.
#' @param cluster Cluster identifier (factor, character, or integer) or name of
#' the column in \code{data}.
#' @param grid the grid for the calibration curve evaluation
#' @param plot logical, indicates whether to plot the calibration curves. Default is \code{TRUE}.
#' @param cluster_curves logical, whether to include cluster-specific curves in the plot. Default is \code{FALSE}.
#' @param knots integer, number of knots for splines. Default is \code{3}.
#' @param transf character, transformation for predictions: \code{"logit"} or \code{"identity"}. Default is \code{"logit"}.
#' @param method_choice character, which method to use for meta-analysis. Options are:
#' \code{"log"}, \code{"loess"} or \code{"splines"}. Default is \code{"splines"}.
#' @param method.tau character, method for between-study heterogeneity estimation. Default is \code{"REML"}.
#' This argument is passed to the \code{\link[meta]{metagen}} function.
#' @param prediction logical, whether to compute prediction intervals. Default is \code{TRUE}.
#' This argument is passed to the \code{prediction} argument of the \code{\link[meta]{metagen}} function.
#' @param random logical, whether to use random-effects model. Default is \code{TRUE}.
#' This argument is passed to the \code{random} argument of the \code{\link[meta]{metagen}} function.
#' @param sm character, summary measure for meta-analysis. Default is \code{"PLOGIT"}.
#' This argument is passed to the \code{sm} argument of the \code{\link[meta]{metagen}} function.
#' @param hakn logical, whether to use Hartung-Knapp adjustment. Default is \code{FALSE}.
#' This argument is passed to the \code{method.random.ci} argument of the \code{\link[meta]{metagen}} function.
#' @param linewidth numeric, line width for the meta-curve. Default is \code{1}.
#' @param method.predict character, method for prediction intervals. Default is \code{"HTS"}.
#' This argument is passed to the \code{method.predict} argument of the \code{\link[meta]{metagen}} function.
#' @param verbose logical, indicates whether progress has to be printed in the console.
#' @param cl.level the confidence level for the calculation of the confidence interval. Default is \code{0.95}.
#' @param alpha.lr the alpha-level used for the likelihood ratio test, selecting the number of knots for the
#' restricted cubic splines
#'
#' @details
#' This function estimates the center-specific calibration curves using logistic regression,
#' loess or splines. Hereafter, it aggregates the calibration curves using meta-analytical techniques.
#' The meta-analysis is performed using the function \code{\link[meta]{metagen}} from the \code{\link[meta]{meta}}
#' package. The \code{method_choice} argument determines which method is for the meta-analytical aggregation.
#'
#' @return A list containing:
#' \describe{
#' \item{\code{cluster_data}}{Data frame with linear predictors and standard errors for each method per cluster}
#' \item{\code{plot_data}}{Data frame with meta-analysis results including predictions and intervals}
#' \item{\code{plot}}{A \code{ggplot2} object if \code{plot = TRUE}, otherwise \code{NULL}}
#' }
#'
#'
MAC2 <- function(data = NULL,
p,
y,
cluster,
grid,
cl.level = 0.95,
alpha.lr = 0.05 / 3,
plot = TRUE,
cluster_curves = FALSE,
knots = 3,
transf = "logit",
method_choice = c("splines", "log", "loess"),
method.tau = "REML",
prediction = TRUE,
random = TRUE,
sm = "PLOGIT",
hakn = FALSE,
linewidth = 1,
method.predict = "HTS",
verbose = FALSE) {
# --- Extract from data if provided ---
callFn = match.call()
method_choice = match.arg(method_choice)
if(knots < 3) {
warning("Number of knots cannot be lower than 3. Will be set to 3.", immediate. = TRUE)
knots = 3
}
if (!is.null(data)) {
if(!all(sapply(c("p", "y", "cluster"), function(a) as.character(callFn[a])) %in% colnames(data)))
stop(paste("Variables", paste0(
callFn[c("p", "y", "cluster")], collapse = ", "
), "were not found in the data.frame."))
p = eval(callFn$p, data)
logit = Logit(p)
y = eval(callFn$y, data)
cluster = eval(callFn$cluster, data)
}
# --- Base dataframe ---
df <- data.frame(
predictions = as.numeric(p),
outcome = as.numeric(y),
cluster = as.factor(cluster)
)
# --- Grid computation ---
transform_function <- if (transf == "logit") Logit else identity
data_all_lp <- data.frame()
# --- Process each cluster ---
for (subcluster in unique(df$cluster)) {
risk_cluster <- df %>% filter(cluster == subcluster)
observed_grid <- data.frame(
x = grid,
cluster = subcluster,
nsample = nrow(risk_cluster)
)
risk_cluster$transf_preds <- transform_function(risk_cluster$predictions)
# --- Logistic regression method ---
log_model <- lrm(data = risk_cluster, outcome ~ transf_preds)
log_data <- predict(log_model,
newdata = data.frame(transf_preds = transform_function(grid)),
type = "lp",
se.fit = TRUE
)
log_data <- data.frame(
log = log_data$linear.predictors,
log_se = log_data$se.fit
)
observed_grid <- cbind(observed_grid, log_data)
# --- Loess method ---
tryCatch(
{
loess_model <- loess.as(
x = risk_cluster$transf_preds,
y = risk_cluster$outcome,
degree = 2,
criterion = "aicc",
plot = FALSE,
control = loess.control(surface = "direct")
)
loess_data <- predict(loess_model,
newdata = data.frame(x = transform_function(grid)),
se = TRUE
)
# Handle NA values in loess predictions
# To-do: why do these result in NA?!
if (any(is.na(loess_data$fit)) || any(is.na(loess_data$se.fit))) {
loess_data$fit = na.approx(loess_data$fit, rule = 2)
loess_data$se.fit = na.approx(loess_data$se.fit, rule = 2)
}
loess_data$fit <- ifelse(loess_data$fit >= 1, 1 - 1e-6, loess_data$fit)
loess_data$fit <- ifelse(loess_data$fit <= 0, 1e-6, loess_data$fit)
loess_data <- data.frame(
loess = transform_function(loess_data$fit),
loess_se = abs(loess_data$se.fit / (loess_data$fit * (1 - loess_data$fit)))
)
observed_grid <- cbind(observed_grid, loess_data)
},
error = function(e) {
message("Fitting loess resulted in the following error message: ", e$message)
}
)
# --- Splines method ---
nkDecrease <- function(Argz) {
tryCatch(
do.call("lrm", Argz),
error = function(e) {
nk = Argz$formula[[3]][[3]]
warning(paste0("The number of knots led to estimation problems, nk will be set to ", nk - 1), immediate. = TRUE)
nk = nk - 1
cat(paste("fitting with", nk, "knots"))
Argz = list(
formula = eval(substitute(outcome ~ rcs(transf_preds, k), list(k = nk))),
data = risk_cluster
)
nkDecrease(Argz)
},
warning = function(w) {
nk = Argz$formula[[3]][[3]]
warning(paste0("The number of knots led to estimation problems, nk will be set to ", nk - 1), immediate. = TRUE)
nk = nk - 1
cat(paste("fitting with", nk, "knots"))
Argz = list(
formula = eval(substitute(outcome ~ rcs(transf_preds, k), list(k = nk))),
data = risk_cluster
)
nkDecrease(Argz)
})
}
knots_sub = knots
argzSplines = list(
formula = eval(substitute(outcome ~ rcs(transf_preds, k), list(k = knots_sub))),
data = risk_cluster
)
splines_model = nkDecrease(argzSplines)
knots_sub = splines_model$sformula[[3]][[3]]
if (knots_sub > 3) {
splines_model3 = lrm(data = risk_cluster, outcome ~ rcs(transf_preds, 3))
splines_model4 = lrm(data = risk_cluster, outcome ~ rcs(transf_preds, 4))
splines_model5 = lrm(data = risk_cluster, outcome ~ rcs(transf_preds, 5))
test3 = lrtest(splines_model3, splines_model)
test4 = lrtest(splines_model4, splines_model)
test5 = lrtest(splines_model5, splines_model)
if(test3$stats["P"] > alpha.lr) {
splines_model = splines_model3
knots_sub = 3
} else if(test4$stats["P"] > alpha.lr) {
splines_model = splines_model4
knots_sub = 4
} else if(test5$stats["P"] > alpha.lr) {
splines_model = splines_model5
knots_sub = 5
}
}
splines_data <- predict(splines_model,
newdata = data.frame(transf_preds = transform_function(grid)),
type = "lp",
se.fit = TRUE
)
splines_data <- data.frame(
splines = splines_data$linear.predictors,
splines_se = splines_data$se.fit,
knots_used = knots_sub
)
observed_grid <- cbind(observed_grid, splines_data)
if (verbose) {
message("Spline model for cluster ", subcluster, " fitted with ", knots_sub, " knots.")
}
# --- KDE method ---
# if ("kde" %in% methods) {
# tryCatch(
# {
# kde_data <- kde_curve(risk_cluster$outcome, risk_cluster$predictions)
# observed_grid <- cbind(observed_grid, kde = kde_data$y)
# },
# error = function(e) {
# message("KDE was not computed because: ", e$message)
# }
# )
# }
data_all_lp <- rbind(data_all_lp, observed_grid)
}
# --- Meta-analysis ---
adhoc.hakn.ci <- if (hakn) "IQWiG6" else NULL
adhoc.hakn.pi <- if (hakn) "se" else NULL
curve <- data.frame()
for (value in unique(data_all_lp$x)) {
data_v <- data_all_lp %>% filter(x == value)
meta_inputs <- switch(method_choice,
"log" = list(TE = data_v$log, seTE = data_v$log_se),
"loess" = list(TE = data_v$loess, seTE = data_v$loess_se),
"splines" = list(TE = data_v$splines, seTE = data_v$splines_se),
# "kde" = list(TE = data_v$kde, seTE = NA),
)
meta <- metagen(
TE = meta_inputs$TE,
seTE = meta_inputs$seTE,
studlab = data_v$cluster,
random = random,
prediction = prediction,
method.tau = method.tau,
sm = sm,
backtransf = TRUE,
method.random.ci = hakn,
method.predict = method.predict
)
data_v_b <- data.frame(
y = Ilogit(meta$TE.random),
upper = Ilogit(meta$upper.random),
lower = Ilogit(meta$lower.random),
up_pre = Ilogit(meta$upper.predict),
low_pre = Ilogit(meta$lower.predict),
tau = meta$tau2,
x = value
)
curve <- rbind(curve, data_v_b)
}
# --- Plotting ---
plot_obj <- NULL
if (plot) {
plot_obj <- ggplot(data = curve) +
geom_abline(linetype = "dashed", alpha = 0.1) +
geom_ribbon(aes(
x = x, ymin = pmax(0, pmin(lower, 1)),
ymax = pmax(0, pmin(upper, 1)),
fill = "CI 95%", alpha = "CI 95%"
)) +
geom_ribbon(aes(
x = x, ymin = pmax(0, pmin(low_pre, 1)),
ymax = pmax(0, pmin(up_pre, 1)),
fill = "PI 95%", alpha = "PI 95%"
)) +
geom_line(aes(x = x, y = y),
color = "black",
linewidth = linewidth, linetype = "dashed"
) +
xlab("Estimated probability") +
ylab("Observed proportion") +
theme_classic(base_size = 8, base_family = "serif") +
scale_x_continuous(breaks = seq(0, 1, 0.1)) +
scale_y_continuous(breaks = seq(0, 1, 0.2)) +
scale_alpha_manual(values = c(0.4, 0.2), name = "Heterogeneity") +
scale_fill_manual(values = c("red", "red"), name = "Heterogeneity") +
coord_cartesian(xlim = c(0, 1), ylim = c(0, 1)) +
theme(
legend.key.size = unit(0.3, "cm"),
panel.grid.major = element_blank(),
panel.grid.minor = element_blank()
)
if (cluster_curves) {
plot_obj <- plot_obj +
geom_line(
data = data_all_lp,
aes(x = x, y = Ilogit(data_all_lp[, method_choice]), group = cluster),
linewidth = linewidth / 2,
show.legend = FALSE,
linetype = "solid"
)
}
}
# --- Return ---
list(
cluster_data = data_all_lp,
plot_data = curve,
plot = plot_obj
)
}
Any scripts or data that you put into this service are public.
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.