#' @title Convert from individual to summary data in meta-analyses
#'
#' @description Allows for one-way conversion from full to summary data
#' or for calculation of effects for binary data.
#' Usually used before calling [baggr].
#' Input must be pre-formatted appropriately.
#'
#' @param data __either__ a data.frame of individual-level observations
#' with columns for outcome (numeric), treatment (values 0 and 1) and
#' group (numeric, character or factor); __or__, a data frame with binary data
#' (must have columns `a`, `c`, `b`/`n1`, `d`/`n2`).
#' @param effect what effect to calculate? a `mean` (and SE) of outcome in groups or
#' (for binary data) `logOR` (odds ratio), `logRR` (risk ratio),
#' `RD` (risk difference);
#' @param log logical; log-transform the outcome variable?
#' @param rare_event_correction This correction is used when working with
#' binary data (effect `logOR` or `logRR`)
#' The value of correction is added to all cells in
#' either some or all rows (groups), depending on `correction_type`.
#' Using corrections may bias results but is the only alternative to
#' avoid infinite values.
#' @param correction_type If `"single"` then rare event correction is only applied to the
#' particular rows that have 0 cells, if `"all"`, then to all studies
#' @param cfb logical; calculate change from baseline? If yes, the outcome
#' variable is taken as a difference between values in `outcome` and
#' `baseline` columns
#' @param summarise logical; `TRUE` by default, but you can disable it to obtain
#' converted (e.g. logged) data with columns renamed
#' @param group name of the column with grouping variable
#' @param outcome name of column with outcome variable
#' @param treatment name of column with treatment variable
#' @param baseline name of column with baseline variable
#' @param pooling Internal use only, please ignore
#'
#' @return
#' * If you `summarise`: a data.frame with columns for `group`, `tau` and `se.tau`
#' (for `effect = "mean"`, also baseline means; for `"logRR"` or `"logOR"` also
#' `a`, `b`, `c`, `d`, which correspond to typical contingency table notation, that is:
#' `a` = events in exposed; `b` = no events in exposed, `c` = events in unexposed,
#' `d` = no events in unexposed).
#' * If you do not summarise data, individual level data will be returned, but
#' some columns may be renamed or transformed (see the arguments above).
#'
#' @details
#' The conversions done by this function are not typically needed and may happen automatically
#' when `data` is given to [baggr]. However, this function can be used to explicitly
#' convert from full to reduced (summarised) data without analysing it in any model.
#' It can be useful for examining your data and generating summary tables.
#'
#'
#' If multiple operations are performed, they are taken in this order:
#' 1) conversion to log scale,
#' 2) calculating change from baseline,
#' 3) summarising data (using appropriate `effect`)
#'
#' @author Witold Wiecek
#' @seealso [convert_inputs] for how any type of data is (internally) converted into
#' a list of Stan inputs; vignette `baggr_binary` for more details about
#' rare event corrections
#' @export
#' @import stats
#'
#' @examples
#'
#' # Example of working with binary outcomes data
#' # Make up some individual-level data first:
#' df_rare <- data.frame(group = paste("Study", LETTERS[1:5]),
#' a = c(0, 2, 1, 3, 1), c = c(2, 2, 3, 3, 5),
#' n1i = c(120, 300, 110, 250, 95),
#' n2i = c(120, 300, 110, 250, 95))
#' df_rare_ind <- binary_to_individual(df_rare)
#' # Calculate ORs; default rare event correction will be applied
#' prepare_ma(df_rare_ind, effect = "logOR")
#' # Add 0.5 to all rows
#' prepare_ma(df_rare_ind, effect = "logOR",
#' correction_type = "all",
#' rare_event_correction = 0.5)
prepare_ma <- function(data, #standardise = NULL,
effect = c("mean", "logOR", "logRR", "RD"),
rare_event_correction = 0.25,
correction_type = c("single", "all"),
log = FALSE, cfb = FALSE, summarise = TRUE,
treatment="treatment",
baseline = NULL,
group="group",
outcome="outcome",
pooling=FALSE) {
effect <- match.arg(effect)
correction_type <- match.arg(correction_type)
if(grepl("pool|unknown", detect_input_type(data, group, treatment, outcome))){
if(effect %in% c("logOR", "logRR", "RD")){
check_columns_binary(data)
data <- binary_to_individual(data, group)
group <- "group"
}else
stop("Data must be individual-level (if summarising) or binary (if converting), see ?prepare_ma")
}
check_columns(data, outcome, group, treatment, stop.for.na = FALSE)
# Input checks and prep
data <- data[,c(treatment, group, outcome, baseline)]
if(is.null(baseline))
names(data) <- c("treatment", "group", "outcome")
else
names(data) <- c("treatment", "group", "outcome", "baseline")
if(any(!stats::complete.cases(data))){
if(summarise)
warning("NA values present in data - they may be dropped when summarising")
else
check_columns(data, outcome, group, treatment, stop.for.na = TRUE)
}
if(effect %in% c("logOR", "logRR", "RD")) {
if(!is_binary(data$outcome))
stop("Outcome column is not binary (only 0 and 1 values allowed).")
}
# 1. transform data (for now only log)
if(log) {
data$outcome <- log(data$outcome)
if(!is.null(baseline))
data$baseline <- log(data$baseline)
}
# 2. Change from baseline
if(cfb){
if(is.null(baseline))
stop("Define baseline column to calculate change from baseline")
data$outcome <- data$outcome - data$baseline
data$baseline <- NULL
}
# 3. Standardise
standardise <- NULL #until standardisation is fixed, we cauterise this
if(!is.null(standardise)) {
# Whole sample
if(standardise == "all")
data$outcome <- (data$outcome - mean(data$outcome)) / sd(data$outcome)
else if(standardise == "bsl"){
if(cfb)
stop("Can't standardise by baseline value if outcome is change to baseline.")
data$outcome <- (data$outcome - mean(data$baseline)) / sd(data$baseline)
}
# In each group separately
else if(standardise %in% c("group", "by group", "by_group")){
agg <- stats::aggregate(outcome ~ group,
function(x) {c(mean=mean(x), sd=sd(x))},
data = data)
means <- agg$outcome[,"mean"]
sds <- agg$outcome[,"sd"]
names(means) <- names(sds) <- agg$group
data$outcome <- (data$outcome - means[data$group]) / sds[data$group]
}
else if(standardise %in% c("group_bsl")){
if(cfb)
stop("Can't standardise by baseline value if outcome is change to baseline.")
if(is.null(baseline))
stop("Can't standardise by baseline value if baseline=NULL.")
agg <- stats::aggregate(baseline ~ group,
function(x) {c(mean=mean(x), sd=sd(x))},
data = data)
means <- agg$baseline[,"mean"]
sds <- agg$baseline[,"sd"]
names(means) <- names(sds) <- agg$group
data$outcome <- (data$outcome - means[data$group]) / sds[data$group]
} else if(standardise %in% c("control")) {
mean_ctrl <- mean(data$outcome[data$treatment == 0])
sd_ctrl <- sd(data$outcome[data$treatment == 0])
data$outcome <- (data$outcome - mean_ctrl)/ sd_ctrl
} else if(standardise %in% c("group_control")) {
agg <- stats::aggregate(outcome ~ group,
function(x) {c(mean=mean(x), sd=sd(x))},
data = data[data$treatment == 0,])
means <- agg$outcome[,"mean"]
sds <- agg$outcome[,"sd"]
names(means) <- names(sds) <- agg$group
data$outcome <- (data$outcome - means[data$group]) / sds[data$group]
} else
stop("Wrong standardise argument, expecting 'all', 'bsl', 'group' or 'group_bsl'")
}
# 4. Summarising
if(summarise){
bdt_sort <- unique(data$group) #remember order in which names appear in data
if(effect == "mean") {
magg <- stats::aggregate(outcome ~ treatment + group,
mean, data = data)
seagg <- stats::aggregate(outcome ~ treatment + group,
function(x) sd(x)/sqrt(length(x)), data = data)
mwide <- stats::reshape(data = magg, timevar = "treatment",
idvar = "group", direction = "wide")
sewide <- stats::reshape(data = seagg, timevar = "treatment",
idvar = "group", direction = "wide")
nagg <- stats::aggregate(outcome ~ treatment + group, length, data = data)
nwide <- stats::reshape(data = nagg, timevar = "treatment",
idvar = "group", direction = "wide")
out <- data.frame(group = mwide$group,
mu = mwide$outcome.0,
tau = mwide$outcome.1 - mwide$outcome.0,
se.mu = sewide$outcome.0,
se.tau = sqrt(sewide$outcome.0^2 + sewide$outcome.1^2),
n.mu = nwide$outcome.0,
n.tau = nwide$outcome.1)
out <- out[order(factor(out$group, levels = bdt_sort)),]
rownames(out) <- NULL
}
# Prepare event counts for binary data models
# (including rare event corrections)
if(effect %in% c("logOR", "logRR", "RD")) {
bdt_by <- by(data, list(data$group), function(x) {
with(x,
data.frame(
group = unique(as.character(group)),
a = sum(outcome[treatment == 1]),
n1 = sum(treatment == 1),
c = sum(outcome[treatment == 0]),
n2 = sum(treatment == 0),
b = sum(treatment == 1) - sum(outcome[treatment == 1]),
d = sum(treatment == 0) - sum(outcome[treatment == 0])))
})
binary_data_table <- do.call(rbind, bdt_by[bdt_sort])
out <- apply_cont_corr(binary_data_table,
rare_event_correction,
correction_type,
pooling=pooling)
rownames(out) <- NULL
# Add OR or RR estimate
out <- calc_or_rr(out, effect)
}
} else {
out <- data
}
out
}
# see prepare_ma()
calc_or_rr <- function(out, effect) {
if(effect == "logRR") {
out$tau <- with(out, log((a/(a+b))/(c/(c+d))))
out$se <- with(out, sqrt(1/a + 1/c - 1/(a+b) - 1/(c+d)))
}
if(effect == "logOR") {
out$tau <- with(out, log((a*d)/(b*c)))
out$se <- with(out, sqrt(1/a + 1/b + 1/c + 1/d))
}
if(effect == "RD") {
out$tau <- with(out, (a/(a+b)) - (c/(c+d)))
out$se <- with(out, {
p1 <- a/(a+b); p2 <- c/(c+d);
v1 <- p1*(1-p1)/(a+b)
v2 <- p2*(1-p2)/(c+d)
sqrt(v1 + v2)
})
}
out
}
# see prepare_ma()
apply_cont_corr <- function(binary_data_table, v, correction_type,
add_or = FALSE,
pooling = FALSE #indicates that this was called to calc pooling
) {
rare_event_correction <- v
rare <- with(binary_data_table, (a == 0 | b == 0 | c == 0 | d == 0))
if(sum(rare) == 1 && rare_event_correction == 0.25 && pooling == 0)
message("Applied default rare event correction (0.25) in 1 study")
if(sum(rare) > 1 && rare_event_correction == 0.25 && pooling == 0)
message("Applied default rare event corrections (0.25) in ", sum(rare), " studies")
# if(sum(rare) > 0 && pooling == 1)
# message("Some pooling values calculated using rare event corrections.")
if(correction_type == "single")
cc_value <- v*rare
if(correction_type == "all")
cc_value <- rare_event_correction
binary_data_table$a <- cc_value + binary_data_table$a
binary_data_table$b <- cc_value + binary_data_table$b
binary_data_table$c <- cc_value + binary_data_table$c
binary_data_table$d <- cc_value + binary_data_table$d
binary_data_table$n1 <- cc_value + binary_data_table$n1
binary_data_table$n2 <- cc_value + binary_data_table$n2
if(!add_or)
return(binary_data_table)
else
return(calc_or_rr(binary_data_table, "logOR"))
}
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.