Nothing
#' Prepare the dataset in the proper format for R2jags
#'
#' @description
#' \code{data_preparation} prepares the dataset in the proper format for
#' R2jags and returns a list of elements that \code{\link{run_model}} inherits
#' via the argument \code{data}.
#'
#' @param data A data-frame of the one-trial-per-row format with arm-level data.
#' See 'Format' in \code{\link{run_model}}.
#' @param measure Character string indicating the effect measure. For a binary
#' outcome, the following can be considered: \code{"OR"}, \code{"RR"} or
#' \code{"RD"} for the odds ratio, relative risk, and risk difference,
#' respectively. For a continuous outcome, the following can be considered:
#' \code{"MD"}, \code{"SMD"}, or \code{"ROM"} for mean difference,
#' standardised mean difference and ratio of means, respectively.
#'
#' @return A list of data-frames on the following elements to be passed
#' to \code{\link{run_model}}:
#' \item{pseudo_m}{A pseudo-data-frame with values -1 and \strong{m} for the
#' corresponding trial-arms with unreported and reported missing participant
#' outcome data, respectively (see 'Details').}
#' \item{m}{The number of missing participant outcome data in each
#' trial-arm (see 'Details').}
#' \item{N}{The number of randomised participants in each trial-arm.}
#' \item{t}{The intervention identifier in each trial-arm.}
#' \item{I}{A pseudo-data-frame that indicates whether missing participant
#' outcome data have been reported or not for each observed trial-arm
#' (see 'Details').}
#' \item{measure}{The effect measure for the analysed outcome.}
#' \item{y0}{The observed mean value of the outcome in each trial-arm,
#' when the outcome is continuous.}
#' \item{se0}{The observed standard deviation of the outcome in each
#' trial-arm, when the outcome is continuous.}
#' \item{r}{The number of observed events of the outcome in each
#' trial-arm, when the outcome is binary.}
#'
#' @details \code{data_preparation} prepares the data for the Bayesian analysis
#' (See 'Format' in \code{\link{run_model}}). \code{data_preparation}
#' creates the pseudo-data-frames \code{m_new}, \code{I}, and \code{m_pseudo}
#' that have the same dimensions with the element \code{N}. \code{m_new} takes
#' the zero value for the observed trial-arms with unreported missing
#' participant outcome data (i.e., \strong{m} equals \code{NA} for the
#' corresponding trial-arms), the same value with \strong{m} for the observed
#' trial-arms with reported missing participant outcome data, and \code{NA}
#' for the unobserved trial-arms. \code{I} is a dummy data-frame and takes the
#' value one for the observed trial-arms with reported missing participant
#' outcome data, the zero value for the observed trial-arms with unreported
#' missing participant outcome data (i.e., \code{m_new} equals zero for the
#' corresponding trial-arms), and \code{NA} for the unobserved trial-arms.
#' Thus, \code{I} indicates whether missing participant outcome data have been
#' collected for the observed trial-arms. If the user has not defined the
#' element \strong{m} in \code{data_preparation}, \code{m_new} and \code{I}
#' take the zero value for all observed trial-arms to indicate that no missing
#' participant outcome data have been collected for the analysed outcome.
#' \code{I} and \code{m_new} are used from the following functions of the
#' package: \code{\link{run_model}}, \code{\link{run_metareg}},
#' \code{\link{prepare_model}}, \code{\link{run_nodesplit}},
#' \code{\link{prepare_nodesplit}}, \code{\link{run_ume}},
#' \code{\link{prepare_ume}}, and \code{\link{run_sensitivity}}.
#' Lastly, \code{m_pseudo} is a variant of \code{m_new}: it takes the value -1
#' for the observed trial-arms with unreported missing participant outcome
#' data (i.e., \strong{m} equals \code{NA} for the corresponding trial-arms),
#' the same value with \strong{m} for the observed trial-arms with reported
#' missing participant outcome data, and \code{NA} for the unobserved
#' trial-arms. It is used in function \code{\link{heatmap_missing_network}} to
#' calculate and illustrate the percentage of missing participant outcome data
#' across the observed comparisons and interventions of the network and the
#' function \code{\link{heatmap_missing_dataset}} to illustrate the trial-arms
#' with unreported missing participant outcome data. All pseudo-data-frames
#' aim to retain the trials without information on missing participant outcome
#' data.
#'
#' Furthermore, \code{data_preparation} sorts the interventions across
#' the arms of each trial in an ascending order and correspondingly the
#' remaining elements in \code{data} (See 'Format' in
#' \code{\link{run_model}}). \code{data_preparation} considers the first
#' column in \strong{t} as being the control arm for every trial. Thus,
#' this sorting ensures that interventions with a lower identifier are
#' consistently treated as the control arm in each trial. This case is
#' relevant in non-star-shaped networks.
#'
#' @author {Loukia M. Spineli}
#'
#' @seealso \code{\link{heatmap_missing_dataset}},
#' \code{\link{heatmap_missing_network}},
#' \href{https://CRAN.R-project.org/package=R2jags}{R2jags},
#' \code{\link{run_metareg}}, \code{\link{run_model}},
#' \code{\link{run_nodesplit}}, \code{\link{run_sensitivity}},
#' \code{\link{run_ume}}, \code{\link{prepare_model}},
#' \code{\link{prepare_nodesplit}}, \code{\link{prepare_ume}}
#'
#' @export
data_preparation <- function(data, measure) {
# Intervention studied in each arm of every trial
treat <- if (dim(data[, startsWith(colnames(data), "t")])[2] == 0) {
stop("The information on the individual arms is missing", call. = FALSE)
} else {
data[, startsWith(colnames(data), "t")]
}
# Total number of included trials
ns <- length(treat[, 1])
# Number of interventions investigated in every trial
na <- apply(treat, 1, function(x) length(which(!is.na(x))))
# Total number of interventions
nt <- length(table(as.matrix(treat)))
measure <- if (missing(measure)) {
stop("The argument 'measure' needs to be defined.", call. = FALSE)
} else if ((dim(data[, startsWith(colnames(data), "r")])[2] > 0) &
!is.element(measure, c("OR", "RR", "RD"))) {
stop("Insert 'OR', 'RR', or 'RD' for a binary outcome.", call. = FALSE)
} else if ((dim(data[, startsWith(colnames(data), "r")])[2] == 0) &
!is.element(measure, c("MD", "SMD", "ROM"))) {
stop("Insert 'MD', 'SMD' or 'ROM' for a continuous outcome.", call. = FALSE)
} else {
measure
}
# When no missing outcome data are collected
mod <- if (dim(data[, startsWith(colnames(data), "m")])[2] == 0) {
message("Missing participant outcome data have *not* been collected.")
as.data.frame(matrix(NA, nrow = nrow(treat), ncol = ncol(treat)))
} else {
# Number of missing participants in each arm of every trial
data[, startsWith(colnames(data), "m")]
}
# For a continuous outcome
if (is.element(measure, c("MD", "SMD", "ROM"))) {
# Observed mean value in each arm of every trial
y_obs <- data[, startsWith(colnames(data), "y")]
# Observed standard deviation in each arm of every trial
sd_obs <- data[, startsWith(colnames(data), "sd")]
# Number of randomised participants in each arm of every trial
rand <- data[, startsWith(colnames(data), "n")]
# Observed standard error in each arm of every trial
se_obs <- sd_obs / sqrt(rand - mod)
if ((dim(y_obs)[2] != max(na)) |
(dim(sd_obs)[2] != max(na)) |
(dim(mod)[2] != max(na)) |
(dim(rand)[2] != max(na))) {
stop("All elements must have the same dimension.", call. = FALSE)
}
if ((dim(y_obs)[1] != ns) |
(dim(sd_obs)[1] != ns) |
(dim(mod)[1] != ns) |
(dim(rand)[1] != ns)) {
stop("All elements must have the same dimension.", call. = FALSE)
}
# Order by 'id of t1' < 'id of t1'
treat_list <- rand_list <- list()
y_obs_list <- sd_obs_list <- se_obs_list <- mod_list <- treat_list
for (i in 1:ns) {
treat_list[[i]] <- treat[i, ]
y_obs_list[[i]] <- y_obs[i, ]
sd_obs_list[[i]] <- sd_obs[i, ]
se_obs_list[[i]] <- se_obs[i, ]
mod_list[[i]] <- mod[i, ]
rand_list[[i]] <- rand[i, ]
}
y0 <- sd0 <- se0 <- m <- N <- t <- treat
for (i in 1:ns) {
y0[i, ] <- unlist(y_obs_list[[i]])[order(unlist(treat_list[[i]]),
na.last = TRUE)]
sd0[i, ] <- unlist(sd_obs_list[[i]])[order(unlist(treat_list[[i]]),
na.last = TRUE)]
se0[i, ] <- unlist(se_obs_list[[i]])[order(unlist(treat_list[[i]]),
na.last = TRUE)]
m[i, ] <- unlist(mod_list[[i]])[order(unlist(treat_list[[i]]),
na.last = TRUE)]
N[i, ] <- unlist(rand_list[[i]])[order(unlist(treat_list[[i]]),
na.last = TRUE)]
t[i, ] <- sort(unlist(treat_list[[i]]), na.last = TRUE)
}
names(y0) <- paste0("y", seq_len(max(na)))
names(sd0) <- paste0("sd", seq_len(max(na)))
names(se0) <- paste0("se", seq_len(max(na)))
names(m) <- paste0("m", seq_len(max(na)))
names(N) <- paste0("n", seq_len(max(na)))
names(t) <- paste0("t", seq_len(max(na)))
} else {
# For a binary outcome:
# Number of observed events in each arm of every trial
event <- data[, startsWith(colnames(data), "r")]
# Number of randomised participants in each arm of every trial
rand <- data[, startsWith(colnames(data), "n")]
if ((dim(event)[2] != max(na)) |
(dim(mod)[2] != max(na)) |
(dim(rand)[2] != max(na))) {
stop("All elements must have the same dimension.", call. = FALSE)
}
if ((dim(event)[1] != ns) |
(dim(mod)[1] != ns) |
(dim(rand)[1] != ns)) {
stop("All elements must have the same dimension.", call. = FALSE)
}
# Order by 'id of t1' < 'id of t2' < 'id of t3', and so on
treat_list <- event_list <- mod_list <- rand_list <- list()
for (i in 1:ns) {
treat_list[[i]] <- treat[i, ]
event_list[[i]] <- event[i, ]
mod_list[[i]] <- mod[i, ]
rand_list[[i]] <- rand[i, ]
}
r <- m <- N <- t <- treat
for (i in 1:ns) {
r[i, ] <- unlist(event_list[[i]])[order(unlist(treat_list[[i]]),
na.last = TRUE)]
m[i, ] <- unlist(mod_list[[i]])[order(unlist(treat_list[[i]]),
na.last = TRUE)]
N[i, ] <- unlist(rand_list[[i]])[order(unlist(treat_list[[i]]),
na.last = TRUE)]
t[i, ] <- sort(unlist(treat_list[[i]]), na.last = TRUE)
}
names(r) <- paste0("r", seq_len(max(na)))
names(m) <- paste0("m", seq_len(max(na)))
names(N) <- paste0("n", seq_len(max(na)))
names(t) <- paste0("t", seq_len(max(na)))
}
# Indicate the trials with fully reported missing outcome data (for each arm)
# If a trial reports missing participant outcome data partially or not at all,
# insert 'NA'.
I <- m_new <- m_pseudo <- m
for (i in 1:ns) {
for (k in 1:na[i]) {
I[i, k] <- if (is.na(m[i, k]) & !is.na(N[i, k])) {
0
} else if (is.na(m[i, k]) & is.na(N[i, k])) {
NA
} else if (!is.na(m[i, k]) & !is.na(N[i, k])) {
1
}
m_new[i, k] <- if (is.na(m[i, k]) & !is.na(N[i, k])) {
0
} else if (is.na(m[i, k]) & is.na(N[i, k])) {
NA
} else if (!is.na(m[i, k]) & !is.na(N[i, k])) {
m[i, k]
}
# Matrix that indicates with -1 the trial-arms with non-reported missing
# participants. It is used by the following functions:
# 'heatmap_missing_dataset' and 'heatmap_missing_network'.
m_pseudo[i, k] <- if (!is.na(t[i, k]) & is.na(m[i, k])) {
-1
} else if (is.na(m[i, k]) & is.na(N[i, k])) {
NA
} else if (!is.na(m[i, k]) & !is.na(N[i, k])) {
m[i, k]
}
}
}
names(I) <- paste0("I", seq_len(max(na)))
# Return results
results <- list(m_pseudo = m_pseudo,
m = m_new,
N = N,
t = t,
I = I,
na = na,
nt = nt,
ns = ns,
measure = measure)
results <- if (is.element(measure, c("MD", "SMD", "ROM"))) {
append(results, list(y0 = y0, se0 = se0, sd0 = sd0))
} else if (is.element(measure, c("OR", "RR", "RD"))) {
append(results, list(r = r))
}
return(results)
}
Any scripts or data that you put into this service are public.
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.