#' Prepare meta-analysis dataset for meta_stan function
#'
#' \code{metastan_data} creates datasets suitable for meta_stan function
#'
#' The resulting data.frame can be used as data argument in \code{meta_stan}.
#'
#' @param id a vector of study IDs (labels)
#' @param treatment (optional) vector indicating treatment groups
#' @param x (optional) covariable/regressor vector or matrix
#' @param type type of outcome
#' @param y estimates (for normal outcomes)
#' @param se associated standard errors (normal outcomes)
#' @param v variances / squared standard errors (normal outcomes)
#' @param count event count (for binomial or Poisson outcomes)
#' @param total sample size (for binomial outcomes)
#' @param exposure exposure (for Poisson outcomes)
#' @param labels (optional) vector of row labels
#' @param data a data frame from which (some of) above variables may be taken
#' @param sort if TRUE (the default), output rows will be sorted (keeping entries corresponding to the same study ("id") next to each other, and sorting by "treatment" and "x")
#' @return The returned object is a "list" of class "metastan_data"
#' containing the following elements:
#' * "id" : study identifier (a vector of type factor)
#' * "treatment" : (optional) a treatment identifier (a vector of type factor, or NULL)
#' * "x" : (optional) covariable(s), (a numeric matrix, or NULL)
#' * "type" : a flag identifying the outcome type ("normal", "binomial", or "poisson")
#' * "outcome" : the actual outcome data (a two-column numeric matrix)
#' * "call" : the original function call
#' @details NB: arguments "id", "treatment", "x", "y", "se", "v", "count",
#' "total", "exposure", "labels"
#' may be taken from local environment OR from "data" (data frame) argument.
#'
#' @examples
#' # 3 studies, binomial endpoint, one covariable (named "dose"):
#' msd <- metastan_data(id=c("Smith","Smith", "Taylor","Taylor",
#' "Jones", "Jones","Taylor"),
#' treatment=c("placebo","verum","placebo","verum",
#' "verum","placebo","verum"),
#' type="binomial",
#' count=c(3, 2, 7, 5, 4, 5, 6),
#' total=c(10, 10, 25, 27, 15, 16, 23),
#' x=cbind("dose"=c(0, 50, 0, 50, 30, 0, 20)))
#' msd
#' print(msd, n=7) # (show all 7 lines of data)
#' print.default(msd) # default view of returned object
#' @export
metastan_data <- function(id,
treatment,
x,
type = c("normal", "binomial", "poisson"),
y,
se,
v, # for normal outcomes
count,
total, # for binomial outcomes (event count, sample size)
exposure, # for Poisson outcomes (count, exposure)
labels, # (optional) vector of row labels
data, # a data frame from which (some of) above variables may be taken
sort = TRUE)
{
##############################
# gather data from arguments;
# check whether a "data" argument was supplied:
if (missing(data))
data <- NULL
if (is.null(data)) {
dataArg <- FALSE
data <- sys.frame(sys.parent())
} else {
dataArg <- TRUE
if (!is.data.frame(data))
data <- data.frame(data)
}
mc <- match.call()
# gather arguments from environment _OR_ from "data":
mc.id <- mc[[match("id", names(mc))]]
id <- eval(mc.id, data, enclos=sys.frame(sys.parent()))
mc.treatment <- mc[[match("treatment", names(mc))]]
treatment <- eval(mc.treatment, data, enclos=sys.frame(sys.parent()))
mc.x <- mc[[match("x", names(mc))]]
x <- eval(mc.x, data, enclos=sys.frame(sys.parent()))
mc.y <- mc[[match("y", names(mc))]]
y <- eval(mc.y, data, enclos=sys.frame(sys.parent()))
mc.se <- mc[[match("se", names(mc))]]
se <- eval(mc.se, data, enclos=sys.frame(sys.parent()))
mc.v <- mc[[match("v", names(mc))]]
v <- eval(mc.v, data, enclos=sys.frame(sys.parent()))
mc.count <- mc[[match("count", names(mc))]]
count <- eval(mc.count, data, enclos=sys.frame(sys.parent()))
mc.total <- mc[[match("total", names(mc))]]
total <- eval(mc.total, data, enclos=sys.frame(sys.parent()))
mc.exposure <- mc[[match("exposure", names(mc))]]
exposure <- eval(mc.exposure, data, enclos=sys.frame(sys.parent()))
mc.labels <- mc[[match("labels", names(mc))]]
labels <- eval(mc.labels, data, enclos=sys.frame(sys.parent()))
###################################
# check for (potential) conflicts:
if (dataArg) {
dataName <- deparse(substitute(data))
id2 <- try(eval(mc.id), silent=TRUE)
if ((!is.element("try-error", class(id2))) && (!identical(id, id2)))
warning(paste0("potential conflict -- interpreted \"id=", deparse(substitute(mc.id)),
"\" argument within data frame \"", dataName, "\""))
treatment2 <- try(eval(mc.treatment), silent=TRUE)
if ((!is.element("try-error", class(treatment2))) && (!identical(treatment, treatment2)))
warning(paste0("potential conflict -- interpreted \"id=", deparse(substitute(mc.treatment)),
"\" argument within data frame \"", dataName, "\""))
x2 <- try(eval(mc.x), silent=TRUE)
if ((!is.element("try-error", class(x2))) && (!identical(x, x2)))
warning(paste0("potential conflict -- interpreted \"x=", deparse(substitute(mc.x)),
"\" argument within data frame \"", dataName, "\""))
y2 <- try(eval(mc.y), silent=TRUE)
if ((!is.element("try-error", class(y2))) && (!identical(y, y2)))
warning(paste0("potential conflict -- interpreted \"y=", deparse(substitute(mc.y)),
"\" argument within data frame \"", dataName, "\""))
se2 <- try(eval(mc.se), silent=TRUE)
if ((!is.element("try-error", class(se2))) && (!identical(se, se2)))
warning(paste0("potential conflict -- interpreted \"se=", deparse(substitute(mc.se)),
"\" argument within data frame \"", dataName, "\""))
v2 <- try(eval(mc.v), silent=TRUE)
if ((!is.element("try-error", class(v2))) && (!identical(v, v2)))
warning(paste0("potential conflict -- interpreted \"v=", deparse(substitute(mc.v)),
"\" argument within data frame \"", dataName, "\""))
count2 <- try(eval(mc.count), silent=TRUE)
if ((!is.element("try-error", class(count2))) && (!identical(count, count2)))
warning(paste0("potential conflict -- interpreted \"count=", deparse(substitute(mc.count)),
"\" argument within data frame \"", dataName, "\""))
total2 <- try(eval(mc.total), silent=TRUE)
if ((!is.element("try-error", class(total2))) && (!identical(total, total2)))
warning(paste0("potential conflict -- interpreted \"total=", deparse(substitute(mc.total)),
"\" argument within data frame \"", dataName, "\""))
exposure2 <- try(eval(mc.exposure), silent=TRUE)
if ((!is.element("try-error", class(exposure2))) && (!identical(exposure, exposure2)))
warning(paste0("potential conflict -- interpreted \"exposure=", deparse(substitute(mc.exposure)),
"\" argument within data frame \"", dataName, "\""))
labels2 <- try(eval(mc.labels), silent=TRUE)
if ((!is.element("try-error", class(labels2))) && (!identical(labels, labels2)))
warning(paste0("potential conflict -- interpreted \"labels=", deparse(substitute(mc.labels)),
"\" argument within data frame \"", dataName, "\""))
rm(list=c("id2", "y2", "se2", "v2", "count2",
"total2", "exposure2", "x2", "labels2"))
}
rm(list=c("mc.id", "mc.treatment", "mc.x", "mc.y", "mc.se", "mc.v",
"mc.count", "mc.total", "mc.exposure", "mc.labels"))
##########
type <- match.arg(type)
# some sanity checks:
stopifnot(!is.null(id), is.vector(id), length(id)>0)
if (!is.null(treatment))
stopifnot(is.vector(treatment), (length(treatment)==length(id)))
stopifnot(is.vector(sort), is.logical(sort), length(sort)==1, !is.na(sort))
# process outcome data:
if (type=="binomial") { # (proper binomial counts & totals required)
stopifnot(!is.null(count) & !is.null(total),
is.vector(count), is.numeric(count), all(is.finite(count)),
all(count>=0), length(count)==length(id),
is.vector(total), is.numeric(total), all(is.finite(total)),
all(total>0), all(total>=count),
length(total)==length(id))
outcome <- matrix(NA_integer_, ncol=2, nrow=length(id),
dimnames=list(NULL, c("count","total")))
outcome[,"count"] <- count
outcome[,"total"] <- total
}
if (type=="poisson") { # (proper Poisson counts & exposures required)
stopifnot(!is.null(count) & !is.null(exposure),
is.vector(count), is.numeric(count), all(is.finite(count)),
all(count>=0), length(count)==length(id),
is.vector(exposure), is.numeric(exposure), all(is.finite(exposure)),
all(exposure>0), length(exposure)==length(id))
outcome <- matrix(NA_integer_, ncol=2, nrow=length(id),
dimnames=list(NULL, c("count","exposure")))
outcome[,"count"] <- count
outcome[,"exposure"] <- exposure
}
if (type=="normal") { # (estimates and standard errors / variances required)
stopifnot(!is.null(y) & (!is.null(v) | !is.null(se)),
is.vector(y), is.numeric(y), all(is.finite(y)),
length(y)==length(id))
if (!is.null(v)) { # check variances, convert to standard errors:
stopifnot(is.vector(v), is.numeric(v), all(is.finite(v)),
all(v>0), length(v)==length(id))
se <- sqrt(v)
} # check standard errors:
stopifnot(is.vector(se), is.numeric(se), all(is.finite(se)),
all(se>0), length(se)==length(id))
outcome <- matrix(NA_integer_, ncol=2, nrow=length(id),
dimnames=list(NULL, c("y","se")))
outcome[,"y"] <- y
outcome[,"se"] <- se
}
# check "id" vector, convert to factor (if necessary):
if (!is.factor(id)) {
id <- as.factor(id)
} else { # check whether all levels are actually used:
if (length(unique(id)) != length(levels(id)))
warning("\"id\" argument contains un-used factor levels.")
}
# check "treatment" vector, convert to factor (if necessary):
if (!is.null(treatment)) {
if (!is.factor(treatment)) {
treatment <- as.factor(treatment)
} else { # check whether all levels are actually used:
if (length(unique(treatment)) != length(levels(treatment)))
warning("\"treatment\" argument contains un-used factor levels.")
}
}
# set up regressor matrix
# (NB: always a matrix, but commonly just a single column):
if (!is.null(x)) {
stopifnot(is.vector(x) | is.matrix(x))
if (is.vector(x)) {
stopifnot(is.numeric(x), all(is.finite(x)), length(x)==length(id))
x <- matrix(x, nrow=length(id), ncol=1,
dimnames=list(NULL, "x"))
} else if (is.matrix(x)) {
stopifnot(is.numeric(x), all(is.finite(x)), nrow(x)==length(id))
matcolnames <- colnames(x)
colnames(x) <- make.names(matcolnames, unique=TRUE)
}
} else { # (the default)
x <- NULL
}
if (sort) { # sort output rows sensibly:
# create a "sorted ID" vector (in order to keep studies "in order of appearance"):
sortedID <- factor(id, levels=as.character(unique(id)))
if (!is.null(treatment)) {
if (!is.null(x)) {
rowOrder <- order(sortedID, treatment, x[,1])
} else {
rowOrder <- order(sortedID, treatment)
}
} else {
if (!is.null(x)) {
rowOrder <- order(sortedID, x[,1])
} else {
rowOrder <- order(sortedID)
}
}
# permute/sort rows of output elements:
id <- id[rowOrder]
if (!is.null(treatment)) treatment <- treatment[rowOrder]
if (!is.null(x)) x <- x[rowOrder,,drop=FALSE]
outcome <- outcome[rowOrder,,drop=FALSE]
if (!is.null(labels)) labels <- labels[rowOrder]
}
# generate a sensible row label vector (essentially for "internal use" only):
if (!is.null(labels)) {
stopifnot(is.vector(labels), is.character(labels),
!any(duplicated(labels)))
} else {
labels <- make.names(as.character(id))
countvec <- rep(NA_integer_, length(id))
for (i in 1:nlevels(id)) {
idx <- which(id == levels(id)[i])
if (length(idx)>0) {
countvec[idx] <- 1:length(idx)
# check for duplicate regressor entries:
if (length(idx)>1) {
if (is.null(treatment) & is.null(x)) {
warning("Duplicate entries in \"id\" argument (id=\"",levels(id)[i],"\")!")
} else if (any(duplicated(cbind(as.numeric(treatment), x)[idx,]))) {
# (NB: "duplicated()" works for vectors and matrices/data.frames (row-wise)!)
warning("Duplicate covariable entries (id=\"",levels(id)[i],"\")!")
}
}
}
}
if (max(table(id)) > 1) {
labels <- paste(labels, sprintf("%.02d",countvec), sep=".")
}
}
# apply row labels throughout:
rownames(outcome) <- labels
if (!is.null(treatment)) names(treatment) <- labels
if (!is.null(x)) rownames(x) <- labels
# set up object to hold results:
result <- list("id" = id, # study ID
"treatment" = treatment, # (optional) treatment ID
"x" = x, # (optional) covariable(s)
"type" = type, # type of endpoint
"outcome" = outcome, # outcome data
"call" = match.call(expand.dots=FALSE))
class(result) <- "metastan_data"
return(result)
}
#' @export
print.metastan_data <- function(x, n=6, ...)
{
cat("\"metastan_data\" object.\n")
cat(paste0("outcome type : \"", x$type, "\"\n"))
cat(paste0("number of studies : ", length(unique(x$id)), "\n"))
cat(paste0("number of treatments : ",
ifelse(is.null(x$treatment), 1, length(unique(x$treatment))),
"\n"))
tab <- table(x$id)
if (length(unique(tab))==1)
cat(paste0("outcomes per study : ", tab[1], "\n"))
else
cat(paste0("outcomes per study : ", min(tab), "-", max(tab), "\n"))
if (!is.null(x$x)) { # (only printed if present)
cat(paste0("covariable(s) : ", as.character(paste0("\"",colnames(x$x),
"\"",collapse=", ")), "\n"))
}
cat("top of data set:\n")
dat <- cbind.data.frame("id"=as.character(x$id))
if (!is.null(x$treatment)) {
dat <- cbind.data.frame(dat, "treatment"=x$treatment)
}
if (!is.null(x$x)) {
dat <- cbind.data.frame(dat, x$x)
}
dat <- cbind.data.frame(dat, x$outcome)
rownames(dat) <- NULL
print(head(dat, n=n), ...)
invisible(x)
}
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.