Nothing
#' Summary a single covariate, test across treatment and/or population
#'
#' This function performs demographics imbalance checking of a single covariate across multiple groups.
#'
#' @param data Input data frame. Rows are patients and columns are variables (e.g. demographics variables, time to event variables,
#' biomarker variables, treatment indicator, etc.). One patient per row.
#' @param var name of the clinical covariate to test. name should be in the column names of data. entries with empty value (nchar() == 0) will be imputed as NA
#' @param trt name of the treatment column. If trt is specified, the analysis will be performed within treatment arm.
#' if it is NULL, the comparison will be performed using all samples.
#' @param trt.name preferred display name of the treatment variable
#' If it is NULL, trt will be used.
#' @param subgroup name of the column which indicates subpopulation (e.g. biomarker evaluable population)
#' @param subgroup.name preferred display name of the subpopulation (e.g. biomarker evaluable population).
#' If it is NULL, subgroup will be used.
#' @param itt.name preferred display name of the full population (e.g. ITT).
#' If it is NULL, "All" will be used.
#' @param subgroup.indicator In the subpopulation column, which value is used
#' to define the subpopulation (e.g. biomarker evaluable population). Default is 1. It can also be character or logical.
#' Default is 1. The non-subpopulation enrties is not allowed to be specified as NA.
#' @param var.class class of the variable. possible categories are "numeric", "categorical" and
#' "ordered.factor". "ordered.factor" can be used to categorical variable with
#' ordered levels - e.g. IC score 0/1/2/3. If class is ordered.factor ,
#' ordered.factor.levels need to be specified.
#' If it is not specified, will try to use the class of the column.
#' "numeric","integer" will be treated as "numeric"
#' "logical""character","factor" will be treated as "categorical".
#' @param ordered.factor.levels ordered levels of the ordered factor.
#' @param cont.show what summary statistics to show for a continuous covariate.
#' Default is c("N" ,"Mean","Median", "Min-Max","NA's").
#' Possible options are "N" ,"Mean","SEM", "SD","Median",
#' "Min","Max" ,"Min-Max","1st Qrtl.","3rd Qrtl.","IQR" ,"NA's"
#' @param digits digits for rounding
#' @param trt.order If the user wants to display the treatments in a certain
#' order, it can be defined here. All elements in trt.order should be the same
#' unique values in the treatment column.
#' @param na.action defaults to "na.omit". Possible options are "na.omit", "error"
#' When it is specified as "na.omit", entries with missing trt or subgroup
#' will be automatically removed before calculation.
#' @param compare.subgroup If it is TRUE,
#' the output will show summary statistics of subgroup and others. Default is FALSE. If it is FALSE,
#' will show summary statistics of subgroup vs. All patients
#'
#' @note This function provides summary statistics of a single clinical covariate. Using default parameters,
#' the function provides a table to compare summary statistics in All patients vs. in BEP (biomarker evaluable population),
#' within treatment arm
#' @note trt allows for more than 2 levels. However, only 2 levels are allowed for subgroup.
#' For more general use, a user can specify trt to get summary statistics for any
#' sub-group defination (and leave subgroup as NULL).
#'
#' @importFrom stats as.formula complete.cases fisher.test kruskal.test sd
#' @noRd
#'
SummarySingle <- function(data, var,
trt = NULL, trt.name = NULL,
subgroup = NULL, subgroup.name = NULL, subgroup.indicator = 1, compare.subgroup = FALSE, itt.name = "All",
var.class = NULL, ordered.factor.levels = NULL,
cont.show = c("N", "Mean", "Median", "Min-Max", "NA's"),
digits = 2, trt.order = NULL,
na.action = "error") {
stopifnot(na.action %in% c("na.omit", "error"))
stopifnot(class(data) == "data.frame")
if (!all(c(var, trt, subgroup) %in% colnames(data))) stop("var, trt and subgroup should have matched column names in the input data!")
if (!is.null(subgroup)) if (nlevels(as.factor(data[, subgroup])) < 2) stop("subpopulation column has only one unique value!")
possible.show <- c(
"N", "Mean", "SEM", "SD", "Median",
"Min", "Max", "Min-Max", "1st Qrtl.", "3rd Qrtl.",
"IQR", "NA's"
)
if (length(setdiff(cont.show, possible.show)) > 0) {
stop(paste("possible cont.show elements are:", possible.show))
}
data[[var]][which(nchar(as.character(data[[var]])) == 0)] <- NA
possible.class <- c("categorical", "numeric", "ordered.factor")
if (is.null(var.class) || !all(var.class %in% possible.class)) {
if (class(data[, var]) %in% c("numeric", "integer")) var.class <- "numeric"
if (class(data[, var]) %in% c("logical")) class(data[, var]) <- "character"
if (class(data[, var]) %in% c("character", "factor")) var.class <- "categorical"
}
if (is.null(var.class) || !all(var.class %in% possible.class)) stop(paste("var.class should be in", paste(possible.class, collapse = ",")))
if (var.class == "ordered.factor") {
if (is.null(ordered.factor.levels)) stop("If class is ordered.factor,
ordered.factor.levels need to be specified.")
if (!is.null(ordered.factor.levels)) {
ordered.factor.levels <- as.character(ordered.factor.levels)
if (!identical(sort(ordered.factor.levels), sort(levels(factor(data[, var]))))) stop("ordered factor levels should match unique elements in the variable!")
data[, var] <- factor(data[, var], levels = ordered.factor.levels, ordered = TRUE)
Lev <- ordered.factor.levels
}
}
if ((!is.null(subgroup))) subgroup.name <- ifelse(is.null(subgroup.name), subgroup, subgroup.name)
if ((!is.null(trt))) trt.name <- ifelse(is.null(trt.name), trt, trt.name)
# TRT missing value
if (!is.null(trt)) {
if (any(is.na(data[, trt]))) {
if (na.action == "na.omit") {
data <- data[complete.cases(data[, trt]), ]
warning("There were observations without 'trt' information! These were omitted (na.action=\"na.omit\")!")
}
else {
stop("There were observations without 'trt' information!")
}
}
}
# subgroup missing value
if (!is.null(subgroup)) {
if (length(subgroup) == 1 && subgroup %in% colnames(data)) {
if (any(is.na(data[, subgroup]))) {
if (na.action == "na.omit") {
data <- data[complete.cases(data[, subgroup]), ]
warning("There were observations without 'subgroup' information! These were omitted (na.action=\"na.omit\")!")
}
else {
stop("There were observations without 'subgroup' information!")
}
}
}
}
# if trt is NULL, create a column with all 1s as indicator
if (is.null(trt)) {
data$trt <- rep(1, nrow(data))
trt.lev <- unique(data$trt)
} else {
if (!is.null(trt.order)) {
tmp <- levels(factor(data[, trt]))
if (!identical(sort(trt.order), sort(tmp))) stop("trt.order should match levels in trt column!")
data$trt <- factor(data[, trt], levels = trt.order)
}
else {
data$trt <- factor(data[, trt])
}
trt.lev <- levels(data$trt)
}
# if subgroup is NULL, create a column with all 1s as indicator
data$ITT <- rep(1, nrow(data))
compare.itt <- !compare.subgroup
if (!is.null(subgroup)) {
if (compare.itt) {
data[, subgroup] <- ifelse(data[, subgroup] %in% subgroup.indicator, 1, 0)
subgroup.name <- c(itt.name, subgroup.name)
subgroup <- c("ITT", subgroup)
}
if (!compare.itt) {
subgroup.l <- paste0(subgroup.name, "_", c(subgroup.indicator, paste0("not_", subgroup.indicator)))
tmp <- unique(data[which(!data[, subgroup] %in% subgroup.indicator), subgroup])
if (length(tmp == 1)) subgroup.l[2] <- paste0(subgroup.name, "_", tmp)
data[, subgroup.l[1]] <- ifelse(data[, subgroup] %in% subgroup.indicator, 1, 0)
data[, subgroup.l[2]] <- ifelse(data[, subgroup] %in% subgroup.indicator, 0, 1)
subgroup <- subgroup.l
subgroup.name <- subgroup.l
}
}
if (is.null(subgroup)) {
subgroup <- "ITT"
subgroup.name <- " "
}
# output matrix (trt by population)
res <- matrix(
ncol = sum(1, length(trt.lev) * length(subgroup)),
nrow = 0
)
if (var.class == "categorical") {
data[, var] <- factor(data[, var])
Lev <- levels(data[, var])
}
result <- vector("list", length(trt.lev))
names(result) <- rep("", length(trt.lev))
if (!is.null(trt.name)) {
names(result) <- trt.lev
}
res.ind <- 1
for (i in 1:length(trt.lev)) {
trt.mat <- data[which(data$trt == trt.lev[i]), ]
for (j in 1:length(subgroup.name)) {
subgroup.mat <- trt.mat[which(trt.mat[, subgroup[j]] == 1), ]
Nna <- length(which(is.na(subgroup.mat[, var])))
Na <- nrow(subgroup.mat) - Nna
if (var.class %in% c("categorical", "ordered.factor")) {
res.subgroup <- data.frame(1)
res.subgroup$"Total (non-NA)" <- Na
res.subgroup$"NA's" <- Nna
for (k in Lev) {
res.subgroup[, k] <- length(which(subgroup.mat[, var] == k))
res.subgroup[, k] <- paste(res.subgroup[, k], " ",
"(", 100 * round(res.subgroup[, k] / Na, digits +
2), "%)",
sep = ""
)
}
# }
res.subgroup <- t(res.subgroup[, -1])
} else {
smry <- summary(subgroup.mat[, var], digits = 16)
res.subgroup <- data.frame(1)
res.subgroup$N <- Na
res.subgroup$Mean <- round(ifelse(is.nan(smry["Mean"]),
NA, smry["Mean"]
), digits = digits)
res.subgroup$SEM <- round(sd(subgroup.mat[, var],
na.rm = TRUE
) / sqrt(length(which(!is.na(subgroup.mat[
,
var
])))), digits = digits)
res.subgroup$SD <- round(sd(subgroup.mat[, var],
na.rm = TRUE
), digits = digits)
res.subgroup$Median <- round(smry["Median"],
digits = digits
)
res.subgroup$Min <- round(smry["Min."], digits = digits)
res.subgroup$Max <- round(smry["Max."], digits = digits)
if (any(is.na(smry[c("Min.", "Max.")]))) {
res.subgroup$"Min-Max" <- NA
} else {
res.subgroup$"Min-Max" <- paste(round(smry["Min."],
digits = digits
),
round(smry["Max."], digits = digits),
sep = "..."
)
}
res.subgroup$"1st Qrtl." <- round(smry["1st Qu."], digits = digits)
res.subgroup$"3rd Qrtl." <- round(smry["3rd Qu."], digits = digits)
res.subgroup$IQR <- round(smry["3rd Qu."] - smry["1st Qu."], digits = digits)
res.subgroup$"NA's" <- Nna
res.subgroup <- t(res.subgroup[cont.show])
}
if (j == 1) {
p.res <- res.subgroup
} else {
p.res <- cbind(p.res, res.subgroup)
}
}
colnames(p.res) <- subgroup.name
if (length(result) > 1) colnames(p.res) <- paste0(colnames(p.res), "(", names(result)[i], ")")
result[[i]] <- p.res
}
colns <- unlist(sapply(result, colnames, simplify = FALSE))
rowns <- unique(unlist(sapply(result, rownames, simplify = FALSE)))
outmat <- matrix("", nrow = length(rowns), ncol = length(colns), dimnames = list(rowns, colns))
for (i in 1:length(result)) outmat[rownames(result[[i]]), colnames(result[[i]])] <- result[[i]]
outmat
}
#' Summary multiple covariates, test across treatment and/or population
#'
#' This function performs demographics imbalance checking of a single covariate across multiple groups.
#'
#' @param var a vector of covariate names - the clinical covariate to test
#' @param var.name preferred display names of the clinical covariates
#' If it is NULL, var will be used.
#' @param ordered.factor.levels.list a list indicates ordered levels for ordered.factor. Each ordered.factor
#' should have a corresponding element in this list.
#' @param var.class a vector that indicates class of the variables. possible categories are "numeric", "categorical" and
#' "ordered.factor". "ordered.factor" can be used to categorical variable with
#' ordered levels - e.g. IC score 0/1/2/3. If class is ordered.factor ,
#' ordered.factor.levels need to be specified.
#' If the user doesn't specify class of all variables (the length of the var.class is less than length of var),
#' The program will try to use the class of the column.
#' "numeric","integer" will be treated as "numeric"
#' "logical""character","factor" will be treated as "categorical".
#' In this case the program request that names of the vector var.class is a subset of the var vector.
#'
#' @return output object is a matrix with summary statistics. It can be passed to knitr::kable().
#'
#' @note trt allows for more than 2 levels. However, only 2 levels are allowed for subgroup.
#' For more general use, a user can specify trt to get summary statistics for any
#' sub-group defination (and leave subgroup as NULL).
#' @note This function provides summary statistics of a vector of clinical covariates. Using default parameters,
#' the function provides a table to compare summary statistics in All population vs. in BEP (biomarker evaluable population),
#' within treatment arm.
#'
#' @inheritParams SummarySingle
#' @noRd
#'
SummaryVars <- function(data, var, var.name = NULL,
trt = NULL, trt.name = NULL,
subgroup = NULL, subgroup.name = NULL, subgroup.indicator = 1, compare.subgroup = FALSE, itt.name = "All",
var.class = NULL, ordered.factor.levels.list = NULL,
cont.show = c("N", "Mean", "Median", "Min-Max", "NA's"),
digits = 2, trt.order = NULL,
na.action = "error") {
stopifnot(na.action %in% c("na.omit", "error"))
stopifnot(class(data) == "data.frame")
if (!all(c(var, trt, subgroup) %in% colnames(data))) stop("var, trt and subgroup should have matched column names in the input data!")
if (!is.null(subgroup)) if (nlevels(as.factor(data[, subgroup])) < 2) stop("subpopulation column has only one unique value!")
possible.show <- c(
"N", "Mean", "SEM", "SD", "Median",
"Min", "Max", "Min-Max", "1st Qrtl.", "3rd Qrtl.",
"IQR", "NA's"
)
if (length(setdiff(cont.show, possible.show)) > 0) {
stop(paste("possible cont.show elements are:", possible.show))
}
possible.class <- c("categorical", "numeric", "ordered.factor")
if (!all(var.class %in% possible.class)) stop(paste("var.class should be in", paste(possible.class, collapse = ",")))
if (length(var) == length(var.class)) names(var.class) <- var
if (length(var) != length(var.class)) if (!all(names(var.class) %in% var)) stop("length of var.class doesn't match length of var -
In this case the program request that names of the vector var.class is a subset of the var vector")
k.var <- length(var)
for (i in 1:k.var) {
tmp <- try(SummarySingle(
data = data, var = var[i],
trt = trt, trt.name = trt.name,
subgroup = subgroup, subgroup.name = subgroup.name, subgroup.indicator = subgroup.indicator,
compare.subgroup = compare.subgroup, itt.name = itt.name,
var.class = var.class[i], ordered.factor.levels = ordered.factor.levels.list[[var[i]]],
cont.show = cont.show,
digits = digits, trt.order = trt.order,
na.action = na.action
))
if (class(tmp) == "try-error") stop(paste("error in", var[i]))
if (i == 1) kcol <- ncol(tmp)
line1 <- rep("", kcol)
tmp2 <- rbind(line1, tmp)
rownames(tmp2)[1] <- ifelse(is.null(var.name[i]), var[i], var.name[i])
if (i == 1) mat <- tmp2
if (i > 1) mat <- rbind(mat, tmp2)
}
mat
}
Any scripts or data that you put into this service are public.
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.