# Todo
# Add headers for Black, White, and Total
# Add method to use the demographics function and tidy up output table
# Probs need to set up t-test to account for equal or unequal variances
# You would also want to test the assumption of the t-test
# to decide whether you want a parametric or non-parametric value
# Could remove the -88's and -99's by using the "exclude =" argument
# Probably want to set this scheme up so that it will use the native values all the way to the end
# Create a function that will return continuous values up to a certain cutoff
dem_tbl <- function(data, outcome, continuous.covariates = NULL,
continuous.tests = "wilcox.test",
categorical.covariates = NULL,
categorical.tests = "likelihood.ratio.chi.square"){
###### Make a table for total values #######
# Crosstabulate based on a specified outcome
totals <- as.matrix(xtabs(as.formula(paste0("~",
"`", outcome, "`")), data=data))
# Calculate Percentages
per.tbl <- prop.table(totals)
# Combine percentages with the raw numbers
totals <- paste0(totals, " (", round(per.tbl * 100, 1), ")")
# This will inadvertantly get rid of the names, return them using those in
# the percentage table
names(totals) <- paste0(names(per.tbl), ".", outcome)
# Add a description of the sub-table, a label to explain what is contained in each cell,
# and a P-value place-holder
########################################
totals <- c(sub.tbl = "Totals", quant = "N (%)", Total = nrow(data), totals, P = "", test="")
############# Make table for continuous covariates #########################
# Compute a table for continuous covariates if they are specified
if (!is.null(continuous.covariates)){
###################################
# Just specify what is missing and terminate the function
###################################
# Check whether any continuous covariates do not match
if(sum(!(continuous.covariates %in% colnames(data))) > 0){
print("An element in 'continuous.covarates' does not match with the column names of 'data'")
# Subset continuous.covariates to contain only those elements which match with columns in data
continuous.covariates <- intersect(continuous.covariates, colnames(data))
}
# If anything in continuous.covariates matches, then implement
# the cont.tbl function as appropriate
if (length(continuous.covariates) > 0){
cont.tbl <- multi_cont_compare(data = data,
continuous.covariates = continuous.covariates,
outcome = outcome, continuous.tests = continuous.tests)
} else {
print("None of 'continuous.covariates' matched with columns of 'data'")
cont.tbl <- NULL
}
}
############### Make table for categorical covariates #####################
# Compute a table for continuous covariates if they are specified
if (!is.null(categorical.covariates)){
# Check whether any continuous covariates do not match
if(sum(!(categorical.covariates %in% colnames(data))) > 0){
print("An element in 'categorical.covarates' does not match with the column names of 'data'")
# Subset categorical.covariates to contain only those elements which match with columns in data
categorical.covariates <- intersect(categorical.covariates %in% colnames(data))
}
# If anything in categorical.covariates matches, then implement
# the cont.tbl function as appropriate
if (length(categorical.covariates) > 0){
# Create a categorical table
cat.tbl <- multi_cat_compare(data = data,
categorical.covariates = categorical.covariates,
outcome = outcome, categorical.tests = categorical.tests)
} else {
print("None of 'categorical.covariates' matched with columns of 'data'")
cat.tbl <- NULL
}
}
############# Pull everything together #################
# if (!is.null(cat.tbl)){
# cat.tbl <- cat.tbl[,names(totals)]
# }
#
# if (!is.null(cont.tbl)){
# cont.tbl <- cont.tbl[,names(totals)]
# }
out.tbl <- rbind(totals, cat.tbl, cont.tbl)
return(out.tbl)
}
#' Create a stratified demographics table
#'
#' @param data A set of data with all covariates in separate columns and each row representing exactly 1 individual
#' @param outcome Column containing indicating the primary outcome of interest (e.g. case/control or disease subtypes)
#' @param continuous.covariates Columns containing continuous covariates of interest
#' @param continuous.tests A vector of tests to run on the continuous covariates
#' @param categorical.covariates Columns containing the categorical covariates of interest
#' @param categorical.tests A vector of tests to run on categorical covariates
#' @param strata.col Column containing the categories by which the data will be stratified
#' @return A demographics table in the format typically implemented by the iMAGE study.
#' @export
strat_dem_tbl <- function(data, outcome, continuous.covariates = NULL,
continuous.tests = "wilcox.test", categorical.covariates = NULL,
categorical.tests = "likelihood.ratio.chi.square", strata.col = NULL){
# Make base table
out.data <- dem_tbl(data = data, outcome = outcome,
continuous.covariates = continuous.covariates,
continuous.tests = continuous.tests,
categorical.covariates = categorical.covariates,
categorical.tests = categorical.tests)
# Indicate that this is the unstratified data
colnames(out.data) <- paste0(colnames(out.data), ".Overall")
# Create uniquely identifying rownames to help line up the stratified tables
rownames(out.data) <- paste0(out.data[,1], out.data[,2])
if (!(is.null(strata.col))){
# Find each individual stratum
if (is.factor(data[,strata.col])){
strata <- levels(data[,strata.col])
} else {
strata <- unique(data[,strata.col])
}
# Make a demographics table for each individual stratum and add to the base table
for (stratum in strata){
# Make a temporary subset of data
tmp.data <- data[data[,strata.col] == stratum,]
# Run demographics table function on this temporary set of data
tmp.dem.data <- dem_tbl(data = tmp.data, outcome = outcome,
continuous.covariates = continuous.covariates,
continuous.tests = continuous.tests,
categorical.covariates = categorical.covariates,
categorical.tests = categorical.tests)
# Create uniquely identifying rownames to help line up the stratified tables
rownames(tmp.dem.data) <- paste0(tmp.dem.data[,1], tmp.dem.data[,2])
# Find the rownames that are not included in the sub table
idx <- which(!(rownames(out.data) %in% rownames(tmp.dem.data)))
# Loop through each position where there is a lack of alignment
# Replace one line at a time with empty space
for (i in idx){
tmp.dem.data <- rbind(tmp.dem.data[1:(i-1),], "", tmp.dem.data[i:nrow(tmp.dem.data),])
}
# Rename the columns to reflect which stratum we are referring to
colnames(tmp.dem.data) <- paste0(colnames(tmp.dem.data), ".", strata.col, ".", stratum)
# Once this is done, bind the two tables together
out.data <- cbind(out.data, "", tmp.dem.data[,-(1:2)])
}
}
out.data <- out.data[,-1]
rownames(out.data) <- 1:nrow(out.data)
return(out.data)
}
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.