#' Exports the table 1 in a word file
#'
#' @param import.col.names (Character) the column names as specifed in the imported data.frame or excel file.
#' @param output.var.names (Character) the variable names as they will be displayed in the output. If not specified we will use the excel column names.
#' @param dichotomous (Character) a vector of character values that specifies which of the excel columns contain dichotomous data.
#' @param ordinal (Character) a vector of character values that specifies which of the excel columns contain ordinal data.
#' @param median.iqr (Character) a vector of character values that specifies for which continuous excel columns we should calculate median and iqr instead of mean and sd.
#' @param group.col.name (Character) the column name that specifies the group (control or treatment).
#' @param control.value (Character) the value that specifies the control group. (e.g. 0 or 'control').
#' @param treatment.value (Character) the value that specifies the treatment group (e.g. 1 or 'treatment').
#' @param data (data.frame) (optional) (default: NULL) the data in a data.frame format that will be loaded. If you do not provide data, a pop up window will be displayed asking for an excel file to load data from.
#' @param excel.path (Character) (optional) the path that the excel file can be read from. By default it is set to NULL so that a pop up window will ask for the path.
#' @param export.path (Character) (optional) the path that the Word Document will be exported to.
#' @param sheet (Character) (optional) (default: NULL) the sheet inside excel file that the data are stored. By default it gets the first one.
#' @param tableone.col.names (Character) (optional) a vector for the column names of the exported table. Default are: c('Variable', 'Treatment Group', 'Control Group', 'p-value', 'Mean Difference', 'OR', 'Test Stat.')
#' @param export.filename (Character) (optional) the name of the file that will be exported. Do not include the .docx extension. (default filename is TableOne.docx)
#' @param show.stats (Character) (optional) a vector of characters that specifies which statistics will be displayed in the produced table One. By default it displays the MD, OR and Test Stat. value as additional statistics except from p-value
#' @param export.word (Boolean) (optional) (default: TRUE) specify if you want to have a Word Document that contains the table one exported. Whether you export the table to Word or not, you will also get the results in a data.frame format after running the package.
#'
#' @author
#' Agapios Panos <panosagapios@gmail.com>
#'
#' @importFrom easycsv choose_dir
#' @importFrom officer read_docx body_add_blocks block_list fp_border body_end_section_landscape body_add_par
#' @importFrom flextable regulartable theme_zebra autofit vline vline_right align bold
#' @importFrom stats sd t.test pnorm chisq.test quantile median
#' @export
#'
tableone <- function(import.col.names, output.var.names, dichotomous = c(), ordinal = c(), median.iqr = c(), group.col.name, control.value, treatment.value, data = NULL, excel.path = NULL, export.path = NULL, sheet = NULL, tableone.col.names = NULL, export.filename = NULL, show.stats = NULL, export.word = TRUE) {
# checking if the user specified an export.path argument. If not, a prompt window will be displayed to ask for a path.
if (is.null(export.path)) {
print('Please choose a folder to export the Word Document...')
export.path <- choose_dir()
}
# checking if the user has supplied the correct amount of names for all variables
if (length(output.var.names) != length(import.col.names)) {
stop("The length of the variables 'output.var.names' and 'import.col.names' is not equal. Please make sure you have entered the same number of items in these vectors.")
}
# if the user did not specify any output.var.names we will use the excel column names
if (is.null(output.var.names) | length(output.var.names) == 0) {
output.var.names <- import.col.names
}
# checking if the export.path is specified
if (length(export.path) == 0) {
stop('You have to choose a folder to have the Word Document exported')
} else if (is.na(export.path)) {
stop('You did not specify a valid export.path argument')
} else {
cat('You selected the folder ', export.path, '\n')
}
# checking if export.filename is specified
if (length(export.filename) == 0) {
if (!is.null(export.filename)) {
stop('The export filename must be of length > 0')
} else {
export.filename <- 'TableOne'
cat('No export.filename is specified. The file will be saved as TableOne.docx.', '\n')
}
} else if (is.na(export.filename)) {
stop('You did not specify a valid export.filename argument')
}
# data will be loaded from excel
if (is.null(data)) {
# getting data from the excel file
excel_data <- import_excel_data(excel.path, sheet, import.col.names, output.var.names, group.col.name, control.value, treatment.value)
imported_data <- excel_data$data
group <- excel_data$group
# data will be loaded from the data argument
} else {
if (is.data.frame(data)) {
if (!group.col.name %in% names(data)) {
stop('The group.col.name is not present in the data data.frame that you loaded')
}
colnames.verification <- import.col.names %in% names(data)
if (!any(colnames.verification)) {
unmatched.pos <- which(colnames.verification == F)
stop('The columns ', import.col.names[unmatched.pos], ' are not present in the data data.frame that you loaded.')
}
imported_data <- data
group <- data[group.col.name]
} else {
stop('The data that you provide must be in data.frame format.')
}
}
# checking if the user specified which statistics will be displayed
if (is.null(show.stats)) {
stats <- c("MD", "OR", "test-value")
} else {
stats <- show.stats
}
# calculating the number of columns according to the user's chosen stats. By default we have 4 columns + chosen stats.
dataframe.ncols <- length(stats) + 4
# initializing the data.frame that will be exported to Word
table.to.export <- data.frame( matrix( ncol = dataframe.ncols, nrow = length(output.var.names) ) )
# keeping treatment and control group values in a separate var
treatment <- imported_data[which(group == treatment.value),]
control <- imported_data[which(group == control.value),]
# number of participants in the treatment group
n.t <- nrow(treatment)
# number of participants in the control group
n.c <- nrow(control)
# variable to check if continuity correction is applied to add note
continuity_correction <- F
# keep a vector of the vars for which we applied continuity correction
continuity_correction_vars <- c()
# checking if the user specified custom column names for the exported table
if (is.null(tableone.col.names)) {
tableone.col.names <- c('Variable', 'Treatment Group', 'Control Group', 'p-value')
colname.ind <- 5
if ("MD" %in% stats) {
tableone.col.names[colname.ind] <- 'Mean Difference'
colname.ind <- colname.ind + 1
}
if ("OR" %in% stats) {
tableone.col.names[colname.ind] <- 'OR'
colname.ind <- colname.ind + 1
}
if ("test-value" %in% stats) {
tableone.col.names[colname.ind] <- 'Test Stat.'
}
} else {
if (length(tableone.col.names) < 4 + dataframe.ncols) {
stop(paste('you must provide a vector with', 4 + dataframe.ncols, 'column names for the argument tableone.col.names. Use " " inside the vector to keep empty column names'))
} else {
tableone.col.names[which(tableone.col.names == '' | is.na(tableone.col.names))] <- ' '
}
}
# adding the number of participants in each group
tableone.col.names[2] <- paste0(tableone.col.names[2], ' (n=', n.t, ')')
tableone.col.names[3] <- paste0(tableone.col.names[3], ' (n=', n.c, ')')
# adding the names in table that will be exported
names(table.to.export) <- tableone.col.names
# we must use a padding variable to compensate for the rows added by the ordinal data. The padding will only be used in the table.to.export variable
# for each row of ordinal data we will increase the padding by 1
padding <- 0
# generating the table that will be exported
for (i in 1:length(import.col.names)) {
if (import.col.names[i] %in% dichotomous) {
# get data as factors for treatment group
t.factors <- table(treatment[i])
t.f1name <- names(t.factors[1])
t.f1count <- ifelse(is.na(t.factors[1]), 0, t.factors[1])
t.f2name <- names(t.factors[2])
t.f2count <- ifelse(is.na(t.factors[2]), 0, t.factors[2])
# t.f2name <- names(t.factors[2]) TODO remove
t.totalcount <- t.f1count + t.f2count
# t.f1percent <- t.f1count / t.totalcount TODO remove
t.f2percent <- t.f2count / t.totalcount
# get data as factors for control group
c.factors <- table(control[i])
c.f1name <- names(c.factors[1])
c.f1count <- ifelse(is.na(c.factors[1]), 0, c.factors[1])
# c.f1name <- names(c.factors[1]) TODO remove
c.f2name <- names(c.factors[2])
c.f2count <- ifelse(is.na(c.factors[2]), 0, c.factors[2])
# c.f2name <- names(c.factors[2]) TODO remove
c.totalcount <- c.f1count + c.f2count
# c.f1percent <- c.f1count / c.totalcount TODO remove
c.f2percent <- c.f2count / c.totalcount
# creating the table columns
# column name
# TODO IMPROVEMENT add the option to choose which factor to consider for every dichotomous var as the baseline.
if (output.var.names[i] == '') {
varname <- t.f2name # we get the 2nd factor's name because we use the 2nd factor as a baseline.
} else {
varname <- output.var.names[i]
}
# calculate and display OR
t.events <- length(which(unlist(treatment[i]) == t.f2name)) # we use f2name because we have the 2nd factor as the baseline.
t.no_events <- length(which(unlist(treatment[i]) == t.f1name))
c.events <- length(which(unlist(control[i]) == c.f2name))
c.no_events <- length(which(unlist(control[i]) == c.f1name))
if (t.events == 0 | c.events == 0 | t.no_events == 0 | c.no_events == 0) {
t.events = t.events + 0.5
c.events = c.events + 0.5
t.no_events = t.no_events + 0.5
c.no_events = c.no_events + 0.5
continuity_correction = T
continuity_correction_vars <- cbind(continuity_correction_vars, varname)
}
or.value <- (c.no_events * t.events) / (t.no_events * c.events)
var.log.or <- 1/c.no_events + 1/t.no_events + 1/t.events + 1/c.events
logor.ci.lower <- log(or.value) - 1.96*sqrt(var.log.or)
logor.ci.upper <- log(or.value) + 1.96*sqrt(var.log.or)
testval <- round(log(or.value)/sqrt(var.log.or), digits = 3)
pval <- round(2*(1 - pnorm(abs(testval))), digits = 3)
table.to.export[padding + i, 1] <- paste(varname, '(%)')
# treatment group value - percentage
table.to.export[padding + i, 2] <- paste0(format(round(t.f2percent*100, digits = 2), nsmall = 2), '%') # we use f2percent because we have the 2nd factor as the baseline.
# control group value - percentage
table.to.export[padding + i, 3] <- paste0(format(round(c.f2percent*100, digits = 2), nsmall = 2), '%')
# p-value
table.to.export[padding + i, 4] <- format(pval, nsmall = 3)
# get a start value for the column index as we dynamically show statistics
col.ind = 5
if ("MD" %in% stats) {
# mean difference with 95 percent confidence interval
table.to.export[padding + i,col.ind] <- ''
col.ind <- col.ind + 1
}
if ("OR" %in% stats) {
# odds ratio - 95 percent OR confidence interval
table.to.export[padding + i,col.ind] <- paste0(format(round(or.value, digits = 2), nsmall = 2), ' [', format(round(exp(logor.ci.lower), digits = 2), nsmall = 2), ', ', format(round(exp(logor.ci.upper), digits = 2), nsmall = 2), ']')
col.ind <- col.ind + 1
}
if ("test-value" %in% stats) {
table.to.export[padding + i,col.ind] <- format(testval, nsmall = 3)
}
} else if (import.col.names[i] %in% ordinal) {
test.values <- chisq.test(unlist(imported_data[import.col.names[i]]), unlist(group))
var.levels <- rownames(test.values$observed)
c.count <- test.values$observed[,1]
t.count <- test.values$observed[,2]
# column name
table.to.export[padding + i, 1] <- output.var.names[i]
# treatment group value stays empty for the ordinal data
table.to.export[padding + i, 2] <- ''
# control group value stays empty as well
table.to.export[padding + i, 3] <- ''
# p-value
table.to.export[padding + i, 4] <- format(round(test.values$p.value, digits = 3), nsmall = 3)
# get a start value for the column index as we dynamically show statistics
col.ind = 5
if ("MD" %in% stats) {
# mean difference with 95 percent confidence interval
table.to.export[padding + i, col.ind] <- ''
col.ind <- col.ind + 1
}
if ("OR" %in% stats) {
# leaving empty the odds ratio column
table.to.export[padding + i, col.ind] <- ''
col.ind <- col.ind + 1
}
if ("test-value" %in% stats) {
table.to.export[padding + i, col.ind] <- format(round(test.values$statistic, digits = 3), nsmall = 3)
}
# we add a row for each factor of the ordinal data
for (k in 1:length(var.levels)) {
padding <- padding + 1
# name for the factor
table.to.export[padding + i, 1] <- paste0('---- level: ', var.levels[k])
# treatment group value stays empty for the ordinal data
table.to.export[padding + i, 2] <- paste0(format(round((t.count[k] / n.t) * 100, digits = 2), nsmall = 2), '%')
# control group value stays empty as well
table.to.export[padding + i, 3] <- paste0(format(round((c.count[k] / n.c) * 100, digits = 2), nsmall = 2), '%')
# other columns will be empty
table.to.export[padding + i, 4] <- ''
table.to.export[padding + i, 5] <- ''
table.to.export[padding + i, 6] <- ''
table.to.export[padding + i, 7] <- ''
}
} else { # continuous data case
test.values <- t.test(as.numeric(unlist(imported_data[import.col.names[i]]))~unlist(group), imported_data)
# mean for control and treatment group
c.mean <- test.values$estimate[1]
t.mean <- test.values$estimate[2]
mean.difference <- t.mean - c.mean
if (import.col.names[i] %in% median.iqr) {
# calculating quantiles and median for control and treatment group
c.q25 <- quantile(as.numeric(unlist(control[i])), na.rm = T)[2]
c.q75 <- quantile(as.numeric(unlist(control[i])), na.rm = T)[4]
t.q25 <- quantile(as.numeric(unlist(treatment[i])), na.rm = T)[2]
t.q75 <- quantile(as.numeric(unlist(treatment[i])), na.rm = T)[4]
c.median <- median(as.numeric(unlist(control[i])), na.rm = T)
t.median <- median(as.numeric(unlist(treatment[i])), na.rm = T)
# column name
table.to.export[padding + i, 1] <- paste0(output.var.names[i], ' (median, IQR)')
# treatment group value with standard deviation
table.to.export[padding + i, 2] <- paste0(t.median, ' (', t.q25, '-', t.q75, ')')
# control group value with standard deviation
table.to.export[padding + i, 3] <- paste0(c.median, ' (', c.q25, '-', c.q75, ')')
} else {
# standard deviation for control and treament group
c.sd <- round(sd(as.numeric(unlist(control[i])), na.rm = T), digits = 2)
t.sd <- round(sd(as.numeric(unlist(treatment[i])), na.rm = T), digits = 2)
# column name
table.to.export[padding + i, 1] <- paste0(output.var.names[i], ' (mean, sd)')
# treatment group value with standard deviation
table.to.export[padding + i, 2] <- paste0(format(round(t.mean, digits = 2), nsmall = 2), '\U00B1', format(t.sd, nsmall = 2))
# control group value with standard deviation
table.to.export[padding + i, 3] <- paste0(format(round(c.mean, digits = 2), nsmall = 2), '\U00B1', format(c.sd, nsmall = 2))
}
# p-value
pval <- round(test.values$p.value, digits = 3)
if (pval < 0.001) pval <- '<0.001'
# p-value
table.to.export[padding + i, 4] <- format(pval, nsmall = 3)
# get a start value for the column index as we dynamically show statistics
col.ind = 5
if ("MD" %in% stats) {
# mean difference with 95 percent confidence interval
table.to.export[padding + i, col.ind] <- paste0(format(round(mean.difference, digits = 2), nsmall = 2), ' [', format(round(test.values$conf.int[1], digits = 2), nsmall = 2), ', ', format(round(test.values$conf.int[2], digits = 2), nsmall = 2), ']')
col.ind <- col.ind + 1
}
if ("OR" %in% stats) {
# leaving empty the odds ratio column
table.to.export[padding + i, col.ind] <- ''
col.ind <- col.ind + 1
}
if ("test-value" %in% stats) {
table.to.export[padding + i, col.ind] <- format(round(test.values$statistic, digits = 3), nsmall = 3)
}
}
}
if (export.word) {
# initializing a blank docx document
doc <- read_docx()
# converting the data.frame to regulartable so that we can apply styling using a theme from flextable package
output <- regulartable(table.to.export)
# auto-adjust width for table columns
output <- autofit(output, add_w = 0.2, add_h = 0)
# applying the styling to the table
output <- theme_zebra(output, odd_header = "#CFCFCF", odd_body = "#F8F8F8",
even_header = "transparent", even_body = "transparent")
# adding vertical borders between columns
output <- vline( output, border = fp_border(color = "gray80", width = 1), part = "all" )
# remove right table border
output <- vline_right(output, border = fp_border(width = 0), part = "all")
# align text to center
output <- align(output, align = "center", part = "all")
# align first column to right
output <- align(output, j = 1, align = "right", part = "body")
# make header bold
output <- bold(output, bold = TRUE, part = "header")
# make first column bold
output <- bold(output, j = 1, bold = TRUE, part = "body")
# adding the table to the document body
body_add_blocks(doc, blocks = block_list(output))
# adding note for continuity correction if applied
if (continuity_correction) body_add_par(doc, value = paste('Note: Continuity correction was applied for the variables: ', paste(continuity_correction_vars, collapse = ', ')), style = "centered")
# make the orientation landscape
body_end_section_landscape(doc)
# finaly exporting the docx file
print(doc, target = paste0(export.path, '/', export.filename, '.docx'))
print('The file is exported successfully! You can find it in the following directory:')
print(paste0(export.path, '/', export.filename, '.docx'))
}
print('Here is the table one in a data.frame format:')
if (continuity_correction) print(paste('Note: Continuity correction was applied for the variables: ', paste(continuity_correction_vars, collapse = ', ')))
print(table.to.export)
output <- list()
output$table <- table.to.export
output$continuity_correction <- continuity_correction
output$continuity_correction_vars <- continuity_correction_vars
return(output)
}
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.