knitr::opts_chunk$set(
  comment = "#>"
)

This packages builds a 'Table One' for a medical journal. It adds summaries of each variable based on user specifications, and outputs the result as a dataframe which can be written to excel/a csv file/embedded into a markdown document.

It take a 'strata' variable which is an ordered factor. The table is divided into a 'Total' column + one column for each level of the factor. The make_output_df() function creates an empty dataframe with the counts of each observation included in the column headings.

The get_() functions add rows to that dataframe.

library(TableOneDataframe)
library(knitr)
data(mtcars)
# Making the stata variable a factor
mtcars$cyl <- factor(mtcars$cyl)
levels(mtcars$cyl) <- c("4", "6", "8", "Extra")
# Creating an empty output table to store results. Make sure the 'strata' variable is a factor.
# Empty levels get a separate column.
# The include_tests argument determines if all of the functions after do p-value testing.
output <- make_output_df(mtcars, "cyl", include_tests = TRUE)
# Median IQR
output <- get_median_iqr(mtcars, 'cyl', 'disp', "Display", output)
# Mean (SD)
output <- get_mean_sd(mtcars, 'cyl', 'hp', "HP", output)

# Count non missing
output <- get_count(mtcars, 'cyl', 'wt', "WT", output)
# Count unique non missing
output <- get_unique_count(mtcars, 'cyl', 'gear', "Gears", output)
# Count specific value
output <- get_n_percent_value(mtcars, 'cyl', 'vs', 1, "VS = 1", output)
# Sum a numeric variable
output <- get_sum(mtcars, 'cyl', 'disp', "Display", output)

# N %
# Adding a random NA to demonstrate. 
mtcars[3, 'gear'] <- NA
# We want to treat gears as a factor rather than a number
mtcars$gear <- as.factor(mtcars$gear)
output <- get_n_percent(mtcars, 'cyl', 'gear', "Gear", output)

# If it's not a factor, gets converted anyway.
output <- get_n_percent(mtcars, 'cyl', 'carb', "Carb", output)

# Can also sort by frequency instead of factor/alphabetical ordering.
output <- get_n_percent(mtcars, 'cyl', 'carb', "Carb", output, sort_by_freq = TRUE)

# Get availability of both numeric and non-numeric variables.
output <- get_availability(mtcars, 'cyl', 'gear', "Gear", output)
# Adding NA to demonstrate
mtcars[12:16, 'wt'] <- NA
output <- get_availability(mtcars, 'cyl', 'wt', "wt", output)

# Output is just a dataframe, so can be used with kable, or written to excel/csv.
kable(output)


aasiyahrashan/TableOneDataframe documentation built on Dec. 9, 2024, 4:33 p.m.