#' Round numbers with 0-padding.
#'
#' Utility functions to round numbers, similar the the base functions \code{signif}
#' and \code{round}, but resulting in character representations that keep zeros at
#' the right edge if they are significant.
#'
#' @param x A numeric vector.
#' @param digits An integer specifying the number of significant digits to keep
#' (for \code{signif_pad}) or the number of digits after the decimal point (for
#' \code{round_pad}).
#' @param round.integers Should rounding be limited to digits to the right of
#' the decimal point?
#' @param round5up Should numbers with 5 as the last digit always be rounded
#' up? The standard R approach is "go to the even digit" (IEC 60559 standard,
#' see \code{\link{round}}), while some other softwares (e.g. SAS, Excel)
#' always round up.
#' @param dec The character symbol to use as decimal mark (locale
#' specific). [Deprecated; use \code{decimal.mark} instead]
#' @param ... Further options, passed to \code{formatC} (which is used
#' internally). Not all options will work, but some might be useful (e.g.
#' \code{big.mark}, \code{decimal.mark}).
#'
#' @return A character vector containing the rounded numbers.
#'
#' @seealso
#' \code{\link{signif}}
#' \code{\link{round}}
#' \code{\link{formatC}}
#' \code{\link{prettyNum}}
#' \code{\link{format}}
#'
#' @examples
#' x <- c(0.9001, 12345, 1.2, 1., 0.1, 0.00001 , 1e5)
#' signif_pad(x, digits=3)
#' signif_pad(x, digits=3, round.integers=TRUE)
#'
#' # Compare:
#' as.character(signif(x, digits=3))
#' format(x, digits=3, nsmall=3)
#' prettyNum(x, digits=3, drop0trailing=TRUE)
#' prettyNum(x, digits=3, drop0trailing=FALSE)
#'
#' # This is very close.
#' formatC(x, format="fg", flag="#", digits=3)
#' formatC(signif(x, 3), format="fg", flag="#", digits=3)
#'
#' # Could always remove the trailing "."
#' sub("[.]$", "", formatC(x, format="fg", flag="#", digits=3))
#'
#' @keywords utilities
#' @export
signif_pad <-
function(x,
digits = 3,
round.integers = TRUE,
round5up = TRUE,
dec,
...) {
args <- list(...)
if (!missing(dec)) {
warning("argument dec is deprecated; please use decimal.mark instead.",
call. = FALSE)
args$decimal.mark <- dec
}
eps <- if (round5up)
x * (10 ^ (-(digits + 3)))
else
0
rx <- ifelse(x >= 10 ^ digits & .isFALSE(round.integers),
round(x),
signif(x + eps, digits))
cx <- do.call(formatC,
c(list(
x = rx,
digits = digits,
format = "fg",
flag = "#"
),
args[names(args) %in% names(formals(formatC))]))
cx[is.na(x)] <- "0" # Put in a dummy value for missing x
cx <- gsub("[^0-9]*$", "", cx) # Remove any trailing non-digit characters
ifelse(is.na(x), NA, cx)
}
#' @rdname signif_pad
#' @export
round_pad <- function (x,
digits = 2,
round5up = TRUE,
dec,
...) {
args <- list(...)
if (!missing(dec)) {
warning("argument dec is deprecated; please use decimal.mark instead.",
call. = FALSE)
args$decimal.mark <- dec
}
eps <- if (round5up)
10 ^ (-(digits + 3))
else
0
rx <- round(x + eps, digits)
cx <- do.call(formatC,
c(list(
x = rx,
digits = digits,
format = "f",
flag = "0"
),
args[names(args) %in% names(formals(formatC))]))
ifelse(is.na(x), NA, cx)
}
#' Compute some basic descriptive statistics.
#'
#' Values of type \code{factor}, \code{character} and \code{logical} are
#' treated as categorical. For logicals, the two categories are given the
#' labels `Yes` for \code{TRUE}, and `No` for \code{FALSE}. Factor levels with
#' zero counts are retained.
#'
#' @param x A vector or numeric, factor, character or logical values.
#' @param quantile.type An integer from 1 to 9, passed as the \code{type}
#' argument to function \code{\link[stats]{quantile}}.
#' @param ... Further arguments (ignored).
#'
#' @return A list. For numeric \code{x}, the list contains the numeric elements:
#' \itemize{
#' \item \code{N}: the number of non-missing values
#' \item \code{NMISS}: the number of missing values
#' \item \code{SUM}: the sum of the non-missing values
#' \item \code{MEAN}: the mean of the non-missing values
#' \item \code{SD}: the standard deviation of the non-missing values
#' \item \code{MIN}: the minimum of the non-missing values
#' \item \code{MEDIAN}: the median of the non-missing values
#' \item \code{CV}: the percent coefficient of variation of the non-missing values
#' \item \code{GMEAN}: the geometric mean of the non-missing values if non-negative, or \code{NA}
#' \item \code{GCV}: the percent geometric coefficient of variation of the
#' non-missing values if non-negative, or \code{NA}
#' \item \code{qXX}: various quantiles (percentiles) of the non-missing
#' values (q01: 1\%, q02.5: 2.5\%, q05: 5\%, q10: 10\%, q25: 25\% (first
#' quartile), q33.3: 33.33333\% (first tertile), q50: 50\% (median, or second
#' quartile), q66.7: 66.66667\% (second tertile), q75: 75\% (third quartile),
#' q90: 90\%, q95: 95\%, q97.5: 97.5\%, q99: 99\%)
#' \item \code{Q1}: the first quartile of the non-missing values (alias \code{q25})
#' \item \code{Q2}: the second quartile of the non-missing values (alias \code{q50} or \code{Median})
#' \item \code{Q3}: the third quartile of the non-missing values (alias \code{q75})
#' \item \code{IQR}: the inter-quartile range of the non-missing values (i.e., \code{Q3 - Q1})
#' \item \code{T1}: the first tertile of the non-missing values (alias \code{q33.3})
#' \item \code{T2}: the second tertile of the non-missing values (alias \code{q66.7})
#' }
#' If \code{x} is categorical (i.e. factor, character or logical), the list
#' contains a sublist for each category, where each sublist contains the
#' numeric elements:
#' \itemize{
#' \item \code{FREQ}: the frequency count
#' \item \code{PCT}: the percent relative frequency, including NA in the denominator
#' \item \code{PCTnoNA}: the percent relative frequency, excluding NA from the denominator
#' }
#'
#' @examples
#' x <- exp(rnorm(100, 1, 1))
#' stats.default(x)
#'
#' y <- factor(sample(0:1, 99, replace=TRUE), labels=c("Female", "Male"))
#' y[1:10] <- NA
#' stats.default(y)
#' stats.default(is.na(y))
#'
#' @keywords utilities
#' @export
#' @importFrom stats sd median quantile IQR na.omit
stats.default <- function(x, quantile.type = 7, ...) {
if (is.logical(x)) {
x <- factor(1 - x,
levels = c(0, 1),
labels = c("Yes", "No"))
}
if (is.factor(x) || is.character(x)) {
y <- table(x, useNA = "no")
nn <- names(y)
nn[is.na(nn)] <- "Missing"
names(y) <- nn
lapply(y, function(z)
list(
FREQ = z,
PCT = 100 * z / length(x),
PCTnoNA = 100 * z / sum(y),
N = sum(y)
))
} else if (is.numeric(x) && sum(!is.na(x)) == 0) {
list(
N = sum(!is.na(x)),
NMISS = sum(is.na(x)),
SUM = NA,
MEAN = NA,
SD = NA,
CV = NA,
GMEAN = NA,
GCV = NA,
MEDIAN = NA,
MIN = NA,
MAX = NA,
q01 = NA,
q025 = NA,
q05 = NA,
q10 = NA,
q25 = NA,
q50 = NA,
q75 = NA,
q90 = NA,
q95 = NA,
q975 = NA,
q99 = NA,
Q1 = NA,
Q2 = NA,
Q3 = NA,
IQR = NA,
T1 = NA,
T2 = NA
)
} else if (is.numeric(x)) {
q <-
quantile(
x,
probs = c(
0.01,
0.025,
0.05,
0.1,
0.25,
1 / 3,
0.5,
2 / 3,
0.75,
0.9,
0.95,
0.975,
0.99
),
na.rm = TRUE,
type = quantile.type
)
list(
N = sum(!is.na(x)),
NMISS = sum(is.na(x)),
SUM = sum(x, na.rm = TRUE),
MEAN = mean(x, na.rm = TRUE),
SD = sd(x, na.rm = TRUE),
CV = 100 * sd(x, na.rm = TRUE) / abs(mean(x, na.rm = TRUE)),
GMEAN = if (any(na.omit(x) <= 0))
NA
else
exp(mean(log(x), na.rm = TRUE)),
GCV = if (any(na.omit(x) <= 0))
NA
else
100 * sqrt(exp(sd(
log(x), na.rm = TRUE
) ^ 2) - 1),
MEDIAN = median(x, na.rm = TRUE),
MIN = min(x, na.rm = TRUE),
MAX = max(x, na.rm = TRUE),
q01 = q["1%"],
q02.5 = q["2.5%"],
q05 = q["5%"],
q10 = q["10%"],
q25 = q["25%"],
q50 = q["50%"],
q75 = q["75%"],
q90 = q["90%"],
q95 = q["95%"],
q97.5 = q["97.5%"],
q99 = q["99%"],
Q1 = q["25%"],
Q2 = q["50%"],
Q3 = q["75%"],
IQR = q["75%"] - q["25%"],
T1 = q["33.33333%"],
T2 = q["66.66667%"]
)
} else {
stop(paste("Unrecognized variable type:", class(x)))
}
}
#' Apply rounding to basic descriptive statistics.
#'
#' Not all statistics should be rounded in the same way, or at all. This
#' function will apply rounding selectively to a list of statistics as returned
#' by \code{\link{stats.default}}. In particular we don't round counts (N and
#' FREQ), and for MIN, MAX and MEDIAN the \code{digits} is interpreted as the
#' \emph{minimum} number of significant digits, so that we don't loose any
#' precision. Percentages are rounded to a fixed number of decimal places
#' (default 1) rather than a specific number of significant digits.
#'
#' @param x A list, such as that returned by \code{\link{stats.default}}.
#' @param digits An integer specifying the number of significant digits to keep.
#' @param digits.pct An integer specifying the number of digits after the
#' decimal place for percentages.
#' @param round.median.min.max Should rounding applied to median, min and max?
#' @param round.integers Should rounding be limited to digits to the right of
#' the decimal point?
#' @param round5up Should numbers with 5 as the last digit always be rounded
#' up? The standard R approach is "go to the even digit" (IEC 60559 standard,
#' see \code{\link{round}}), while some other softwares (e.g. SAS, Excel)
#' always round up.
#' @param ... Further arguments.
#'
#' @return A list with the same number of elements as \code{x}. The rounded
#' values will be \code{character} (not \code{numeric}) and will have 0 padding
#' to ensure consistent number of significant digits.
#'
#' @seealso
#' \code{\link{signif_pad}}
#' \code{\link{stats.default}}
#' @examples
#' x <- round(exp(rnorm(100, 1, 1)), 6)
#' stats.default(x)
#' stats.apply.rounding(stats.default(x), digits=3)
#' stats.apply.rounding(stats.default(round(x, 1)), digits=3)
#'
#' @keywords utilities
#' @export
stats.apply.rounding <- function(x,
digits = 3,
digits.pct = 1,
round.median.min.max = TRUE,
round.integers = TRUE,
round5up = TRUE,
...) {
mindig <- function(x, digits) {
cx <- format(x)
ndig <- nchar(gsub("\\D", "", cx))
ifelse(
ndig > digits,
cx,
signif_pad(
x,
digits = digits,
round.integers = round.integers,
round5up = round5up,
...
)
)
}
format.percent <- function(x, digits) {
if (x == 0)
"0"
else if (x == 100)
"100"
else
formatC(x, digits = digits.pct, format = "f")
}
if (!is.list(x)) {
stop("Expecting a list")
}
if (is.list(x[[1]])) {
# Apply recursively
lapply(
x,
stats.apply.rounding,
digits = digits,
digits.pct = digits.pct,
round.integers = round.integers,
round5up = round5up,
...
)
} else {
cx <- lapply(x, format)
r <- lapply(
x,
signif_pad,
digits = digits,
round.integers = round.integers,
round5up = round5up,
...
)
nr <- c("N", "NMISS", "FREQ") # No rounding
nr <- nr[nr %in% names(x)]
nr <- nr[!is.na(x[nr])]
r[nr] <- cx[nr]
if (!round.median.min.max) {
sr <-
c("MEDIAN", "MIN", "MAX") # Only add significant digits, don't remove any
sr <- sr[sr %in% names(x)]
r[sr] <- lapply(x[sr], mindig, digits = digits)
}
pr <- c("PCT", "CV", "GCV") # Percentages
pr <- pr[pr %in% names(x)]
pr <- pr[!is.na(x[pr])]
r[pr] <-
lapply(as.numeric(x[pr]), format.percent, digits = digits.pct)
r
}
}
#' Render values for table output.
#'
#' Called from \code{\link{tab1}} by default to render values for
#' displaying in the table. This function forwards the call to separate
#' functions for rendering continuous, categorical and missing values.
#' The idea is that each of these functions can be overridden to customize
#' the table output.
#'
#' @param x A vector or numeric, factor, character or logical values.
#' @param name Name of the variable to be rendered (ignored).
#' @param missing Should missing values be included?
#' @param render.empty A \code{character} to return when \code{x} is empty.
#' @param render.continuous A function to render continuous (i.e.
#' \code{numeric}) values. Can also be a \code{character} string, in which case
#' it is passed to \code{\link{parse.abbrev.render.code}}.
#' @param render.categorical A function to render categorical (i.e.
#' \code{factor}, \code{character} or \code{logical}) values. Can also be a
#' \code{character} string, in which case it is passed to
#' \code{\link{parse.abbrev.render.code}}.
#' @param render.missing A function to render missing (i.e. \code{NA}) values.
#' Can also be a \code{character} string, in which case it is passed to
#' \code{\link{parse.abbrev.render.code}}. Set to \code{NULL} to ignore missing
#' values.
#' @param ... Further arguments, passed to \code{\link{stats.apply.rounding}}.
#'
#' @return A \code{character} vector. Each element is to be displayed in a
#' separate cell in the table. The \code{\link{names}} of the vector are the
#' labels to use in the table. However, the first names should be empty as it
#' will be replaced by the name of the variable. Empty strings are allowed and
#' result in empty table cells.
#'
#' @examples
#' x <- exp(rnorm(100, 1, 1))
#' render.default(x)
#' render.default(x, TRUE)
#'
#' y <- factor(sample(0:1, 99, replace=TRUE), labels=c("Female", "Male"))
#' y[1:10] <- NA
#' render.default(y)
#'
#' @keywords utilities
#' @export
render.default <-
function(x,
name,
missing = any(is.na(x)),
render.empty = "NA",
render.continuous = render.continuous.default,
render.categorical = render.categorical.default,
render.missing = render.missing.default,
...) {
if (is.character(render.continuous)) {
render.continuous <-
parse.abbrev.render.code(code = render.continuous, ...)
}
if (is.character(render.categorical)) {
render.categorical <-
parse.abbrev.render.code(code = render.categorical, ...)
}
if (!is.null(render.missing) && is.character(render.missing)) {
render.missing <- parse.abbrev.render.code(code = render.missing, ...)
}
if (length(x) == 0) {
return(render.empty)
}
if (is.logical(x)) {
x <- factor(x,
levels = c(T, F),
labels = c("Yes", "No"))
}
if (is.factor(x) || is.character(x)) {
r <- do.call(render.categorical, c(list(x = x), list(...)))
} else if (is.numeric(x)) {
r <- do.call(render.continuous, c(list(x = x), list(...)))
} else {
stop(paste("Unrecognized variable type:", class(x)))
}
if (missing && !is.null(render.missing)) {
r <- c(r, do.call(render.missing, c(list(x = x), list(...))))
}
# Remove empty levels from data
r <- gsub("NA% \\(0/0\\)", "", r)
r <- gsub("0% \\(0/\\d+\\)", "", r)
gsub("0 \\(0%\\)", "", r)
}
#' Parse abbreviated code for rendering table output.
#'
#' @param code A \code{character} vector specifying the statistics to display
#' in abbreviated code. See Details.
#' @param ... Further arguments, passed to \code{\link{stats.apply.rounding}}.
#'
#' @return A function that takes a single argument and returns a
#' \code{character} vector.
#'
#' @details In abbreviated code, the words N, NMISS, MEAN, SD, MIN, MEDIAN,
#' MAX, IQR, CV, GMEAN, GCV, FREQ and PCT are substituted for their respective
#' values (see \code{\link{stats.default}}). The substitution is case
#' insensitive, and the substituted values are rounded appropriately (see
#' \code{\link{stats.apply.rounding}}). Other text is left unchanged. The
#' \code{code} can be a vector, in which case each element is displayed in its
#' own row in the table. The \code{names} of \code{code} are used as row
#' labels; if no names are present, then the \code{code} itself is used unless
#' \code{code} is of length 1, in which case no label is used (for numeric
#' variables only, categorical variables are always labeled by the class
#' label). The special name '.' also indicates that \code{code} itself be is
#' used as the row label.
#'
#' @examples
#' \dontrun{
#' x <- round(exp(rnorm(100, log(20), 1)), 2)
#' stats.default(x)
#' f <- parse.abbrev.render.code(c("Mean (SD)", "Median [Min, Max]"), 3)
#' f(x)
#' f2 <- parse.abbrev.render.code(c("Geo. Mean (Geo. CV%)" = "GMean (GCV%)"), 3)
#' f2(x)
#' f3 <- parse.abbrev.render.code(c("Mean (SD)"), 3)
#' f3(x)
#'
#' x <- sample(c("Male", "Female"), 30, replace=T)
#' stats.default(x)
#' f <- parse.abbrev.render.code("Freq (Pct%)")
#' f(x)
#' }
#'
#' @keywords utilities
#' @export
parse.abbrev.render.code <-
function(code, ...) {
codestr <- code
if (is.null(names(codestr)) && length(codestr) > 1) {
names(codestr) <- codestr
}
names(codestr)[names(codestr) == "."] <-
codestr[names(codestr) == "."]
function(x, ...) {
s <- stats.apply.rounding(stats.default(x, ...), ...)
g <- function(ss) {
res <- codestr
for (nm in names(ss)) {
res <- gsub(paste0("\\b", nm, "\\b"),
ss[[nm]],
res,
ignore.case = T)
}
names(res) <- names(codestr)
res
}
if (!is.list(s)) {
stop("Expecting a list")
}
if (is.list(s[[1]])) {
res <- lapply(s, g)
nm <- ifelse(sapply(res, seq_along) == 1, "1", "")
nm[nm == "1"] <- names(s)
res <- unlist(res)
names(res) <- nm
c("", res)
} else {
if (length(codestr) == 1 && is.null(names(codestr))) {
g(s)
} else {
c("", g(s))
}
}
}
}
#' Render continuous values for table output.
#'
#' Called from \code{\link{tab1}} by default to render continuous (i.e.
#' \code{numeric}) values for displaying in the table.
#'
#' @param x A numeric vector.
#' @param ... Further arguments, passed to \code{\link{stats.apply.rounding}}.
#'
#' @return A \code{character} vector. Each element is to be displayed in a
#' separate cell in the table. The \code{\link{names}} of the vector are the
#' labels to use in the table. However, the first names should be empty as it
#' will be replaced by the name of the variable. Empty strings are allowed and
#' result in empty table cells.
#'
#' @examples
#' x <- exp(rnorm(100, 1, 1))
#' render.continuous.default(x)
#'
#' @keywords utilities
#' @export
render.continuous.default <- function(x, ...) {
with(
stats.apply.rounding(stats.default(x, ...), ...),
c(
"",
"Valid Obs." = sprintf("%s", N),
"Mean (SD)" = sprintf("%s (%s)", MEAN, SD),
"Median [Min, Max]" = sprintf("%s [%s, %s]", MEDIAN, MIN, MAX)
)
)
}
#' Render categorical values for table output.
#'
#' Called from \code{\link{tab1}} by default to render categorical (i.e.
#' \code{factor}, \code{character} or \code{logical}) values for displaying in the table.
#'
#' @param x A vector of type \code{factor}, \code{character} or \code{logical}.
#' @param ... Further arguments, passed to \code{\link{stats.apply.rounding}}.
#' @param na.is.category Include missing values in the denominator for
#' calculating percentages or omit them (the default). This option do not for
#' the moment.
#'
#' @return A \code{character} vector. Each element is to be displayed in a
#' separate cell in the table. The \code{\link{names}} of the vector are the
#' labels to use in the table. However, the first names should be empty as it
#' will be replaced by the name of the variable. Empty strings are allowed and
#' result in empty table cells.
#'
#' @examples
#' y <- factor(sample(0:1, 99, replace=TRUE), labels=c("Female", "Male"))
#' y[1:10] <- NA
#' render.categorical.default(y)
#' @keywords utilities
#' @export
render.categorical.default <- function(x, ..., na.is.category = FALSE) {
c("", sapply(stats.apply.rounding(stats.default(x, ...), ...),
function(y)
with(y, ifelse(FREQ == 0, "",
sprintf("%s/%s (%s%%)", FREQ, N, PCTnoNA)))))
}
#' Render missing values for table output.
#'
#' Called from \code{\link{tab1}} by default to render missing (i.e.
#' \code{NA}) values for displaying in the table.
#'
#' @param x A vector.
#' @param ... Further arguments, passed to \code{\link{stats.apply.rounding}}.
#'
#' @return A \code{character} vector. Each element is to be displayed in a
#' separate cell in the table. The \code{\link{names}} of the vector are the
#' labels to use in the table. Empty strings are allowed and
#' result in empty table cells.
#'
#' @examples
#' y <- factor(sample(0:1, 99, replace=TRUE), labels=c("Female", "Male"))
#' y[1:10] <- NA
#' render.missing.default(y)
#' @keywords utilities
#' @export
render.missing.default <- function(x, ...) {
with(stats.apply.rounding(stats.default(is.na(x), ...), ...)$Yes,
c(Missing = sprintf("%s (%s%%)", FREQ, PCT)))
}
#' Render variable labels for table output.
#'
#' Called from \code{\link{tab1}} by default to render variable labels
#' for displaying in the table.
#'
#' @param x A vector, usually with the \code{\link{var_lab}} and (if appropriate)
#' \code{\link{unit}} attributes.
#' @param transpose Logical indicating whether on not the table is transposed.
#'
#' @return A \code{character}, which may contain HTML markup.
#'
#' @examples
#' x <- exp(rnorm(100, 1, 1))
#' var_lab(x) <- "Weight"
#' render.varlabel(x)
#'
#' y <- factor(sample(0:1, 99, replace=TRUE), labels=c("Female", "Male"))
#' y[1:10] <- NA
#' var_lab(y) <- "Sex"
#' render.varlabel(y)
#' @keywords utilities
#' @export
render.varlabel <- function(x, transpose = F) {
sprintf("%s", var_lab(x))
}
#' Generate an flextable of descriptive statistics.
#'
#' There are two interfaces, the default, which typically takes a list of
#' \code{data.frame}s for \code{x}, and the formula interface. The formula
#' interface is less flexible, but simpler to use and designed to handle the
#' most common use cases. It is important to use factors appropriately for
#' categorical variables (i.e. have the levels labeled properly and in the
#' desired order). The contents of the table can be customized by providing
#' user-defined `renderer' functions. To facilitate this, some tags (such as
#' row labels) are given specific classes for easy CSS selection.
#'
#' @details For the default version, is is expected that \code{x} is a named
#' list of \code{data.frame}s, one for each stratum, with names corresponding to
#' strata labels.
#'
#' @param vars Variables to be used for summary table.
#' @param data A \code{data.frame} from which the variables in \code{vars}
#' should be taken.
#' @param group Name of the grouping variable.
#' @param row_split Variable that used for splitting table rows, rows will be
#' splited using this variable. Useful for repeated measures.
#' @param overall A label for the "Total" column. Specify \code{NULL} or
#' \code{FALSE} to omit the column altogether.
#' @param select a named vector with as many components as row-variables. Every
#' element of `select` will be used to select the individuals to be analyzed
#' for every row-variable. Name of the vector corresponds to the row variable,
#' element is the selection.
#' @param render A function to render the table cells (see Details).
#' @param drop_lev Should empty factor levels be dropped?
#' @param indent Indent symbol of the table for statistic values.
#' @param ... Further arguments, passed to \code{render}.
#'
#' @return An object of class "tab1".
#'
#' @examples
#'
#' dat <- expand.grid(id=1:10, sex=c("Male", "Female"), treat=c("Treated", "Placebo"))
#' dat$age <- runif(nrow(dat), 10, 50)
#' dat$age[3] <- NA # Add a missing value
#' dat$wt <- exp(rnorm(nrow(dat), log(70), 0.2))
#'
#' var_lab(dat$sex) <- "Sex"
#' var_lab(dat$age) <- "Age"
#' var_lab(dat$treat) <- "Treatment Group"
#' var_lab(dat$wt) <- "Weight"
#'
#'
#' # Something more complicated
#'
#' dat$dose <- ifelse(dat$treat=="Placebo", "Placebo",
#' sample(c("5 mg", "10 mg"), nrow(dat), replace=TRUE))
#' dat$dose <- factor(dat$dose, levels=c("Placebo", "5 mg", "10 mg"))
#'
#'
#'
#' my.render.cont <- function(x) {
#' with(stats.default(x),
#' sprintf("%0.2f (%0.1f)", MEAN, SD))
#' }
#'
#' tab1(c("age", "sex", "wt"),
#' data = dat,
#' group = "treat",
#' render.continuous=my.render.cont)
#'
#'
#' @keywords utilities
#' @export
tab1 <- function(vars,
data,
group = NULL,
row_split = NULL,
overall = TRUE,
select = NULL,
render = render.default,
drop_lev = TRUE,
indent = "\t",
...) {
if (is.character(render)) {
render <- parse.abbrev.render.code(code = render, ...)
}
vars_list <- c(vars, group, row_split)
if (!all(vars_list %in% names(data))) {
stop(
"Variable ",
paste(vars_list[!vars_list %in% names(data)], collapse = ", "),
" not in the dataset, please check!"
)
}
if(base::anyDuplicated(vars_list))
stop("vars, group and row_splot duplicated.")
# Select records with non-missing group and row split
df <- data[stats::complete.cases(data[c(group, row_split)]),
vars_list, drop = FALSE]
# Variable names to labels if no variable label
with_varlab <- sapply(df, has.label)
for (i in names(df)[!with_varlab]) {
var_lab(df[[i]]) <- i
}
variables <- sapply(names(df), function(v)
render.varlabel(df[[v]]), simplify = FALSE)
# Group variable to factor
if (!is.null(group)) {
if (has.labels(df[[group]]) | !is.factor(df[[group]]))
df[[group]] <- to_factor(df[[group]])
group_label <- var_lab(df[[group]])
}
if (!is.null(row_split)) {
if (has.labels(df[[row_split]]) | !is.factor(df[[row_split]]))
df[[row_split]] <- to_factor(df[[row_split]])
}
# Check if missing
any.missing <- sapply(vars, function(v)sum(is.na(df[[v]]))) > 0
# Generate selection vector function
gen_selec <- function(dat, var, select = NULL) {
if (is.null(select) | !var %in% names(select)) {
return(rep(TRUE, length(dat[[var]])))
} else{
r <- eval(str2expression(select[var]), envir = dat)
r & !is.na(r)
}
}
# Create value labels for characters variables to avoid missing levels between groups
df[sapply(df, is.character)] <- lapply(df[sapply(df, is.character)],
to_character)
# Tabulation main function
calc_tab <- function(dat) {
# Transform data to list for loop
if (overall & !is.null(group)) {
x <- c(split(dat, dat[[group]]), list(Total = dat))
} else if (!is.null(group)) {
x <- split(dat, dat[[group]])
} else{
x <- list(Total = dat)
}
out <- lapply(vars, function(v) {
# Apply stats
y <- do.call(cbind, lapply(x, function(s) {
z <- s[[v]][gen_selec(s, v, select)] # Apply subset
# Convert character to factor
if(has.labels(z) | is.character(z))
z <- to_factor(z)
render(x = z, name = v, missing = any.missing[v], ...)
}))
rownames(y) <- paste(indent, rownames(y), sep = "")
rownames(y)[1] <- variables[[v]]
return(y)
})
out <- lapply(out, function(x) {
cbind(x, c(1, rep(2, nrow(x) - 1)))
})
out <- do.call(rbind, out)
rbind("Observation" = c(sapply(x, nrow), 1), out)
}
if (is.null(row_split)) {
tbody <- calc_tab(df)
} else{
split_lab <- variables[[row_split]]
dfm <- split(df, df[[row_split]])
tbody <- lapply(names(dfm), function(x) {
out <- calc_tab(dfm[[x]])
out <- rbind(c(rep("", ncol(out) - 1), 0), out)
row.names(out)[1] <- paste(split_lab, "=", x)
return(out)
})
tbody <- do.call("rbind", tbody)
}
# Remove empty rows
if (drop_lev)
tbody <- tbody[!(rowSums(tbody != "") == 1 &
!tbody[, ncol(tbody)] %in% c(0, 1)), ]
# Position of variable to bold and merge horizontal
# pos <- tbody[, ncol(tbody)]
res <- tbody[,-ncol(tbody), drop = FALSE]
structure(res,
position = unname(tbody[, ncol(tbody)]),
class = c("tab1", class(res))
)
}
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.