#' Explore and summarize data
#'
#' @details See \url{https://radiant-rstats.github.io/docs/data/explore.html} for an example in Radiant
#'
#' @param dataset Dataset to explore
#' @param vars (Numeric) variables to summarize
#' @param byvar Variable(s) to group data by
#' @param fun Functions to use for summarizing
#' @param top Use functions ("fun"), variables ("vars"), or group-by variables as column headers
#' @param tabfilt Expression used to filter the table (e.g., "Total > 10000")
#' @param tabsort Expression used to sort the table (e.g., "desc(Total)")
#' @param tabslice Expression used to filter table (e.g., "1:5")
#' @param nr Number of rows to display
#' @param data_filter Expression used to filter the dataset before creating the table (e.g., "price > 10000")
#' @param arr Expression to arrange (sort) the data on (e.g., "color, desc(price)")
#' @param rows Rows to select from the specified dataset
#' @param envir Environment to extract data from
#'
#' @return A list of all variables defined in the function as an object of class explore
#'
#' @examples
#' explore(diamonds, c("price", "carat")) %>% str()
#' explore(diamonds, "price:x")$tab
#' explore(diamonds, c("price", "carat"), byvar = "cut", fun = c("n_missing", "skew"))$tab
#'
#' @seealso See \code{\link{summary.explore}} to show summaries
#'
#' @export
explore <- function(dataset, vars = "", byvar = "", fun = c("mean", "sd"),
top = "fun", tabfilt = "", tabsort = "", tabslice = "",
nr = Inf, data_filter = "", arr = "", rows = NULL,
envir = parent.frame()) {
tvars <- vars
if (!is.empty(byvar)) tvars <- unique(c(tvars, byvar))
df_name <- if (is_string(dataset)) dataset else deparse(substitute(dataset))
dataset <- get_data(dataset, tvars, filt = data_filter, arr = arr, rows = rows, na.rm = FALSE, envir = envir)
rm(tvars)
## in case : was used
vars <- base::setdiff(colnames(dataset), byvar)
## converting data as needed for summarization
dc <- get_class(dataset)
fixer <- function(x, fun = as_integer) {
if (is.character(x) || is.Date(x)) {
x <- rep(NA, length(x))
} else if (is.factor(x)) {
x_num <- sshhr(as.integer(as.character(x)))
if (length(na.omit(x_num)) == 0) {
x <- fun(x)
} else {
x <- x_num
}
}
x
}
fixer_first <- function(x) {
x <- fixer(x, function(x) as_integer(x == levels(x)[1]))
}
mean <- function(x, na.rm = TRUE) sshhr(base::mean(fixer_first(x), na.rm = na.rm))
sum <- function(x, na.rm = TRUE) sshhr(base::sum(fixer_first(x), na.rm = na.rm))
var <- function(x, na.rm = TRUE) sshhr(stats::var(fixer_first(x), na.rm = na.rm))
sd <- function(x, na.rm = TRUE) sshhr(stats::sd(fixer_first(x), na.rm = na.rm))
se <- function(x, na.rm = TRUE) sshhr(radiant.data::se(fixer_first(x), na.rm = na.rm))
me <- function(x, na.rm = TRUE) sshhr(radiant.data::me(fixer_first(x), na.rm = na.rm))
cv <- function(x, na.rm = TRUE) sshhr(radiant.data::cv(fixer_first(x), na.rm = na.rm))
prop <- function(x, na.rm = TRUE) sshhr(radiant.data::prop(fixer_first(x), na.rm = na.rm))
varprop <- function(x, na.rm = TRUE) sshhr(radiant.data::varprop(fixer_first(x), na.rm = na.rm))
sdprop <- function(x, na.rm = TRUE) sshhr(radiant.data::sdprop(fixer_first(x), na.rm = na.rm))
seprop <- function(x, na.rm = TRUE) sshhr(radiant.data::seprop(fixer_first(x), na.rm = na.rm))
meprop <- function(x, na.rm = TRUE) sshhr(radiant.data::meprop(fixer_first(x), na.rm = na.rm))
varpop <- function(x, na.rm = TRUE) sshhr(radiant.data::varpop(fixer_first(x), na.rm = na.rm))
sdpop <- function(x, na.rm = TRUE) sshhr(radiant.data::sdpop(fixer_first(x), na.rm = na.rm))
median <- function(x, na.rm = TRUE) sshhr(stats::median(fixer(x), na.rm = na.rm))
min <- function(x, na.rm = TRUE) sshhr(base::min(fixer(x), na.rm = na.rm))
max <- function(x, na.rm = TRUE) sshhr(base::max(fixer(x), na.rm = na.rm))
p01 <- function(x, na.rm = TRUE) sshhr(radiant.data::p01(fixer(x), na.rm = na.rm))
p025 <- function(x, na.rm = TRUE) sshhr(radiant.data::p025(fixer(x), na.rm = na.rm))
p05 <- function(x, na.rm = TRUE) sshhr(radiant.data::p05(fixer(x), na.rm = na.rm))
p10 <- function(x, na.rm = TRUE) sshhr(radiant.data::p10(fixer(x), na.rm = na.rm))
p25 <- function(x, na.rm = TRUE) sshhr(radiant.data::p25(fixer(x), na.rm = na.rm))
p75 <- function(x, na.rm = TRUE) sshhr(radiant.data::p75(fixer(x), na.rm = na.rm))
p90 <- function(x, na.rm = TRUE) sshhr(radiant.data::p90(fixer(x), na.rm = na.rm))
p95 <- function(x, na.rm = TRUE) sshhr(radiant.data::p95(fixer(x), na.rm = na.rm))
p975 <- function(x, na.rm = TRUE) sshhr(radiant.data::p975(fixer(x), na.rm = na.rm))
p99 <- function(x, na.rm = TRUE) sshhr(radiant.data::p99(fixer(x), na.rm = na.rm))
skew <- function(x, na.rm = TRUE) sshhr(radiant.data::skew(fixer(x), na.rm = na.rm))
kurtosi <- function(x, na.rm = TRUE) sshhr(radiant.data::kurtosi(fixer(x), na.rm = na.rm))
isLogNum <- "logical" == dc & names(dc) %in% base::setdiff(vars, byvar)
if (sum(isLogNum) > 0) {
dataset[, isLogNum] <- select(dataset, which(isLogNum)) %>%
mutate_all(as.integer)
dc[isLogNum] <- "integer"
}
if (is.empty(byvar)) {
byvar <- c()
tab <- summarise_all(dataset, fun, na.rm = TRUE)
} else {
## convert categorical variables to factors if needed
## needed to deal with empty/missing values
dataset[, byvar] <- select_at(dataset, .vars = byvar) %>%
mutate_all(~ empty_level(.))
tab <- dataset %>%
group_by_at(.vars = byvar) %>%
summarise_all(fun, na.rm = TRUE)
}
## adjust column names
if (length(vars) == 1 || length(fun) == 1) {
rng <- (length(byvar) + 1):ncol(tab)
colnames(tab)[rng] <- paste0(vars, "_", fun)
rm(rng)
}
## setup regular expression to split variable/function column appropriately
rex <- paste0("(.*?)_", glue('({glue_collapse(fun, "$|")}$)'))
## useful answer and comments: http://stackoverflow.com/a/27880388/1974918
tab <- gather(tab, "variable", "value", !!-(seq_along(byvar))) %>%
extract(variable, into = c("variable", "fun"), regex = rex) %>%
mutate(fun = factor(fun, levels = !!fun), variable = factor(variable, levels = vars)) %>%
# mutate(variable = paste0(variable, " {", dc[variable], "}")) %>%
spread("fun", "value")
## flip the table if needed
if (top != "fun") {
tab <- list(tab = tab, byvar = byvar, fun = fun) %>%
flip(top)
}
nrow_tab <- nrow(tab)
## filtering the table if desired from Report > Rmd
if (!is.empty(tabfilt)) {
tab <- filter_data(tab, tabfilt)
}
## sorting the table if desired from Report > Rmd
if (!identical(tabsort, "")) {
tabsort <- gsub(",", ";", tabsort)
tab <- tab %>% arrange(!!!rlang::parse_exprs(tabsort))
}
## ensure factors ordered as in the (sorted) table
if (!is.empty(byvar) && top != "byvar") {
for (i in byvar) tab[[i]] <- tab[[i]] %>% (function(x) factor(x, levels = unique(x)))
rm(i)
}
## frequencies converted to doubles during gather/spread above
check_int <- function(x) {
if (is.double(x) && length(na.omit(x)) > 0) {
x_int <- sshhr(as.integer(round(x, .Machine$double.rounding)))
if (isTRUE(all.equal(x, x_int, check.attributes = FALSE))) x_int else x
} else {
x
}
}
tab <- ungroup(tab) %>% mutate_all(check_int)
## slicing the table if desired
if (!is.empty(tabslice)) {
tab <- tab %>%
slice_data(tabslice) %>%
droplevels()
}
## convert to data.frame to maintain attributes
tab <- as.data.frame(tab, stringsAsFactors = FALSE)
attr(tab, "radiant_nrow") <- nrow_tab
if (!isTRUE(is.infinite(nr))) {
ind <- if (nr > nrow(tab)) 1:nrow(tab) else 1:nr
tab <- tab[ind, , drop = FALSE]
rm(ind)
}
list(
tab = tab,
df_name = df_name,
vars = vars,
byvar = byvar,
fun = fun,
top = top,
tabfilt = tabfilt,
tabsort = tabsort,
tabslice = tabslice,
nr = nr,
data_filter = data_filter,
arr = arr,
rows = rows
) %>% add_class("explore")
}
#' Summary method for the explore function
#'
#' @details See \url{https://radiant-rstats.github.io/docs/data/explore.html} for an example in Radiant
#'
#' @param object Return value from \code{\link{explore}}
#' @param dec Number of decimals to show
#' @param ... further arguments passed to or from other methods
#'
#' @examples
#' result <- explore(diamonds, "price:x")
#' summary(result)
#' result <- explore(diamonds, "price", byvar = "cut", fun = c("n_obs", "skew"))
#' summary(result)
#' explore(diamonds, "price:x", byvar = "color") %>% summary()
#'
#' @seealso \code{\link{explore}} to generate summaries
#'
#' @export
summary.explore <- function(object, dec = 3, ...) {
cat("Explore\n")
cat("Data :", object$df_name, "\n")
if (!is.empty(object$data_filter)) {
cat("Filter :", gsub("\\n", "", object$data_filter), "\n")
}
if (!is.empty(object$arr)) {
cat("Arrange :", gsub("\\n", "", object$arr), "\n")
}
if (!is.empty(object$rows)) {
cat("Slice :", gsub("\\n", "", object$rows), "\n")
}
if (!is.empty(object$tabfilt)) {
cat("Table filter:", object$tabfilt, "\n")
}
if (!is.empty(object$tabsort[1])) {
cat("Table sorted:", paste0(object$tabsort, collapse = ", "), "\n")
}
if (!is.empty(object$tabslice)) {
cat("Table slice :", object$tabslice, "\n")
}
nr <- attr(object$tab, "radiant_nrow")
if (!isTRUE(is.infinite(nr)) && !isTRUE(is.infinite(object$nr)) && object$nr < nr) {
cat(paste0("Rows shown : ", object$nr, " (out of ", nr, ")\n"))
}
if (!is.empty(object$byvar[1])) {
cat("Grouped by :", object$byvar, "\n")
}
cat("Functions :", paste0(object$fun, collapse = ", "), "\n")
cat("Top :", c("fun" = "Function", "var" = "Variables", "byvar" = "Group by")[object$top], "\n")
cat("\n")
format_df(object$tab, dec = dec, mark = ",") %>%
print(row.names = FALSE)
invisible()
}
#' Deprecated: Store method for the explore function
#'
#' @details Return the summarized data. See \url{https://radiant-rstats.github.io/docs/data/explore.html} for an example in Radiant
#'
#' @param dataset Dataset
#' @param object Return value from \code{\link{explore}}
#' @param name Name to assign to the dataset
#' @param ... further arguments passed to or from other methods
#'
#' @seealso \code{\link{explore}} to generate summaries
#'
#' @export
store.explore <- function(dataset, object, name, ...) {
if (missing(name)) {
object$tab
} else {
stop(
paste0(
"This function is deprecated. Use the code below instead:\n\n",
name, " <- ", deparse(substitute(object)), "$tab\nregister(\"",
name, ")"
),
call. = FALSE
)
}
}
#' Flip the DT table to put Function, Variable, or Group by on top
#'
#' @details See \url{https://radiant-rstats.github.io/docs/data/explore.html} for an example in Radiant
#'
#' @param expl Return value from \code{\link{explore}}
#' @param top The variable (type) to display at the top of the table ("fun" for Function, "var" for Variable, and "byvar" for Group by. "fun" is the default
#'
#' @examples
#' explore(diamonds, "price:x", top = "var") %>% summary()
#' explore(diamonds, "price", byvar = "cut", fun = c("n_obs", "skew"), top = "byvar") %>% summary()
#'
#' @seealso \code{\link{explore}} to calculate summaries
#' @seealso \code{\link{summary.explore}} to show summaries
#' @seealso \code{\link{dtab.explore}} to create the DT table
#'
#' @export
flip <- function(expl, top = "fun") {
cvars <- expl$byvar %>%
(function(x) if (is.empty(x[1])) character(0) else x)
if (top[1] == "var") {
expl$tab %<>% gather(".function", "value", !!-(1:(length(cvars) + 1))) %>%
spread("variable", "value")
expl$tab[[".function"]] %<>% factor(., levels = expl$fun)
} else if (top[1] == "byvar" && length(cvars) > 0) {
expl$tab %<>% gather(".function", "value", !!-(1:(length(cvars) + 1))) %>%
spread(!!cvars[1], "value")
expl$tab[[".function"]] %<>% factor(., levels = expl$fun)
## ensure we don't have invalid column names
colnames(expl$tab) <- fix_names(colnames(expl$tab))
}
expl$tab
}
#' Make an interactive table of summary statistics
#'
#' @details See \url{https://radiant-rstats.github.io/docs/data/explore.html} for an example in Radiant
#'
#' @param object Return value from \code{\link{explore}}
#' @param dec Number of decimals to show
#' @param searchCols Column search and filter
#' @param order Column sorting
#' @param pageLength Page length
#' @param caption Table caption
#' @param ... further arguments passed to or from other methods
#'
#' @examples
#' \dontrun{
#' tab <- explore(diamonds, "price:x") %>% dtab()
#' tab <- explore(diamonds, "price", byvar = "cut", fun = c("n_obs", "skew"), top = "byvar") %>%
#' dtab()
#' }
#'
#' @seealso \code{\link{pivotr}} to create a pivot table
#' @seealso \code{\link{summary.pivotr}} to show summaries
#'
#' @export
dtab.explore <- function(object, dec = 3, searchCols = NULL,
order = NULL, pageLength = NULL,
caption = NULL, ...) {
style <- if (exists("bslib_current_version") && "4" %in% bslib_current_version()) "bootstrap4" else "bootstrap"
tab <- object$tab
cn_all <- colnames(tab)
cn_num <- cn_all[sapply(tab, is.numeric)]
cn_cat <- cn_all[-which(cn_all %in% cn_num)]
isInt <- sapply(tab, is.integer)
isDbl <- sapply(tab, is_double)
dec <- ifelse(is.empty(dec) || dec < 0, 3, round(dec, 0))
top <- c("fun" = "Function", "var" = "Variables", "byvar" = paste0("Group by: ", object$byvar[1]))[object$top]
sketch <- shiny::withTags(
table(
thead(
tr(
th(" ", colspan = length(cn_cat)),
lapply(top, th, colspan = length(cn_num), class = "text-center")
),
tr(lapply(cn_all, th))
)
)
)
if (!is.empty(caption)) {
## from https://github.com/rstudio/DT/issues/630#issuecomment-461191378
caption <- shiny::tags$caption(style = "caption-side: bottom; text-align: left; font-size:100%;", caption)
}
## for display options see https://datatables.net/reference/option/dom
dom <- if (nrow(tab) < 11) "t" else "ltip"
fbox <- if (nrow(tab) > 5e6) "none" else list(position = "top")
dt_tab <- DT::datatable(
tab,
container = sketch,
caption = caption,
selection = "none",
rownames = FALSE,
filter = fbox,
## must use fillContainer = FALSE to address
## see https://github.com/rstudio/DT/issues/367
## https://github.com/rstudio/DT/issues/379
fillContainer = FALSE,
style = style,
options = list(
dom = dom,
stateSave = TRUE, ## store state
searchCols = searchCols,
order = order,
columnDefs = list(list(orderSequence = c("desc", "asc"), targets = "_all")),
autoWidth = TRUE,
processing = FALSE,
pageLength = {
if (is.null(pageLength)) 10 else pageLength
},
lengthMenu = list(c(5, 10, 25, 50, -1), c("5", "10", "25", "50", "All"))
),
## https://github.com/rstudio/DT/issues/146#issuecomment-534319155
callback = DT::JS('$(window).on("unload", function() { table.state.clear(); })')
) %>%
DT::formatStyle(., cn_cat, color = "white", backgroundColor = "grey")
## rounding as needed
if (sum(isDbl) > 0) {
dt_tab <- DT::formatRound(dt_tab, names(isDbl)[isDbl], dec)
}
if (sum(isInt) > 0) {
dt_tab <- DT::formatRound(dt_tab, names(isInt)[isInt], 0)
}
## see https://github.com/yihui/knitr/issues/1198
dt_tab$dependencies <- c(
list(rmarkdown::html_dependency_bootstrap("bootstrap")),
dt_tab$dependencies
)
dt_tab
}
###########################################
## turn functions below into functional ...
###########################################
#' Number of observations
#' @param x Input variable
#' @param ... Additional arguments
#' @return number of observations
#' @examples
#' n_obs(c("a", "b", NA))
#'
#' @export
n_obs <- function(x, ...) length(x)
#' Number of missing values
#' @param x Input variable
#' @param ... Additional arguments
#' @return number of missing values
#' @examples
#' n_missing(c("a", "b", NA))
#'
#' @export
n_missing <- function(x, ...) sum(is.na(x))
#' Calculate percentiles
#' @param x Numeric vector
#' @param na.rm If TRUE missing values are removed before calculation
#' @examples
#' p01(0:100)
#'
#' @rdname percentiles
#' @export
p01 <- function(x, na.rm = TRUE) quantile(x, .01, na.rm = na.rm)
#' @rdname percentiles
#' @export
p025 <- function(x, na.rm = TRUE) quantile(x, .025, na.rm = na.rm)
#' @rdname percentiles
#' @export
p05 <- function(x, na.rm = TRUE) quantile(x, .05, na.rm = na.rm)
#' @rdname percentiles
#' @export
p10 <- function(x, na.rm = TRUE) quantile(x, .1, na.rm = na.rm)
#' @rdname percentiles
#' @export
p25 <- function(x, na.rm = TRUE) quantile(x, .25, na.rm = na.rm)
#' @rdname percentiles
#' @export
p75 <- function(x, na.rm = TRUE) quantile(x, .75, na.rm = na.rm)
#' @rdname percentiles
#' @export
p90 <- function(x, na.rm = TRUE) quantile(x, .90, na.rm = na.rm)
#' @rdname percentiles
#' @export
p95 <- function(x, na.rm = TRUE) quantile(x, .95, na.rm = na.rm)
#' @rdname percentiles
#' @export
p975 <- function(x, na.rm = TRUE) quantile(x, .975, na.rm = na.rm)
#' @rdname percentiles
#' @export
p99 <- function(x, na.rm = TRUE) quantile(x, .99, na.rm = na.rm)
#' Coefficient of variation
#' @param x Input variable
#' @param na.rm If TRUE missing values are removed before calculation
#' @return Coefficient of variation
#' @examples
#' cv(runif(100))
#'
#' @export
cv <- function(x, na.rm = TRUE) {
m <- mean(x, na.rm = na.rm)
if (m == 0) {
message("Mean should be greater than 0")
NA
} else {
sd(x, na.rm = na.rm) / m
}
}
#' Standard error
#' @param x Input variable
#' @param na.rm If TRUE missing values are removed before calculation
#' @return Standard error
#' @examples
#' se(rnorm(100))
#'
#' @export
se <- function(x, na.rm = TRUE) {
if (na.rm) x <- na.omit(x)
sd(x) / sqrt(length(x))
}
#' Margin of error
#' @param x Input variable
#' @param conf_lev Confidence level. The default is 0.95
#' @param na.rm If TRUE missing values are removed before calculation
#' @return Margin of error
#'
#' @importFrom stats qt
#'
#' @examples
#' me(rnorm(100))
#'
#' @export
me <- function(x, conf_lev = 0.95, na.rm = TRUE) {
if (na.rm) x <- na.omit(x)
se(x) * qt(conf_lev / 2 + .5, length(x) - 1, lower.tail = TRUE)
}
#' Calculate proportion
#' @param x Input variable
#' @param na.rm If TRUE missing values are removed before calculation
#' @return Proportion of first level for a factor and of the maximum value for numeric
#' @examples
#' prop(c(rep(1L, 10), rep(0L, 10)))
#' prop(c(rep(4, 10), rep(2, 10)))
#' prop(rep(0, 10))
#' prop(factor(c(rep("a", 20), rep("b", 10))))
#'
#' @export
prop <- function(x, na.rm = TRUE) {
if (na.rm) x <- na.omit(x)
if (is.numeric(x)) {
mean(x == max(x, 1)) ## gives proportion of max value in x
} else if (is.factor(x)) {
mean(x == levels(x)[1]) ## gives proportion of first level in x
} else if (is.logical(x)) {
mean(x)
} else {
NA
}
}
#' Variance for proportion
#' @param x Input variable
#' @param na.rm If TRUE missing values are removed before calculation
#' @return Variance for proportion
#' @examples
#' varprop(c(rep(1L, 10), rep(0L, 10)))
#'
#' @export
varprop <- function(x, na.rm = TRUE) {
p <- prop(x, na.rm = na.rm)
p * (1 - p)
}
#' Standard deviation for proportion
#' @param x Input variable
#' @param na.rm If TRUE missing values are removed before calculation
#' @return Standard deviation for proportion
#' @examples
#' sdprop(c(rep(1L, 10), rep(0L, 10)))
#'
#' @export
sdprop <- function(x, na.rm = TRUE) sqrt(varprop(x, na.rm = na.rm))
#' Standard error for proportion
#' @param x Input variable
#' @param na.rm If TRUE missing values are removed before calculation
#' @return Standard error for proportion
#' @examples
#' seprop(c(rep(1L, 10), rep(0L, 10)))
#'
#' @export
seprop <- function(x, na.rm = TRUE) {
if (na.rm) x <- na.omit(x)
sqrt(varprop(x, na.rm = FALSE) / length(x))
}
#' Margin of error for proportion
#' @param x Input variable
#' @param conf_lev Confidence level. The default is 0.95
#' @param na.rm If TRUE missing values are removed before calculation
#' @return Margin of error
#'
#' @importFrom stats qnorm
#'
#' @examples
#' meprop(c(rep(1L, 10), rep(0L, 10)))
#'
#' @export
meprop <- function(x, conf_lev = 0.95, na.rm = TRUE) {
if (na.rm) x <- na.omit(x)
seprop(x) * qnorm(conf_lev / 2 + .5, lower.tail = TRUE)
}
#' Variance for the population
#' @param x Input variable
#' @param na.rm If TRUE missing values are removed before calculation
#' @return Variance for the population
#' @examples
#' varpop(rnorm(100))
#'
#' @export
varpop <- function(x, na.rm = TRUE) {
if (na.rm) x <- na.omit(x)
n <- length(x)
var(x) * ((n - 1) / n)
}
#' Standard deviation for the population
#' @param x Input variable
#' @param na.rm If TRUE missing values are removed before calculation
#' @return Standard deviation for the population
#' @examples
#' sdpop(rnorm(100))
#'
#' @export
sdpop <- function(x, na.rm = TRUE) sqrt(varpop(x, na.rm = na.rm))
#' Natural log
#' @param x Input variable
#' @param na.rm Remove missing values (default is TRUE)
#' @return Natural log of vector
#' @examples
#' ln(runif(10, 1, 2))
#'
#' @export
ln <- function(x, na.rm = TRUE) {
if (na.rm) log(na.omit(x)) else log(x)
}
#' Does a vector have non-zero variability?
#' @param x Input variable
#' @param na.rm If TRUE missing values are removed before calculation
#' @return Logical. TRUE is there is variability
#' @examples
#' summarise_all(diamonds, does_vary) %>% as.logical()
#'
#' @export
does_vary <- function(x, na.rm = TRUE) {
## based on http://stackoverflow.com/questions/4752275/test-for-equality-among-all-elements-of-a-single-vector
if (length(x) == 1L) {
FALSE
} else {
if (is.factor(x) || is.character(x)) {
length(unique(x)) > 1
} else {
abs(max(x, na.rm = na.rm) - min(x, na.rm = na.rm)) > .Machine$double.eps^0.5
}
}
}
#' Convert categorical variables to factors and deal with empty/missing values
#' @param x Categorical variable used in table
#' @return Variable with updated levels
#' @export
empty_level <- function(x) {
if (!is.factor(x)) x <- as.factor(x)
levs <- levels(x)
if ("" %in% levs) {
levs[levs == ""] <- "NA"
x <- factor(x, levels = levs)
x[is.na(x)] <- "NA"
} else if (any(is.na(x))) {
x <- factor(x, levels = unique(c(levs, "NA")))
x[is.na(x)] <- "NA"
}
x
}
#' Calculate the mode (modal value) and return a label
#'
#' @details From https://www.tutorialspoint.com/r/r_mean_median_mode.htm
#' @param x A vector
#' @param na.rm If TRUE missing values are removed before calculation
#'
#' @examples
#' modal(c("a", "b", "b"))
#' modal(c(1:10, 5))
#' modal(as.factor(c(letters, "b")))
#' modal(runif(100) > 0.5)
#'
#' @export
modal <- function(x, na.rm = TRUE) {
if (na.rm) x <- na.omit(x)
unv <- unique(x)
unv[which.max(tabulate(match(x, unv)))]
}
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.