#' Print data.frame as message
#'
#' Print \code{data.frame} or \code{data.table} object as a message.
#'
#' @param x \code{data.frame} or \code{data.table} object
#' @param digits number of significant digits to be printed; defaults to 3
#' @param row_names boolean; if TRUE, row names of \code{x} are printed. Defaults to FALSE
#' @param col_names boolean; if TRUE, column names of \code{x} are printed. Defaults to TRUE
#' @return prints out the content of the data.table as a \code{message}
#' @export
df_to_message = function(x, digits = 3L, row_names = FALSE, col_names = TRUE)
{
if (!col_names)
names(x) = NULL
message(
paste(
utils::capture.output(
print(round(x, digits), row.names = row_names, digits = digits)
),
collapse = "\n"
)
)
}
#' Reorder categories according to frequency
#'
#' Takes a integer, character, or factor vector and reorder the labels according to the frequency with which they appear.
#'
#' @param x integer or character vector, or factor variable
#' @param decreasing boolean; if TRUE, orders in decreasing order, otherwise in increasing order
#' @return returns \code{x} with labels ordered according to frequencies
#' @details It is assumed that the labels of \code{x} have an inherent ordering. \code{reorder_labels} reorders the labels such that the first label is assigned to the category with the highest frequency, the second label to the second most frequent category, and so on.
#' @export
reorder_labels = function(x, decreasing = TRUE)
{
# get class of x
x_class = class(x)
if (x_class == "character") {
freq = table(x)
labs = names(freq)
l_tab = labs[order(freq, decreasing = decreasing)]
return(labs[match(x, l_tab)])
} else if (x_class == "integer") {
match(x, order(table(x), decreasing = decreasing))
} else if (x_class == "factor") {
freq = table(x)
labs = names(freq)[order(freq, decreasing = decreasing)]
return(factor(x, levels = labs))
} else {
stop("x must be either a character or integer vector, or a factor variable")
}
}
#' Round numeric object to string while keeping trailing zeros
#'
#' Wrapper function to round a numeric value (or vector) to character string (or vector) for printing purposes keeping trailing zeros
#'
#' @param x a numeric vector
#' @param digits integer; number of decimals to keep
#' @return character string/vector with \code{x} rounded to \code{digit} digits while keeping trailing zeros
#' @export
round_to_char = function(x, digits = 1L)
{
if (!is.null(dim(x)))
stop("dimension of x has to be NULL")
if (digits %% 1 != 0)
stop("digits has to be an integer value")
return(sprintf(paste0("%.", digits, "f"), round(x, digits)))
}
#' Change first character of string to upper case
#'
#' @param x string to modify
#' @param rest_to_lower boolean; if TRUE, changes all other characters to lower case
#' @return returns \code{x} with the first character changed to upper-case
upper_first_char = function(x, rest_to_lower = FALSE)
{
x1 = toupper(substr(x, 1, 1))
x2 = if (rest_to_lower) {
tolower(substr(x, 2, nchar(x)))
} else {
substr(x, 2, nchar(x))
}
return(paste0(x1, x2))
}
#' Generate factor using quantiles
#'
#' Generates a factor out of a numeric vector, where the levels are determined based on the quantiles
#'
#' @param x a numeric vector
#' @param n the number of categories to create
#' @param return_labels whether the labels containing the intervals should be returned; if \code{FALSE} a \code{numeric} variable is returned
#' @return returns a factor variable where the levels are intervals of \code{x} determined by the quantiles of \code{x}. The number of quantiles to use is determined by \code{n}.
quantilize = function(x, n, return_labels = TRUE)
{
if (!is.numeric(x))
stop("x has to be numeric")
if (n < 0)
stop("n has to be positive")
cut(
x,
quantile(
x,
probs = seq(0, 1, by = 1/n),
na.rm = TRUE
),
include.lowest = TRUE,
labels = if (return_labels) NULL else FALSE
)
}
#' Find interval containing a specific value from an \code{quantilefactor} object
#'
#' @param x a \code{factor} object created with \code{gen_quantiles}
#' @param val a \code{numeric} vector containing the value(s) to search for
#' @param return_labels if \code{TRUE}; returns the label of the quantile, otherwise the number
#' @return returns the number of the quantile to which each element in \code{x} belongs. The lowest quantile has number 1 (not 0).
#' @details function checks the pattern of the factor labels, which have to be in the form of an interval. The specific regular expression that is checked is \code{^(\\(|\\[)-*[0-9.]+(e-\\d+)*,-*[0-9.]+(e-\\d+)*(\\)|\\])$}.
find_interval = function(x, val, return_labels = FALSE)
{
if (!is.factor(x))
stop("x has to a factor")
if (!is.numeric(val))
stop("val has to be numeric")
# get levels
l = levels(x)
# check pattern
if (!all(grepl("^(\\(|\\[)-*[0-9.]+(e-\\d+)*,-*[0-9.]+(e-\\d+)*(\\)|\\])$", l)))
stop("factor levels have to be a pattern of an interval (see details in ?find_quantile)")
# drop (,[ at start and end
l = gsub("^\\(|^\\[|\\)$|\\]$", "", levels(x))
# get cutpoints for the intervals
cuts = as.numeric(unique(do.call(c, strsplit(l, ","))))
# find intervals
res = findInterval(val, cuts, left.open = TRUE)
# check values
if (any(val < min(cuts) | val > max(cuts))) {
warning("some elements of val are outside of the data range")
# adjust when values are outside of data-range
res[res == 0] = 1
res[res == length(cuts)] = length(cuts) - 1L
}
if (return_labels)
return(levels(x)[res])
return(res)
}
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.