#' @importFrom stats ave cor dist lm median model.frame model.response na.fail pchisq pnorm prcomp predict predict.lm quantile sd terms
#' @importFrom utils tail
NULL
#' Wrapper for several methods to test if a variable is empty
#'
#' This is mainly an internal function but as other dependent packages also
#' use it sometimes and it generally is quite handy to have it is exported for
#' public use.
#'
#' @param x A variable.
#' @param false_triggers Whether \code{FALSE} should be considered as empty.
#' @return Logical telling if variable is blank.
#' @examples
#' is_blank(NULL)
#' @author Christofer \enc{Bäcklin}{Backlin}
#' @export
is_blank <- function(x, false_triggers=FALSE){
if(is.function(x)) return(FALSE) # Some of the tests below trigger warnings when used on functions
is.null(x) ||
length(x) == 0 ||
all(is.na(x)) ||
all(x=="") ||
(false_triggers && all(!x))
}
#' Detect if modeling results contains multiple procedures
#'
#' @param result Modeling results, as returned by \code{\link{evaluate}}.
#' @return Logical scalar.
#' @author Christofer \enc{Bäcklin}{Backlin}
#' @export
is_multi_procedure <- function(result){
belongs_to_method <- function(r){
!is.null(names(r)) &&
identical(sort(names(r)),
c("error", "importance", "model", "prediction"))
}
if(inherits(result, "modeling_result")){
if(all(sapply(result, belongs_to_method))){
return(FALSE)
} else if(all(sapply(result, sapply, belongs_to_method)) &&
all(sapply(result, length) == length(result[[1]]))){
return(TRUE)
}
}
stop("Invalid modeling result.")
}
#' Get names for modeling procedures
#'
#' @param procedure List of modeling procedures.
#' @return A character vector of suitable non-duplicate names.
#' @author Christofer \enc{Bäcklin}{Backlin}
name_procedure <- function(procedure){
if(inherits(procedure, "modeling_procedure"))
stop("`name_procedure` only takes lists of modeling procedures.")
if(is.null(names(procedure))){
name <- subtree(procedure, TRUE, "method", error_value=NA, warn=-1)
name <- ifelse(is.na(name), "model", name)
} else {
name <- names(procedure)
}
ave(name, name, FUN=function(x){
if(length(x) == 1) x else sprintf("%s (%i)", x, seq_along(x))
})
}
#' Replace values with something else
#'
#' @param x Variable containing NAs.
#' @param pattern The values in \code{x} to be replaced. Can also be a
#' function.
#' @param replacement The value which is to replace the values matching
#' \code{pattern}.
#' @param invert Whether to fill all values except the ones matching
#' \code{pattern}.
#' @return An imputed version of \code{x}.
#' @author Christofer \enc{Bäcklin}{Backlin}
#' @examples
#' fill(1:10, function(x) x %% 2 == 1, 0)
#' na_fill(c(1,2,NA,4,5), 3)
#' @export
fill <- function(x, pattern, replacement, invert=FALSE){
if(is.function(pattern)){
x[xor(invert, pattern(x))] <- replacement
} else {
x[xor(invert, x %in% pattern)] <- replacement
}
x
}
#' @rdname fill
#' @export
na_fill <- function(x, replacement){
fill(x, is.na, replacement)
}
#' Load a package and offer to install if missing
#'
#' If running R in interactive mode, the user is prompted for installing
#' missing packages. If running in batch mode an error is thrown.
#'
#' @param pkg Package name.
#' @param reason A status message that informs the user why the package is
#' needed.
#' @return Nothing
#' @examples
#' nice_require("base", "is required to do anything at all")
#' @author Christofer \enc{Bäcklin}{Backlin}
#' @importFrom utils install.packages
#' @export
nice_require <- function(pkg, reason){
pkg.loaded <- sapply(pkg, requireNamespace)
if(!all(pkg.loaded) && interactive()){
pkg <- pkg[!pkg.loaded]
if(missing(reason))
reason <- paste(if(length(pkg) > 1) "are" else "is", "required but not installed")
cat(sprintf("Package%s %s %s. Install now? (Y/n) ",
if(length(pkg) > 1) "s" else "",
paste("`", pkg, "`", sep="", collapse=", "),
reason))
if(grepl("^(y(es)?)?$", tolower(readline()))){
install.packages(pkg)
pkg.loaded <- sapply(pkg, requireNamespace)
if(!all(pkg.loaded))
stop("Cannot load package `%s`.", pkg[!pkg.loaded])
} else {
stop(sprintf("%s was not installed.",
paste("`", pkg, "`", sep="", collapse=", ")))
}
}
}
#' Trapezoid rule numerical integration
#'
#' Only intended for internal use.
#'
#' @param x Integrand.
#' @param y Function values.
#' @return Area under function.
#' @examples
#' x <- seq(0, pi, length.out=100)
#' trapz(x, sin(x))
#' @author Christofer \enc{Bäcklin}{Backlin}
#' @noRd
trapz <- function(x,y){
idx <- order(x)
x <- x[idx]
y <- y[idx]
idx <- !is.na(x) & !is.na(y)
x <- x[idx]
y <- y[idx]
n <- length(x)
if(n != length(y)) stop("x and y must have same length")
sum((y[-1]+y[-n])/2 * (x[-1] - x[-n]))
}
#' List all available methods
#'
#' This function searches all attached packages for methods compatible with the
#' \pkg{emil} framework.
#'
#' @param pos Location to search in, see \code{\link{ls}}.
#' @return A data frame.
#' @examples
#' list_method()
#' @author Christofer \enc{Bäcklin}{Backlin}
#' @export
list_method <- function(pos=search()){
method <- lapply(pos, function(p){
grep("^(fit|predict|importance)_", ls(p), value=TRUE)
})
data.frame(location = rep(pos, sapply(method, length)),
method = unlist(method)) %>%
extract_("method", c("plugin", "method"), c("^(fit|predict|importance)_(.*)$")) %>%
mutate_(plugin = "factor(plugin, c('fit', 'predict', 'importance'))", dummy = TRUE) %>%
spread_("plugin", "dummy") %>%
mutate_(fit = "!is.na(fit)", predict = "!is.na(predict)", importance = "!is.na(importance)")
}
#' Convert subsetting vectors.
#'
#' @param y Response vector.
#' @param subset A subsetting vector on arbitrary form.
#' @return A subsetting vector.
#' @examples
#' y <- runif(20)
#' ind <- sort(sample(20, 10))
#' identical(y[ind], y[logical_subset(ind)])
#' @author Christofer \enc{Bäcklin}{Backlin}
#' @noRd
positive_integer_subset <- function(y, subset){
# Not quite sure why this line is needed,
# but if not present the function will use subset=TRUE if subset is missing!
# Must be some funky evaluation problem...
stopifnot(!missing(y) && !missing(subset))
UseMethod("positive_integer_subset", subset)
}
#' @method positive_integer_subset default
#' @export
positive_integer_subset.default <- function(y, subset){
seq_along(y)[subset]
}
#' @method positive_integer_subset fold
#' @export
positive_integer_subset.fold <- function(y, subset){
index_fit(subset)
}
#' @noRd
logical_subset <- function(y, subset){
stopifnot(!missing(y) && !missing(subset))
UseMethod("logical_subset", subset)
}
#' @method logical_subset default
#' @export
logical_subset.default <- function(y, subset){
seq_along(y) %in% seq_along(y)[subset]
}
#' @method logical_subset fold
#' @export
logical_subset.fold <- function(y, subset){
subset > 0
}
#' Convert factors to logicals
#'
#' Factors are converted to logical vectors or matrices depending on the number
#' of levels. Ordered factors are converted to matrices where each column
#' represent a level, coded \code{TRUE} for observations that match the level
#' and \code{FALSE} otherwise.
#' Unordered factors are converted in a similar way but coded \code{TRUE} for
#' observations that match the level \emph{or a higher level}.
#' Interpred in words, the star rating example below returns a matrix containing
#' a column named \dQuote{3 stars} that contains \code{TRUE} for observations
#' with at least three stars and \code{FALSE} for observations with fewer than
#' three stars.
#'
#' @param x Factor.
#' @param base Level to consider as the basis for comparison. Can be either
#' integer or character.
#' Note that \code{base = 4} is interpreted as a level named "4",
#' but \code{base = 4L} is interpreted as the fourth level.
#' @param drop Whether to keep the base level. The base level column never holds
#' any information that cannot be deduced from the remaining columns.
#' @examples
#' # Binary factor
#' email <- factor(sample(2, 20, TRUE), labels=c("unverified", "verified"))
#' factor_to_logical(email)
#'
#' # Unordered multi-level factors
#' wine_preferences <- factor(sample(3, 20, TRUE),
#' labels=c("red", "white", "none"))
#' factor_to_logical(wine_preferences, base="none")
#'
#' fruit <- factor(sample(4, 20, TRUE),
#' labels = c("apple", "banana", "cantaloup", "durian"))
#' fruit[sample(length(fruit), 3)] <- NA
#' factor_to_logical(fruit, drop=FALSE)
#'
#' # Ordered factor
#' rating <- factor(1:5, labels = paste(1:5, "stars"), ordered=TRUE)
#' factor_to_logical(rating)
#'
#' # Ordered factor with custom base
#' tie_break <- factor(1:5,
#' labels=c("SetAlice", "AdvAlice", "Deuce", "AdvBob", "SetBob"),
#' ordered = TRUE)
#' tie_status <- as.data.frame(
#' factor_to_logical(tie_break, base="Deuce", drop=FALSE)
#' )
#' print(tie_status)
#' tie_break[tie_status$AdvAlice]
#' tie_break[tie_status$SetBob]
#' tie_break[tie_status$Deuce]
#' @author Christofer \enc{Bäcklin}{Backlin}
#' @export
factor_to_logical <- function(x, base=1L, drop=TRUE){
stopifnot(length(base) <= 1)
stopifnot(is.factor(x))
my_drop <- drop
if(length(base) == 0){
if(is.ordered(x))
stop("Ordered factors cannot be converted without a base level.")
baseL <- 1L
my_drop <- FALSE
} else if(is.integer(base)){
if(base < 1L)
stop("Invalid base level (non-positive).")
if(base > nlevels(x))
stop("Invalid base level (larger than number of levels).")
baseL <- base
} else {
if(is.numeric(base))
warning("`base` is numeric but not an integer, interpreting it as a character.")
if(!base %in% levels(x))
stop("Invalid base level.")
baseL <- match(base, levels(x))
}
levelsL <- 1:nlevels(x) - baseL
if(is.ordered(x)){
topleft <- diag(baseL-1)
topleft[upper.tri(topleft)] <- 1
bottomleft <- matrix(0, nrow = nlevels(x)-baseL+1, ncol = baseL-1)
bottomright <- diag(nlevels(x)-baseL)
bottomright[lower.tri(bottomright)] <- 1L
topright <- matrix(0, nrow = baseL, ncol = nlevels(x)-baseL)
newx <- cbind(rbind(topleft, bottomleft), 1L, rbind(topright, bottomright))
} else {
newx <- diag(nlevels(x))
}
mode(newx) <- "logical"
colnames(newx) <- levels(x)
if(my_drop) newx <- newx[, -baseL, drop=FALSE]
newx[as.integer(x), , drop=my_drop]
}
#' Get the most common value
#'
#' @param x Vector.
#' @param na.rm Whether to ignore missing values when calculating the mode.
#' Note that modes may be identified even if \code{x} contains missing values
#' as long as they are too few to affect the result.
#' @param allow_multiple Controls what is returned if \code{x} contains more
#' than one mode. If \code{TRUE} all modes are returned, if \code{FALSE}
#' \code{NA} is returned.
#' @return The most common values or values in \code{x} or \code{NA} if could
#' not be determined.
#' @examples
#' mode(mtcars$cyl)
#' mode(chickwts$feed)
#' mode(unlist(strsplit("Hello Dolly!", "")))
#'
#' # Multiple modes
#' mode(iris$Species)
#'
#' # Missing values
#' x <- rep(1:4, 4)
#' x[2:4] <- NA
#' mode(x)
#' mode(x, na.rm=TRUE)
#'
#' x <- c(rep(1:3, c(4,2,1)), NA)
#' mode(x, na.rm=FALSE)
#' @author Christofer \enc{Bäcklin}{Backlin}
#' @export
mode <- function(x, na.rm=FALSE, allow_multiple=TRUE){
if(na.rm){
count <- table(x, useNA="no")
count_max <- count == max(count)
} else {
count <- table(x, useNA="always")
na_count <- tail(count, 1)
count <- count[-length(count)]
count_max <- count == max(count)
if(any(count[!count_max] + na_count >= count[count_max]))
return(NA)
}
if(is.factor(x)){
x_mode <- factor(unname(which(count_max)), seq_len(nlevels(x)), levels(x))
} else {
x_mode <- names(count)[count_max]
if(!is.character(x)) x_mode <- as(x_mode, class(x))
}
if(!allow_multiple && length(x_mode) > 1) NA
else x_mode
}
#' Summarize a potentially long character vector to a compact form
#'
#' @param type The type of entities listed in \code{names}.
#' @param name Names of entities that should be summarized.
#' @return Character scalar.
#' @examples
#' example_string("letter", LETTERS)
#' @author Christofer \enc{Bäcklin}{Backlin}
#' @noRd
example_string <- function(type, names){
stopifnot(length(names) > 0)
sprintf("%s `%s`%s", type, names[1],
if(length(names) > 1)
sprintf(" and %i other %s%s", length(names)-1, type,
if(length(names) > 2) "s" else ""))
}
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.