#### Programming helpers #####################################################
## Quicker way to get last item of vector
last <- function(x) {return(x[length(x)])}
## Just so code reads more clearly when using last(x)
first <- function(x) {return(x[1])}
check_if_zero_base <- function(x) {
# this is the default tolerance used in all.equal
tolerance <- .Machine$double.eps^0.5
# If the absolute deviation between the number and zero is less than
# the tolerance of the floating point arithmetic, then return TRUE.
# This means, to me, that I can treat the number as 0 rather than
# -3.20469e-16 or some such.
abs(x - 0) < tolerance
}
# This seems to give about a 80%-90% speed boost
check_if_zero <- Vectorize(check_if_zero_base)
# Try to anticipate which S3 will be called (sloop package should have
# something like this when it is released)
# Code adapted from G. Grothendieck's at Stack Overflow:
# https://stackoverflow.com/questions/42738851/r-how-to-find-what-s3-method-
# will-be-called-on-an-object
#' @importFrom utils .S3methods getS3method
find_S3_class <- function(generic, ..., package) {
# not going to provide function, just function name as character
# ch <- deparse(substitute(generic))
f <- X <- function(x, ...) UseMethod("X")
for (m in .S3methods(generic, envir = getNamespace(package))) {
assign(sub(generic, "X", m, fixed = TRUE), "body<-"(f, value = m))
}
char_meth <- tryCatch(X(...), error = function(e) {return(NA)})
if (is.na(char_meth)) {return(char_meth)}
# Return the stub for dispatch to getS3method as class
return(reg_match("(?<=\\.).*", char_meth, perl = TRUE))
}
# I'm sure stingr/stringi have this, but I don't want to import them
reg_match <- function(pattern, text, ignore.case = FALSE, perl = FALSE,
fixed = FALSE, useBytes = FALSE, invert = FALSE) {
matches <- gregexpr(pattern, text, ignore.case, perl, fixed, useBytes)
# If only 1 match, return just the one match rather than a list
if (length(matches) == 1) {matches <- matches[[1]]}
regmatches(text, matches, invert)
}
### Handle NA better #########################################################
# Since I accept data from users and the global environment, there may be
# missing cases including in the raw data that weren't used in the model fit.
# I can't use complete.cases since these data frames may include extra columns.
# Rather than guess at which variables were used to determine missingness
# (straightforward with lm, but could have hidden problems with glms/lmer/etc.)
# I use the model.frame features for doing so.
drop_missing <- function(model, data) {
na_action <- attr(model.frame(model), "na.action")
# If no "na.action" attribute, nothing to remove
if (is.null(na_action)) {
return(data)
}
to_remove <- names(na_action)
data <- data[rownames(data) %not% to_remove, ]
return(data)
}
### Formula helpers ##########################################################
any_transforms <- function(formula, rhs.only = TRUE) {
if (rhs.only == TRUE) {
any(all_vars(formula) %nin% rownames(attr(terms(formula), "factor")))
} else {
any(all.vars(formula) %nin% rownames(attr(terms(formula), "factor")))
}
}
which_terms <- function(formula, var) {
terms <- terms(formula)
factors <- attr(terms, "factors")
names(factors[var,] %not% 0)
}
original_terms <- function(formula) {
o <- all.vars(formula)
names(o) <- rownames(attr(terms(formula), "factors"))
o
}
# get_response <- function(formula) {
# original_terms(formula)[attr(terms(formula), "response")]
# }
# Adapted from formula.tools
two_sided <- function(x, ...) {
# from operator.tools::operators()
operators <- c("::", ":::", "@", "$", "[", "[[", ":", "+", "-", "*", "/", "^",
"%%", "%/%", "<", "<=", ">", ">=", "==", "!=", "%in%", "%!in%",
"!", "&", "&&", "|", "||", "~", "<-", "<<-", "=", "?", "%*%",
"%x%", "%o%", "%>%", "%<>%", "%T>%")
is.name(x[[1]]) && deparse(x[[1]]) %in% operators && length(x) == 3
}
# Adapted from formula.tools
one_sided <- function(x, ...) {
# from operator.tools::operators()
operators <- c("::", ":::", "@", "$", "[", "[[", ":", "+", "-", "*", "/", "^",
"%%", "%/%", "<", "<=", ">", ">=", "==", "!=", "%in%", "%!in%",
"!", "&", "&&", "|", "||", "~", "<-", "<<-", "=", "?", "%*%",
"%x%", "%o%", "%>%", "%<>%", "%T>%")
is.name(x[[1]]) && deparse(x[[1]]) %in% operators && length(x) == 2
}
# Adapted from formula.tools
get_lhs <- function(x) {
if (two_sided(x) == TRUE) {
x[[2]]
} else if (one_sided(x)) {
NULL
} else {
stop_wrap(x, "does not appear to be a one- or two-sided formula.")
}
}
is_lhs_transformed <- function(x) {
final <- as.character(deparse(get_lhs(x)))
bare_vars <- all_vars(get_lhs(x))
any(final != bare_vars)
}
# Adapted from formula.tools
get_rhs <- function(x) {
# from operator.tools::operators()
operators <- c("::", ":::", "@", "$", "[", "[[", ":", "+", "-", "*", "/", "^",
"%%", "%/%", "<", "<=", ">", ">=", "==", "!=", "%in%", "%!in%",
"!", "&", "&&", "|", "||", "~", "<-", "<<-", "=", "?", "%*%",
"%x%", "%o%", "%>%", "%<>%", "%T>%")
if (as.character(x[[1]]) %nin% operators) {
stop_wrap(x[[1]], "does not appear to be an operator.")
}
if (two_sided(x) == TRUE) {x[[3]]} else if (one_sided(x) == TRUE) {x[[2]]}
}
all_vars <- function(formula) {
if (is.name(formula) || is.call(formula)) {
all.vars(formula)
} else if (two_sided(formula)) {
c(as.character(deparse(get_lhs(formula))), all.vars(get_rhs(formula)))
} else if (one_sided(formula)) {
all.vars(get_rhs(formula))
}
}
#### Weighted helpers ########################################################
#' @title Weighted standard deviation calculation
#' @description This function calculates standard deviations with weights and
#' is a counterpart to the built-in `weighted.mean` function.
#' @param x A vector of values for which you want the standard deviation
#' @param weights A vector of weights equal in length to `x`
#' @rdname wtd.sd
#' @export
wtd.sd <- function(x, weights) {
# Get the mean
xm <- weighted.mean(x, weights, na.rm = TRUE)
# Squaring the weighted deviations and dividing by weighted N - 1
variance <- sum((weights * (x - xm)^2) / (sum(weights[!is.na(x)]) - 1), na.rm = TRUE)
# Standard deviation is sqrt(variance)
sd <- sqrt(variance)
# Return the SD
return(sd)
}
wtd.table <- function(x, weights = NULL, na.rm = TRUE) {
if (is.null(weights)) {
weights <- rep(1, length(x))
}
if (length(x) != length(weights)) {
stop("x and weights lengths must be the same")
}
if (na.rm) {
s <- !is.na(x) & !is.na(weights)
x <- x[s, drop = FALSE]
weights <- weights[s]
}
result <- tapply(weights, x, sum, simplify = TRUE)
result[is.na(result)] <- 0
as.table(result)
}
### weighted effects coding ##################################################
#' @importFrom stats contr.treatment
contr.weighted <- function(x, base = 1, weights = NULL) {
frequencies <- wtd.table(x, weights = weights)
n.cat <- length(frequencies)
# If base level is named, get the index
if (is.character(base)) {
base <- which(levels(x) == base)
}
new.contrasts <- contr.treatment(n.cat, base = base)
new.contrasts[base, ] <- -1 * frequencies[-base]/frequencies[base]
colnames(new.contrasts) <- names(frequencies[-base])
return(new.contrasts)
}
#### Regex helper ############################################################
# Taken from Hmisc
escapeRegex <- function(string) {
gsub('([.|()\\^{}+$*?]|\\[|\\])', '\\\\\\1', string)
}
### Hadley update #############################################################
# from https://stackoverflow.com/questions/13690184/update-inside-a-function-
# only-searches-the-global-environment
#' @importFrom stats update.formula
j_update <- function(mod, formula = NULL, data = NULL, offset = NULL,
weights = NULL, call.env = parent.frame(), ...) {
call <- getCall(mod)
if (is.null(call)) {
stop("Model object does not support updating (no call)", call. = FALSE)
}
term <- terms(mod)
if (is.null(term)) {
stop("Model object does not support updating (no terms)", call. = FALSE)
}
if (!is.null(data)) call$data <- data
if (!is.null(formula)) call$formula <- update.formula(call$formula, formula)
env <- attr(term, ".Environment")
# Jacob add
# if (!is.null(offset))
call$offset <- offset
# if (!is.null(weights))
call$weights <- weights
extras <- as.list(match.call())[-1]
extras <- extras[which(names(extras) %nin% c("mod", "formula", "data",
"offset", "weights",
"call.env"))]
for (i in seq_along(extras)) {
if (is.name(extras[[i]])) {
extras[[i]] <- eval(extras[[i]], envir = call.env)
}
}
existing <- !is.na(match(names(extras), names(call)))
for (a in names(extras)[existing]) call[[a]] <- extras[[a]]
if (any(!existing)) {
call <- c(as.list(call), extras[!existing])
call <- as.call(call)
}
if (is.null(call.env)) {call.env <- parent.frame()}
eval(call, env, call.env)
}
### coeftest ##################################################################
## Taken from lmtest package with changes
test_coefs <- function(x, the_vcov = NULL, df = NULL, ...) {
UseMethod("test_coefs")
}
#' @importFrom stats pnorm
# Adapted from lmtest package
test_coefs.default <- function(x, the_vcov = NULL, df = NULL, ...) {
# Grab the coefs
est <- coef(x)
# Need to check what kind of SEs user requested and generate vcov matrix
if (is.null(the_vcov)) {
the_vcov <- vcov(x) # out of the box
} else {
if (is.function(the_vcov)) {
the_vcov <- the_vcov(x, ...) # use the supplied function
} else {
the_vcov <- the_vcov # use the supplied variance-covariance matrix
}
}
# Calculate the SEs
se <- sqrt(diag(the_vcov))
# Now we need to calculate test statistics
if (!is.null(names(est)) && !is.null(names(se))) {
if (length(unique(names(est))) == length(names(est)) &&
length(unique(names(se))) == length(names(se))) {
anames <- names(est)[names(est) %in% names(se)]
est <- est[anames]
se <- se[anames]
}
}
tvals <- as.vector(est)/se
# Determine DFs
if (is.null(df)) {
df <- try(df.residual(x), silent = TRUE)
if (inherits(df, "try-error")) df <- NULL
}
if (is.null(df)) df <- 0
# Calculate the p values
if (is.finite(df) && df > 0) {
pvals <- 2 * pt(abs(tvals), df = df, lower.tail = FALSE)
cols <- c("Est.", "S.E.", "t value", "p")
} else {
pvals <- 2 * pnorm(abs(tvals), lower.tail = FALSE)
cols <- c("Est.", "S.E.", "z value", "p")
}
tbl <- cbind(est, se, tvals, pvals)
colnames(tbl) <- cols
return(tbl)
}
#' @exportS3Method NULL
test_coefs.glm <- function(x, the_vcov = NULL, df = Inf, ...) {
# Only difference is default DF
test_coefs.default(x, the_vcov = the_vcov, df = df, ...)
}
#### robust predictions #####################################################
#' @importFrom stats vcov model.frame terms delete.response
predict_rob <- function(model, .vcov = vcov(model), newdata = NULL,
se.fit = TRUE, dispersion = NULL, terms = NULL,
type = c("link", "response", "terms"),
na.action = na.pass, ...) {
if (is.null(newdata)) {newdata <- model.frame(model)}
tt <- terms(model)
Terms <- delete.response(tt)
m <- model.frame(Terms, newdata, na.action = na.action,
xlev = model$xlevels)
m.mat <- model.matrix(Terms, m, contrasts.arg = model$contrasts)
m.coef <- coef(model)
# if (inherits(object, "survreg")) {dispersion <- 1}
# if (is.null(dispersion) || dispersion == 0) {
# dispersion <- summary(object, dispersion = dispersion)$dispersion
# }
# residual.scale <- as.vector(sqrt(dispersion))
# pred <- predict.lm(object, newdata, se.fit, scale = residual.scale,
# type = ifelse(type == "link", "response", type),
# terms = terms, na.action = na.action)
offset <- rep(0, nrow(m.mat))
if (!is.null(off.num <- attr(tt, "offset"))) {
for (i in off.num) {
offset <- offset + eval(attr(tt, "variables")[[i + 1]], newdata)
}
}
if (!is.null(model$call$offset)) {
offset <- offset + eval(model$call$offset, newdata)
}
p <- model$rank
p1 <- seq_len(p)
piv <- if (p) {qr(model)$pivot[p1]}
if (p < ncol(m.mat) && !(missing(newdata) || is.null(newdata))) {
warning("prediction from a rank-deficient fit may be misleading")
}
fit <- drop(m.mat[, piv, drop = FALSE] %*% m.coef[piv])
if (!is.null(offset)) {
fit <- fit + offset
}
# fit <- as.vector(m.mat %*% m.coef)
se.fit <- sqrt(diag(m.mat %*% .vcov %*% t(m.mat)))
type <- type[1]
switch(type, response = {
se.fit <- se.fit * abs(family(model)$mu.eta(fit))
fit <- family(model)$linkinv(fit)
}, link = , terms = )
return(list(fit = fit, se.fit = se.fit))
}
#' @import broom
NULL
#' @import broom.mixed
NULL
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.