Nothing
#' Generic function for extracting the right-hand side from a model
#'
#' @keywords internal
#'
#' @param model A fitted model
#' @param \dots additional arguments passed to the specific extractor
#' @noRd
extract_rhs <- function(model, ...) {
UseMethod("extract_rhs", model)
}
#' Extract right-hand side
#'
#' Extract a data frame with list columns for the primary terms and subscripts
#' from all terms in the model
#'
#' @keywords internal
#'
#' @param model A fitted model
#'
#' @return A list with one element per future equation term. Term components
#' like subscripts are nested inside each list element. List elements with two
#' or more terms are interactions.
#' @noRd
#' @examples
#' \dontrun{
#' library(palmerpenguins)
#' mod1 <- lm(body_mass_g ~ bill_length_mm + species * flipper_length_mm, penguins)
#'
#' extract_rhs(mod1)
#' # > # A tibble: 7 x 8
#' # > term estimate ... primary subscripts
#' # > 1 (Intercept) -3341.615846 ...
#' # > 2 bill_length_mm 59.304539 ... bill_length_mm
#' # > 3 speciesChinstrap -27.292519 ... species Chinstrap
#' # > 4 speciesGentoo -2215.913323 ... species Gentoo
#' # > 5 flipper_length_mm 24.962788 ... flipper_length_mm
#' # > 6 speciesChinstrap:flipper_length_mm -3.484628 ... flipper_length_mm Chinstrap,
#' # > 7 speciesGentoo:flipper_length_mm 11.025972 ... flipper_length_mm Gentoo,
#'
#' str(extract_rhs(mod1))
#' # > Classes ‘lm’ and 'data.frame': 7 obs. of 8 variables:
#' # > $ term : chr "(Intercept)" "bill_length_mm" "speciesChinstrap" "speciesGentoo" ...
#' # > $ estimate : num -3341.6 59.3 -27.3 -2215.9 25 ...
#' # > $ std.error : num 810.14 7.25 1394.17 1328.58 4.34 ...
#' # > $ statistic : num -4.1247 8.1795 -0.0196 -1.6679 5.7534 ...
#' # > $ p.value : num 4.69e-05 5.98e-15 9.84e-01 9.63e-02 1.97e-08 ...
#' # > $ split :List of 7
#' # > ..$ : chr "(Intercept)"
#' # > ..$ : chr "bill_length_mm"
#' # > ..$ : chr "speciesChinstrap"
#' # > ..$ : chr "speciesGentoo"
#' # > ..$ : chr "flipper_length_mm"
#' # > ..$ : chr "speciesChinstrap" "flipper_length_mm"
#' # > ..$ : chr "speciesGentoo" "flipper_length_mm"
#' # > $ primary :List of 7
#' # > ..$ : chr
#' # > ..$ : chr "bill_length_mm"
#' # > ..$ : chr "species"
#' # > ..$ : chr "species"
#' # > ..$ : chr "flipper_length_mm"
#' # > ..$ : chr "species" "flipper_length_mm"
#' # > ..$ : chr "species" "flipper_length_mm"
#' # > $ subscripts:List of 7
#' # > ..$ : chr ""
#' # > ..$ : chr ""
#' # > ..$ : chr "Chinstrap"
#' # > ..$ : chr "Gentoo"
#' # > ..$ : chr ""
#' # > ..$ : Named chr "Chinstrap" ""
#' # > .. ..- attr(*, "names")= chr [1:2] "species" "flipper_length_mm"
#' # > ..$ : Named chr "Gentoo" ""
#' # > .. ..- attr(*, "names")= chr [1:2] "species" "flipper_length_mm"
#' }
#'
extract_rhs.default <- function(model, index_factors, ...) {
check_dots(...)
# Extract RHS from formula
formula_rhs <- labels(terms(formula(model)))
# Extract unique (primary) terms from formula (no interactions)
formula_rhs_terms <- formula_rhs[!grepl(":", formula_rhs)]
# Extract coefficient names and values from model
full_rhs <- broom::tidy(model)
# Split interactions split into character vectors
full_rhs$split <- strsplit(full_rhs$term, ":")
full_rhs$primary <- extract_primary_term(
formula_rhs_terms,
full_rhs$term
)
full_rhs$subscripts <- extract_all_subscripts(
full_rhs$primary,
full_rhs$split
)
if (index_factors) {
full_rhs <- distinct(full_rhs, "primary")
unique_ss <- unique(unlist(full_rhs$subscripts))
unique_ss <- unique_ss[vapply(unique_ss, nchar, FUN.VALUE = integer(1)) > 0]
replacement_ss <- letters[seq(9, (length(unique_ss) + 8))]
full_rhs$subscripts <- lapply(full_rhs$subscripts, function(x) {
out <- replacement_ss[match(x, unique_ss)]
ifelse(is.na(out), "", out)
})
}
class(full_rhs) <- c("data.frame", class(model))
full_rhs
}
#' @noRd
extract_rhs.lmerMod <- function(model, return_variances, ...) {
check_dots(...)
# Extract RHS from formula
formula_rhs <- labels(terms(formula(model)))
# Extract unique (primary) terms from formula (no interactions)
formula_rhs_terms <- formula_rhs[!grepl(":", formula_rhs)]
formula_rhs_terms <- gsub("^`?(.+)`$?", "\\1", formula_rhs_terms)
# Extract coefficient names and values from model
if(return_variances) {
full_rhs <- broom.mixed::tidy(model, scales = c("vcov", NA))
# Make the names like they are sdcor, so it doesn't break other code
full_rhs$term <- gsub("var__", "sd__", full_rhs$term)
full_rhs$term <- gsub("cov__", "cor__", full_rhs$term)
} else {
full_rhs <- broom.mixed::tidy(model)
}
full_rhs$term <- vapply(full_rhs$term, order_interaction,
FUN.VALUE = character(1)
)
full_rhs$group <- recode_groups(full_rhs)
full_rhs$original_order <- seq_len(nrow(full_rhs))
full_rhs$term <- gsub("^`?(.+)`$?", "\\1", full_rhs$term)
# Split interactions split into character vectors
full_rhs$split <- strsplit(full_rhs$term, ":")
full_rhs$primary <- lapply(full_rhs$term, function(x) "")
full_rhs$primary[full_rhs$effect == "fixed"] <- extract_primary_term(
formula_rhs_terms,
full_rhs$term[full_rhs$effect == "fixed"]
)
# make sure split and primary are in the same order
full_rhs$primary[full_rhs$effect == "fixed"] <- Map(
function(prim, splt) {
ord <- vapply(prim, function(x) grep(x, splt, fixed = TRUE), FUN.VALUE = integer(1))
names(sort(ord))
},
full_rhs$primary[full_rhs$effect == "fixed"],
full_rhs$split[full_rhs$effect == "fixed"]
)
full_rhs$subscripts <- lapply(full_rhs$term, function(x) "")
full_rhs$subscripts[full_rhs$effect == "fixed"] <- extract_all_subscripts(
full_rhs$primary[full_rhs$effect == "fixed"],
full_rhs$split[full_rhs$effect == "fixed"]
)
randvars <- pull_randvar_names(full_rhs)
fixedvars <- unique(full_rhs$term[full_rhs$effect == "fixed"])
if(any(!randvars %in% fixedvars)) {
stop(
paste(
paste0(
"{equatiomatic} only supports models where each random effect ",
"has a corresponding fixed effect. You specified the following ",
"variables as randomly varying without including the ",
"corresponding fixed effect:"),
paste0(setdiff(randvars, fixedvars), collapse = ", ")
),
call. = FALSE
)
}
group_coefs <- detect_group_coef(model, full_rhs)
all_terms <- unique(unlist(full_rhs$primary[full_rhs$effect == "fixed"]))
l1_terms <- setdiff(all_terms, names(group_coefs))
l1_terms <- setNames(rep("l1", length(l1_terms)), l1_terms)
var_levs <- c(l1_terms, group_coefs)
full_rhs$pred_level <- lapply(full_rhs$primary, function(x) {
var_levs[names(var_levs) %in% x]
})
full_rhs$pred_level[full_rhs$effect == "fixed"] <- Map(
function(predlev, splt) {
ord <- vapply(names(predlev),
function(x) grep(x, splt, fixed = TRUE),
FUN.VALUE = integer(1))
ord <- names(sort(ord))
predlev[ord]
},
full_rhs$pred_level[full_rhs$effect == "fixed"],
full_rhs$split[full_rhs$effect == "fixed"]
)
full_rhs$l1 <- vapply(full_rhs$pred_level, function(x) {
length(x) > 0 & all(x == "l1")
}, FUN.VALUE = logical(1))
full_rhs$l1 <- ifelse(full_rhs$term == "(Intercept)",
TRUE,
full_rhs$l1
)
full_rhs$crosslevel <- detect_crosslevel(
full_rhs$primary,
full_rhs$pred_level
)
class(full_rhs) <- c("data.frame", class(model))
full_rhs
}
#' @noRd
extract_rhs.glmerMod <- function(model, ...) {
extract_rhs.lmerMod(model, ...)
}
pull_randvar_names <- function(rhs) {
rows_selected <- (rhs$effect == "ran_pars") & (rhs$group != "Residual")
random <- rhs[rows_selected, "term", drop = TRUE]
random <- random[!grepl("^cor", random)]
unique(gsub("sd__", "", random))
}
#' Extract right-hand side of an forecast::Arima object
#'
#' Extract a dataframe of S/MA components
#'
#' @keywords internal
#'
#' @inheritParams extract_eq
#'
#' @return A dataframe
#' @noRd
extract_rhs.forecast_ARIMA <- function(model, ...) {
# RHS of ARIMA is the Moving Average side
# Consists of a Non-Seasonal MA (p), Seasonal MA (P), Seasonal Differencing.
# This is more than needed, but we"re being explicit for readability.
# Orders structure in Arima model: c(p, q, P, Q, m, d, D)
ords <- model$arma
names(ords) <- c("p", "q", "P", "Q", "m", "d", "D")
# Following the rest of the package.
# Pull the full model with broom::tidy
full_mdl <- broom::tidy(model)
# Filter down to only the MA terms and seasonal drift
full_rhs <- full_mdl[grepl("^s?ma", full_mdl$term), ]
# Add a Primary column and set it to the backshift operator.
full_rhs$primary <- "B"
# Get the superscript for the backshift operator.
## This is equal to the number on the term for MA
## and the number on the term * the seasonal frequency for SMA.
## Powers of 1 are replaced with an empty string.
rhs_super <- as.numeric(gsub("^s?ma", "", full_rhs$term))
rhs_super[grepl("^sma", full_rhs$term)] <- rhs_super[grepl("^sma", full_rhs$term)] * ords["m"]
rhs_super <- as.character(rhs_super)
full_rhs$superscript <- rhs_super
# The RHS (MA side) has no differencing.
# Previous versions of this function were erroneous
# in that it included a seasonal difference on this side.
# Reduce any "1" superscripts to not show the superscript
full_rhs[full_rhs$superscript == "1", "superscript"] <- ""
# Set subscripts so that create_term works later
full_rhs$subscripts <- ""
# Set the class
class(full_rhs) <- c(class(model), "data.frame")
# Explicit return
return(full_rhs)
}
order_interaction <- function(interaction_term) {
if (grepl("^cor__", interaction_term)) {
ran_part <- gsub("(.+\\.).+", "\\1", interaction_term)
interaction_term <- gsub(ran_part, "", interaction_term, fixed = TRUE)
} else if (grepl("^sd__", interaction_term)) {
ran_part <- "sd__"
interaction_term <- gsub(paste0("^", ran_part), "", interaction_term)
}
terms <- strsplit(interaction_term, ":")[[1]]
terms_ordered <- sort(terms)
out <- paste0(terms_ordered, collapse = ":")
if (exists("ran_part")) {
# check/handle if there's an interaction in the random part
# sd or cor
type <- gsub("(^.+__).+", "\\1", ran_part)
# remove type and period at end
ran <- gsub(type, "", ran_part)
ran <- gsub("\\.$", "", ran)
# handle interaction (if present)
ran <- strsplit(ran, ":")[[1]]
ran <- paste0(sort(ran), collapse = ":")
# paste it all back together
if (grepl("^cor", ran_part)) {
out <- paste0(type, ran, ".", out)
} else {
out <- paste0(type, ran, out)
}
}
out
}
recode_groups <- function(rhs) {
rhs_splt <- split(rhs, rhs$group)
rhs_splt <- rhs_splt[!grepl("Residual", names(rhs_splt))]
names_collapsed <- collapse_groups(names(rhs_splt))
intercept_vary <- vapply(rhs_splt, function(x) {
any(grepl("sd__(Intercept)", x$term, fixed = TRUE))
}, FUN.VALUE = logical(1))
check <- split(intercept_vary, names_collapsed)
# collapse these groups
collapse <- vapply(check, all, FUN.VALUE = logical(1))
collapse_term <- function(term, v) {
ifelse(grepl(term, v), collapse_groups(v), v)
}
out <- rhs$group
for (i in seq_along(collapse[!collapse])) {
out <- collapse_term(names(collapse[!collapse])[i], out)
}
out
}
collapse_groups <- function(group) {
gsub("(.+)\\.\\d\\d?$", "\\1", group)
}
order_split <- function(split, pred_level) {
if (length(pred_level) == 0) {
return(pred_level)
}
var_order <- vapply(names(pred_level), function(x) {
exact <- split %in% x
detect <- grepl(x, split)
# take exact if it's there, if not take detect
if (any(exact)) {
out <- exact
} else {
out <- detect
}
seq_along(out)[out]
}, FUN.VALUE = integer(1))
split[var_order]
}
#' Pull just the random variables
#' @param rhs output from \code{extract_rhs}
#' @keywords internal
#' @noRd
extract_random_vars <- function(rhs) {
order <- rhs[rhs$group != "Residual", ]
order <- sort(tapply(order$original_order, order$group, min))
vc <- rhs[rhs$group != "Residual" & rhs$effect == "ran_pars", ]
splt <- split(vc, vc$group)[names(order)]
lapply(splt, function(x) {
vars <- x[!grepl("cor__", x$term), ]
gsub("sd__(.+)", "\\1", vars$term)
})
}
detect_crosslevel <- function(primary, pred_level) {
mapply_lgl(function(prim, predlev) {
if (length(prim) > 1) {
if (length(prim) != length(predlev)) {
TRUE
} else if (length(unique(predlev)) != 1) {
TRUE
} else {
FALSE
}
} else {
FALSE
}
},
prim = primary,
predlev = pred_level
)
}
#### Consider refactoring the below too
detect_covar_level <- function(predictor, group) {
nm <- names(group)
if (is.numeric(predictor)) {
if (is.matrix(predictor)) {
predictor <- predictor[ ,1]
}
predictor <- round(predictor, 5)
}
v <- paste(predictor, group[, 1], sep = " _|_ ")
unique_v <- unique(v)
test <- gsub(".+\\s\\_\\|\\_\\s(.+)", "\\1", unique_v)
if (all(!duplicated(test))) {
return(nm)
}
}
detect_X_level <- function(X, group) {
lapply(X, detect_covar_level, group)
}
collapse_list <- function(x, y) {
null_x <- vapply(x, function(x) {
if (any(is.null(x))) {
return(is.null(x))
} else {
return(is.na(x))
}
}, FUN.VALUE = logical(1))
null_y <- vapply(y, function(x) {
if (any(is.null(x))) {
return(is.null(x))
} else {
return(is.na(x))
}
}, FUN.VALUE = logical(1))
y[null_x & !null_y] <- y[null_x & !null_y]
y[!null_x & null_y] <- x[!null_x & null_y]
y[!null_x & !null_y] <- x[!null_x & !null_y]
unlist(lapply(y, function(x) ifelse(is.null(x), NA_character_, x)))
}
detect_group_coef <- function(model, rhs) {
outcome <- all.vars(formula(model))[1]
d <- model@frame
random_lev_names <- names(extract_random_vars(rhs))
random_levs <- unlist(strsplit(random_lev_names, ":"))
random_levs <- gsub("^\\(|\\)$", "", random_levs)
random_levs <- unique(collapse_groups(random_levs))
random_lev_ids <- d[random_levs]
ranef_order <- vapply(random_lev_ids, function(x) {
length(unique(x))
}, FUN.VALUE = numeric(1))
ranef_order <- rev(sort(ranef_order))
random_lev_ids <- random_lev_ids[, names(ranef_order), drop = FALSE]
# Make sure there are explicit ids
random_lev_ids <- make_explicit_id(random_lev_ids)
X <- d[!(names(d) %in% c(random_levs, outcome))]
lev_assign <- vector("list", length(random_levs))
for (i in seq_along(random_lev_ids)) {
lev_assign[[i]] <- detect_X_level(X, random_lev_ids[, i, drop = FALSE])
}
levs <- Reduce(collapse_list, rev(lev_assign))
# reassign acutal names (in cases where ranef contains ":")
out <- random_lev_names[match(levs, random_levs)]
names(out) <- names(levs)
unlist(out[!is.na(out)])
}
row_paste <- function(d) {
apply(d, 1, paste, collapse = "-")
}
#' Makes the grouping variables explicit, which is neccessary for
#' detecting group-level predictors
#' @param ranef_df A data frame that includes only the random
#' effect ID variables (i.e., random_lev_ids)
#' @noRd
make_explicit_id <- function(ranef_df) {
for(i in seq_along(ranef_df)[-length(ranef_df)]) {
ranef_df[[i]] <- row_paste(ranef_df[ ,i:length(ranef_df)])
}
ranef_df
}
#' Extract the primary terms from all terms
#'
#' @inheritParams detect_primary
#'
#' @keywords internal
#'
#' @param all_terms A list of all the equation terms on the right hand side,
#' usually the result of \code{broom::tidy(model, quick = TRUE)$term}.
#' @examples
#' \dontrun{
#' primaries <- c("partyid", "age", "race")
#'
#' full_terms <- c(
#' "partyidDon't know", "partyidOther party", "age",
#' "partyidNot str democrat", "age", "raceBlack", "age", "raceBlack"
#' )
#'
#' extract_primary_term(primaries, full_terms)
#' }
#' @noRd
extract_primary_term <- function(primary_term_v, all_terms) {
detected <- lapply(all_terms, detect_primary, primary_term_v)
lapply(detected, function(pull) primary_term_v[pull])
}
#' Detect if a given term is part of a vector of full terms
#'
#' @keywords internal
#'
#' @param full_term The full name of a single term, e.g.,
#' \code{"partyidOther party"}
#' @param primary_term_v A vector of primary terms, e.g., \code{"partyid"}.
#' Usually the result of \code{formula_rhs[!grepl(":", formula_rhs)]}
#'
#' @return A logical vector the same length of \code{primary_term_v} indicating
#' whether the \code{full_term} is part of the given \code{primary_term_v}
#' element
#'
#' @examples
#' \dontrun{
#' detect_primary("partyidStrong republican", c("partyid", "age", "race"))
#' detect_primary("age", c("partyid", "age", "race"))
#' detect_primary("raceBlack", c("partyid", "age", "race"))
#' }
#' @noRd
detect_primary <- function(full_term, primary_term_v) {
if (full_term %in% primary_term_v) {
primary_term_v %in% full_term
} else {
# escape parens
primary_term_v <- gsub("(", "\\(", primary_term_v, fixed = TRUE)
primary_term_v <- gsub(")", "\\)", primary_term_v, fixed = TRUE)
splt <- strsplit(full_term, ":")[[1]]
m_logical <- vapply(primary_term_v, function(indiv_term) {
vapply(splt, function(x) grepl(paste0("^", indiv_term), x),
FUN.VALUE = logical(1))
},
logical(length(splt))
)
if (is.null(dim(m_logical))) {
return(m_logical)
}
apply(m_logical, 2, any)
}
}
#' Extract all subscripts
#'
#' @keywords internal
#'
#' @param primary_list A list of primary terms
#' @param full_term_list A list of full terms
#'
#' @return A list with the subscripts. If full term has no subscript,
#' returns \code{""}.
#'
#' @examples
#' \dontrun{
#' p_list <- list(
#' "partyid",
#' c("partyid", "age"),
#' c("age", "race"),
#' c("partyid", "age", "race")
#' )
#'
#' ft_list <- list(
#' "partyidNot str republican",
#' c("partyidInd,near dem", "age"),
#' c("age", "raceBlack"),
#' c("partyidInd,near dem", "age", "raceBlack")
#' )
#'
#' extract_all_subscripts(p_list, ft_list)
#' }
#' @noRd
extract_all_subscripts <- function(primary_list, full_term_list) {
Map(extract_subscripts, primary_list, full_term_list)
}
#' Extract the subscripts from a given term
#'
#' @keywords internal
#'
#' @param primary A single primary term, e.g., \code{"partyid"}
#' @param full_term_v A vector of full terms, e.g.,
#' \code{c("partyidDon't know", "partyidOther party"}. Can be of length 1.
#' @examples
#' \dontrun{
#' extract_subscripts("partyid", "partyidDon't know")
#' extract_subscripts(
#' "partyid",
#' c(
#' "partyidDon't know", "partyidOther party",
#' "partyidNot str democrat"
#' )
#' )
#' }
#' @noRd
extract_subscripts <- function(primary, full_term_v) {
out <- switch(as.character(length(primary)),
"0" = "",
"1" = gsub(primary, "", full_term_v, fixed = TRUE),
mapply_chr(function(x, y) gsub(x, "", y, fixed = TRUE),
x = primary,
y = full_term_v
)
)
out
}
#' Generic function for wrapping the RHS of a model equation in something, like
#' how the RHS of probit is wrapped in φ()
#'
#' @keywords internal
#'
#' @param model A fitted model
#' @param tex The TeX version of the RHS of the model (as character), built as
#' \code{rhs_combined} or \code{eq_raw$rhs} in \code{extract_eq()}
#' @param \dots additional arguments passed to the specific extractor
#' @noRd
wrap_rhs <- function(model, tex, ...) {
UseMethod("wrap_rhs", model)
}
#' @keywords internal
#' @noRd
wrap_rhs.default <- function(model, tex, ...) {
return(tex)
}
#' @keywords internal
#' @noRd
wrap_rhs.glm <- function(model, tex, ...) {
if (model$family$link == "probit") {
rhs <- probitify(tex)
} else {
rhs <- tex
}
return(rhs)
}
#' @keywords internal
#' @noRd
wrap_rhs.polr <- function(model, tex, ...) {
if (model$method == "probit") {
rhs <- probitify(tex)
} else {
rhs <- tex
}
return(rhs)
}
#' @keywords internal
#' @noRd
wrap_rhs.clm <- function(model, tex, ...) {
if (model$info$link == "probit") {
rhs <- probitify(tex)
} else {
rhs <- tex
}
return(rhs)
}
#' @keywords internal
#' @noRd
probitify <- function(tex) {
# Replace existing beginning-of-line \quad space with `\\qquad\` to account for \Phi
tex <- gsub("&\\\\quad", "&\\\\qquad\\\\", tex)
# It would be cool to use \left[ and \right] someday, but they don't work when
# the equation is split across multiple lines (see
# https://tex.stackexchange.com/q/21290/11851)
paste0("\\Phi[", tex, "]")
}
Any scripts or data that you put into this service are public.
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.