Nothing
#' Tidying methods for mixed effects models
#'
#' These methods tidy the coefficients of \code{lme4::lmer} and \code{lme4::glmer}
#' models (i.e., \code{merMod} objects). Methods are also provided for \code{allFit}
#' objects.
#'
#' @aliases glance.allFit
#' @aliases tidy.allFit
#' @param x An object of class \code{merMod}, such as those from \code{lmer},
#' \code{glmer}, or \code{nlmer}
#' @param ... Additional arguments (passed to \code{confint.merMod} for \code{tidy}; \code{augment_columns} for \code{augment}; ignored for \code{glance})
#'
#' @return All tidying methods return a \code{data.frame} without rownames.
#' The structure depends on the method chosen.
#'
#' @name lme4_tidiers
#'
#' @examples
#'
#' if (require("lme4")) {
#' ## original model
#' \dontrun{
#' lmm1 <- lmer(Reaction ~ Days + (Days | Subject), sleepstudy)
#' }
#' ## load stored object
#' load(system.file("extdata", "lme4_example.rda", package="broom.mixed"))
#' (tt <- tidy(lmm1))
#' tidy(lmm1, effects = "fixed")
#' tidy(lmm1, effects = "fixed", conf.int=TRUE)
#' tidy(lmm1, effects = "fixed", conf.int=TRUE, conf.method="profile")
#' ## lmm1_prof <- profile(lmm1) # generated by extdata/runexamples
#' tidy(lmm1, conf.int=TRUE, conf.method="profile", profile=lmm1_prof)
#' ## conditional modes (group-level deviations from population-level estimate)
#' tidy(lmm1, effects = "ran_vals", conf.int=TRUE)
#' ## coefficients (group-level estimates)
#' (rcoef1 <- tidy(lmm1, effects = "ran_coefs"))
#' if (require(tidyr) && require(dplyr)) {
#' ## reconstitute standard coefficient-by-level table
#' spread(rcoef1,key=term,value=estimate)
#' ## split ran_pars into type + term; sort fixed/sd/cor
#' (tt %>% separate(term,c("type","term"),sep="__",fill="left")
#' %>% arrange(!is.na(type),desc(type)))
#' }
#' head(augment(lmm1, sleepstudy))
#' glance(lmm1)
#'
#' glmm1 <- glmer(cbind(incidence, size - incidence) ~ period + (1 | herd),
#' data = cbpp, family = binomial)
#' tidy(glmm1)
#' tidy(glmm1,exponentiate=TRUE)
#' tidy(glmm1, effects = "fixed")
#' ## suppress warning about influence.merMod
#' head(suppressWarnings(augment(glmm1, cbpp)))
#' glance(glmm1)
#'
#' startvec <- c(Asym = 200, xmid = 725, scal = 350)
#' nm1 <- nlmer(circumference ~ SSlogis(age, Asym, xmid, scal) ~ Asym|Tree,
#' Orange, start = startvec)
#' ## suppress warnings about var-cov matrix ...
#' op <- options(warn=-1)
#' tidy(nm1)
#' tidy(nm1, effects = "fixed")
#' options(op)
#' head(augment(nm1, Orange))
#' glance(nm1)
#' detach("package:lme4")
#' }
#' if (require("lmerTest")) {
#' lmm1 <- lmer(Reaction ~ Days + (Days | Subject), sleepstudy)
#' tidy(lmm1)
#' glance(lmm1)
#' detach("package:lmerTest") # clean up
#' }
NULL
#' @export
confint.rlmerMod <- function(object, parm, level,
method = "Wald", ...) {
cc <- class(object)
class(object) <- "merMod"
if (method != "Wald") {
warning("only Wald method implemented for rlmerMod objects")
}
return(confint(object, parm = parm, level = level, method = "Wald", ...))
}
## FIXME: relatively trivial/only used in one place, could
## probably be eliminated? OR move to utils (also in glmmTMB?)
fix_ran_vals <- function(g) {
term <- level <- estimate <- NULL
r <- (g
%>% tibble::rownames_to_column("level")
%>% gather(term, estimate, -level)
)
return(r)
}
#' @rdname lme4_tidiers
#'
#' @param debug print debugging output?
#' @param effects A character vector including one or more of "fixed" (fixed-effect parameters); "ran_pars" (variances and covariances or standard deviations and correlations of random effect terms); "ran_vals" (conditional modes/BLUPs/latent variable estimates); or "ran_coefs" (predicted parameter values for each group, as returned by \code{\link[lme4]{coef.merMod}})
#' @param conf.int whether to include a confidence interval
#' @param conf.level confidence level for CI
#' @param conf.method method for computing confidence intervals (see \code{lme4::confint.merMod})
#' @param scales scales on which to report the variables: for random effects, the choices are \sQuote{"sdcor"} (standard deviations and correlations: the default if \code{scales} is \code{NULL}) or \sQuote{"vcov"} (variances and covariances). \code{NA} means no transformation, appropriate e.g. for fixed effects.
#' @param exponentiate whether to exponentiate the fixed-effect coefficient estimates and confidence intervals (common for logistic regression); if \code{TRUE}, also scales the standard errors by the exponentiated coefficient, transforming them to the new scale
#' @param exponentiate_ran_coefs whether to exponentiate the predicted paramater values for each group
#' @param ran_prefix a length-2 character vector specifying the strings to use as prefixes for self- (variance/standard deviation) and cross- (covariance/correlation) random effects terms
#' @param profile pre-computed profile object, for speed when using \code{conf.method="profile"}
#' @param ddf.method the method for computing the degrees of freedom and t-statistics (only applicable when using the \pkg{lmerTest} package: see \code{\link[lmerTest]{summary.lmerModLmerTest}}
#'
#' @return \code{tidy} returns one row for each estimated effect, either
#' with groups depending on the \code{effects} parameter.
#' It contains the columns
#' \item{group}{the group within which the random effect is being estimated: \code{"fixed"} for fixed effects}
#' \item{level}{level within group (\code{NA} except for modes)}
#' \item{term}{term being estimated}
#' \item{estimate}{estimated coefficient}
#' \item{std.error}{standard error}
#' \item{statistic}{t- or Z-statistic (\code{NA} for modes)}
#' \item{p.value}{P-value computed from t-statistic (may be missing/NA)}
#'
#' @importFrom dplyr mutate bind_rows bind_cols
#' @importFrom tibble rownames_to_column
#' @importFrom tidyr gather spread
#' @importFrom purrr map
#' @importFrom nlme VarCorr ranef
#' @importFrom methods is selectMethod
#' @importFrom stats cov2cor
#' @export
tidy.merMod <- function(x, effects = c("ran_pars", "fixed"),
scales = NULL, ## c("sdcor","vcov",NA),
exponentiate = FALSE,
exponentiate_ran_coefs = FALSE,
ran_prefix = NULL,
conf.int = FALSE,
conf.level = 0.95,
conf.method = "Wald",
ddf.method = NULL,
profile = NULL,
debug = FALSE,
...) {
## R CMD check false positives
variable <- level <- term <- estimate <- std.error <- grpvar <-
.id <- grp <- condval <- condsd <- NULL
if(is.null(ddf.method)) {
ddf.method <- if (inherits(x,"lmerModLmerTest")) "Satterthwaite" else NULL
}
effect_names <- c("ran_pars", "fixed", "ran_vals", "ran_coefs")
if (!is.null(scales)) {
if (length(scales) != length(effects)) {
stop(
"if scales are specified, values (or NA) must be provided ",
"for each effect"
)
}
}
if (length(miss <- setdiff(effects, effect_names)) > 0) {
stop("unknown effect type ", miss)
}
if (conf.int && conf.method == "profile" && !is.null(profile)) {
p <- profile
} else {
p <- x
}
ret_list <- list()
if ("fixed" %in% effects) {
## return tidied fixed effects
## not quite sure why implicit/automatic method dispatch doesn't work,
## but we seem to need this to get the right summary for merModLmerTest
## objects ...
sum_fun <- selectMethod("summary", class(x))
if (inherits(x,"lmerModLmerTest")) {
ss <- sum_fun(x, ddf=ddf.method)
} else {
ss <- sum_fun(x)
}
ret <- stats::coef(ss) %>%
data.frame(check.names = FALSE) %>%
rename_regex_match()
if (debug) {
cat("output from coef(summary(x)):\n")
print(coef(ss))
}
## need to save rownames before dplyr (mutate(), bind_cols())
## destroys them ...
ret <- tibblify(ret)
if (conf.int) {
if (is(x, "merMod") || is(x, "rlmerMod")) {
cifix <- cifun(p, parm = "beta_", method = conf.method,
level = conf.level, ddf.method = ddf.method, ...)
} else {
## ?? for glmmTMB? check ...
cifix <- cifun(p, level = conf.level, ...)
}
ret <- dplyr::bind_cols(ret, cifix)
}
ran_effs <- sprintf("ran_%s", c("pars", "modes", "coefs"))
## if (any(purrr::map_lgl(ran_effs, ~. %in% effects))) {
## add group="fixed" to tidy table for fixed effects
## ret <- mutate(ret,group="fixed")
## }
if (exponentiate) {
vv <- intersect(c("estimate", "conf.low", "conf.high"), names(ret))
ret <- (ret
%>% mutate_at(vars(vv), ~exp(.))
%>% mutate(across(std.error, ~ . * estimate))
)
}
ret_list$fixed <- reorder_frame(ret)
}
if ("ran_pars" %in% effects) {
if (is.null(scales)) {
rscale <- "sdcor"
} else {
rscale <- scales[effects == "ran_pars"]
}
if (!rscale %in% c("sdcor", "vcov")) {
stop(sprintf("unrecognized ran_pars scale %s", sQuote(rscale)))
}
vc <- VarCorr(x)
if (!is(x, "merMod") && grepl("^VarCorr", class(vc)[1])) {
if (!is(x, "rlmerMod")) {
## hack: attempt to augment glmmADMB (or other)
## values so we can use as.data.frame.VarCorr.merMod
vc <- lapply(
vc,
function(x) {
attr(x, "stddev") <- sqrt(diag(x))
attr(x, "correlation") <- stats::cov2cor(x)
x
}
)
attr(vc, "useScale") <- (stats::family(x)$family == "gaussian")
}
class(vc) <- "VarCorr.merMod"
}
## n.b. use order="lower.tri" here so that term order matches
## that returned by confint() !
ret <- as.data.frame(vc, order="lower.tri")
## purrr::map_at?
ret[] <- lapply(ret, function(x) if (is.factor(x)) {
as.character(x)
} else {
x
} )
if (is.null(ran_prefix)) {
ran_prefix <- switch(rscale,
vcov = c("var", "cov"),
sdcor = c("sd", "cor")
)
}
## don't try to assign as rowname (non-unique anyway),
## make it directly into a term column
ret[["term"]] <- apply(ret[c("var1", "var2")], 1,
ran_pars_name,
ran_prefix = ran_prefix
)
## keep only desired term, rename
ret <- setNames(
ret[c("grp", "term", rscale)],
c("group", "term", "estimate")
)
## these are in 'lower.tri' order, need to make sure this
## is matched in as.data.frame() below
if (conf.int) {
ciran <- cifun(p, parm = "theta_", method = conf.method, level = conf.level, ...)
ret <- data.frame(ret, ciran, stringsAsFactors = FALSE)
}
ret_list$ran_pars <- ret
}
if ("ran_vals" %in% effects) {
## fix each group to be a tidy data frame
ret <- (ranef(x, condVar = TRUE)
%>% as.data.frame(stringsAsFactors = FALSE)
%>% dplyr::rename(
group = grpvar, level = grp,
estimate = condval, std.error = condsd
)
%>% dplyr::mutate_if(is.factor, as.character)
)
if (conf.int) {
if (conf.method != "Wald") {
stop("only Wald CIs available for conditional modes")
}
mult <- qnorm((1 + conf.level) / 2)
ret <- ret %>% mutate(
conf.low = estimate - mult * std.error,
conf.high = estimate + mult * std.error
)
}
ret_list$ran_vals <- ret
}
if ("ran_coefs" %in% effects) {
ret <- coef(x) %>%
purrr::map(fix_ran_vals) %>%
bind_rows(.id = "group")
if (conf.int) {
warning("CIs not available for random-effects coefficients: returning NA")
ret <- ret %>% mutate(conf.low = NA, conf.high = NA)
}
if (exponentiate_ran_coefs) {
rc <- intersect("estimate", names(ret))
ret <- (ret %>% mutate_at(vars(rc), ~exp(.)))
}
ret_list$ran_coefs <- ret
}
if (exponentiate_ran_coefs) {
if(!"ran_coefs" %in% effects) {
warning("Exponentiated random coefficients were specified, but random coefficients were not requested. Not returning random coefficients.")
}
}
ret <- (ret_list
%>%
dplyr::bind_rows(.id = "effect")
%>%
as_tibble()
%>%
reorder_cols()
)
return(ret)
}
#' @rdname lme4_tidiers
#' @export
tidy.rlmerMod <- tidy.merMod
#' @rdname lme4_tidiers
#'
#' @param data original data this was fitted on; if not given this will
#' attempt to be reconstructed
#' @param newdata new data to be used for prediction; optional
#'
#' @template augment_NAs
#'
#' @return \code{augment} returns one row for each original observation,
#' with columns (each prepended by a .) added. Included are the columns
#' \item{.fitted}{predicted values}
#' \item{.resid}{residuals}
#' \item{.fixed}{predicted values with no random effects}
#'
#' Also added for "merMod" objects, but not for "mer" objects,
#' are values from the response object within the model (of type
#' \code{lmResp}, \code{glmResp}, \code{nlsResp}, etc). These include \code{".mu",
#' ".offset", ".sqrtXwt", ".sqrtrwt", ".eta"}.
#'
#' @importFrom broom augment augment_columns
#' @importFrom dplyr bind_cols
#' @export
augment.merMod <- function(x, data = stats::model.frame(x), newdata, ...) {
# move rownames if necessary
if (missing(newdata)) {
newdata <- NULL
}
ret <- suppressMessages(augment_columns(x, data, newdata, se.fit = NULL, ...))
# add predictions with no random effects (population means)
predictions <- stats::predict(x, re.form = NA)
# some cases, such as values returned from nlmer, return more than one
# prediction per observation. Not clear how those cases would be tidied
if (length(predictions) == nrow(ret)) {
ret$.fixed <- predictions
}
# columns to extract from resp reference object
# these include relevant ones that could be present in lmResp, glmResp,
# or nlsResp objects
respCols <- c(
"mu", "offset", "sqrtXwt", "sqrtrwt", "weights",
"wtres", "gam", "eta"
)
cols <- lapply(respCols, function(cc) x@resp[[cc]])
names(cols) <- paste0(".", respCols)
## remove too-long fields and empty fields
n_vals <- vapply(cols,length,1L)
min_n <- min(n_vals[n_vals>0])
cols <- dplyr::bind_cols(cols[n_vals==min_n])
cols <- insert_NAs(cols, ret)
if (length(cols) > 0) {
ret <- dplyr::bind_cols(ret, cols)
}
return(unrowname(ret))
}
#' @rdname lme4_tidiers
#'
#' @return \code{glance} returns one row with the columns
#' \item{nobs}{the number of observations}
#' \item{sigma}{the square root of the estimated residual variance}
#' \item{logLik}{the data's log-likelihood under the model}
#' \item{AIC}{the Akaike Information Criterion}
#' \item{BIC}{the Bayesian Information Criterion}
#' \item{deviance}{deviance}
#'
#' @rawNamespace if(getRversion()>='3.3.0') importFrom(stats, sigma) else importFrom(lme4,sigma)
#' @importFrom broom glance
#' @export
glance.merMod <- function(x, ...) {
deviance <- NULL ## false-positive code checks
ff <- finish_glance(x = x)
if (lme4::isREML(x)) ff <- rename(ff, REMLcrit = deviance)
return(ff)
}
##' Augmentation for random effects (for caterpillar plots etc.)
##'
##' @param x ranef (conditional mode) information from an lme4 fit, using \code{ranef(.,condVar=TRUE)}
##' @param ci.level level for confidence intervals
##' @param reorder reorder levels by conditional mode values?
##' @param order.var numeric or character: which variable to use for ordering levels?
##' @param \dots additional arguments (unused: for generic consistency)
##' @examples
##' if (require("lme4")) {
##' load(system.file("extdata","lme4_example.rda",package="broom.mixed"))
##' rr <- ranef(lmm1,condVar=TRUE)
##' aa <- broom::augment(rr)
##' ## Q-Q plot:
##' if (require(ggplot2) && require(dplyr)) {
##' g0 <- ggplot(aa,aes(estimate,qq,xmin=lb,xmax=ub))+
##' geom_errorbarh(height=0)+
##' geom_point()+facet_wrap(~variable,scale="free_x")
##' ## regular caterpillar plot:
##' g1 <- ggplot(aa,aes(estimate,level,xmin=lb,xmax=ub))+
##' geom_errorbarh(height=0)+
##' geom_vline(xintercept=0,lty=2)+
##' geom_point()+facet_wrap(~variable,scale="free_x")
##' ## emphasize extreme values
##' aa2 <- group_by(aa,grp,level)
##' aa3 <- mutate(aa2, keep=any(estimate/std.error>2))
##' ## Update caterpillar plot with extreme levels highlighted
##' ## (highlight all groups with *either* extreme intercept *or*
##' ## extreme slope)
##' ggplot(aa3, aes(estimate,level,xmin=lb,xmax=ub,colour=factor(keep)))+
##' geom_errorbarh(height=0)+
##' geom_vline(xintercept=0,lty=2)+
##' geom_point()+facet_wrap(~variable,scale="free_x")+
##' scale_colour_manual(values=c("black","red"), guide=FALSE)
##' }
##' }
##' @importFrom stats ppoints
##' @export
augment.ranef.mer <- function(x,
ci.level = 0.9,
reorder = TRUE,
order.var = 1, ...) {
variable <- level <- estimate <- std.error <- grpvar <-
grp <- condval <- condsd <- NULL
tmpf <- function(z) {
if (is.character(order.var) && !order.var %in% names(z)) {
order.var <- 1
warning("order.var not found, resetting to 1")
}
## would use plyr::name_rows, but want levels first
zz <- data.frame(level = rownames(z), z, check.names = FALSE)
if (reorder) {
## if numeric order var, add 1 to account for level column
ov <- if (is.numeric(order.var)) order.var + 1 else order.var
zz$level <- reorder(zz$level, zz[, order.var + 1], FUN = identity)
}
## Q-Q values, for each column separately
qq <- c(apply(z, 2, function(y) {
qnorm(stats::ppoints(nrow(z)))[order(order(y))]
}))
rownames(zz) <- NULL
pv <- attr(z, "postVar")
cols <- 1:(dim(pv)[1])
se <- unlist(lapply(cols, function(i) sqrt(pv[i, i, ])))
## n.b.: depends on explicit column-major ordering of se/melt
zzz <- zz %>%
tidyr::pivot_longer(-level, values_to = "estimate", names_to = "variable") %>%
dplyr::arrange(variable, level)
zzz <- cbind(zzz, qq = qq, std.error = se)
## reorder columns:
subset(zzz, select = c(variable, level, estimate, qq, std.error))
}
dd <- purrr::map_dfr(x, tmpf, .id = "grp")
ci.val <- -qnorm((1 - ci.level) / 2)
transform(dd,
## p=2*pnorm(-abs(estimate/std.error)), ## 2-tailed p-val
lb = estimate - ci.val * std.error,
ub = estimate + ci.val * std.error
)
}
## experimental
##' @export
tidy.gamm4 <- function(x, ...) {
## gamm4 returns an *unclassed* list of length two
## (mer, gam)
r <- tidy(x$mer, ...)
r$term <- gsub("^X", "", r$term)
return(r)
}
##' @export
augment.gamm4 <- function(x, ...) {
return(augment(x$mer, ...))
}
##' @export
glance.gamm4 <- function(x, ...) {
return(glance(x$mer, ...))
}
##' @export
tidy.lmList4 <- function(x, conf.int = FALSE,
conf.level = 0.95, ...) {
cols <- estimate <- std.error <- NULL ## R CMD check false positives
ss <- summary(x)$coefficients
names(dimnames(ss)) <- c("group","cols","terms")
# flatten results cube
tmp <- list()
for (i in 1:dim(ss)[3]) {
tmp[[i]] <- ss[, , i, drop=TRUE] %>%
as.data.frame %>%
tibble::rownames_to_column(var = "group") %>%
rename_regex_match() %>%
dplyr::mutate(`terms` = dimnames(ss)$terms[i])
}
tmp <- dplyr::bind_rows(tmp)
tmp <- tmp[, unique(c("group", "terms", sort(colnames(tmp))))]
tmp <- tmp[order(tmp$group, tmp$terms),]
ret <- tibble::as_tibble(tmp)
if (conf.int) {
qq <- qnorm((1+conf.level)/2)
ret <- (ret %>%
mutate(conf.low=estimate-qq*std.error,
conf.high=estimate+qq*std.error)
)
## don't think reorder_cols is necessary ...?
}
return(ret)
}
#' @export
tidy.allFit <- function(x, ...) {
bad <- purrr::map_lgl(x, is, "error")
purrr::map_dfr(x[!bad], tidy, ..., .id = "optimizer")
}
##' @importFrom purrr possibly
##' @export
glance.allFit <- function(x, ...) {
bad <- purrr::map_lgl(x, is, "error")
if (all(bad)) stop("all models bad")
## find first non-bad and fill with NA values
dummy <- glance(x[!bad][[1]]) %>% mutate(across(everything(), ~ NA))
res <- (purrr::map_dfr(x, purrr::possibly(glance, ..., otherwise = dummy),
.id = "optimizer")
## compute relative negative log-likelihood
%>% mutate(NLL_rel = ifelse(is.na(logLik), Inf, max(logLik, na.rm=TRUE) - logLik))
)
return(res)
}
Any scripts or data that you put into this service are public.
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.