##############################################################################
# Copyright (c) 2012-2017 Russell V. Lenth #
# #
# This file is part of the emmeans package for R (*emmeans*) #
# #
# *emmeans* is free software: you can redistribute it and/or modify #
# it under the terms of the GNU General Public License as published by #
# the Free Software Foundation, either version 2 of the License, or #
# (at your option) any later version. #
# #
# *emmeans* is distributed in the hope that it will be useful, #
# but WITHOUT ANY WARRANTY; without even the implied warranty of #
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the #
# GNU General Public License for more details. #
# #
# You should have received a copy of the GNU General Public License #
# along with R and *emmeans*. If not, see #
# <https://www.r-project.org/Licenses/> and/or #
# <http://www.gnu.org/licenses/>. #
##############################################################################
#' Support functions for model extensions
#'
#' This documents some functions and methods that may be useful to package
#' developers wishing to add support for \pkg{emmeans} for their model objects.A user
#' or package developer may add \pkg{emmeans} support for a model
#' class by writing \code{recover_data} and \code{emm_basis} methods
#' for that class. (Users in need for a quick way to obtain results for a model
#' that is not supported may be better served by the \code{\link{qdrg}} function.)
#' There are several other exported functions that may be useful. See the
#' "xtending" vignette for more details.
#'
## #' @rdname extending-emmeans
#' @name extending-emmeans
#' @param object An object of the same class as is supported by a new method.
#' @param ... Additional parameters that may be supported by the method.
#'
#' @section Details:
#' To create a reference grid, the \code{ref_grid} function needs to reconstruct
#' the data used in fitting the model, and then obtain a matrix of linear
#' functions of the regression coefficients for a given grid of predictor
#' values. These tasks are performed by calls to \code{recover_data} and
#' \code{emm_basis} respectively. A vignette giving details and examples
#' is available via \href{../doc/xtending.html}{vignette("xtending", "emmeans")}
#'
#' To extend \pkg{emmeans}'s support to additional model types, one need only
#' write S3 methods for these two functions. The existing methods serve as
#' helpful guidance for writing new ones. Most of the work for
#' \code{recover_data} can be done by its method for class \code{"call"},
#' providing the \code{terms} component and \code{na.action} data as additional
#' arguments. Writing an \code{emm_basis} method is more involved, but the
#' existing methods (e.g., \code{emmeans:::emm_basis.lm}) can serve as models.
#' Certain \code{recover_data} and \code{emm_basis} methods are exported from
#' \pkg{emmeans}. (To find out, do \code{methods("recover_data")}.) If your
#' object is based on another model-fitting object, it
#' may be that all that is needed is to call one of these exported methods and
#' perhaps make modifications to the results. Contact the developer if you need
#' others of these exported.
#'
#' If the model has a multivariate response, \code{bhat} needs to be
#' \dQuote{flattened} into a single vector, and \code{X} and \code{V} must be
#' constructed consistently.
#'
#' In models where a non-full-rank result is possible (often, you can tell by
#' seeing if there is a \code{singular.ok} argument in the model-fitting
#' function), \code{\link{summary.emmGrid}} and its relatives check the
#' estimability of each
#' prediction, using the \code{\link[estimability]{nonest.basis}} function in
#' the \pkg{estimability} package.
#'
#' The models already supported are detailed in \href{../doc/models.html}{the
#' "models" vignette}. Some packages may provide additional \pkg{emmeans}
#' support for its object classes.
#'
#'
#' @return The \code{recover_data} method must return a \code{\link{data.frame}}
#' containing all the variables that appear as predictors in the model,
#' and attributes \code{"call"}, \code{"terms"}, \code{"predictors"},
#' and \code{"responses"}. (\code{recover_data.call} will
#' provide these attributes.)
#'
#' @note Without an explicit \code{data} argument, \code{recover_data} returns
#' the \emph{current version} of the dataset. If the dataset has changed
#' since the model was fitted, then this will not be the data used to fit
#' the model. It is especially important to know this in simulation studies
#' where the data are randomly generated or permuted, and in cases where
#' several datasets are processed in one step (e.g., using \code{dplyr}).
#' In those cases, users should be careful to provide the actual data
#' used to fit the model in the \code{data} argument.
#'
#' @seealso \href{../doc/xtending.html}{Vignette on extending emmeans}
#'
#' @export
#' @order 1
recover_data = function(object, ...) {
# look for outside methods first
for (cl in .chk.cls(object)) {
rd <- .get.outside.method("recover_data", cl)
if(!is.null(rd))
return(rd(object, ...))
}
## look for an inside method
for (cl in class(object)) {
mth = utils::getAnywhere(paste("recover_data", cl, sep = "."))
if (length(mth$objs) > 0)
return((mth$objs[[1]])(object, ...))
}
UseMethod("recover_data") ## This call has to be here to establish recover_data as a generic
}
# get classes that are OK for external code to modify
# We don't allow overriding certain anchor classes,
# nor ones in 3rd place or later in inheritance
.chk.cls = function(object) {
sacred = c("call", "lm", "glm", "mlm", "aovlist", "lme", "qdrg")
setdiff(head(class(object), 2), sacred)
}
### My internal method dispatch -- we prefer outside methods
.get.outside.method = function(generic, cls) {
mth = utils::getAnywhere(paste(generic, cls, sep = "."))
from = sapply(strsplit(mth[[3]], "[ :]"), function(x) rev(x)[1])
if (length(from) == 0)
return (NULL)
if(any(outside <- (from != "emmeans")))
mth[which(outside)[1]]
else
NULL
}
#--------------------------------------------------------------
### call' objects
# This recover_data method serves as the workhorse for the others
# For model objects, call this with the object's call and its terms component
# Late addition: if data is non-null, use it in place of recovered data
# Later addition: na.action arg req'd - vector of row indexes removed due to NAs
# na.action is ignored when data is non-NULL
#' @rdname extending-emmeans
#' @param trms The \code{\link{terms}} component of \code{object} (typically with
#' the response deleted, e.g. via \code{\link{delete.response}})
#' @param na.action Integer vector of indices of observations to ignore; or
#' \code{NULL} if none
#' @param data Data frame. Usually, this is \code{NULL}. However, if non-null,
#' this is used in place of the reconstructed dataset. It must have all of the
#' predictors used in the model, and any factor levels must match those used
#' in fitting the model.
#' @param params Character vector giving the names of any variables in the model
#' formula that are \emph{not} predictors. For example, a spline model may involve
#' a local variable \code{knots} that is not a predictor, but its value is
#' needed to fit the model. Names of parameters not actually used are harmless,
#' and the default value \code{"pi"} (the only numeric constant in base R)
#' is provided in case the model involves it. An example involving splines
#' may be found at \url{https://github.com/rvlenth/emmeans/issues/180}.
#' @param frame Optional \code{data.frame}. Many model objects contain the
#' model frame used when fitting the model. In cases where there are no
#' predictor transformations, this model frame has all the original predictor
#' values and so is usable for recovering the data. Thus, if \code{frame} is
#' non-missing and \code{data} is \code{NULL}, a check is made on \code{trms}
#' and if there are no function calls, we use \code{data = frame}. This
#' can be helpful because it provides a modicum of security against the
#' possibility that the original data used when fitting the model has been
#' altered or removed.
#' @param pwts Optional vector of prior weights. Typically, this may be obtained
#' from the fitted \code{model} via \code{weights(model)}. If this is provided,
#' it is used to set weights as long as it is non-\code{NULL} and the same length
#' as the number of rows of the data.
#' @param addl.vars Character value or vector specifying additional predictors
#' to include in the reference grid. These must be names of variables that
#' exist, or you will get an error.
#' This may be useful if you need to do
#' additional computations later on that depend on these variables; e.g.,
#' bias adjustments for random slopes of variables not among the fixed predictors.
#'
#' @exportS3Method recover_data call
#' @export
#' @order 2
recover_data.call = function(object, trms, na.action, data = NULL,
params = "pi", frame, pwts, addl.vars, ...) {
fcall = object # because I'm easily confused
vars = setdiff(.all.vars(trms), params)
offarg = fcall$offset
if(missing(addl.vars))
addl.vars = character(0)
vars = union(vars, addl.vars)
if (!missing(frame)) {
if(is.null(data) && !.has.fcns(trms))
data = frame
if("(offset)" %in% names(data))
vars = union(vars, "(offset)")
}
else if(!is.null(offarg))
vars = union(vars, .all.vars(reformulate(deparse(offarg))))
tbl = data
if (length(vars) == 0 || vars[1] == "1") {
tbl = data.frame(c(1,1))
vars = names(tbl) = 1
}
if (is.null(tbl)) {
possibly.random = FALSE
m = match(c("formula", "data", "subset", "weights"), names(fcall), 0L)
fcall = fcall[c(1L, m)]
# check to see if there are any function calls to worry about
# [e.g., subset = sample(1:n, 50) will give us a
# different subset than model used]
mm = match(c("data", "subset"), names(fcall), 0L)
if(any(mm > 0)) {
# Flag cases where there is a function call in data or subset
# May indicate a situation where data are randomized
fcns = unlist(lapply(fcall[mm],
function(x) setdiff(all.names(x),
c("::",":::","[[","]]",all.vars(x)))))
possibly.random = (max(nchar(c("", fcns))) > 1)
}
fcall$drop.unused.levels = TRUE
fcall[[1L]] = quote(stats::model.frame)
fcall$xlev = NULL # we'll ignore xlev
if(!is.numeric(na.action)) ### In case na.action is not a vector of indices
na.action = NULL
# If we have an explicit list of cases to exclude, let everything through now
if (!is.null(na.action))
fcall$na.action = na.pass
else # exclude incomplete cases
fcall$na.action = na.omit
form = .reformulate(vars)
fcall$formula = update(trms, form)
env = environment(trms)
if (is.null(env))
env = parent.frame()
tbl = try(eval(fcall, env, parent.frame()), silent = TRUE)
if(inherits(tbl, "try-error"))
return(.rd.error(vars, fcall))
if (possibly.random) {
chk = eval(fcall, env, parent.frame())
if (!all(chk == tbl))
stop("Data appear to be randomized -- ",
"cannot consistently recover the data\n",
"Move the randomization ",
"outside of the model-fitting call.")
}
# Now we can drop na.action's rows
if (!is.null(na.action))
tbl = tbl[-(na.action), , drop=FALSE]
}
else {
tbl = tbl[, vars, drop = FALSE] # consider only the variables actually needed
tbl = tbl[complete.cases(tbl), , drop=FALSE]
}
# if there is a separate offset argument, calculate the static offset
if(!is.null(offarg)) {
if ("(offset)" %in% names(tbl)) { # don't have to calculate it
offval = tbl[["(offset)"]]
vars = setdiff(vars, "(offset)")
}
else
offval = eval(offarg, tbl, enclos = environment(trms))
if(!is.null(offval)) {
tbl$.static.offset. = offval
addl.vars = c(addl.vars, ".static.offset.")
}
}
if(!missing(pwts) && !is.null(pwts)) {
if ((npw <- length(pwts)) == nrow(tbl))
tbl[["(weights)"]] = pwts
else if (length(unique(pwts)) > 1)
warning("Model has ", npw, " prior weights, but we recovered ",
nrow(tbl), " rows of data.\nSo prior weights were ignored.",
call. = FALSE)
}
attr(tbl, "call") = object # the original call
attr(tbl, "terms") = trms
attr(tbl, "predictors") = setdiff(.all.vars(delete.response(trms)), params)
if(!missing(addl.vars)) {
attr(tbl, "predictors") = union(attr(tbl, "predictors"), addl.vars)
vars = setdiff(vars, addl.vars)
}
attr(tbl, "responses") = setdiff(vars, union(attr(tbl, "predictors"), params))
tbl
}
# error message for recover_data.call
.rd.error = function(vars, fcall) {
if ("pi" %in% vars)
return("\nTry re-running with 'params = c\"pi\", ...)'")
if (is.list(fcall$data)) fcall$data = "(raw data structure)"
dataname = as.character(fcall$data)[1]
if ((!is.na(dataname)) && (nchar(dataname) > 50))
dataname = paste(substring(dataname, 1, 50), "...")
mesg = "We are unable to reconstruct the data.\n"
mesg = paste0(mesg, "The variables needed are:\n\t",
paste(vars, collapse = " "), "\n",
"Are any of these actually constants? (specify via 'params = ')\n")
if (is.na(dataname))
mesg = paste(mesg, "Try re-running with 'data = \"<name of dataset>\"'\n")
else
mesg = paste0(mesg, "The dataset name is:\n\t", dataname, "\n",
"Does the data still exist? Or you can specify a dataset via 'data = '\n")
mesg
}
#----------------------------------------------------------
### emm_basis methods create a basis for the reference grid
#
# Required args:
# object - the model object
# trms - terms component of object
# xlev - named list of factor levels (but not the coerced ones)
# grid - reference grid
# All methods must return a list with these elements:
# X - basis for linear fcns over reference grid
# bhat - regression coefficients for fixed effects (INCLUDING any NAs)
# nbasis - matrix whose columns for a basis for non-estimable functions of beta; all.estble if none
# V - estimated covariance matrix of bhat
# dffun - function(k, dfargs) to find df for k'bhat having std error se
# dfargs - additional arguments, if any, for dffun
# misc - Extra info ...
# -- if extra levels need to be added (e.g. mlm, polr),
# put them in misc$ylevs
# -- For transformations or link fcns, use misc$tran
# for name (see 'make.link'), and use misc$inv.lbl
# for label to use in 'summary' when tran is inverted
# (ref_grid looks at lhs of model for tran if none found)
# Note: if no df exists, set dffun = function(...) NA and dfargs = list()
#--------------------------------------------------------------
#' @rdname extending-emmeans
#' @order 11
#' @param xlev Named list of factor levels (\emph{excluding} ones coerced to
#' factors in the model formula)
#' @param grid A \code{data.frame} (provided by \code{ref_grid}) containing
#' the predictor settings needed in the reference grid
#'
#' @return The \code{emm_basis} method should return a \code{list} with the
#' following elements:
#' \describe{
#' \item{X}{The matrix of linear functions over \code{grid}, having the same
#' number of rows as \code{grid} and the number of columns equal to the length
#' of \code{bhat}.}
#' \item{bhat}{The vector of regression coefficients for fixed effects. This
#' should \emph{include} any \code{NA}s that result from rank deficiencies.}
#' \item{nbasis}{A matrix whose columns form a basis for non-estimable functions
#' of beta, or a 1x1 matrix of \code{NA} if there is no rank deficiency.}
#' \item{V}{The estimated covariance matrix of \code{bhat}.}
#' \item{dffun}{A function of \code{(k, dfargs)} that returns the degrees of
#' freedom associated with \code{sum(k * bhat)}.}
#' \item{dfargs}{A \code{list} containing additional arguments needed for
#' \code{dffun}}.
#' } %%% end of describe
#' @export
#'
#' @section Communication between methods:
#' If the \code{recover_data} method generates information needed by \code{emm_basis},
#' that information may be incorporated by creating a \code{"misc"} attribute in the
#' returned recovered data. That information is then passed as the \code{misc}
#' argument when \code{ref_grid} calls \code{emm_basis}.
#'
#' @section Optional hooks:
#' Some models may need something other than standard linear estimates and
#' standard errors. If so, custom functions may be pointed to via the items
#' \code{misc$estHook}, \code{misc$vcovHook} and \code{misc$postGridHook}. If
#' just the name of the hook function is provided as a character string, then it
#' is retrieved using \code{\link{get}}.
#'
#' The \code{estHook} function should have arguments \samp{(object, do.se, tol,
#' ...)} where \code{object} is the \code{emmGrid} object,
#' \code{do.se} is a logical flag for whether to return the standard error, and
#' \code{tol} is the tolerance for assessing estimability. It should return a
#' matrix with 3 columns: the estimates, standard errors (\code{NA} when
#' \code{do.se==FALSE}), and degrees of freedom (\code{NA} for asymptotic). The
#' number of rows should be the same as \samp{object@linfct}. The
#' \code{vcovHook} function should have arguments \samp{(object, tol, ...)} as
#' described. It should return the covariance matrix for the estimates. Finally,
#' \code{postGridHook}, if present, is called at the very end of
#' \code{ref_grid}; it takes one argument, the constructed \code{object}, and
#' should return a suitably modified \code{emmGrid} object.
emm_basis = function(object, trms, xlev, grid, ...) {
# look for outside methods first
for (cl in .chk.cls(object)) {
emb <- .get.outside.method("emm_basis", cl)
if(!is.null(emb))
return(emb(object, trms, xlev, grid, ...))
}
for (cl in class(object)) {
mth = utils::getAnywhere(paste("emm_basis", cl, sep = "."))
if (length(mth$objs) > 0)
return((mth$objs[[1]])(object, trms, xlev, grid, ...))
}
UseMethod("emm_basis") ## This call has to be here to establish emm_basis as a generic
}
# Hidden courtesy function that provides access to all recover_data methods
#' @rdname extending-emmeans
#' @order 21
#' @export
.recover_data = function(object, ...)
recover_data(object, ...)
# Hidden courtesy function that provides access to all emm_basis methods
#' @rdname extending-emmeans
#' @order 22
#' @return \code{.recover_data} and \code{.emm_basis} are hidden exported versions of
#' \code{recover_data} and \code{emm_basis}, respectively. They run in \pkg{emmeans}'s
#' namespace, thus providing access to all existing methods.
#' @export
.emm_basis = function(object, trms, xlev, grid, ...)
emm_basis(object, trms, xlev, grid, ...)
#--------------------------------------------------------------
### DEFAULT METHODS (we hit these when a model is NOT supported)
# I'll have it return the message if we caught the error in this way
# Then caller can use try() to check for other types of errors,
# and just print this message otherwise
# NOT @exported
#' @exportS3Method recover_data default
recover_data.default = function(object, ...) {
paste("Can't handle an object of class ", dQuote(class(object)[1]), "\n",
paste(.show_supported(), collapse=""))
}
# NOT @exported
#' @exportS3Method emm_basis default
emm_basis.default = function(object, trms, xlev, grid, ...) {
stop("Can't handle an object of class", dQuote(class(object)[1]), "\n",
.show_supported())
}
# Private fcn to get a list of supported objects
# does this by looking in namespace [ns] and methods [meth]
# then strips that off leaving extensions
.show_supported = function(ns = "emmeans", meth = "emm_basis") {
"Use help(\"models\", package = \"emmeans\") for information on supported models."
}
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.