##############################################################################
# Copyright (c) 2012-2024 Russell V. Lenth #
# #
# This file is part of the emmeans package for R (*emmeans*) #
# #
# *emmeans* is free software: you can redistribute it and/or modify #
# it under the terms of the GNU General Public License as published by #
# the Free Software Foundation, either version 2 of the License, or #
# (at your option) any later version. #
# #
# *emmeans* is distributed in the hope that it will be useful, #
# but WITHOUT ANY WARRANTY; without even the implied warranty of #
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the #
# GNU General Public License for more details. #
# #
# You should have received a copy of the GNU General Public License #
# along with R and *emmeans*. If not, see #
# <https://www.r-project.org/Licenses/> and/or #
# <http://www.gnu.org/licenses/>. #
##############################################################################
### This file has summary.emmGrid S3 method and related functions
# as.data.frame for a summary just passes thru
# This creates an infinite loop if we're not careful with using `as.data.frame`
# to coerce a summary_emm object to a regular data frame.
#' @export
as.data.frame.summary_emm = function(x, ...) {
attr(x, "digits") = getOption("digits")
x
}
# S3 summary method
#' Summaries, predictions, intervals, and tests for \code{emmGrid} objects
#'
#' These are the primary methods for obtaining numerical or tabular results from
#' an \code{emmGrid} object. \code{summary.emmGrid} is the general function for
#' summarizing \code{emmGrid} objects. It also serves as the print method for
#' these objects; so for convenience, \code{summary()} arguments may be included
#' in calls to functions such as \code{\link{emmeans}} and
#' \code{\link{contrast}} that construct \code{emmGrid} objects. Note that by
#' default, summaries for Bayesian models are diverted to
#' \code{\link{hpd.summary}}.
#'
#' \code{confint.emmGrid} is equivalent to \code{summary.emmGrid with
#' infer = c(TRUE, FALSE)}. The function \code{test.emmGrid}, when called with
#' \code{joint = FALSE}, is equivalent to \code{summary.emmGrid} with \code{infer = c(FALSE, TRUE)}.
#'
#' With \code{joint = TRUE}, \code{test.emmGrid} calculates the Wald test of the
#' hypothesis \code{linfct \%*\% bhat = null}, where \code{linfct} and
#' \code{bhat} refer to slots in \code{object} (possibly subsetted according to
#' \code{by} or \code{rows}). An error is thrown if any row of \code{linfct} is
#' non-estimable. It is permissible for the rows of \code{linfct} to be linearly
#' dependent, as long as \code{null == 0}, in which case a reduced set of
#' contrasts is tested. Linear dependence and nonzero \code{null} cause an
#' error. The returned object has an additional \code{"est.fcns"} attribute, which
#' is a list of the linear functions associated with the joint test.
#'
#' @param object An object of class \code{"emmGrid"} (see \link{emmGrid-class})
#' @param infer A vector of one or two logical values. The first determines
#' whether confidence intervals are displayed, and the second determines
#' whether \emph{t} tests and \emph{P} values are displayed. If only one value
#' is provided, it is used for both.
#' @param level Numerical value between 0 and 1. Confidence level for confidence
#' intervals, if \code{infer[1]} is \code{TRUE}.
#' @param adjust Character value naming the method used to adjust \eqn{p} values
#' or confidence limits; or to adjust comparison arrows in \code{plot}. See
#' the P-value adjustments section below.
#' @param by Character name(s) of variables to use for grouping into separate
#' tables. This affects the family of tests considered in adjusted \emph{P}
#' values.
#' @param cross.adjust Character: \eqn{p}-value adjustment method to
#' additionally apply \emph{across}
#' the \code{by} groups. See the section on P-value adjustments for details.
#' @param type Character: type of prediction desired. This only has an effect if
#' there is a known transformation or link function. \code{"response"}
#' specifies that the inverse transformation be applied. \code{"mu"} (or
#' equivalently, \code{"unlink"}) is usually the same as \code{"response"},
#' but in the case where the model has both a link function and a response
#' transformation, only the link part is back-transformed. Other valid values
#' are \code{"link"}, \code{"lp"}, and \code{"linear.predictor"}; these are
#' equivalent, and request that results be shown for the linear predictor,
#' with no back-transformation. The default is \code{"link"}, unless the
#' \code{"predict.type"} option is in force; see \code{\link{emm_options}},
#' and also the section below on transformations and links.
#' @param df Numeric. If non-missing, a constant number of degrees of freedom to
#' use in constructing confidence intervals and \emph{P} values (\code{NA}
#' specifies asymptotic results).
#' @param calc Named list of character value(s) or formula(s).
#' The expressions in \code{char} are evaluated and appended to the
#' summary, just after the \code{df} column. The expression may include
#' any names up through \code{df} in the summary, any additional names in
#' \code{object@grid} (such as \code{.wgt.} or \code{.offset.}), or any
#' earlier elements of \code{calc}.
#' @param null Numeric. Null hypothesis value(s), on the linear-predictor scale,
#' against which estimates are tested. May be a single value used for all, or
#' a numeric vector of length equal to the number of tests in each family
#' (i.e., \code{by} group in the displayed table).
#' @param delta Numeric value (on the linear-predictor scale). If zero, ordinary
#' tests of significance are performed. If positive, this specifies a
#' threshold for testing equivalence (using the TOST or two-one-sided-test
#' method), non-inferiority, or non-superiority, depending on \code{side}. See
#' Details for how the test statistics are defined.
#' @param side Numeric or character value specifying whether the test is
#' left-tailed (\code{-1}, \code{"-"}, \code{"<"}, \code{"left"}, or
#' \code{"nonsuperiority"}); right-tailed (\code{1}, \code{"+"}, \code{">"},
#' \code{"right"}, or \code{"noninferiority"}); or two-sided (\code{0},
#' \code{2}, \code{"!="}, \code{"two-sided"}, \code{"both"},
#' \code{"equivalence"}, or \code{"="}). See the special section below for
#' more details.
#' @param frequentist Ignored except if a Bayesian model was fitted. If missing
#' or \code{FALSE}, the object is passed to \code{\link{hpd.summary}}. Otherwise,
#' a logical value of \code{TRUE} will have it return a frequentist summary.
#' @param bias.adjust Logical value for whether to adjust for bias in
#' back-transforming (\code{type = "response"}). This requires a valid value of
#' \code{sigma} to exist in the object or be specified.
#' @param sigma Error SD assumed for bias correction (when
#' \code{type = "response"} and a transformation
#' is in effect), or for constructing prediction intervals. If not specified,
#' \code{object@misc$sigma} is used, and a warning is issued if it is not found
#' or not valid.
#' \emph{Note:} \code{sigma} may be a vector, but be careful that it correctly
#' corresponds (perhaps after recycling) to the order of the reference grid.
#' @param ... Optional arguments such as \code{scheffe.rank}
#' (see \dQuote{P-value adjustments}).
#' In \code{confint.emmGrid},
#' \code{predict.emmGrid}, and
#' \code{test.emmGrid}, these arguments are passed to
#' \code{summary.emmGrid}.
#'
#' @return \code{summary.emmGrid}, \code{confint.emmGrid}, and
#' \code{test.emmGrid} return an object of class \code{"summary_emm"}, which
#' is an extension of \code{\link{data.frame}} but with a special \code{print}
#' method that displays it with custom formatting. For models fitted using
#' MCMC methods, the call is diverted to \code{\link{hpd.summary}} (with
#' \code{prob} set to \code{level}, if specified); one may
#' alternatively use general MCMC summarization tools with the
#' results of \code{as.mcmc}.
#'
#' @section Defaults:
#' The \code{misc} slot in \code{object} may contain default values for
#' \code{by}, \code{calc}, \code{infer}, \code{level}, \code{adjust},
#' \code{type}, \code{null}, \code{side}, and \code{delta}.
#' These defaults vary depending
#' on the code that created the object. The \code{\link{update}} method may be
#' used to change these defaults. In addition, any options set using
#' \samp{emm_options(summary = ...)} will trump those stored in the object's
#' \code{misc} slot.
#'
#' @section Transformations and links:
#' With \code{type = "response"}, the transformation assumed can be found in
#' \samp{object@misc$tran}, and its label, for the summary is in
#' \samp{object@misc$inv.lbl}. Any \eqn{t} or \eqn{z} tests are still performed
#' on the scale of the linear predictor, not the inverse-transformed one.
#' Similarly, confidence intervals are computed on the linear-predictor scale,
#' then inverse-transformed.
#'
#' Be aware that only univariate transformations and links are
#' supported in this way. Some multivariate transformations are supported by
#' \code{\link{mvregrid}}.
#'
#' @section Bias adjustment when back-transforming:
#' When \code{bias.adjust} is \code{TRUE}, then back-transformed estimates
#' are adjusted by adding
#' \eqn{0.5 h''(u)\sigma^2}, where \eqn{h} is the inverse transformation and
#' \eqn{u} is the linear predictor. This is based on a second-order Taylor
#' expansion. There are better or exact adjustments for certain specific
#' cases, and these may be incorporated in future updates.
#'
#' Note: In certain models, e.g., those with non-gaussian families,
#' \code{sigma} is initialized as \code{NA}, and so by default, bias adjustment
#' is skipped and a warning is issued. You may override this by specifying a
#' value for \code{sigma}. However, \emph{with ordinary generalized linear models,
#' bias adjustment is inappropriate} and you should not try to do it. With GEEs and GLMMs,
#' you probably should \emph{not} use \code{sigma(model)}, and instead you should create an
#' appropriate value using the estimated random effects, e.g., from \code{VarCorr(model)}.
#' An example is provided in the \dQuote{transformations} vignette.
#'
#' @section P-value adjustments:
#' The \code{adjust} argument specifies a multiplicity adjustment for tests or
#' confidence intervals. This adjustment always is applied \emph{separately}
#' to each table or sub-table that you see in the printed output (see
#' \code{\link{rbind.emmGrid}} for how to combine tables). If there are non-estimable
#' cases in a \code{by} group, those cases are \emph{excluded} before determining
#' the adjustment; that means there could be different adjustments in different groups.
#'
#' The valid values of \code{adjust} are as follows:
#' \describe{
#' \item{\code{"tukey"}}{Uses the Studentized range distribution with the number
#' of means in the family. (Available for two-sided cases only.)}
#' \item{\code{"scheffe"}}{Computes \eqn{p} values from the \eqn{F}
#' distribution, according to the Scheffe critical value of
#' \eqn{\sqrt{rF(\alpha; r, d)}}{sqrt[r*qf(alpha, r, d)]}, where \eqn{d} is
#' the error degrees of freedom and \eqn{r} is the rank of the set of linear
#' functions under consideration. By default, the value of \code{r} is
#' computed from \code{object@linfct} for each by group; however, if the
#' user specifies an argument matching \code{scheffe.rank}, its value will
#' be used instead. Ordinarily, if there are \eqn{k} means involved, then
#' \eqn{r = k - 1} for a full set of contrasts involving all \eqn{k} means, and
#' \eqn{r = k} for the means themselves. (The Scheffe adjustment is available
#' for two-sided cases only.)}
#' \item{\code{"sidak"}}{Makes adjustments as if the estimates were independent
#' (a conservative adjustment in many cases).}
#' \item{\code{"bonferroni"}}{Multiplies \eqn{p} values, or divides significance
#' levels by the number of estimates. This is a conservative adjustment.}
#' \item{\code{"dunnettx"}}{Uses our own\emph{ad hoc} approximation to the
#' Dunnett distribution for a family of estimates having pairwise
#' correlations of \eqn{0.5} (as is true when comparing treatments with a
#' control with equal sample sizes). The accuracy of the approximation
#' improves with the number of simultaneous estimates, and is much faster
#' than \code{"mvt"}. (Available for two-sided cases only.)}
#' \item{\code{"mvt"}}{Uses the multivariate \eqn{t} distribution to assess the
#' probability or critical value for the maximum of \eqn{k} estimates. This
#' method produces the same \eqn{p} values and intervals as the default
#' \code{summary} or \code{confint} methods to the results of
#' \code{\link{as.glht}}. In the context of pairwise comparisons or comparisons
#' with a control, this produces \dQuote{exact} Tukey or Dunnett adjustments,
#' respectively. However, the algorithm (from the \pkg{mvtnorm} package) uses a
#' Monte Carlo method, so results are not exactly repeatable unless the same
#' random-number seed is used (see \code{\link[base:Random]{set.seed}}). As the family
#' size increases, the required computation time will become noticeable or even
#' intolerable, making the \code{"tukey"}, \code{"dunnettx"}, or others more
#' attractive.}
#' \item{\code{"none"}}{Makes no adjustments to the \eqn{p} values.}
#' } %%%%%%%%%%%%%%%% end \describe {}
#'
#' For tests, not confidence intervals, the Bonferroni-inequality-based adjustment
#' methods in \code{\link{p.adjust}} are also available (currently, these
#' include \code{"holm"}, \code{"hochberg"}, \code{"hommel"},
#' \code{"bonferroni"}, \code{"BH"}, \code{"BY"}, \code{"fdr"}, and
#' \code{"none"}). If a \code{p.adjust.methods} method other than
#' \code{"bonferroni"} or \code{"none"} is specified for confidence limits, the
#' straight Bonferroni adjustment is used instead. Also, if an adjustment method
#' is not appropriate (e.g., using \code{"tukey"} with one-sided tests, or with
#' results that are not pairwise comparisons), a more appropriate method
#' (usually \code{"sidak"}) is substituted.
#'
#' In some cases, confidence and \eqn{p}-value adjustments are only approximate
#' -- especially when the degrees of freedom or standard errors vary greatly
#' within the family of tests. The \code{"mvt"} method is always the correct
#' one-step adjustment, but it can be very slow. One may use
#' \code{\link{as.glht}} with methods in the \pkg{multcomp} package to obtain
#' non-conservative multi-step adjustments to tests.
#'
#' \emph{Warning:} Non-estimable cases are \emph{included} in the family to which adjustments
#' are applied. You may wish to subset the object using the \code{[]} operator
#' to work around this problem.
#'
#' The \code{cross.adjust} argument is a way of specifying a multiplicity
#' adjustment across the \code{by} groups (otherwise by default, each group is
#' treated as a separate family in regards to multiplicity adjustments). It
#' applies only to \eqn{p} values. Valid options are one of the
#' \code{p.adjust.methods} or \code{"sidak"}. This argument is ignored unless
#' it is other than \code{"none"}, there is more than one \code{by} group, and
#' they are all the same size. Under those conditions, we first use
#' \code{adjust} to determine the within-group adjusted \eqn{p} values.
#' Imagine each group's adjusted \eqn{p} values arranged in side-by-side
#' columns, thus forming a matrix with the number of columns equal to the
#' number of \code{by} groups. Then we use the \code{cross.adjust} method to
#' further adjust the adjusted \eqn{p} values in each row of this matrix. Note
#' that an \emph{overall} Bonferroni (or Sidak) adjustment is obtainable by
#' specifying \emph{both} \code{adjust} and \code{cross.adjust} as
#' \code{"bonferroni"} (or \code{"sidak"}). However, less conservative (but
#' yet conservative) overall adjustments are available when it is possible to
#' use an \dQuote{exact} within-group method (e.g., \code{adjust = "tukey"}
#' for pairwise comparisons) and \code{cross.adjust} as a conservative
#' adjustment. [\code{cross.adjust} methods other than \code{"none"},
#' \code{"bonferroni"}, or \code{"sidak"} do not seem advisable, but other
#' \code{p.adjust} methods are available if you can make sense of them.]
#'
#' @section Tests of significance, nonsuperiority, noninferiority, or equivalence:
#' When \code{delta = 0}, test statistics are the usual tests of significance.
#' They are of the form
#' \samp{(estimate - null)/SE}. Notationally:
#' \describe{
#' \item{Significance}{\eqn{H_0: \theta = \theta_0} versus \cr
#' \eqn{H_1: \theta < \theta_0} (left-sided), or\cr
#' \eqn{H_1 \theta > \theta_0} (right-sided), or\cr
#' \eqn{H_1: \theta \ne \theta_0} (two-sided)\cr
#' The test statistic is\cr
#' \eqn{t = (Q - \theta_0)/SE}\cr
#' where \eqn{Q} is our estimate of \eqn{\theta};
#' then left, right, or two-sided \eqn{p} values are produced,
#' depending on \code{side}.}
#' }
#' When \code{delta} is positive, the test statistic depends on \code{side} as
#' follows.
#' \describe{
#' \item{Left-sided (nonsuperiority)}{\eqn{H_0: \theta \ge \theta_0 + \delta}
#' versus \eqn{H_1: \theta < \theta_0 + \delta}\cr
#' \eqn{t = (Q - \theta_0 - \delta)/SE}\cr
#' The \eqn{p} value is the lower-tail probability.}
#' \item{Right-sided (noninferiority)}{\eqn{H_0: \theta \le \theta_0 - \delta}
#' versus \eqn{H_1: \theta > \theta_0 - \delta}\cr
#' \eqn{t = (Q - \theta_0 + \delta)/SE}\cr
#' The \eqn{p} value is the upper-tail probability.}
#' \item{Two-sided (equivalence)}{\eqn{H_0: |\theta - \theta_0| \ge \delta}
#' versus \eqn{H_1: |\theta - \theta_0| < \delta}\cr
#' \eqn{t = (|Q - \theta_0| - \delta)/SE}\cr
#' The \eqn{p} value is the \emph{lower}-tail probability.\cr
#' Note that \eqn{t} is the maximum of \eqn{t_{nonsup}} and \eqn{-t_{noninf}}.
#' This is equivalent to choosing the less
#' significant result in the two-one-sided-test (TOST) procedure.}
#' } %%%%%%%%%%%% end \describe{}
#'
#'
#' @section Non-estimable cases:
#' When the model is rank-deficient, each row \code{x} of \code{object}'s
#' \code{linfct} slot is checked for estimability. If \code{sum(x*bhat)}
#' is found to be non-estimable, then the string \code{NonEst} is displayed for the
#' estimate, and associated statistics are set to \code{NA}.
#' The estimability check is performed
#' using the orthonormal basis \code{N} in the \code{nbasis} slot for the null
#' space of the rows of the model matrix. Estimability fails when
#' \eqn{||Nx||^2 / ||x||^2} exceeds \code{tol}, which by default is
#' \code{1e-8}. You may change it via \code{\link{emm_options}} by setting
#' \code{estble.tol} to the desired value.
#'
#' See the warning above that non-estimable cases are still included when
#' determining the family size for \emph{P}-value adjustments.
#'
#' @section Warning about potential misuse of P values:
#' Some in the statistical and scientific community argue that
#' the term \dQuote{statistical significance} should be completely abandoned, and
#' that criteria such as \dQuote{p < 0.05} never be used to assess the
#' importance of an effect. These practices can be too misleading and are prone to abuse.
#' See \href{../doc/basics.html#pvalues}{the \dQuote{basics} vignette} for more
#' discussion.
#'
#'
#' @note In doing testing and a transformation and/or link is in force, any
#' \code{null} and/or \code{delta} values specified must always be on the
#' scale of the linear predictor, regardless of the setting for `type`. If
#' \code{type = "response"}, the null value displayed in the summary table
#' will be back-transformed from the value supplied by the user. But the
#' displayed \code{delta} will not be changed, because there (often) is
#' not a natural way to back-transform it.
#'
#' @note When we have \code{type = "response"}, and \code{bias.adj = TRUE},
#' the \code{null} value displayed in the output is both back-transformed
#' and bias-adjusted, leading to a rather non-intuitive-looking null value.
#' However, since the tests themselves are performed on the link scale,
#' this is the response value at which a *P* value of 1 would be obtained.
#'
#' @note The default \code{show} method for \code{emmGrid} objects (with the
#' exception of newly created reference grids) is \code{print(summary())}.
#' Thus, with ordinary usage of \code{\link{emmeans}} and such, it is
#' unnecessary to call \code{summary} unless there is a need to
#' specify other than its default options.
#'
#' @note If a data frame is needed, \code{summary}, \code{confint},
#' and \code{test} serve this need. \code{as.data.frame} routes to
#' \code{summary} by default; calling it with \code{destroy.annotations = TRUE}
#' is not recommended for exactly that reason.
#' If you want to see more digits in the output, use
#' \code{print(summary(object), digits = ...)}; and if you \emph{always} want
#' to see more digits, use \code{emm_options(opt.digits = FALSE)}.
#' @seealso \code{\link{hpd.summary}}
#'
#' @method summary emmGrid
#' @export
#' @order 1
#'
#' @examples
#' warp.lm <- lm(breaks ~ wool * tension, data = warpbreaks)
#' warp.emm <- emmeans(warp.lm, ~ tension | wool)
#' warp.emm # implicitly runs 'summary'
#'
#' confint(warp.emm, by = NULL, level = .90)
#'
#' # --------------------------------------------------------------
#' pigs.lm <- lm(log(conc) ~ source + factor(percent), data = pigs)
#' pigs.emm <- emmeans(pigs.lm, "percent", type = "response")
#' summary(pigs.emm) # (inherits type = "response")
#' summary(pigs.emm, calc = c(n = ".wgt.")) # Show sample size
#'
#' # For which percents is EMM non-inferior to 35, based on a 10% threshold?
#' # Note the test is done on the log scale even though we have type = "response"
#' test(pigs.emm, null = log(35), delta = log(1.10), side = ">")
#'
#' con <- contrast(pigs.emm, "consec")
#' test(con)
#'
#' test(con, joint = TRUE)
#'
#' # default Scheffe adjustment - rank = 3
#' summary(con, infer = c(TRUE, TRUE), adjust = "scheffe")
#'
#' # Consider as some of many possible contrasts among the six cell means
#' summary(con, infer = c(TRUE, TRUE), adjust = "scheffe", scheffe.rank = 5)
#'
#' # Show estimates to more digits
#' print(test(con), digits = 7)
#'
#' # --------------------------------------------------------------
#' # Cross-adjusting P values
#' prs <- pairs(warp.emm) # pairwise comparisons of tension, by wool
#' test(prs, adjust = "tukey", cross.adjust = "bonferroni")
#'
#' # Same comparisons taken as one big family (more conservative)
#' test(prs, adjust = "bonferroni", by = NULL)
#'
summary.emmGrid <- function(object, infer, level, adjust, by,
cross.adjust = "none", type, df, calc,
null, delta, side, frequentist,
bias.adjust = get_emm_option("back.bias.adj"),
sigma, ...) {
if(missing(sigma))
sigma = object@misc$sigma
if(missing(frequentist) && !is.null(object@misc$frequentist))
frequentist = object@misc$frequentist
if(missing(bias.adjust)) {
if (!is.null(object@misc$bias.adjust))
bias.adjust = object@misc$bias.adjust
else
bias.adjust = get_emm_option("back.bias.adj")
}
if(!is.na(object@post.beta[1]) && (missing(frequentist) || !frequentist))
return (hpd.summary(object, prob = level, by = by, type = type, delta = delta,
bias.adjust = bias.adjust, sigma = sigma, ...))
# Any "summary" options override built-in
opt = get_emm_option("summary")
if(!is.null(opt)) {
opt$object = object
object = do.call("update.emmGrid", opt)
}
if(!missing(sigma))
object = update(object, sigma = sigma) # we'll keep sigma in misc
misc = object@misc
use.elts = .reconcile.elts(object)
grid = object@grid[use.elts, , drop = FALSE]
### For missing arguments, get from misc, else default
if(missing(infer))
infer = misc$infer
if(missing(level))
level = misc$level
if(missing(adjust))
adjust = misc$adjust
if(missing(cross.adjust) && !is.null(misc$cross.adjust))
cross.adjust = misc$cross.adjust
if(missing(by))
by = misc$by.vars
# Disable Tukey if by vars don't match those used in construction
if((!is.na(misc$estType) && misc$estType == "pairs") && (paste(c("", by), collapse = ",") != misc$.pairby))
misc$estType = object@misc$estType = "contrast"
if (missing(type))
type = .get.predict.type(misc)
else
type = .validate.type(type)
# if there are two transformations and we want response, then we need to undo both
if ((type == "response") && (!is.null(misc$tran2))) {
tmp = match.call()
tmp$object = quote(object)
tmp$type = "unlink"
summ.unlink = eval(tmp) #this is summary with type = "unlink"
object = regrid(object, transform = "mu")
two.trans = TRUE
}
else
two.trans = FALSE
# Check if we have a log-ratio transformation. At least then we can guess the problem.
if ((type == "response") && is.character(object@misc$tran) && endsWith(object@misc$tran, "lr"))
warning("It looks like you have a multivariate transformation -- not handled by type = 'response'.\n",
"Perhaps instead you wanted to use 'mvregrid()'?")
if ((type %in% c("mu", "unlink")) && (!is.null(t2 <- misc$tran2))) {
if (!is.character(t2))
t2 = "tran"
object = update(object, inv.lbl = paste0(t2, "(resp)"))
}
if(missing(df))
df = misc$df
if(!is.null(df)) {
object@dffun = function(k, dfargs) df[1]
attr(object@dffun, "mesg") = "user-specified"
}
# for missing args that default to zero unless provided or in misc slot
.nul.eq.zero = function(val) {
if(is.null(val)) 0
else val
}
if(missing(null))
null = .nul.eq.zero(misc$null)
if(missing(delta))
delta = .nul.eq.zero(misc$delta)
if(missing(side))
side = .nul.eq.zero(misc$side)
# reconcile all the different ways we could specify the alternative
# ... and map each to one of the first 3 subscripts
side.opts = c("left","both","right","two-sided","noninferiority","nonsuperiority","equivalence","superiority","inferiority","0","2","-1","1","+1","<",">","!=","=")
side.map = c( 1, 2, 3, 2, 3, 1, 2, 3, 1, 2, 2, 1, 3, 3, 1, 3, 2, 2)
side = side.map[pmatch(side, side.opts, 2)[1]] - 2
delta = abs(delta)
result = .est.se.df(object, ...)
lblnms = setdiff(names(grid),
c(object@roles$responses, ".offset.", ".wgt."))
lbls = grid[lblnms]
zFlag = (all(is.na(result$df) | is.infinite(result$df)))
inv = (type %in% c("response", "mu", "unlink")) # flag to inverse-transform
link = attr(result, "link")
if (inv && is.null(link))
inv = FALSE
if ((length(infer) == 0) || !is.logical(infer))
infer = c(FALSE, FALSE)
if(length(infer == 1))
infer = c(infer,infer)
if(inv && !is.null(misc$tran)) {
if (!is.null(misc$inv.lbl)) {
names(result)[1] = misc$inv.lbl
if (!is.null(misc$log.contrast)) # contrast of logs - relabel as ratios
for (ell in seq_along(lbls)){
lbls[[ell]] = factor(lbls[[ell]])
levels(lbls[[ell]]) = gsub(" - ", " / ", levels(lbls[[ell]]))
}
}
else
names(result)[1] = "response"
}
attr(result, "link") = NULL
estName = names(result)[1]
mesg = misc$initMesg
# Look for "mesg" attribute in dffun
if (!is.null(dfm <- attr(object@dffun, "mesg")))
mesg = c(mesg, paste("Degrees-of-freedom method:", dfm))
### Add an annotation when we show results on lp scale and
### there is a transformation
if (!inv && !is.null(link)) {
mesg = c(mesg, paste("Results are given on the", link$name, "(not the response) scale."))
}
if (inv && !is.null(link$unknown)) {
mesg = c(mesg, paste0('Unknown transformation "', link$name, '": no transformation done'))
inv = FALSE
link = NULL
}
if(inv && bias.adjust && !is.null(link)) {
link = .make.bias.adj.link(link, sigma)
bias.adjust = attr(link, "bias.adjust") # disables later message if skipped
}
# et = 1 if a prediction, 2 if a contrast (or unmatched or NULL), 3 if pairs
et = pmatch(c(misc$estType, "c"), c("prediction", "contrast", "pairs"), nomatch = 2)[1]
by.size = nrow(grid)
by.rows = .find.by.rows(grid, by)
if (!is.null(by)) {
if (length(unique(sapply(by.rows, length))) > 1) {
by.size = misc$famSize = "(varies)"
}
else for (nm in by)
by.size = by.size / length(unique(object@levels[[nm]]))
}
fam.info = c(misc$famSize, by.size, et)
cnm = NULL
adjust = tolower(adjust)
if (adjust %in% c("bh", "by")) # special cases in p.adjust.methods
adjust = toupper(adjust)
# get vcov matrix only if needed (adjust == "mvt")
corrmat = sch.rank = NULL
if (adjust %.pin% "mvt") { ##(!is.na(pmatch(adjust, "mvt"))) {
vvv = vcov(object)
if(any(is.na(vvv))) {
warning("'mvt' adjustment not available due to missing values")
corrmat = .diag(rep(NA, nrow(vvv)))
}
else
corrmat = cov2cor(vvv)
attr(corrmat, "by.rows") = by.rows
}
else if (adjust %.pin% "scheffe") { ##(!is.na(pmatch(adjust, "scheffe"))) {
if(is.null(sch.rank <- .match.dots("scheffe.rank", ...)))
sch.rank = sapply(by.rows, function(.) qr(zapsmall(object@linfct[., , drop = FALSE]))$rank)
if(length(unique(sch.rank)) > 1)
fam.info[1] = "uneven" # This forces ragged.by = TRUE in .adj functions
}
# Add calculated columns
if(!missing(calc) || !is.null(calc <- misc$calc)) {
env = c(result, grid[setdiff(names(grid), names(result))])
for (v in names(calc)) {
elt = rev(as.character(calc[[v]]))[1] # pick out rhs if a formula
val = try(eval(parse(text = elt), envir = env), silent = TRUE)
if(!inherits(val, "try-error"))
result[[v]] = env[[v]] = val
else
warning("The column '", v, "' could not be calculated, ",
" so it is omitted")
}
}
# add linkname attribute
if (two.trans)
linkname = paste0(link$name, "[", attr(summ.unlink, "linkname"), "]")
else
linkname = link$name
if(infer[1]) { # add CIs
acv = .adj.critval(result[[1]], level, result$df, adjust, fam.info, side, corrmat, by.rows, sch.rank)
###adjust = acv$adjust # in older versions, I forced same adj method for tests
cv = acv$cv
cv = switch(side + 2, cbind(-Inf, cv), cbind(-cv, cv), cbind(-cv, Inf))
cnm = if (zFlag) c("asymp.LCL", "asymp.UCL") else c("lower.CL","upper.CL")
if(!is.null(misc$.predFlag)) {
cnm = c("lower.PL", "upper.PL")
sigma = misc$sigma
mesg = c(mesg, paste0(
"Prediction intervals and SEs are based on an error SD of ",
.fmt.sigma(sigma)))
estName = names(result)[1] = "prediction"
}
if (!two.trans) {
result[[cnm[1]]] = result[[1]] + cv[, 1]*result$SE
result[[cnm[2]]] = result[[1]] + cv[, 2]*result$SE
}
else {
result[cnm] = summ.unlink[cnm]
}
mesg = c(mesg, paste("Confidence level used:", level), acv$mesg)
if (inv) {
clims = with(link, cbind(linkinv(result[[cnm[1]]]), linkinv(result[[cnm[2]]])))
tmp = apply(clims, 1, function(x) {
z = diff(x); ifelse(is.na(z), 0, z) })
idx = if (all(tmp >= 0)) 1:2 else 2:1
result[[cnm[1]]] = clims[, idx[1]]
result[[cnm[2]]] = clims[, idx[2]]
mesg = c(mesg, paste("Intervals are back-transformed from the", linkname, "scale"))
}
}
if(infer[2]) { # add tests
tnm = ifelse (zFlag, "z.ratio", "t.ratio")
tail = ifelse(side == 0, -sign(abs(delta)), side)
if(!two.trans) {
result[["null"]] = null
if (inv && !is.null(link))
result[["null"]] = link$linkinv(null)
if (all(result$null == 0))
result[["null"]] = NULL
if (side == 0) {
if (delta == 0) # two-sided sig test
t.ratio = result[[tnm]] = (result[[1]] - null) / result$SE
else
t.ratio = result[[tnm]] = (abs(result[[1]] - null) - delta) / result$SE
}
else {
t.ratio = result[[tnm]] = (result[[1]] - null + side * delta) / result$SE
}
apv = .adj.p.value(t.ratio, result$df, adjust, fam.info, tail, corrmat, by.rows, sch.rank)
adjust = apv$adjust # in case it was abbreviated
result$p.value = apv$pval
mesg = c(mesg, apv$mesg)
}
else {
result$null = ifelse(is.null(summ.unlink$null), link$linkinv(0), link$linkinv(summ.unlink$null))
result[[tnm]] = summ.unlink[[tnm]]
result$p.value = summ.unlink$p.value
apv = .adj.p.value(0, result$df, adjust, fam.info, tail, corrmat, by.rows, sch.rank)
# we ignore everything about apv except the message
mesg = c(mesg, apv$mesg)
}
# Handle cross-adjustments
if ( (length(by.rows) > 1) &&
(length(len <- sapply(by.rows, length)) > 1) &&
is.na(pmatch(cross.adjust, "none")) ) {
val = c("sidak", p.adjust.methods)
w = pmatch(tolower(cross.adjust), tolower(val))
if (!is.na(w)) {
bridx = unlist(by.rows)
cross.adjust = val[w]
mat = matrix(result$p.value[bridx], nrow = len)
apv = apply(mat, 1, function(p) {
if (w > 1) p.adjust(p, cross.adjust)
else 1 - (1 - p)^ncol(mat)
})
result$p.value[bridx] = as.numeric(t(apv))
mesg = c(mesg, paste("Cross-group P-value adjustment:", cross.adjust))
}
else
message("Invalid cross-adjustment method: '", cross.adjust, "'")
}
if (delta > 0)
mesg = c(mesg, paste("Statistics are tests of", c("nonsuperiority","equivalence","noninferiority")[side+2],
"with a threshold of", signif(delta, 5)))
if(tail != 0)
mesg = c(mesg, paste("P values are ", ifelse(tail<0,"left-","right-"),"tailed", sep=""))
if (inv)
mesg = c(mesg, paste("Tests are performed on the", linkname, "scale"))
}
if (inv) {
result[["SE"]] = with(link, abs(mu.eta(result[[1]]) * result[["SE"]]))
result[[1]] = with(link, linkinv(result[[1]]))
if(bias.adjust)
mesg = c(mesg, paste("Bias adjustment applied based on sigma =",
.fmt.sigma(sigma)))
}
if (length(misc$avgd.over) > 0) {
qual = attr(misc$avgd.over, "qualifier")
if (is.null(qual)) qual = ""
mesg = c(paste0("Results are averaged over", qual, " the levels of: ",
paste(misc$avgd.over, collapse = ", ")), mesg)
}
summ = cbind(lbls, result)
attr(summ, "estName") = estName
attr(summ, "clNames") = cnm # will be NULL if infer[1] is FALSE
if (is.null(misc$pri.vars) || length(misc$pri.vars) == 0)
misc$pri.vars = names(object@levels)
attr(summ, "pri.vars") = setdiff(union(misc$pri.vars, misc$by.vars), c(by, ".wgt.", ".offset."))
attr(summ, "by.vars") = by
attr(summ, "adjust") = adjust
attr(summ, "side") = side
attr(summ, "delta") = delta
attr(summ, "type") = type
attr(summ, "mesg") = unique(mesg)
attr(summ, "linkname") = linkname
class(summ) = c("summary_emm", "data.frame")
summ
}
# S3 predict method
#' @rdname summary.emmGrid
#' @order 4
#' @method predict emmGrid
#' @param interval Type of interval desired (partial matching is allowed):
#' \code{"none"} for no intervals,
#' otherwise confidence or prediction intervals with given arguments,
#' via \code{\link{confint.emmGrid}}.
#' Note: prediction intervals are not available
#' unless the model family is \code{"gaussian"}.
#'
#' @export
#' @return \code{predict} returns a vector of predictions for each row of \code{object@grid}.
predict.emmGrid <- function(object, type,
interval = c("none", "confidence", "prediction"),
level = 0.95,
bias.adjust = get_emm_option("back.bias.adj"), sigma,
...)
{
# update with any "summary" options
opt = get_emm_option("summary")
if(!is.null(opt)) {
opt$object = object
object = do.call("update.emmGrid", opt)
}
interval = match.arg(interval)
if (interval %in% c( "confidence", "prediction")) {
if (interval == "prediction") {
ok = object@misc$.predFlag = .chk.predict(object)
if (!ok)
return(NULL)
}
return(confint.emmGrid(object, type = type, level = level,
bias.adjust = bias.adjust, sigma = sigma, ...))
}
if (missing(type))
type = .get.predict.type(object@misc)
else
type = .validate.type(type)
# if there are two transformations and we want response, then we need to undo both
if ((type == "response") && (!is.null(object@misc$tran2)))
object = regrid(object, transform = "mu")
pred = .est.se.df(object, do.se = FALSE, ...)
result = pred[[1]]
if (type %in% c("response", "mu", "unlink")) {
link = attr(pred, "link")
if (!is.null(link)) {
if (bias.adjust) {
if(missing(sigma))
sigma = object@misc$sigma
link = .make.bias.adj.link(link, sigma)
}
result = link$linkinv(result)
if (is.logical(link$unknown) && link$unknown)
warning("Unknown transformation: \"", link$name, "\" -- no transformation applied.")
}
}
result
}
# as.data.frame method
#' @rdname summary.emmGrid
#' @order 5
#' @param x object of the given class
#' @param destroy.annotations Logical value. If \code{FALSE}, an object of class
#' \code{summary_emm} is returned (which inherits from \code{data.frame}),
#' but if displayed, details like confidence levels, P-value adjustments,
#' transformations, etc. are also shown. But unlike the result
#' of \code{summary}, the number of digits displayed
#' is obtained from \code{getOption("digits")} rather than using the
#' optimal digits algorithm we usually use. Thus, it is formatted more like a
#' regular data frame, but with any annotations and groupings still intact.
#' If \code{TRUE} (not recommended), a \dQuote{plain vanilla} data frame is
#' returned, based on \code{row.names} and \code{check.names}.
#' @param row.names passed to \code{\link{as.data.frame}}
#' @param optional required argument, but ignored in \code{as.data.frame.emmGrid}
#' @param check.names passed to \code{\link{data.frame}}
#' @return The \code{as.data.frame} method returns an object that inherits
#' from \code{"data.frame"}.
#' @export
#' @method as.data.frame emmGrid
as.data.frame.emmGrid = function(x,
row.names = NULL, optional, check.names = TRUE,
destroy.annotations = FALSE, ...) {
rtn = summary.emmGrid(x, ...)
attr(rtn, "digits") = getOption("digits")
if(destroy.annotations)
rtn = as.data.frame.data.frame(rtn, row.names = row.names, check.names = check.names)
rtn
}
#' @rdname summary.emmGrid
#' @order 6
#' @method [ summary_emm
#' @param as.df Logical value. With \code{x[..., as.df = TRUE]}, the result is
#' object is coerced to a \code{\link{data.frame}} before the subscripting
#' is applied. With \code{as.df = FALSE}, the result is
#' returned as a \code{summary_emm} object when possible.
#' @export
"[.summary_emm" = function(x, ..., as.df = FALSE) {
attr(x, "by.vars") = NULL
rtn = as.data.frame.data.frame(x)[...]
if ((!as.df) && (!is.null(attr(rtn, "estName"))))
class(rtn) = c("summary_emm", "data.frame")
rtn
}
# Computes the quadratic form y'Xy after subsetting for the nonzero elements of y
.qf.non0 = function(X, y) {
ii = (zapsmall(y) != 0)
rtn = if (any(ii))
sum(y[ii] * (X[ii, ii, drop = FALSE] %*% y[ii]))
else 0
if (!is.na(rtn) && (rtn < 0)) {
warning("Negative variance estimate obtained!")
rtn = NaN
}
rtn
}
# Workhorse for getting estimates etc.
.est.se.df = function(object, do.se=TRUE, tol = get_emm_option("estble.tol"), ...) {
if (nrow(object@grid) == 0) {
result = data.frame(NA, NA, NA)
names(result) = c(object@misc$estName, "SE", "df")
return(result[-1, ])
}
misc = object@misc
use.elts = .reconcile.elts(object)
if (!is.null(hook <- misc$estHook)) {
if (is.character(hook)) hook = get(hook)
result = hook(object, do.se=do.se, tol=tol, ...)
}
else {
active = which(!is.na(object@bhat))
bhat = object@bhat[active]
result = t(apply(object@linfct[use.elts, , drop = FALSE], 1, function(x) {
if (!any(is.na(x)) && estimability::is.estble(x, object@nbasis, tol)) {
x = x[active]
est = sum(bhat * x)
if(do.se) {
se = sqrt(.qf.non0(object@V, x))
df = object@dffun(x, object@dfargs)
### Brute force: if (is.na(df)) df = Inf ### Added
}
else # if these unasked-for results are used, we're bound to get an error!
se = df = 0
c(est, se, df)
}
else c(NA,NA,NA)
}))
if (!is.null(object@grid$.offset.))
result[, 1] = result[, 1] + object@grid$.offset.[use.elts]
}
result[1] = as.numeric(result[1]) # silly bit of code to avoid getting a data.frame of logicals if all are NA
result = as.data.frame(result)
names(result) = c(misc$estName, "SE", "df")
if (!is.null(misc$.predFlag)) {
if (is.null(misc$sigma))
stop("No 'sigma' is available for obtaining Prediction intervals.",
call. = FALSE)
result$SE = sqrt(result$SE^2 + misc$sigma^2)
}
if (!is.null(misc$tran)) {
attr(result, "link") = .get.link(misc)
if(is.character(misc$tran) && (misc$tran == "none"))
attr(result, "link") = NULL
}
result
}
# workhorse for nailing down the link function
.get.link = function(misc) {
link = if(is.character(misc$tran))
.make.link(misc$tran)
else if (is.list(misc$tran))
misc$tran
else
NULL
if (is.list(link)) { # See if multiple of link is requested, or variable is offset
if (!is.null(misc$tran.mult) || !is.null(misc$tran.offset)) {
name = link$name
mult = link$mult = ifelse(is.null(misc$tran.mult), 1, misc$tran.mult)
off = link$offset = ifelse(is.null(misc$tran.offset), 0, misc$tran.offset)
link = with(link, list(
linkinv = function(eta) linkinv(eta / mult) - offset,
mu.eta = function(eta) mu.eta(eta / mult) / mult))
if(mult != 1)
name = paste0(round(mult, 3), "*", name)
if(off != 0)
name = paste0(name, "(mu + ", round(off, 3), ")")
link$name = name
}
}
if (!is.null(link) && is.null(link$name))
link$name = "linear-predictor"
link
}
# patch-in alternative back-transform stuff for bias adjustment
# Currently, we just use a 2nd-order approx for everybody:
# E(h(nu + E)) ~= h(nu) + 0.5*h"(nu)*var(E)
# We also return an attribute "bias.adjust" which is TRUE if ok, FALSE if we aborted
.make.bias.adj.link = function(link, sigma) {
###if (is.null(sigma) || (!is.null(sigma) && (is.na(sigma) || (sigma < 0)))) { ###old code
if(is.null(sigma) || (length(sigma) == 0) || is.na(sigma[1]) || (sigma[1] < 0)) {
warning("Bias adjustment skipped: No valid 'sigma' provided\n",
"(And perhaps bias.adjust should NOT be used; see ? summary.emmGrid)",
call. = FALSE)
attr(link, "bias.adjust") = FALSE
return(link)
}
link$inv = link$linkinv
link$der = link$mu.eta
link$sigma22 = sigma^2 / 2
link$der2 = function(eta) with(link, 1000 * (der(eta + .0005) - der(eta - .0005)))
link$linkinv = function(eta) with(link, inv(eta) + sigma22 * der2(eta))
link$mu.eta = function(eta) with(link, der(eta) +
1000 * sigma22 * (der2(eta + .0005) - der2(eta - .0005)))
attr(link, "bias.adjust") = TRUE
link
}
####!!!!! TODO: Re-think whether we are handling Scheffe adjustments correctly
####!!!!! if/when we shift around 'by' specs, etc.
### utility for changing adjustments
.chg.adjust = function(old, new, reason) {
message("Note: adjust = \"", old, "\" was changed to \"", new,
"\"\nbecause \"", old, "\" is ", reason)
new
}
# utility to compute an adjusted p value
# tail is -1, 0, 1 for left, two-sided, or right
# Note fam.info is c(famsize, ncontr, estTypeIndex)
# lsmeans >= 2.14: added corrmat arg, dunnettx & mvt adjustments
# emmeans > 1.3.4: we have sch.rank of same length as by.rows
# NOTE: corrmat is NULL unless adjust == "mvt"
.adj.p.value = function(t, DF, adjust, fam.info, tail, corrmat, by.rows, sch.rank) {
fam.size = fam.info[1]
n.contr = fam.info[2]
et = as.numeric(fam.info[3])
# Force no adjustment when just one test, unless we're using scheffe
if ((n.contr == 1) && !(adjust %.pin% "scheffe")) ##(pmatch(adjust, "scheffe", 0) != 1))
adjust = "none"
# check if we need to adjust adaptively for only estimable cases
adaptive = (adjust != "none") && any(is.na(t))
# do a pmatch of the adjust method
adj.meths = c("sidak", "tukey", "scheffe", "dunnettx", "mvt", p.adjust.methods)
k = pmatch(adjust, adj.meths)
if(is.na(k))
stop("Adjust method '", adjust, "' is not recognized or not valid")
adjust = adj.meths[k]
if ((tail != 0) && (adjust %in% c("tukey", "scheffe", "dunnettx"))) # meth not approp for 1-sided
adjust = .chg.adjust(adjust, "sidak", "not appropriate for one-sided inferences")
if ((et != 3) && adjust == "tukey") # not pairwise
adjust = .chg.adjust(adjust, "sidak", "only appropriate for one set of pairwise comparisons")
ragged.by = (is.character(fam.size) || adjust %in% c("mvt", setdiff(p.adjust.methods, "none"))) # flag that we need to do groups separately
ragged.by = ragged.by | adaptive # keep it ragged if we need adaptive adj
if (!ragged.by)
by.rows = list(seq_along(t)) # not ragged, we can do all as one by group
# asymptotic results when df is NA
DF[is.na(DF)] = Inf
# if estType is "prediction", use #contrasts + 1 as family size
# (produces right Scheffe CV; Tukey ones are a bit strange)
# deprecated - used to try to keep track of scheffe.adj = ifelse(et == 1, 0, - 1)
if (tail == 0)
p.unadj = 2*pt(abs(t), DF, lower.tail=FALSE)
else
p.unadj = pt(t, DF, lower.tail = (tail<0))
pval = numeric(length(t))
for(jj in seq_along(by.rows)) {
rows = by.rows[[jj]]
unadj.p = p.unadj[rows]
abst = abs(t[rows])
df = DF[rows]
if (ragged.by) {
n.contr = max(sum(!is.na(unadj.p)), 1)
fam.size = (1 + sqrt(1 + 8*n.contr)) / 2 # tukey family size - e.g., 6 pairs -> family of 4
}
if (adjust %in% p.adjust.methods) # simple now because we forced ragged.rows
pval[rows] = p.adjust(unadj.p, adjust)
else pval[rows] = switch(adjust,
sidak = 1 - (1 - unadj.p)^n.contr,
# NOTE: tukey, scheffe, dunnettx all assumed 2-sided!
tukey = ptukey(sqrt(2)*abst, fam.size, zapsmall(df), lower.tail=FALSE),
scheffe = pf(t[rows]^2 / (sch.rank[jj]), sch.rank[jj],
df, lower.tail = FALSE),
dunnettx = 1 - .pdunnx(abst, n.contr, df),
mvt = 1 - .my.pmvt(t[rows], df, corrmat[rows,rows,drop=FALSE], -tail) # tricky - reverse the tail because we're subtracting from 1
)
}
chk.adj = match(adjust, c("none", "tukey", "scheffe"), nomatch = 99)
if (ragged.by) {
nc = max(sapply(by.rows, length))
fs = (1 + sqrt(1 + 8*nc)) / 2
scheffe.dim = "(varies)"
}
else {
nc = n.contr
fs = fam.size
scheffe.dim = sch.rank[1]
}
do.msg = (chk.adj > 1) && !((fs < 3) && (chk.adj < 10)) ### WAS (chk.adj > 1) && (nc > 1) && !((fs < 3) && (chk.adj < 10))
if (do.msg) {
# xtra = if(chk.adj < 10) paste("a family of", fam.size, "tests")
# else paste(n.contr, "tests")
xtra = if (!adaptive || (length(by.rows) == 1)) switch(adjust,
tukey = paste("for comparing a family of", fam.size, "estimates"),
scheffe = paste("with rank", scheffe.dim),
paste("for", n.contr, "tests")
)
else switch(adjust,
tukey = "for varying family sizes",
scheffe = "with varying rank",
"for varying numbers of tests"
)
mesg = paste("P value adjustment:", adjust, "method", xtra)
}
else mesg = NULL
list(pval = pval, mesg = mesg, adjust = adjust)
}
# Code needed for an adjusted critical value
# returns a list similar to .adj.p.value
# lsmeans >= 2.14: Added tail & corrmat args, dunnettx & mvt adjustments
# emmeans > 1.3.4: Added sch.rank
# NOTE: corrmat is NULL unless adjust == "mvt"
.adj.critval = function(est, level, DF, adjust, fam.info, tail, corrmat, by.rows, sch.rank) {
mesg = NULL
fam.size = fam.info[1]
n.contr = fam.info[2]
et = as.numeric(fam.info[3])
ragged.by = (is.character(fam.size)) # flag that we need to do groups separately
if (!ragged.by && (n.contr == 1) && !(adjust %.pin% "scheffe")) ##(pmatch(adjust, "scheffe", 0) != 1)) # Force no adjustment when just one interval, unless using Scheffe
adjust = "none"
adj.meths = c("sidak", "tukey", "scheffe", "dunnettx", "mvt", "bonferroni", "none")
k = pmatch(adjust, adj.meths)
if(is.na(k))
k = which(adj.meths == "bonferroni")
adjust = adj.meths[k]
# do we need to adapt for different by-grouup sizes?
adaptive = any(is.na(est))
ragged.by = ragged.by || adaptive || (adjust == "mvt")
if (!ragged.by && (length(unique(DF)) == 1))
by.rows = list(seq_along(DF)) # not ragged, we can do all as one by group
if ((tail != 0) && (adjust %in% c("tukey", "scheffe", "dunnettx"))) # meth not approp for 1-sided
adjust = .chg.adjust(adjust, "sidak", "not appropriate for one-sided inferences")
if ((et != 3) && adjust == "tukey") # not pairwise
adjust = .chg.adjust(adjust, "sidak", "only appropriate for one set of pairwise comparisons")
# asymptotic results when df is NA
DF[is.na(DF)] = Inf
#### No longer used scheffe.adj = ifelse(et == 1, 0, - 1)
chk.adj = match(adjust, c("none", "tukey", "scheffe"), nomatch = 99)
if (ragged.by) {
nc = max(sapply(by.rows, length))
fs = (1 + sqrt(1 + 8*nc)) / 2
scheffe.dim = "(varies)"
}
else {
nc = n.contr
fs = fam.size
scheffe.dim = sch.rank[1]
}
do.msg = (chk.adj > 1) && ### (nc > 1) &&
!((fs < 3) && (chk.adj < 10))
adiv = ifelse(tail == 0, 2, 1) # divisor for alpha where needed
###cvs = lapply(by.rows, function(rows) {
cv = numeric(sum(sapply(by.rows, length)))
for (jj in seq_along(by.rows)) { ####(rows in by.rows) {
rows = by.rows[[jj]]
df = DF[rows]
if (ragged.by) {
n.contr = max(1, sum(!is.na(est[rows])))
fam.size = (1 + sqrt(1 + 8*n.contr)) / 2 # tukey family size - e.g., 6 pairs -> family of 4
}
cv[rows] = switch(adjust,
none = -qt((1-level)/adiv, df),
sidak = -qt((1 - level^(1/n.contr))/adiv, df),
bonferroni = -qt((1-level)/n.contr/adiv, df),
tukey = qtukey(level, fam.size, df) / sqrt(2),
scheffe = sqrt((sch.rank[jj]) * qf(level, sch.rank[jj], df)),
dunnettx = .qdunnx(level, n.contr, df),
mvt = .my.qmvt(level, df, corrmat[rows, rows, drop = FALSE], tail)
)
}
if (do.msg) {
# xtra = if(chk.adj < 10) paste("a family of", fam.size, "estimates")
# else paste(n.contr, "estimates")
xtra = if(!adaptive || (length(by.rows) == 1))
switch(adjust,
tukey = paste("for comparing a family of", fam.size, "estimates"),
scheffe = paste("with rank", scheffe.dim),
paste("for", n.contr, "estimates"))
else switch(adjust,
tukey = "for varying family sizes",
scheffe = "with varying rank",
"for varying numbers of estimates")
mesg = paste("Conf-level adjustment:", adjust, "method", xtra)
}
list(cv = cv, mesg = mesg, adjust = adjust)
}
### My own functions to ease access to mvt functions
### These use one argument at a time and expands each (lower, upper) or p to a k-vector
### Use tailnum = -1, 0, or 1
### NOTE: corrmat needs "by.rows" attribute to tell which rows
### belong to which submatrix.
.my.pmvt = function(x, df, corrmat, tailnum) {
lower = switch(tailnum + 2, -Inf, -abs(x), x)
upper = switch(tailnum + 2, x, abs(x), Inf)
by.rows = attr(corrmat, "by.rows")
if (is.null(by.rows))
by.rows = list(seq_len(length(x)))
by.sel = numeric(length(x))
for (i in seq_along(by.rows))
by.sel[by.rows[[i]]] = i
df = .fix.df(df)
apply(cbind(lower, upper, df, by.sel), 1, function(z) {
idx = by.rows[[z[4]]]
k = length(idx)
pval = try(mvtnorm::pmvt(rep(z[1], k), rep(z[2], k),
df = as.integer(z[3]), corr = corrmat[idx, idx]),
silent = TRUE)
if (inherits(pval, "try-error")) NA
else pval
})
}
# Vectorized for df but needs p to be scalar
.my.qmvt = function(p, df, corrmat, tailnum) {
tail = c("lower.tail", "both.tails", "lower.tail")[tailnum + 2]
df = .fix.df(df)
by.rows = attr(corrmat, "by.rows")
if (is.null(by.rows))
by.rows = list(seq_len(length(df)))
by.sel = numeric(length(df))
for (i in seq_along(by.rows))
by.sel[by.rows[[i]]] = i
# If df all equal, compute just once for each by group
eq.df = (diff(range(df)) == 0)
i1 = if (eq.df) sapply(by.rows, function(r) r[1])
else seq_along(df)
result = apply(cbind(p, df[i1], by.sel[i1]), 1, function(z) {
idx = by.rows[[z[3]]]
cv = try(mvtnorm::qmvt(z[1], tail = tail,
df = as.integer(z[2]), corr = corrmat[idx, idx])$quantile,
silent = TRUE)
if (inherits(cv, "try-error")) NA
else cv
})
if (eq.df) {
res = result
result = numeric(length(df))
for(i in seq_along(by.rows))
result[by.rows[[i]]] = res[i]
}
result
}
# utility to get appropriate integer df
.fix.df = function(df) {
sapply(df, function(d) {
if (d > 0) d = max(1, d)
if (is.infinite(d) || (d > 9999)) d = 0
floor(d + .25) # tends to round down
})
}
### My approximate dunnett distribution
### - a mix of the Tukey cdf and Sidak-corrected t
.pdunnx = function(x, k, df, twt = (k - 1)/k) {
tukey = ptukey(sqrt(2)*x, (1 + sqrt(1 + 8*k))/2, df)
sidak = (pf(x^2, 1, df))^k
twt*tukey + (1 - twt)*sidak
}
# Uses linear interpolation to get quantile
.qdunnx = function(p, k, df, ...) {
if (k < 1.005)
return(qt(1 - .5*(1 - p), df))
xtuk = qtukey(p, (1 + sqrt(1 + 8*k))/2, df) / sqrt(2)
xsid = sqrt(qf(p^(1/k), 1, df))
fcn = function(x, d)
.pdunnx(x, k, d, ...) - p
apply(cbind(xtuk, xsid, df), 1, function(r) {
if (abs(diff(r[1:2])) < .0005)
return (r[1])
x = try(uniroot(fcn, r[1:2], tol = .0005, d = r[3]), silent = TRUE)
if (inherits(x, "try-error")) {
warning("Root-finding failed; using qtukey approximation for Dunnett quantile")
return(xtuk)
}
else
x$root
})
}
### Support for different prediction types ###
# Valid values for type arg or predict.type option
# "link", "lp", "linear" are all legal but equivalent
# "mu" and "response" are usually equivalent -- but in a GLM with a response transformation,
# "mu" (or "unlink") would back-transform the link only, "response" would do both
.valid.types = c("link","lp","linear", "response", "mu", "unlink")
# get "predict.type" option from misc, and make sure it's legal
.get.predict.type = function(misc) {
type = misc$predict.type
if (is.null(type))
.valid.types[1]
else
.validate.type(type)
}
# check a "type" arg to make it legal
# NOTE: if not matched, returns "link", i.e., no back-transformation will be done
.validate.type = function (type) {
type = .valid.types[pmatch(type, .valid.types, 1)]
if (length(type) > 1) {
type = type[1]
warning("You specified more than one prediction type. Only type = \"", type, "\" was used")
}
type
}
# left-or right-justify column labels for m depending on "l" or "R" in just
.just.labs = function(m, just) {
nm = dimnames(m)[[2]]
for (j in seq_len(length(nm))) {
if(just[nm[j]] == "L")
nm[j] = format(nm[j], width = nchar(m[1,j]), justify = "left")
}
dimnames(m) = list(rep("", nrow(m)), nm)
m
}
# Format a data.frame produced by summary.emmGrid
#' @method print summary_emm
#' @export
print.summary_emm = function(x, ..., digits=NULL, quote=FALSE, right=TRUE, export = FALSE) {
estn = attr(x, "estName")
if (is.null(estn)) # uh-oh, somebody messed it up - so Hail Mary
return(invisible(print(data.frame(x))))
if(!is.null(attr(x, "digits")))
digits = attr(x, "digits")
test.stat.names = c("t.ratio", "z.ratio", "F.ratio", "T.square") # format these w 3 dec places
x.save = x
if(export) x.save = list()
for(i in which(sapply(x, is.matrix)))
x[[i]] = NULL # hide matrices
for (i in seq_along(names(x))) # zapsmall the numeric columns
if (is.numeric(x[[i]]))
x[[i]] = zapsmall(x[[i]])
just = sapply(x, function(col) if(is.numeric(col)) "R" else "L") ### was later
if (!is.null(x$df)) x$df = round(x$df, 2)
for (nm in test.stat.names)
if(!is.null(x[[nm]]))
x[[nm]] = format(round(x[[nm]], 3), nsmall = 3, sci = FALSE)
if (!is.null(x$p.value)) {
fp = x$p.value = format(round(x$p.value, 4), nsmall = 4, sci = FALSE)
x$p.value[fp=="0.0000"] = "<.0001"
}
est = x[[estn]]
if (get_emm_option("opt.digits") && is.null(digits)) {
if (!is.null(x[["SE"]]))
tmp = est + x[["SE"]] * cbind(rep(-2, nrow(x)), 0, 2)
else if (!is.null(x[["lower.HPD"]]))
tmp = x[, c("lower.HPD", estn, "upper.HPD"), drop = FALSE]
else tmp = NULL
if (!is.null(tmp))
digits = max(apply(tmp, 1, .opt.dig))
}
if (any(is.na(est))) {
x[[estn]] = format(est, digits=digits)
x[[estn]][is.na(est)] = "nonEst"
}
xc = as.matrix(format.data.frame(x, digits=digits, na.encode=TRUE))
m = apply(rbind(just, names(x), xc), 2, function(x) {
w = max(sapply(x, nchar))
if (x[1] == "R") format(x[-seq_len(2)], width = w, justify="right")
else format(x[-seq_len(2)], width = w, justify="left")
})
if(!is.matrix(m)) m = t(as.matrix(m))
by.vars = attr(x, "by.vars")
if (is.null(by.vars)) {
m = .just.labs(m, just)
if (export)
x.save$summary = m
else {
print(m, quote=FALSE, right=TRUE)
cat("\n")
}
}
else { # separate listing for each by variable
m = .just.labs(m[, setdiff(names(x), by.vars), drop = FALSE], just)
if(export)
x.save$summary = list()
pargs = unname(as.list(x[, by.vars, drop=FALSE]))
# We want labels to come out in order though...
ord = do.call(order, rev(pargs))
m = m[ord, , drop = FALSE]
xc = xc[ord, , drop = FALSE]
pargs = unname(as.list(x[ord, by.vars, drop=FALSE]))
pargs$sep = ", "
lbls = do.call(paste, pargs)
for (lb in unique(lbls)) {
rows = which(lbls==lb)
levs = paste(by.vars, "=", xc[rows[1], by.vars])
levs = paste(levs, collapse=", ")
if(export)
x.save$summary[[levs]] = m[rows, , drop = FALSE]
else {
cat(paste(levs, ":\n", sep=""))
print(m[rows, , drop=FALSE], ..., quote=quote, right=right)
cat("\n")
}
}
}
msg = unique(attr(x, "mesg"))
if (!is.null(msg) && !export)
for (j in seq_len(length(msg))) cat(paste(msg[j], "\n"))
else (x.save$annotations = msg)
invisible(x.save)
}
#' @method update summary_emm
#' @export
#' @rdname update.emmGrid
#' @order 9
#' @param by.vars,mesg Attributes that can be altered in \code{update.summary_emm}
#' @section Method for \code{summary_emm} objects:
#' This method exists so that we can change the way a summary is displayed,
#' by changing the by variables or the annotations.
#'
#' @examples
#' ### Compactify results with a by variable
#' update(joint_tests(pigs.rg, by = "source"), by = NULL)
update.summary_emm = function(object, by.vars, mesg, ...) {
args = match.call()[-1]
args$object = NULL
for (nm in names(args))
attr(object, nm) = args[[nm]]
object
}
# determine optimum digits to display based on a conf or cred interval
# (but always at least 3)
.opt.dig = function(x) {
z = range(x) / max(abs(x))
dz = zapsmall(c(diff(z), z), digits = 8)[1]
zz = round(1.51 - log(dz, 10)) # approx 1 - log(diff(z/3))
zz[is.infinite(zz)] = 3 # we get z = Inf when SE is 0
max(zz, 3, na.rm = TRUE)
}
# Utility -- When misc$display present, reconcile which elements to use.
# Needed if we messed with previous nesting
.reconcile.elts = function(object) {
display = object@misc$display
nrows = nrow(object@grid)
use.elts = rep(TRUE, nrows)
if (!is.null(display) && (length(display) == nrows))
use.elts = display
use.elts
}
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.