# generics::tidy as tibble -----------------------------------------------------
#' @title Model-fit summary or ANOVA
#'
#' @description \code{stat_fit_tb} fits a model and returns a "tidy" version of
#' the model's summary or ANOVA table, using '\code{tidy()} methods from
#' packages 'broom', 'broom.mixed', or other 'broom' extensions. The
#' annotation is added to the plots in tabular form.
#'
#' @param mapping The aesthetic mapping, usually constructed with
#' \code{\link[ggplot2]{aes}}. Only needs to be
#' set at the layer level if you are overriding the plot defaults.
#' @param data A layer specific dataset, only needed if you want to override the
#' plot defaults.
#' @param geom The geometric object to use display the data
#' @param position The position adjustment to use for overlapping points on this
#' layer
#' @param show.legend logical. Should this layer be included in the legends?
#' \code{NA}, the default, includes if any aesthetics are mapped. \code{FALSE}
#' never includes, and \code{TRUE} always includes.
#' @param inherit.aes If \code{FALSE}, overrides the default aesthetics, rather
#' than combining with them. This is most useful for helper functions that
#' define both data and aesthetics and shouldn't inherit behaviour from the
#' default plot specification, e.g. \code{\link[ggplot2]{borders}}.
#' @param ... other arguments passed on to \code{\link[ggplot2]{layer}}. This
#' can include aesthetics whose values you want to set, not map. See
#' \code{\link[ggplot2]{layer}} for more details.
#' @param na.rm a logical indicating whether NA values should be stripped before
#' the computation proceeds.
#' @param method character.
#' @param method.args,tidy.args lists of arguments to pass to \code{method} and
#' to \code{tidy()}.
#' @param n.min integer Minimum number of distinct values in the explanatory
#' variable (on the rhs of formula) for fitting to the attempted.
#' @param tb.type character One of "fit.summary", "fit.anova" or "fit.coefs".
#' @param digits integer indicating the number of significant digits to be used
#' for all numeric values in the table.
#' @param p.digits integer indicating the number of decimal places to round
#' p-values to, with those rounded to zero displayed as the next larger
#' possible value preceded by "<". If \code{p.digits} is outside the range
#' 1..22 no rounding takes place.
#' @param tb.vars,tb.params character or numeric vectors, optionally named, used
#' to select and/or rename the columns or the parameters in the table
#' returned.
#' @param label.x,label.y \code{numeric} Coordinates in data units or with range
#' 0..1, expressed in "normalized parent coordinates" or as character strings
#' depending on the geometry used. If too short they will be recycled. They set
#' the \code{x} and \code{y} coordinates at the \code{after_stat} stage.
#' @param table.theme NULL, list or function A 'gridExtra' \code{ttheme}
#' definition, or a constructor for a \code{ttheme} or NULL for default.
#' @param table.rownames,table.colnames logical flag to enable or disabling
#' printing of row names and column names.
#' @param table.hjust numeric Horizontal justification for the core and column
#' headings of the table.
#' @param parse If TRUE, the labels will be parsed into expressions and
#' displayed as described in \code{?plotmath}.
#'
#' @details \code{stat_fit_tb()} Applies a model fitting function per panel,
#' using the grouping factors from aesthetic mappings in the fitted model.
#' This is suitable, for example for analysis of variance used to test for
#' differences among groups.
#'
#' The argument to \code{method} can be any fit method for which a suitable
#' \code{tidy()} method is available, including non-linear regression. Fit
#' methods retain their default arguments unless overridden.
#'
#' A ggplot statistic receives as data a data frame that is not the one passed
#' as argument by the user, but instead a data frame with the variables mapped
#' to aesthetics. In other words, it respects the grammar of graphics and
#' consequently within arguments passed through \code{method.args} names of
#' aesthetics like \eqn{x} and \eqn{y} should be used instead of the original
#' variable names. The plot's default \code{data} is used by default, which
#' helps ensure that the model is fitted to the same data as plotted in other
#' layers.
#'
#' @section Computed variables: The output of \code{tidy()} is returned as a
#' single "cell" in a tibble (i.e., a tibble nested within a tibble). The
#' returned \code{data} object contains a single tibble, containing the result
#' from a single model fit to all data in a panel. If grouping is present, it
#' is ignored in the sense of returning a single table, but the grouping
#' aesthetic can be a term in the fitted model.
#'
#' @return A tibble with columns named \code{fm.tb} (a tibble returned by
#' \code{tidy()} with possibly renamed and subset columns and rows, within a
#' list), \code{fm.tb.type} (copy of argument passed to \code{tb.type}),
#' \code{fm.class} (the class of the fitted model object), \code{fm.method}
#' (the fit function's name), \code{fm.call} (the call if available), \code{x}
#' and \code{y}.
#'
#' To explore the values returned by this statistic, which vary depending on
#' the model fitting function and model formula we suggest the use of
#' \code{\link[gginnards]{geom_debug}}.
#'
#' @seealso \code{\link[broom]{broom}}, \code{broom.mixed}, and
#' \code{\link[broom]{tidy}} for details on how the tidying of the result of
#' model fits is done. See \code{\link[ggpp]{geom_table}} for details on how
#' inset tables respond to mapped aesthetics and table themes. For details on
#' predefined table themes see \code{\link[ggpp]{ttheme_gtdefault}}.
#'
#' @family ggplot statistics for model fits
#'
#' @export
#'
#' @examples
#' # Package 'broom' needs to be installed to run these examples.
#' # We check availability before running them to avoid errors.
#' broom.installed <- requireNamespace("broom", quietly = TRUE)
#'
#' if (broom.installed)
#' library(broom)
#'
#' # data for examples
#' x <- c(44.4, 45.9, 41.9, 53.3, 44.7, 44.1, 50.7, 45.2, 60.1)
#' covariate <- sqrt(x) + rnorm(9)
#' group <- factor(c(rep("A", 4), rep("B", 5)))
#' my.df <- data.frame(x, group, covariate)
#'
#' gginnards.installed <- requireNamespace("gginnards", quietly = TRUE)
#'
#' if (gginnards.installed)
#' library(gginnards)
#'
#' ## covariate is a numeric or continuous variable
#' # Linear regression fit summary, all defaults
#' if (broom.installed)
#' ggplot(my.df, aes(covariate, x)) +
#' geom_point() +
#' stat_fit_tb() +
#' expand_limits(y = 70)
#'
#' # we can use geom_debug() and str() to inspect the returned value
#' # and discover the variables that can be mapped to aesthetics with
#' # after_stat()
#' if (broom.installed && gginnards.installed)
#' ggplot(my.df, aes(covariate, x)) +
#' geom_point() +
#' stat_fit_tb(geom = "debug", summary.fun = str) +
#' expand_limits(y = 70)
#'
#' # Linear regression fit summary, with default formatting
#' if (broom.installed)
#' ggplot(my.df, aes(covariate, x)) +
#' geom_point() +
#' stat_fit_tb(tb.type = "fit.summary") +
#' expand_limits(y = 70)
#'
#' # Linear regression fit summary, with manual table formatting
#' if (broom.installed)
#' ggplot(my.df, aes(covariate, x)) +
#' geom_point() +
#' stat_fit_tb(digits = 2,
#' p.digits = 4,
#' tb.params = c("intercept" = 1, "covariate" = 2),
#' tb.vars = c(Term = 1, Estimate = 2,
#' "italic(s)" = 3, "italic(t)" = 4,
#' "italic(P)" = 5),
#' parse = TRUE) +
#' expand_limits(y = 70)
#'
#' # Linear regression ANOVA table, with default formatting
#' if (broom.installed)
#' ggplot(my.df, aes(covariate, x)) +
#' geom_point() +
#' stat_fit_tb(tb.type = "fit.anova") +
#' expand_limits(y = 70)
#'
#' # Linear regression ANOVA table, with manual table formatting
#' if (broom.installed)
#' ggplot(my.df, aes(covariate, x)) +
#' geom_point() +
#' stat_fit_tb(tb.type = "fit.anova",
#' tb.params = c("Covariate" = 1, 2),
#' tb.vars = c(Effect = 1, d.f. = 2,
#' "M.S." = 4, "italic(F)" = 5,
#' "italic(P)" = 6),
#' parse = TRUE) +
#' expand_limits(y = 67)
#'
#' # Linear regression fit coeficients, with default formatting
#' if (broom.installed)
#' ggplot(my.df, aes(covariate, x)) +
#' geom_point() +
#' stat_fit_tb(tb.type = "fit.coefs") +
#' expand_limits(y = 67)
#'
#' # Linear regression fit coeficients, with manual table formatting
#' if (broom.installed)
#' ggplot(my.df, aes(covariate, x)) +
#' geom_point() +
#' stat_fit_tb(tb.type = "fit.coefs",
#' tb.params = c(a = 1, b = 2),
#' tb.vars = c(Term = 1, Estimate = 2)) +
#' expand_limits(y = 67)
#'
#' ## x is also a numeric or continuous variable
#' # Polynomial regression, with default formatting
#' if (broom.installed)
#' ggplot(my.df, aes(covariate, x)) +
#' geom_point() +
#' stat_fit_tb(method.args = list(formula = y ~ poly(x, 2))) +
#' expand_limits(y = 70)
#'
#' # Polynomial regression, with manual table formatting
#' if (broom.installed)
#' ggplot(my.df, aes(covariate, x)) +
#' geom_point() +
#' stat_fit_tb(method.args = list(formula = y ~ poly(x, 2)),
#' tb.params = c("x^0" = 1, "x^1" = 2, "x^2" = 3),
#' tb.vars = c("Term" = 1, "Estimate" = 2, "S.E." = 3,
#' "italic(t)" = 4, "italic(P)" = 5),
#' parse = TRUE) +
#' expand_limits(y = 70)
#'
#' ## group is a factor or discrete variable
#' # ANOVA summary, with default formatting
#' if (broom.installed)
#' ggplot(my.df, aes(group, x)) +
#' geom_point() +
#' stat_fit_tb() +
#' expand_limits(y = 70)
#'
#' # ANOVA table, with default formatting
#' if (broom.installed)
#' ggplot(my.df, aes(group, x)) +
#' geom_point() +
#' stat_fit_tb(tb.type = "fit.anova") +
#' expand_limits(y = 70)
#'
#' # ANOVA table, with manual table formatting
#' if (broom.installed)
#' ggplot(my.df, aes(group, x)) +
#' geom_point() +
#' stat_fit_tb(tb.type = "fit.anova",
#' tb.vars = c(Effect = "term", "df", "italic(F)" = "statistic",
#' "italic(P)" = "p.value"),
#' tb.params = c(Group = 1, Error = 2),
#' parse = TRUE)
#'
#' # ANOVA table, with manual table formatting
#' # using column names with partial matching
#' if (broom.installed)
#' ggplot(my.df, aes(group, x)) +
#' geom_point() +
#' stat_fit_tb(tb.type = "fit.anova",
#' tb.vars = c(Effect = "term", "df", "italic(F)" = "stat",
#' "italic(P)" = "p"),
#' tb.params = c(Group = "x", Error = "Resid"),
#' parse = TRUE)
#'
#' # ANOVA summary, with default formatting
#' if (broom.installed)
#' ggplot(my.df, aes(group, x)) +
#' geom_point() +
#' stat_fit_tb() +
#' expand_limits(y = 70)
#'
#' ## covariate is a numeric variable and group is a factor
#' # ANCOVA (covariate not plotted) ANOVA table, with default formatting
#' if (broom.installed)
#' ggplot(my.df, aes(group, x, z = covariate)) +
#' geom_point() +
#' stat_fit_tb(tb.type = "fit.anova",
#' method.args = list(formula = y ~ x + z))
#'
#' # ANCOVA (covariate not plotted) ANOVA table, with manual table formatting
#' if (broom.installed)
#' ggplot(my.df, aes(group, x, z = covariate)) +
#' geom_point() +
#' stat_fit_tb(tb.type = "fit.anova",
#' method.args = list(formula = y ~ x + z),
#' tb.vars = c(Effect = 1, d.f. = 2,
#' "M.S." = 4, "italic(F)" = 5,
#' "italic(P)" = 6),
#' tb.params = c(Group = 1,
#' Covariate = 2,
#' Error = 3),
#' parse = TRUE)
#'
#' ## group is a factor or discrete variable
#' # t-test, minimal output, with manual table formatting
#' if (broom.installed)
#' ggplot(my.df, aes(group, x)) +
#' geom_point() +
#' stat_fit_tb(method = "t.test",
#' tb.vars = c("italic(t)" = "statistic", "italic(P)" = "p.value"),
#' parse = TRUE)
#'
#' # t-test, more detailed output, with manual table formatting
#' if (broom.installed)
#' ggplot(my.df, aes(group, x)) +
#' geom_point() +
#' stat_fit_tb(method = "t.test",
#' tb.vars = c("\"Delta \"*italic(x)" = "estimate",
#' "CI low" = "conf.low", "CI high" = "conf.high",
#' "italic(t)" = "statistic", "italic(P)" = "p.value"),
#' parse = TRUE) +
#' expand_limits(y = 67)
#'
#' # t-test (equal variances assumed), minimal output, with manual table formatting
#' if (broom.installed)
#' ggplot(my.df, aes(group, x)) +
#' geom_point() +
#' stat_fit_tb(method = "t.test",
#' method.args = list(formula = y ~ x, var.equal = TRUE),
#' tb.vars = c("italic(t)" = "statistic", "italic(P)" = "p.value"),
#' parse = TRUE)
#'
#' ## covariate is a numeric or continuous variable
#' # Linear regression using a table theme and non-default position
#' if (broom.installed)
#' ggplot(my.df, aes(covariate, x)) +
#' geom_point() +
#' stat_fit_tb(table.theme = ttheme_gtlight,
#' npcx = "left", npcy = "bottom") +
#' expand_limits(y = 35)
#'
stat_fit_tb <- function(mapping = NULL,
data = NULL,
geom = "table_npc",
position = "identity",
...,
method = "lm",
method.args = list(formula = y ~ x),
n.min = 2L,
tidy.args = list(),
tb.type = "fit.summary",
tb.vars = NULL,
tb.params = NULL,
digits = 3,
p.digits = digits,
label.x = "center",
label.y = "top",
table.theme = NULL,
table.rownames = FALSE,
table.colnames = TRUE,
table.hjust = 1,
parse = FALSE,
na.rm = FALSE,
show.legend = FALSE,
inherit.aes = TRUE) {
ggplot2::layer(
stat = StatFitTb, data = data, mapping = mapping, geom = geom,
position = position, show.legend = show.legend, inherit.aes = inherit.aes,
params =
rlang::list2(method = method,
method.args = method.args,
n.min = n.min,
tidy.args = tidy.args,
tb.type = tb.type,
tb.vars = tb.vars,
tb.params = tb.params,
digits = digits,
p.digits = p.digits,
label.x = label.x,
label.y = label.y,
npc.used = grepl("_npc", geom),
table.theme = table.theme,
table.rownames = table.rownames,
table.colnames = table.colnames,
table.hjust = table.hjust,
parse = parse,
na.rm = na.rm,
...)
)
}
# Defined here to avoid a note in check --as-cran as the imports from 'broom'
# are not seen when the function is defined in-line in the ggproto object.
#' @rdname ggpmisc-ggproto
#'
#' @format NULL
#' @usage NULL
#'
fit_tb_compute_panel_fun <- function(data,
scales,
method = "lm",
method.args = list(formula = y ~ x),
n.min = 2L,
tidy.args = list(),
tb.type = "fit.summary",
tb.vars = NULL,
tb.params = NULL,
digits = 3,
p.digits = digits,
npc.used = TRUE,
label.x = "center",
label.y = "top") {
rlang::check_installed("broom", reason = "to use `stat_fit_tb()`")
force(data)
if (length(unique(data$x)) < n.min) {
# Not enough data to perform fit
return(data.frame())
}
# support setting of table position per panel
panel.idx <- as.integer(as.character(data$PANEL[1]))
if (length(label.x) >= panel.idx) {
label.x <- label.x[panel.idx]
} else if (length(label.x) > 0) {
label.x <- label.x[1]
}
if (length(label.y) >= panel.idx) {
label.y <- label.y[panel.idx]
} else if (length(label.y) > 0) {
label.y <- label.y[1]
}
if (is.character(method)) {
method.name <- method
method <- match.fun(method)
} else {
method.name <- deparse(substitute(method))
}
if ("data" %in% names(method.args)) {
message("External 'data' passed can be inconsistent with plot!\n",
"These data must be available at the time of printing!!!")
} else if (any(grepl("formula|fixed|random|model", names(method.args)))) {
# method.args <- c(method.args, list(data = quote(data))) works in most cases and avoids copying data
method.args <- c(method.args, list(data = data)) # cor.test() needs the actual data
} else {
if (method.name == "cor.test" ) {
warning("Only the 'formula' interface of methods is supported. No formula found, using '~ x + y'")
selector <- setdiff(names(method.args), c("x", "y"))
method.args <- c(method.args[selector], list(formula = ~ x + y, data = data)) # cor.test() needs the actual data
} else {
warning("Only the 'formula' interface of methods is supported. No formula found, using 'y ~ x' default")
method.args <- c(method.args, list(formula = y ~ x, data = data)) # cor.test() needs the actual data
}
}
fm <- do.call(method, method.args)
fm.class <- class(fm) # keep track of fitted model class
if (tolower(tb.type) %in% c("fit.anova", "anova")) {
tidy.args <- c(x = quote(stats::anova(fm)), tidy.args)
fm.tb <- do.call(generics::tidy, tidy.args)
} else if (tolower(tb.type) %in% c("fit.summary", "summary")) {
tidy.args <- c(x = quote(fm), tidy.args)
fm.tb <- do.call(generics::tidy, tidy.args)
} else if (tolower(tb.type) %in% c("fit.coefs", "coefs")) {
tidy.args <- c(x = quote(fm), tidy.args)
fm.tb <- do.call(generics::tidy, tidy.args)[c("term", "estimate")]
}
# reduce number of significant digits of all numeric columns
num.cols <- sapply(fm.tb, is.numeric)
fm.tb[num.cols] <- signif(fm.tb[num.cols], digits = digits)
# treat p.value as a special case
if ("p.value" %in% colnames(fm.tb) && p.digits > 0 && p.digits <= 22) {
fm.tb[["p.value"]] <- round(fm.tb[["p.value"]], digits = p.digits)
limit.text <- paste("<", format(1 * 10^-p.digits, nsmall = p.digits))
fm.tb[["p.value"]] <- ifelse(fm.tb[["p.value"]] > 0,
format(fm.tb[["p.value"]], nsmall = p.digits),
limit.text)
}
if (!is.null(tb.params) && !is.null(fm.tb)) {
if (is.character(tb.params)) {
idxs <- pmatch(tb.params, fm.tb[[1]])
if (length(idxs) < length(tb.params) || anyNA(idxs)) {
warning("Attempt to select nonexistent params")
idxs <- stats::na.omit(idxs)
# no renaming possible, as we do not know which name was not matched
tb.params <- unname(tb.params)
}
} else {
idxs <- unname(tb.params)
if (any(idxs > nrow(fm.tb))) {
warning("Attempt to select nonexistent params")
idxs <- idxs[idxs <= nrow(fm.tb)]
tb.params <- tb.params[idxs]
}
}
if (length(idxs) < nrow(fm.tb)) {
message("Dropping params/terms (rows) from table!")
}
if (is.character(tb.params)) {
idxs <- pmatch(tb.params, fm.tb[[1]])
} else {
idxs <- unname(tb.params)
}
if (length(idxs) < 1L) {
warning("No matching parameters(s).")
fm.tb <- NULL
} else {
fm.tb <- fm.tb[idxs, ]
if (!is.null(names(tb.params))) {
# support renaming of only some selected columns
selector <- names(tb.params) != ""
fm.tb[[1]][selector] <- names(tb.params)[selector]
}
}
}
if (!is.null(tb.vars)) {
if (is.character(tb.vars)) {
idxs <- pmatch(tb.vars, colnames(fm.tb))
if (length(idxs) < length(tb.vars) || anyNA(idxs)) {
warning("Attempt to select nonexistent columns by name")
idxs <- stats::na.omit(idxs)
# no renaming possible, as we do not know which name was not matched
tb.vars <- unname(tb.vars)
}
} else {
idxs <- unname(tb.vars)
if (any(idxs > ncol(fm.tb))) {
warning("Attempt to select nonexistent columns")
idxs <- idxs[idxs <= ncol(fm.tb)]
tb.vars <- tb.vars[idxs]
}
}
if (!(1L %in% idxs) && nrow(fm.tb > 1L)) {
# we warn only if params are two or more
message("Dropping param names from table!")
}
if (length(idxs) < 1L) {
warning("No matching column(s).")
fm.tb <- NULL
} else {
fm.tb <- fm.tb[ , idxs]
if (!is.null(names(tb.vars))) {
# support renaming of only some selected columns
selector <- names(tb.vars) != ""
colnames(fm.tb)[selector] <- names(tb.vars)[selector]
}
}
}
# enclose the tibble and the call in lists to make them acceptable as columns
formula.ls <- fail_safe_formula(fm, method.args)
z <- tibble::tibble(fm.tb = list(fm.tb),
fm.tb.type = tb.type,
fm.class = fm.class[1],
fm.method = method.name,
fm.formula = formula.ls,
fm.formula.chr = format(formula.ls))
if (npc.used) {
margin.npc <- 0.05
} else {
margin.npc <- 0
}
if (is.character(label.x)) {
label.x <- switch(label.x,
right = (1 - margin.npc),
center = 0.5,
centre = 0.5,
middle = 0.5,
left = (0 + margin.npc)
)
if (!npc.used) {
# we need to use scale limits as observations are not necessarily plotted
x.range <- scales$x$range$range
label.x <- label.x * diff(x.range) + x.range[1]
}
}
if (is.character(label.y)) {
label.y <- switch(label.y,
top = (1 - margin.npc),
center = 0.5,
centre = 0.5,
middle = 0.5,
bottom = (0 + margin.npc)
)
if (!npc.used) {
# we need to use scale limits as observations are not necessarily plotted
y.range <- scales$y$range$range
label.y <- label.y * diff(y.range) + y.range[1]
}
}
if (npc.used) {
z$npcx <- label.x
z$x <- NA_real_
z$npcy <- label.y
z$y <- NA_real_
} else {
z$x <- label.x
z$npcx <- NA_real_
z$y <- label.y
z$npcy <- NA_real_
}
z
}
#' @rdname ggpmisc-ggproto
#' @format NULL
#' @usage NULL
#' @export
StatFitTb <-
ggplot2::ggproto("StatFitTb", ggplot2::Stat,
compute_panel = fit_tb_compute_panel_fun,
dropped_aes = "weight",
default_aes =
ggplot2::aes(hjust = "inward",
vjust = "inward",
label = after_stat(fm.tb)),
required_aes = c("x", "y")
)
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.