#' @templateVar class lm
#' @template title_desc_tidy
#'
#' @param x An `lm` object created by [stats::lm()].
#' @template param_confint
#' @template param_exponentiate
#' @template param_unused_dots
#'
#' @evalRd return_tidy(regression = TRUE)
#'
#' @details If the linear model is an `mlm` object (multiple linear model),
#' there is an additional column `response`. See [tidy.mlm()].
#'
#' @examplesIf rlang::is_installed("ggplot2")
#'
#' library(ggplot2)
#' library(dplyr)
#'
#' mod <- lm(mpg ~ wt + qsec, data = mtcars)
#'
#' tidy(mod)
#' glance(mod)
#'
#' # coefficient plot
#' d <- tidy(mod, conf.int = TRUE)
#'
#' ggplot(d, aes(estimate, term, xmin = conf.low, xmax = conf.high, height = 0)) +
#' geom_point() +
#' geom_vline(xintercept = 0, lty = 4) +
#' geom_errorbarh()
#'
#' # aside: There are tidy() and glance() methods for lm.summary objects too.
#' # this can be useful when you want to conserve memory by converting large lm
#' # objects into their leaner summary.lm equivalents.
#' s <- summary(mod)
#' tidy(s, conf.int = TRUE)
#' glance(s)
#'
#' augment(mod)
#' augment(mod, mtcars, interval = "confidence")
#'
#' # predict on new data
#' newdata <- mtcars %>%
#' head(6) %>%
#' mutate(wt = wt + 1)
#' augment(mod, newdata = newdata)
#'
#' # ggplot2 example where we also construct 95% prediction interval
#'
#' # simpler bivariate model since we're plotting in 2D
#' mod2 <- lm(mpg ~ wt, data = mtcars)
#'
#' au <- augment(mod2, newdata = newdata, interval = "prediction")
#'
#' ggplot(au, aes(wt, mpg)) +
#' geom_point() +
#' geom_line(aes(y = .fitted)) +
#' geom_ribbon(aes(ymin = .lower, ymax = .upper), col = NA, alpha = 0.3)
#'
#' # predict on new data without outcome variable. Output does not include .resid
#' newdata <- newdata %>%
#' select(-mpg)
#'
#' augment(mod, newdata = newdata)
#'
#' au <- augment(mod, data = mtcars)
#'
#' ggplot(au, aes(.hat, .std.resid)) +
#' geom_vline(size = 2, colour = "white", xintercept = 0) +
#' geom_hline(size = 2, colour = "white", yintercept = 0) +
#' geom_point() +
#' geom_smooth(se = FALSE)
#'
#' plot(mod, which = 6)
#'
#' ggplot(au, aes(.hat, .cooksd)) +
#' geom_vline(xintercept = 0, colour = NA) +
#' geom_abline(slope = seq(0, 3, by = 0.5), colour = "white") +
#' geom_smooth(se = FALSE) +
#' geom_point()
#'
#' # column-wise models
#' a <- matrix(rnorm(20), nrow = 10)
#' b <- a + rnorm(length(a))
#' result <- lm(b ~ a)
#'
#' tidy(result)
#'
#' @aliases lm_tidiers
#' @export
#' @seealso [tidy()], [stats::summary.lm()]
#' @family lm tidiers
tidy.lm <- function(x, conf.int = FALSE, conf.level = 0.95,
exponentiate = FALSE, ...) {
warn_on_subclass(x, "tidy")
ret <- as_tibble(summary(x)$coefficients, rownames = "term")
colnames(ret) <- c("term", "estimate", "std.error", "statistic", "p.value")
coefs <- stats::coef(x)
if (length(coefs) != nrow(ret)) {
# summary(x)$coefficients misses rank deficient rows (i.e. coefs that
# summary.lm() sets to NA), catch them here and add them back. This join is
# costly, so only do it when necessary.
coefs <- tibble::enframe(coefs, name = "term", value = "estimate")
ret <- left_join(coefs, ret, by = c("term", "estimate"))
}
if (conf.int) {
ci <- broom_confint_terms(x, level = conf.level)
ret <- dplyr::left_join(ret, ci, by = "term")
}
if (exponentiate) {
ret <- exponentiate(ret)
}
ret
}
#' @templateVar class lm
#' @template title_desc_augment
#'
#' @template augment_NAs
#'
#' @inherit tidy.lm params examples
#' @template param_data
#' @template param_newdata
#' @template param_se_fit
#' @template param_interval
#' @param conf.level The confidence level to use for the interval created if
#' `interval` is `"confidence"` or `"prediction"`. Must be strictly greater
#' than 0 and less than 1. Defaults to 0.95, which corresponds to a 95
#' percent confidence/prediction interval.
#' @template param_unused_dots
#'
#' @evalRd return_augment(
#' ".hat",
#' ".lower",
#' ".upper",
#' ".sigma",
#' ".cooksd",
#' ".se.fit",
#' ".std.resid"
#' )
#'
#' @details Some unusual `lm` objects, such as `rlm` from MASS, may omit
#' `.cooksd` and `.std.resid`. `gam` from mgcv omits `.sigma`.
#'
#' When `newdata` is supplied, only returns `.fitted`, `.resid` and
#' `.se.fit` columns.
#'
#' @export
#' @seealso [augment()], [stats::predict.lm()]
#' @family lm tidiers
augment.lm <- function(x, data = model.frame(x), newdata = NULL,
se_fit = FALSE, interval = c("none", "confidence", "prediction"),
conf.level = 0.95, ...) {
warn_on_subclass(x, "augment")
check_ellipses("level", "augment", "lm", ...)
interval <- match.arg(interval)
df <- augment_newdata(x, data, newdata, se_fit, interval, level = conf.level)
if (is.null(newdata)) {
tryCatch(
{
infl <- influence(x, do.coef = FALSE)
df <- add_hat_sigma_cols(df, x, infl)
},
error = data_error
)
}
df
}
#' @templateVar class lm
#' @template title_desc_glance
#'
#' @inherit tidy.lm params examples
#'
#' @evalRd return_glance(
#' "r.squared",
#' "adj.r.squared",
#' "sigma",
#' "statistic",
#' "p.value",
#' df = "The degrees for freedom from the numerator of the overall
#' F-statistic. This is new in broom 0.7.0. Previously, this reported
#' the rank of the design matrix, which is one more than the numerator
#' degrees of freedom of the overall F-statistic.",
#' "logLik",
#' "AIC",
#' "BIC",
#' "deviance",
#' "df.residual",
#' "nobs"
#' )
#'
#'
#' @export
#' @seealso [glance()], [glance.summary.lm()]
#' @family lm tidiers
glance.lm <- function(x, ...) {
warn_on_subclass(x, "glance")
# check whether the model was fitted with only an intercept, in which
# case drop the fstatistic related columns
int_only <- nrow(summary(x)$coefficients) == 1 & "(Intercept)" %in% row.names(summary(x)$coefficients)
with(
summary(x),
tibble(
r.squared = r.squared,
adj.r.squared = adj.r.squared,
sigma = sigma,
statistic = if (!int_only) {
fstatistic["value"]
} else {
NA_real_
},
p.value = if (!int_only) {
stats::pf(
fstatistic["value"],
fstatistic["numdf"],
fstatistic["dendf"],
lower.tail = FALSE
)
} else {
NA_real_
},
df = if (!int_only) {
fstatistic["numdf"]
} else {
NA_real_
},
logLik = as.numeric(stats::logLik(x)),
AIC = stats::AIC(x),
BIC = stats::BIC(x),
deviance = stats::deviance(x),
df.residual = df.residual(x),
nobs = stats::nobs(x)
)
)
}
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.