inst/doc/linear-regression-diagnostics.R

## ----include = FALSE----------------------------------------------------------
knitr::opts_chunk$set(
  collapse = TRUE,
  comment = "#>"
)

## ----setup--------------------------------------------------------------------
library(regressinator)
library(ggplot2)
library(broom)

## -----------------------------------------------------------------------------
nonlinear_pop <- population(
  x1 = predictor(runif, min = 1, max = 8),
  x2 = predictor(runif, min = 4, max = 12),
  y = response(0.7 + 0.8 * x1**2 + 1.2 * x2,
               family = gaussian(), error_scale = 4.0)
)

nonlinear_data <- sample_x(nonlinear_pop, n = 100) |>
  sample_y()

fit <- lm(y ~ x1 + x2, data = nonlinear_data)

## -----------------------------------------------------------------------------
augment(fit) |>
  ggplot(aes(x = .fitted, y = .resid)) +
  geom_point() +
  geom_smooth(se = FALSE) +
  labs(x = "Fitted value", y = "Residual")

## -----------------------------------------------------------------------------
augment_longer(fit) |>
  ggplot(aes(x = .predictor_value, y = .resid)) +
  geom_point() +
  geom_smooth(se = FALSE) +
  facet_wrap(vars(.predictor_name), scales = "free_x") +
  labs(x = "Predictor", y = "Residual")

## ----fig.height=8-------------------------------------------------------------
model_lineup(fit, fn = augment_longer, n = 5) |>
  ggplot(aes(x = .predictor_value, y = .resid)) +
  geom_point() +
  geom_smooth(se = FALSE) +
  facet_grid(rows = vars(.sample), cols = vars(.predictor_name),
             scales = "free_x") +
  labs(x = "Predictor", y = "Residual")

## -----------------------------------------------------------------------------
partial_residuals(fit) |>
  ggplot(aes(x = .predictor_value, y = .partial_resid)) +
  geom_point() + # partial residuals
  geom_smooth(se = FALSE) + # smoothed residuals
  geom_line(aes(x = .predictor_value, y = .predictor_effect)) + # effects
  facet_wrap(vars(.predictor_name), scales = "free") +
  labs(x = "Predictor", y = "Partial residual")

## ----fig.height=8-------------------------------------------------------------
model_lineup(fit, partial_residuals, n = 5) |>
  ggplot(aes(x = .predictor_value, y = .partial_resid)) +
  geom_point() +
  geom_smooth(se = FALSE) +
  geom_line(aes(x = .predictor_value, y = .predictor_effect)) +
  facet_grid(rows = vars(.sample), cols = vars(.predictor_name),
             scales = "free") +
  labs(x = "Predictor", y = "Partial residual")

## -----------------------------------------------------------------------------
intercepts <- c(
  "foo" = -0.3,
  "bar" = 1.7
)

slopes <- c(
  "foo" = 1.8,
  "bar" = -0.4
)

interact_pop <- population(
  x1 = predictor(runif, min = 1, max = 8),
  x2 = predictor(rfactor, levels = c("foo", "bar")),
  y = response(by_level(x2, intercepts) + by_level(x2, slopes) * x1,
               family = gaussian(), error_scale = 4.0)
)

interact_data <- sample_x(interact_pop, n = 100) |>
  sample_y()

no_interact_fit <- lm(y ~ x1 + x2, data = interact_data)

## -----------------------------------------------------------------------------
partial_residuals(no_interact_fit) |>
  ggplot(aes(x = .predictor_value, y = .partial_resid, color = x2)) +
  geom_point() + # partial residuals
  geom_smooth(se = FALSE) + # smoothed residuals
  geom_line(aes(x = .predictor_value, y = .predictor_effect, color = NULL)) + # effects
  facet_wrap(vars(.predictor_name), scales = "free") +
  labs(x = "Predictor", y = "Partial residual")

## -----------------------------------------------------------------------------
interact_fit <- lm(y ~ x1 * x2, data = interact_data)

partial_residuals(interact_fit) |>
  ggplot(aes(x = .predictor_value, y = .partial_resid, color = x2)) +
  geom_point() + # partial residuals
  geom_smooth(se = FALSE) + # smoothed residuals
  geom_line(aes(x = .predictor_value, y = .predictor_effect, color = NULL)) + # effects
  facet_wrap(vars(.predictor_name), scales = "free") +
  labs(x = "Predictor", y = "Partial residual")

## -----------------------------------------------------------------------------
augment(fit) |>
  ggplot(aes(x = seq_along(.cooksd), y = .cooksd)) +
  geom_col() +
  labs(x = "Row index", y = "Cook's distance")

## -----------------------------------------------------------------------------
augment(fit) |>
  ggplot(aes(sample = .std.resid)) +
  geom_qq() +
  geom_qq_line() +
  labs(title = "Normal Q-Q plot of standardized residuals",
       x = "Theoretical quantiles", y = "Observed quantiles")

## ----fig.height=8-------------------------------------------------------------
model_lineup(fit) |>
  ggplot(aes(sample = .std.resid)) +
  geom_qq() +
  geom_qq_line() +
  facet_wrap(vars(.sample)) +
  labs(title = "Normal Q-Q plot of standardized residuals",
       x = "Theoretical quantiles", y = "Observed quantiles")

## -----------------------------------------------------------------------------
library(palmerpenguins)

## -----------------------------------------------------------------------------
penguin_1 <- lm(bill_length_mm ~ flipper_length_mm + species,
                data = penguins)

## -----------------------------------------------------------------------------
partial_residuals(penguin_1, flipper_length_mm) |>
  ggplot(aes(x = .predictor_value, y = .partial_resid)) +
  geom_point(aes(color = species)) +
  geom_smooth(aes(color = species), se = FALSE) +
  geom_line(aes(y = .predictor_effect)) +
  labs(x = "Flipper length (mm)", y = "Partial residual",
       color = "Species")

## -----------------------------------------------------------------------------
penguin_2 <- lm(bill_depth_mm ~ flipper_length_mm * species,
                data = penguins)

## -----------------------------------------------------------------------------
partial_residuals(penguin_2, flipper_length_mm) |>
  ggplot(aes(x = .predictor_value, y = .partial_resid)) +
  geom_point(aes(color = species)) +
  geom_smooth(aes(color = species), se = FALSE) +
  geom_line(aes(y = .predictor_effect)) +
  labs(x = "Flipper length (mm)", y = "Partial residual",
       color = "Species")

## -----------------------------------------------------------------------------
augment(penguin_2) |>
  ggplot(aes(sample = .std.resid)) +
  geom_qq() +
  geom_qq_line() +
  facet_wrap(vars(species)) +
  labs(title = "Normal Q-Q plot of standardized residuals",
       x = "Theoretical quantiles", y = "Observed quantiles")

## -----------------------------------------------------------------------------
augment(penguin_2) |>
  ggplot(aes(x = seq_along(.cooksd), y = .cooksd)) +
  geom_col() +
  labs(x = "Row index", y = "Cook's distance")

Try the regressinator package in your browser

Any scripts or data that you put into this service are public.

regressinator documentation built on Sept. 11, 2024, 6:50 p.m.