inst/doc/moderndive.R

## ----include = FALSE------------------------------------------------
# knitr settings
knitr::opts_chunk$set(
  # Code output:
  warning = FALSE,
  message = FALSE,
  echo = TRUE,
  # Figure:
  out.width = "100%",
  fig.width = 16 / 2.5,
  fig.height = 9 / 2.5,
  fig.align = "center",
  fig.show = "hold",
  # Etc:
  collapse = TRUE,
  comment = "##"
  # tidy = FALSE
)

# Needed packages in vignette
library(moderndive)
library(ggplot2)
library(dplyr)
library(knitr)
library(broom)

# Needed packages internally
library(patchwork)

# Random number generator seed value
set.seed(76)

# Set ggplot defaults for rticles output:
if (!knitr::is_html_output()) {
  # Grey theme:
  theme_set(theme_light())

  scale_colour_discrete <- ggplot2::scale_colour_viridis_d
}


# Set output width for rticles:
options(width = 70)

## -------------------------------------------------------------------
library(moderndive)
library(ggplot2)
library(dplyr)
library(knitr)
library(broom)

## ----echo=FALSE-----------------------------------------------------
evals_sample <- evals %>%
  select(ID, prof_ID, score, age, bty_avg, gender, ethnicity, language, rank) %>%
  sample_n(5)

## ----random-sample-courses, echo=FALSE------------------------------
evals_sample %>%
  kable()

## -------------------------------------------------------------------
score_model <- lm(score ~ age, data = evals)

## -------------------------------------------------------------------
summary(score_model)

## -------------------------------------------------------------------
get_regression_table(score_model)

## -------------------------------------------------------------------
get_regression_points(score_model)

## -------------------------------------------------------------------
get_regression_summaries(score_model)

## ----interaction-model, fig.cap="Visualization of interaction model."----
# Code to visualize interaction model:
ggplot(evals, aes(x = age, y = score, color = ethnicity)) +
  geom_point() +
  geom_smooth(method = "lm", se = FALSE) +
  labs(x = "Age", y = "Teaching score", color = "Ethnicity")

## ----parallel-slopes-model, fig.cap="Visualization of parallel slopes model."----
# Code to visualize parallel slopes model:
ggplot(evals, aes(x = age, y = score, color = ethnicity)) +
  geom_point() +
  geom_parallel_slopes(se = FALSE) +
  labs(x = "Age", y = "Teaching score", color = "Ethnicity")

## -------------------------------------------------------------------
get_regression_table(score_model)

## -------------------------------------------------------------------
get_regression_table(score_model, conf.level = 0.99)

## -------------------------------------------------------------------
sqrt(diag(vcov(score_model)))

## -------------------------------------------------------------------
get_regression_table(score_model) %>%
  pull(std_error)

## -------------------------------------------------------------------
get_regression_table(score_model)$std_error

## -------------------------------------------------------------------
get_regression_table(score_model) %>%
  kable()

## ----eval=FALSE-----------------------------------------------------
#  fitted(score_model)

## ----echo=FALSE-----------------------------------------------------
fitted(score_model)[1:10]

## ----eval=FALSE-----------------------------------------------------
#  residuals(score_model)

## ----echo=FALSE-----------------------------------------------------
residuals(score_model)[1:10]

## ----eval=FALSE-----------------------------------------------------
#  score_model_points <- get_regression_points(score_model)
#  score_model_points

## ----echo=FALSE-----------------------------------------------------
score_model_points <- get_regression_points(score_model)
score_model_points %>%
  slice(1:10)

## ----residuals-1, fig.cap="Histogram visualizing distribution of residuals."----
# Code to visualize distribution of residuals:
ggplot(score_model_points, aes(x = residual)) +
  geom_histogram(bins = 20) +
  labs(x = "Residual", y = "Count")

## ----residuals-2, fig.cap="Partial residual residual plot over age."----
# Code to visualize partial residual plot over age:
ggplot(score_model_points, aes(x = age, y = residual)) +
  geom_point() +
  labs(x = "Age", y = "Residual")

## -------------------------------------------------------------------
new_prof <- tibble(age = c(39, 42))
get_regression_points(score_model, newdata = new_prof)

## ----kaggle-2, echo=FALSE, fig.cap="Resulting Kaggle RMSLE score."----
knitr::include_graphics("leaderboard_orig.jpeg")

## ----echo=FALSE-----------------------------------------------------
summary(score_model)

## -------------------------------------------------------------------
get_regression_summaries(score_model)

## ----interaction-and-parallel-slopes-model-1, echo=FALSE, fig.height = 9/3, fig.cap = "Interaction (left) and parallel slopes (right) models."----
p1 <- ggplot(evals, aes(x = age, y = score, color = ethnicity)) +
  geom_point() +
  labs(x = "Age", y = "Teaching score", color = "Ethnicity") +
  geom_smooth(method = "lm", se = FALSE) +
  theme(legend.position = "none")
p2 <- ggplot(evals, aes(x = age, y = score, color = ethnicity)) +
  geom_point() +
  labs(x = "Age", y = "Teaching score", color = "Ethnicity") +
  geom_parallel_slopes(se = FALSE) +
  theme(axis.title.y = element_blank())
p1 + p2

## ----eval=TRUE------------------------------------------------------
# Regression table for interaction model:
interaction_evals <- lm(score ~ age * ethnicity, data = evals)
get_regression_table(interaction_evals)

# Regression table for parallel slopes model:
parallel_slopes_evals <- lm(score ~ age + ethnicity, data = evals)
get_regression_table(parallel_slopes_evals)

## ----eval=FALSE-----------------------------------------------------
#  # Code to plot interaction and parallel slopes models for MA_schools
#  ggplot(
#    MA_schools,
#    aes(x = perc_disadvan, y = average_sat_math, color = size)
#  ) +
#    geom_point(alpha = 0.25) +
#    labs(
#      x = "% economically disadvantaged",
#      y = "Math SAT Score",
#      color = "School size"
#    ) +
#    geom_smooth(method = "lm", se = FALSE)
#  
#  ggplot(
#    MA_schools,
#    aes(x = perc_disadvan, y = average_sat_math, color = size)
#  ) +
#    geom_point(alpha = 0.25) +
#    labs(
#      x = "% economically disadvantaged",
#      y = "Math SAT Score",
#      color = "School size"
#    ) +
#    geom_parallel_slopes(se = FALSE)

## ----interaction-and-parallel-slopes-model-2, echo=FALSE, fig.height = 9/3, fig.cap = "Interaction (left) and parallel slopes (right) models."----
p1 <- ggplot(MA_schools, aes(x = perc_disadvan, y = average_sat_math, color = size)) +
  geom_point(alpha = 0.25) +
  geom_smooth(method = "lm", se = FALSE) +
  labs(
    x = "% economically disadvantaged",
    y = "Math SAT Score",
    color = "School size"
  ) +
  theme(legend.position = "none")
p2 <- ggplot(MA_schools, aes(x = perc_disadvan, y = average_sat_math, color = size)) +
  geom_point(alpha = 0.25) +
  geom_parallel_slopes(se = FALSE) +
  labs(
    x = "% economically disadvantaged",
    y = "Math SAT Score",
    color = "School size"
  ) +
  theme(axis.title.y = element_blank())
p1 + p2

## -------------------------------------------------------------------
# Regression table for interaction model:
interaction_MA <-
  lm(average_sat_math ~ perc_disadvan * size, data = MA_schools)
get_regression_table(interaction_MA)

# Regression table for parallel slopes model:
parallel_slopes_MA <-
  lm(average_sat_math ~ perc_disadvan + size, data = MA_schools)
get_regression_table(parallel_slopes_MA)

## -------------------------------------------------------------------
get_regression_points(score_model)
broom::augment(score_model)

Try the moderndive package in your browser

Any scripts or data that you put into this service are public.

moderndive documentation built on June 30, 2024, 9:06 a.m.