Nothing
## ----include = FALSE------------------------------------------------
# knitr settings
knitr::opts_chunk$set(
# Code output:
warning = FALSE,
message = FALSE,
echo = TRUE,
# Figure:
out.width = "100%",
fig.width = 16 / 2.5,
fig.height = 9 / 2.5,
fig.align = "center",
fig.show = "hold",
# Etc:
collapse = TRUE,
comment = "##"
# tidy = FALSE
)
# Needed packages in vignette
library(moderndive)
library(ggplot2)
library(dplyr)
library(knitr)
library(broom)
# Needed packages internally
library(patchwork)
# Random number generator seed value
set.seed(76)
# Set ggplot defaults for rticles output:
if (!knitr::is_html_output()) {
# Grey theme:
theme_set(theme_light())
scale_colour_discrete <- ggplot2::scale_colour_viridis_d
}
# Set output width for rticles:
options(width = 70)
## -------------------------------------------------------------------
library(moderndive)
library(ggplot2)
library(dplyr)
library(knitr)
library(broom)
## ----echo=FALSE-----------------------------------------------------
evals_sample <- evals %>%
select(ID, prof_ID, score, age, bty_avg, gender, ethnicity, language, rank) %>%
sample_n(5)
## ----random-sample-courses, echo=FALSE------------------------------
evals_sample %>%
kable()
## -------------------------------------------------------------------
score_model <- lm(score ~ age, data = evals)
## -------------------------------------------------------------------
summary(score_model)
## -------------------------------------------------------------------
get_regression_table(score_model)
## -------------------------------------------------------------------
get_regression_points(score_model)
## -------------------------------------------------------------------
get_regression_summaries(score_model)
## ----interaction-model, fig.cap="Visualization of interaction model."----
# Code to visualize interaction model:
ggplot(evals, aes(x = age, y = score, color = ethnicity)) +
geom_point() +
geom_smooth(method = "lm", se = FALSE) +
labs(x = "Age", y = "Teaching score", color = "Ethnicity")
## ----parallel-slopes-model, fig.cap="Visualization of parallel slopes model."----
# Code to visualize parallel slopes model:
ggplot(evals, aes(x = age, y = score, color = ethnicity)) +
geom_point() +
geom_parallel_slopes(se = FALSE) +
labs(x = "Age", y = "Teaching score", color = "Ethnicity")
## -------------------------------------------------------------------
get_regression_table(score_model)
## -------------------------------------------------------------------
get_regression_table(score_model, conf.level = 0.99)
## -------------------------------------------------------------------
sqrt(diag(vcov(score_model)))
## -------------------------------------------------------------------
get_regression_table(score_model) %>%
pull(std_error)
## -------------------------------------------------------------------
get_regression_table(score_model)$std_error
## -------------------------------------------------------------------
get_regression_table(score_model) %>%
kable()
## ----eval=FALSE-----------------------------------------------------
# fitted(score_model)
## ----echo=FALSE-----------------------------------------------------
fitted(score_model)[1:10]
## ----eval=FALSE-----------------------------------------------------
# residuals(score_model)
## ----echo=FALSE-----------------------------------------------------
residuals(score_model)[1:10]
## ----eval=FALSE-----------------------------------------------------
# score_model_points <- get_regression_points(score_model)
# score_model_points
## ----echo=FALSE-----------------------------------------------------
score_model_points <- get_regression_points(score_model)
score_model_points %>%
slice(1:10)
## ----residuals-1, fig.cap="Histogram visualizing distribution of residuals."----
# Code to visualize distribution of residuals:
ggplot(score_model_points, aes(x = residual)) +
geom_histogram(bins = 20) +
labs(x = "Residual", y = "Count")
## ----residuals-2, fig.cap="Partial residual residual plot over age."----
# Code to visualize partial residual plot over age:
ggplot(score_model_points, aes(x = age, y = residual)) +
geom_point() +
labs(x = "Age", y = "Residual")
## -------------------------------------------------------------------
new_prof <- tibble(age = c(39, 42))
get_regression_points(score_model, newdata = new_prof)
## ----kaggle-2, echo=FALSE, fig.cap="Resulting Kaggle RMSLE score."----
knitr::include_graphics("leaderboard_orig.jpeg")
## ----echo=FALSE-----------------------------------------------------
summary(score_model)
## -------------------------------------------------------------------
get_regression_summaries(score_model)
## ----interaction-and-parallel-slopes-model-1, echo=FALSE, fig.height = 9/3, fig.cap = "Interaction (left) and parallel slopes (right) models."----
p1 <- ggplot(evals, aes(x = age, y = score, color = ethnicity)) +
geom_point() +
labs(x = "Age", y = "Teaching score", color = "Ethnicity") +
geom_smooth(method = "lm", se = FALSE) +
theme(legend.position = "none")
p2 <- ggplot(evals, aes(x = age, y = score, color = ethnicity)) +
geom_point() +
labs(x = "Age", y = "Teaching score", color = "Ethnicity") +
geom_parallel_slopes(se = FALSE) +
theme(axis.title.y = element_blank())
p1 + p2
## ----eval=TRUE------------------------------------------------------
# Regression table for interaction model:
interaction_evals <- lm(score ~ age * ethnicity, data = evals)
get_regression_table(interaction_evals)
# Regression table for parallel slopes model:
parallel_slopes_evals <- lm(score ~ age + ethnicity, data = evals)
get_regression_table(parallel_slopes_evals)
## ----eval=FALSE-----------------------------------------------------
# # Code to plot interaction and parallel slopes models for MA_schools
# ggplot(
# MA_schools,
# aes(x = perc_disadvan, y = average_sat_math, color = size)
# ) +
# geom_point(alpha = 0.25) +
# labs(
# x = "% economically disadvantaged",
# y = "Math SAT Score",
# color = "School size"
# ) +
# geom_smooth(method = "lm", se = FALSE)
#
# ggplot(
# MA_schools,
# aes(x = perc_disadvan, y = average_sat_math, color = size)
# ) +
# geom_point(alpha = 0.25) +
# labs(
# x = "% economically disadvantaged",
# y = "Math SAT Score",
# color = "School size"
# ) +
# geom_parallel_slopes(se = FALSE)
## ----interaction-and-parallel-slopes-model-2, echo=FALSE, fig.height = 9/3, fig.cap = "Interaction (left) and parallel slopes (right) models."----
p1 <- ggplot(MA_schools, aes(x = perc_disadvan, y = average_sat_math, color = size)) +
geom_point(alpha = 0.25) +
geom_smooth(method = "lm", se = FALSE) +
labs(
x = "% economically disadvantaged",
y = "Math SAT Score",
color = "School size"
) +
theme(legend.position = "none")
p2 <- ggplot(MA_schools, aes(x = perc_disadvan, y = average_sat_math, color = size)) +
geom_point(alpha = 0.25) +
geom_parallel_slopes(se = FALSE) +
labs(
x = "% economically disadvantaged",
y = "Math SAT Score",
color = "School size"
) +
theme(axis.title.y = element_blank())
p1 + p2
## -------------------------------------------------------------------
# Regression table for interaction model:
interaction_MA <-
lm(average_sat_math ~ perc_disadvan * size, data = MA_schools)
get_regression_table(interaction_MA)
# Regression table for parallel slopes model:
parallel_slopes_MA <-
lm(average_sat_math ~ perc_disadvan + size, data = MA_schools)
get_regression_table(parallel_slopes_MA)
## -------------------------------------------------------------------
get_regression_points(score_model)
broom::augment(score_model)
Any scripts or data that you put into this service are public.
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.