Nothing
## ----include = FALSE----------------------------------------------------------
knitr::opts_chunk$set(
collapse = TRUE,
warning = FALSE,
message = FALSE,
comment = "#>"
)
## ----eval=FALSE---------------------------------------------------------------
# install.packages("tntpr")
## ----load_packages------------------------------------------------------------
# NOTE: We are not simply loading `tidyverse` in this vignette due to how vignettes
# are built to create the documentation website. In other contexts, however, we
# would simply use `library(tidyverse)` instead of loading many of the packages individually.
library(tntpr)
library(ggplot2)
library(dplyr)
library(tidyr)
library(purrr)
library(stringr)
library(forcats)
library(ggalt)
library(ggridges)
## -----------------------------------------------------------------------------
# base plot we will reuse in this section
plt <- ggplot(ggplot2::mpg, aes(displ, hwy)) +
geom_point() +
labs(
title = "Cars with higher displacement\nhave a lower MPG",
subtitle = "Displacement vs. MPG",
caption = "Data from ggplot's mpg dataset",
x = "Engine Displ.",
y = "MPG"
)
plt +
tntp_style()
## -----------------------------------------------------------------------------
plt +
tntp_style(show_axis_titles = TRUE, family = "sans", base_size = 20) +
theme(plot.subtitle = ggplot2::element_text(family = "serif", size = 25))
## -----------------------------------------------------------------------------
plt +
tntp_style(show_axis_titles = TRUE) +
theme(panel.border = element_rect(color = "black", fill = NA))
## -----------------------------------------------------------------------------
tntp_colors("green", "mint")
## -----------------------------------------------------------------------------
show_tntp_colors("green", "moss", "mint")
## -----------------------------------------------------------------------------
# Note: the cex_label parameter is used to adjust the relative font size
show_tntp_colors(cex_label = 0.7)
## -----------------------------------------------------------------------------
tntp_palette("likert_6")
## -----------------------------------------------------------------------------
show_tntp_palette("likert_6", reverse = TRUE)
## -----------------------------------------------------------------------------
show_tntp_palette()
## ----message = FALSE----------------------------------------------------------
# load fake data into global environment
# remove all salaries of 0
county_data <- tntpr::fake_county |>
filter(t_salary > 0)
avg_salary <- county_data |>
filter(t_salary != 0) |>
group_by(school_year) |>
summarize(avg_salary = mean(t_salary, na.rm = TRUE), .groups = "drop")
## -----------------------------------------------------------------------------
base_font_size <- 16
## ----message = FALSE----------------------------------------------------------
#Make plot
ggplot(avg_salary, aes(x = school_year, y = avg_salary)) +
geom_line(colour = tntp_colors("green"), linewidth = 1) +
scale_y_continuous(labels = scales::dollar, limits = c(0, 5000)) +
labs(
title="Average Teacher Salaries",
subtitle = "Teacher salaries remained constant between 2012 and 2015"
) +
tntp_style(base_size = base_font_size)
## -----------------------------------------------------------------------------
#Prepare data
school_salary <- county_data |>
filter(t_salary != 0) |>
group_by(school_year, school_name) |>
summarize(avg_salary = mean(t_salary, na.rm = TRUE), .groups = "drop")
# create list of school names so we can easily filter data set for the number of schools we want
school_names <- unique(school_salary$school_name)
# only plot two schools
line_plot_schools <- school_salary |>
filter(school_name %in% school_names[1:3])
## -----------------------------------------------------------------------------
ggplot(line_plot_schools, aes(x = school_year, y = avg_salary, color = school_name)) +
geom_line(linewidth = 1) +
scale_y_continuous(labels = scales::dollar, limits = c(0, 5000)) +
scale_colour_manual(values = tntp_palette("colorful")) +
labs(
title="Average Teacher Salaries",
subtitle = "Teacher salaries remained constant between 2012 and 2015"
) +
tntp_style(base_size = base_font_size)
## -----------------------------------------------------------------------------
#Prepare data
bar_df <- school_salary |>
filter(
school_year == 2015,
school_name %in% school_names[1:5]
) |>
# add line breaks for better plotting
mutate(school_name = str_wrap(school_name, 7))
ggplot(bar_df, aes(x = school_name, y = avg_salary)) +
geom_bar(stat="identity",
position="identity",
fill= tntp_colors("gold")) +
scale_y_continuous(labels = scales::dollar, limits = c(0, 5000)) +
labs(
title="Acacia had higher average salaries in 2015",
subtitle = "Average teacher salaries in 2015 by school"
) +
tntp_style(base_size = base_font_size)
## -----------------------------------------------------------------------------
#prepare data
stacked_df <- county_data |>
filter(
school_year == 2015,
school_name %in% school_names[1:5]
) |>
mutate(t_salary_cut = cut(t_salary,
breaks = c(0, 2500, 3500, 4500, 10000),
labels = c("under $2.5k", "$2.5k-$3.5k", "$3.5k-$4.5k", "$4.5k+"))) |>
group_by(school_name, t_salary_cut) |>
summarise(n_cut_school = n(), .groups = "drop_last") |>
mutate(
n_cut_salary = sum(n_cut_school, na.rm = TRUE),
perc_in_each_cut = n_cut_school / n_cut_salary
) |>
# add line breaks for better plotting
mutate(school_name = str_wrap(school_name, 7))
#set order of stacks by changing factor levels
stacked_df$t_salary_cut = factor(stacked_df$t_salary_cut, levels = rev(levels(stacked_df$t_salary_cut)))
## -----------------------------------------------------------------------------
ggplot(
data = stacked_df,
aes(x = school_name, y = perc_in_each_cut, fill = t_salary_cut)
) +
geom_bar(stat = "identity", position = "stack") +
scale_y_continuous(labels = scales::percent, limits = c(0,1)) +
scale_fill_manual(values = tntp_palette("greens")) +
labs(title = "Most teachers earn between $2.5K and $4.5K",
subtitle = "Percentage of teachers by salary range") +
theme(legend.position = "top",
legend.justification = "left") +
guides(fill = guide_legend(reverse = TRUE)) +
tntp_style(base_size = base_font_size)
## -----------------------------------------------------------------------------
# only plot the lowest and highest earning groups
earnings_to_keep <- levels(stacked_df$t_salary_cut)[c(4,1)]
# map bar colors to values
colors_to_use <- tntp_colors("yellow", "green") |>
set_names(earnings_to_keep)
dodged_df <- stacked_df |>
# only plot the lowest and highest earning groups
filter(t_salary_cut %in% !!earnings_to_keep) |>
# switch order of factors so that the lowest earnings plot first
mutate(t_salary_cut = factor(t_salary_cut, levels = earnings_to_keep))
ggplot(dodged_df, aes(x = school_name, y = perc_in_each_cut, fill = t_salary_cut)) +
geom_bar(stat = "identity", position = position_dodge2(preserve = "single")) +
scale_y_continuous(labels = scales::percent, limits = c(0,1)) +
scale_fill_manual(values = colors_to_use) +
labs(title = "More teachers earn over $4.5k than under $2.5k",
subtitle = "Percentage of teachers by salary range") +
tntp_style(base_size = base_font_size)
## -----------------------------------------------------------------------------
# create data set showing school test scores and average school salaries
scores_salary <- county_data |>
group_by(school_name) |>
summarize(
avg_test_score = mean(sch_ela_avg + sch_math_avg, na.rm=FALSE),
avg_salary = mean(t_salary, na.rm = FALSE),
enrollment = mean(sch_enroll_2015, na.rm = TRUE),
.groups = "drop"
) |>
mutate(perc_rank_test_score = percent_rank(avg_test_score)) |>
drop_na()
## -----------------------------------------------------------------------------
ggplot(scores_salary, aes(avg_salary, perc_rank_test_score)) +
geom_point(color = tntp_colors("tangerine"), alpha = 0.9, size = 3) +
labs(
title = "Schools with higher salaries do not have higher test scores",
subtitle = "Relationship between school test scores and salaries",
x = "Average school salary",
y = "Percentile rank test score"
) +
scale_x_continuous(labels = scales::dollar) +
scale_y_continuous(labels = scales::percent) +
tntp_style(base_size = base_font_size, show_axis_titles = TRUE)
## -----------------------------------------------------------------------------
dumbbell_df <- dodged_df |>
pivot_wider(id_cols = "school_name", names_from = "t_salary_cut", values_from = "n_cut_school") |>
mutate(across(where(is.numeric), ~replace_na(.x, 0)))
ggplot(dumbbell_df, aes(x = `under $2.5k`, xend = `$4.5k+`, y = fct_reorder(school_name, `under $2.5k`)), group = school_names) +
geom_dumbbell(
color = tntp_colors("light_grey"),
size = 3,
colour_x = tntp_colors("gold"),
colour_xend = tntp_colors("green"),
show.legend = TRUE
) +
labs(title = "More teachers earn over $4.5k than under $2.5k",
subtitle = "Number of teachers by salary range",
x = "Number of teachers earnign a given salary") +
tntp_style(base_size = base_font_size)
## ----message = FALSE----------------------------------------------------------
# number of teachers per school
number_teachers_school <- county_data |>
count(school_year, school_name)
ggplot(number_teachers_school, aes(n)) +
geom_histogram(binwidth = 5, colour = "white", fill = tntp_colors('navy')) +
labs(
title = "Schools have a wide distribution in the number of teachers",
subtitle = "Total number of teachers per school",
x = "Number of teacher in school",
y = "Count"
) +
tntp_style(base_size = base_font_size)
## -----------------------------------------------------------------------------
ggplot(county_data, aes(x = t_salary, y = school_year, group = school_year, fill = factor(school_year))) +
geom_density_ridges(quantile_lines = TRUE, quantiles = 4) +
scale_x_continuous(labels = scales::dollar) +
labs(title = "Income distrubutions for teachers has remained constant",
subtitle = "Income distrubution and quantiles for teachers") +
tntp_style(base_size = base_font_size) +
theme(legend.position = "none") +
scale_fill_manual(values = tntp_palette("colorful"))
## -----------------------------------------------------------------------------
teacher_survey <- tntpr::teacher_survey
# the y-axis will contain text of an entire survey question
# we want to place line breaks in this text so plots look better
axis_line_breaks <- 40
# scales in HE questions, in order starting with the strongest
agree_disagree_scale <- rev(c("Strongly Agree", "Agree", "Somewhat Agree", "Somewhat Disagree", "Disagree", "Strongly Disagree"))
# put survey into long form and clean up question names
teacher_survey_he <- teacher_survey |>
select(-timing) |>
pivot_longer(cols = everything(), names_to = "question", values_to = "response")
# calculate percentage of responses to each high expectations question
teacher_survey_he_perc <- teacher_survey_he |>
drop_na("response") |>
# calculate the number of responses for each response option
count(question, response, name = "n_response") |>
# calculate the number of responses for each question
group_by(question) |>
mutate(n_question = sum(n_response)) |>
ungroup() |>
# calculate percentages
mutate(
# calculate percentages
percent = n_response / n_question,
# make a column that is text of the percent for plotting
percent_pretty = scales::percent(percent, accuracy = 1)
)
# calculate percentage of strongly agree and agree
teacher_survey_he_perc <- teacher_survey_he_perc |>
mutate(scale_strength = ifelse(response %in% !!agree_disagree_scale[c(5,6)], "Strong response", "Weak response")) |>
group_by(question, scale_strength) |>
mutate(strong_response_percent = sum(percent)) |>
ungroup() |>
mutate(
strong_response_percent = ifelse(response == "Agree", strong_response_percent, NA),
# create line breaks for questions ,which will make plots look better
question = str_wrap(question, axis_line_breaks),
response = factor(response, levels = agree_disagree_scale)
)
## -----------------------------------------------------------------------------
ggplot(teacher_survey_he_perc, aes(percent, question, fill = response)) +
geom_col() +
geom_text(
aes(label = scales::percent(strong_response_percent, accuracy = 1), x = strong_response_percent),
color = "white", fontface = "bold", family = "Halyard Display", size = 5, hjust = 1.05
) +
scale_x_continuous(labels = scales::percent, limits = c(0,1)) +
scale_fill_manual(values = tntp_palette("top2_6"), drop = FALSE) +
guides(fill=guide_legend(nrow=2, byrow=TRUE, reverse = TRUE)) +
labs(title = "High Expectations Survey Responses") +
tntp_style(base_size = base_font_size)
## ----eval=FALSE---------------------------------------------------------------
# geom_col(position = "diverge")
## -----------------------------------------------------------------------------
ggplot(teacher_survey_he_perc, aes(x = percent, y = question, fill = fct_rev(response))) +
geom_col(position = position_diverge()) +
scale_fill_manual(
values = tntp_palette("likert_6"),
drop = FALSE,
breaks = agree_disagree_scale,
labels = agree_disagree_scale
) +
geom_vline(aes(xintercept = 0), linetype = 1, linewidth = 1.2, alpha = .7) +
scale_x_continuous(limits = c(-1, 1), breaks = seq(-1, 1, .25), labels = \(x) scales::percent(abs(x))) +
labs(title = "High Expectations Survey Responses") +
tntp_style(base_size = base_font_size)
## -----------------------------------------------------------------------------
ggplot(teacher_survey_he_perc, aes(x = percent, y = question, fill = fct_rev(response))) +
geom_col(position = position_diverge(break_after = "Agree")) +
geom_text(aes(label = ifelse(percent > 0.1, percent_pretty, "")),
position = position_diverge(break_after = "Agree", vjust = 0.5),
family = "Halyard Display", size = 3) +
scale_fill_manual(
values = tntp_palette("likert_6"),
drop = FALSE,
breaks = agree_disagree_scale,
labels = agree_disagree_scale
) +
geom_vline(aes(xintercept = 0), linetype = 1, linewidth = 1.2, alpha = .7) +
scale_x_continuous(limits = c(-1, 1), breaks = seq(-1, 1, .25), labels = \(x) scales::percent(abs(x))) +
labs(title = "High Expectations Survey Responses") +
tntp_style(base_size = base_font_size)
Any scripts or data that you put into this service are public.
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.