#' @title quealy_subgroups
#'
#' @description kevin quealy of the nytimes did a nice job capturing change
#' in the general population vs change in specific subgroups: http://nyti.ms/1tQrOIl
#' re: obamacare.
#' let's do the same thing for change in RIT score.
#'
#' @param mapvizieR_obj mapvizieR object
#' @param studentids target students
#' @param measurementscale target subject
#' @param subgroup_cols what subgroups in mapvizier roster do you want to cut by? default
#' is starting_quartile
#' @param pretty_names nicely formatted names for the column cuts used above.
#' @param magic_subgroups generated by mapvizieR. possible values include:
#' 'starting_quartile'
#' @param start_fws one academic season (if known); pass vector of two and quealy subgroups will pick
#' @param start_year_offset 0 if start season is same, -1 if start is prior year.
#' @param end_fws ending season
#' @param end_academic_year ending academic year
#' @param start_fws_prefer which term is preferred? not required if only one start_fws is passed
#' @param report_title text grob to put on the report tile
#' @param complete_obsv if TRUE, limit only to students who have BOTH a start
#' and end score. default is TRUE.
#' @param drop_NA_groups should we ignore subgroups with value NA? default is true
#' @param include_all should the output have plot at the top showing the TOTAL
#' variation? not recommended for data spanning multiple grade levels.
#' @param small_n_cutoff drop a subgroup if less than x% (as decimal) of the total pop?
#' (useful for cutting off the long tail of a group). applies to all subgroups in
#' subgroup_cols. does not apply to magic subgroups.
#' @param join_by_measurementscale boolean, passed to roster_to_growth_df. TRUE if
#' the subgroup col is per-subject.
#' @param school_growth_norms passed through to calc_cgp. determines what norm study
#' to use. default is 2015 (most recent study).
#'
#' @return a grob composed of multiple ggplots
#'
#' @export
quealy_subgroups <- function(
mapvizieR_obj,
studentids,
measurementscale,
subgroup_cols = c('starting_quartile'),
pretty_names = c('Starting Quartile'),
magic_subgroups = TRUE,
start_fws,
start_year_offset,
end_fws,
end_academic_year,
start_fws_prefer = NA,
report_title = NA,
complete_obsv = TRUE,
drop_NA_groups = TRUE,
include_all = TRUE,
small_n_cutoff = -1,
join_by_measurementscale = FALSE,
school_growth_norms = 2015
) {
#1. validation
mv_opening_checks(mapvizieR_obj, studentids, 1)
assertthat::assert_that(length(subgroup_cols) == length(pretty_names))
#2. limit to kids, endpoint
df <- mv_limit_growth(mapvizieR_obj, studentids, measurementscale) %>%
dplyr::filter(
end_map_year_academic == end_academic_year,
end_fallwinterspring == end_fws
)
if (complete_obsv) {
df <- df %>% dplyr::filter(complete_obsv == TRUE)
}
#if there's no students, raise an informative error
df %>%
ensurer::ensure_that(
nrow(.) > 0 ~ "no matching students for the specified subject/terms."
)
#3. put SUBGROUPS values from roster onto df
df <- roster_to_growth_df(
target_df = df,
mapvizieR_obj = mapvizieR_obj,
roster_cols = subgroup_cols,
by_measurementscale = join_by_measurementscale,
join_by = 'end'
)
#put rownames back on the df
df$persistent_names <- rownames(df)
#4. for each SUBGROUP permutation
all_sub <- subgroup_cols
if (include_all | !is.logical(magic_subgroups)) {
#add all_students to df
df$all_students <- 'All Students'
#include in subgroups
all_sub <- c('all_students', all_sub)
}
#df to hold the result
window_df <- data.frame(
subgroup = character(0), perm = character(0),
start_fws = character(0), start_year = integer(0),
#for global limits/sizes
min_x = numeric(0), max_x = numeric(0),
n = integer(0), persist_row_names = character(0),
stringsAsFactors = FALSE
)
counter <- 1
group_stats <- list()
#...find the GROWTH WINDOW
#also calc group stats, so we don't have to do it later.
for (i in all_sub) {
#first apply small n filter
df_filtered <- min_subgroup_filter(df, i, small_n_cutoff)
#find permutations for this group
perms <- df_filtered[, i] %>% unique() %>% sort()
if (drop_NA_groups == TRUE) {perms <- perms[!is.na(perms)]}
for (j in perms) {
#matching sub/perm students
mask <- df_filtered[, i] == j
#get the windows
if (length(start_fws) > 1) {
#from the data
auto_windows <- auto_growth_window(
mapvizieR_obj = mapvizieR_obj,
studentids = df_filtered[mask, 'studentid'],
measurementscale = measurementscale,
end_fws = end_fws,
end_academic_year = end_academic_year,
candidate_start_fws = start_fws,
candidate_year_offsets = start_year_offset,
candidate_prefer = start_fws_prefer,
window_tolerance = 0.66
)
inferred_start_fws <- auto_windows[[1]]
inferred_start_academic_year <- auto_windows[[2]]
} else {
inferred_start_fws <- start_fws
inferred_start_academic_year <- end_academic_year + start_year_offset
}
#limit by windows
this_stu <- df_filtered %>% dplyr::filter(
studentid %in% df_filtered[mask, 'studentid'] &
start_fallwinterspring == inferred_start_fws &
start_map_year_academic == inferred_start_academic_year
)
if(nrow(this_stu) == 0) {
next
}
#calc subgroup stats
perm_stats <- quealy_permutation_stats(this_stu, i, school_growth_norms)
perm_stats %>% ensurer::ensure_that(
nrow(.) == 1 ~ 'there should only be one group!')
#give the group name a consistent variable name
names(perm_stats)[names(perm_stats) == i] <- 'facet_me'
#put the stats on the list for use below
group_stats[[paste0(i, '@', j)]] <- perm_stats
window_df[counter, ]$subgroup <- i
window_df[counter, ]$perm <- j
window_df[counter, ]$start_fws <- inferred_start_fws
window_df[counter, ]$start_year <- inferred_start_academic_year
window_df[counter, ]$min_x <- min(perm_stats$start_rit, perm_stats$end_rit)
window_df[counter, ]$max_x <- max(perm_stats$start_rit, perm_stats$end_rit)
window_df[counter, ]$n <- perm_stats$n
window_df[counter, ]$persist_row_names <- paste(this_stu$persistent_names, collapse = ',')
counter <- counter + 1
}
}
#5. magic subgroups, if any
if (!is.logical(magic_subgroups)) {
if ('starting_quartile' %in% magic_subgroups) {
#add to list of subgroups
subgroup_cols <- c(subgroup_cols, 'starting_quartile')
#and pretty names
pretty_names <- c(pretty_names, 'Starting Quartile')
#pull the all_students row
#we'll use this for the start/ends
all_stu <- window_df[window_df$subgroup == 'all_students', ]
quart_fws <- all_stu[1, ]$start_fws
quart_year <- all_stu[1, ]$start_year
#get start/end from ALL students
start_quartile_data <- df %>%
dplyr::filter(
start_fallwinterspring == quart_fws &
start_map_year_academic == quart_year
)
#limit for join
start_quartile_join <- start_quartile_data %>%
dplyr::select(
studentid, start_testquartile
)
names(start_quartile_join) <- c('studentid', 'starting_quartile')
#put back on the df as a demographic variable
df <- df %>%
dplyr::left_join(
start_quartile_join, by = 'studentid'
)
#iterate over the unique subgroups and calc stats
for (i in unique(start_quartile_data$start_testquartile) %>% as.numeric() %>% sort()) {
this_start_quartile <- start_quartile_data %>%
dplyr::filter(as.numeric(start_testquartile) == i)
perm_stats <- quealy_permutation_stats(
this_start_quartile, 'start_testquartile', school_growth_norms
)
perm_stats %>% ensurer::ensure_that(
nrow(.) == 1 ~ 'there should only be one group!')
#give the group name a consistent variable name
names(perm_stats)[names(perm_stats) == i] <- 'facet_me'
#put the stats on the list for use below
group_stats[[paste0('starting_quartile', '@', i)]] <- perm_stats
window_df[counter, ]$subgroup <- 'starting_quartile'
window_df[counter, ]$perm <- i
window_df[counter, ]$start_fws <- all_stu[1, ]$start_fws
window_df[counter, ]$start_year <- all_stu[1, ]$start_year
window_df[counter, ]$min_x <- min(perm_stats$start_rit, perm_stats$end_rit)
window_df[counter, ]$max_x <- max(perm_stats$start_rit, perm_stats$end_rit)
window_df[counter, ]$n <- perm_stats$n
window_df[counter, ]$persist_row_names <- paste(this_start_quartile$persistent_names, collapse = ',')
counter <- counter + 1
#end perms of starting quartiles
}
#end starting quartile magic subgroup
}
#end magic subgroups
}
#6. MAKE PLOTS
#global limits
min_x <- min(window_df$min_x, na.rm = TRUE)
max_x <- max(window_df$max_x, na.rm = TRUE)
plot_lims <- c(
round_to_any(min_x - 1, 5, f = floor),
round_to_any(max_x + 1, 5, f = ceiling)
)
n_range <- c(
min(window_df$n, na.rm = TRUE),
max(window_df$n, na.rm = TRUE)
)
plot_counter <- 1
plot_list <- list()
nrow_list <- list()
if (include_all) {
ref_line_range <- c(
group_stats[['all_students@All Students']]$start_rit,
group_stats[['all_students@All Students']]$end_rit
)
#all students
p_all <- quealy_facet_one_subgroup(
sum_df = group_stats[['all_students@All Students']],
subgroup = 'All Students',
xlims = plot_lims,
n_range = n_range,
ref_lines = ref_line_range
)
plot_list[[plot_counter]] <- p_all
nrow_list[[plot_counter]] <- 1.5
plot_counter <- plot_counter + 1
} else {
ref_line_range <- NA
}
#rest of the subgroups
for (i in 1:length(subgroup_cols)) {
#the matching permutations
this_perms <- window_df[window_df$subgroup == subgroup_cols[i], ]
#recover the logic of which rows based on auto growth windows
#per perm
all_rownames <- this_perms$persist_row_names %>%
paste(collapse = ',') %>% strsplit(split = ',') %>% unlist()
mask <- df$persistent_names %in% all_rownames
#calc group stats on those stu
this_sum <- quealy_permutation_stats(
df[mask, ], subgroup_cols[i], school_growth_norms
)
names(this_sum)[names(this_sum) == subgroup_cols[i]] <- 'facet_me'
plot_list[[plot_counter]] <- quealy_facet_one_subgroup(
sum_df = this_sum,
subgroup = pretty_names[i],
xlims = plot_lims,
n_range = n_range,
ref_lines = ref_line_range
)
nrow_list[[plot_counter]] <- ifelse(
nrow(this_sum) == 1, 1.5, nrow(this_sum)
)
plot_counter <- plot_counter + 1
}
#add named args to plot list for do call
plot_list[['nrow']] <- length(plot_list)
plot_list[['heights']] <- unlist(nrow_list)
final <- do.call(
what = "arrangeGrob",
args = plot_list,
)
if (!is.na(report_title)) {
title <- h_var(report_title, 16)
final <- gridExtra::arrangeGrob(
title, final, nrow = 2, heights = c(1, 19)
)
}
return(final)
}
#' @title quealy_permutation_stats
#'
#' @description calculates group stats for all the permutations of a subroup. used
#' to be internal to quealy_subgroups, has been extracted.
#'
#' @param df a growth data frame
#' @param subgroup the subgroup to group and calculate summary stats for
#' @param norms school growth norms to use. 2012 or 2015.
#'
#' @return a data frame
#'
#' @export
quealy_permutation_stats <- function(df, subgroup, norms = 2015) {
results <- df %>%
dplyr::group_by_(
subgroup, quote(measurementscale),
quote(start_fallwinterspring), quote(end_fallwinterspring)
) %>%
dplyr::summarize(
approximate_grade = round(mean(end_grade, na.rm = TRUE), 0),
start_rit = mean(start_testritscore, na.rm = TRUE),
end_rit = mean(end_testritscore, na.rm = TRUE),
rit_change = mean(rit_growth, na.rm = TRUE),
start_npr = mean(start_consistent_percentile, na.rm = TRUE),
end_npr = mean(end_consistent_percentile, na.rm = TRUE),
npr_change = mean(end_consistent_percentile - start_consistent_percentile, na.rm = TRUE),
n = n()
) %>%
as.data.frame()
#add cgp
results$cgp <- NA
for (i in 1:nrow(results)) {
results[i, ]$cgp <- calc_cgp(
measurementscale = results[i, ]$measurementscale,
end_grade = results[i, ]$approximate_grade,
growth_window = paste(results[i, ]$start_fallwinterspring,
'to', results[i, ]$end_fallwinterspring),
baseline_avg_rit = results[i, ]$start_rit,
ending_avg_rit = results[i, ]$end_rit,
norms = norms
)[['results']]
}
return(results)
}
#' @title quealy_facet_one_subgroup
#'
#' @description the plot called by quealy subgroups for each subgroup. used
#' to be internal to the function, has been extracted.
#'
#' @param sum_df output of quealy_permutation_stats. needs to have a header
#' called facet_me. look at quealy_subgroups to see example use.
#' @param subgroup the subgroup to plot. quealy_subgroups calls this plot
#' once per element in subgroup_cols (and once for all students)
#' @param xlims the global xlims for the plot.
#' @param n_range the global range of n values. used to set the width of
#' the lines
#' @param ref_lines if using with all students, the reference lines
#' showing change in all_students
#'
#' @return a data frame
#'
#' @export
quealy_facet_one_subgroup <- function(
sum_df, subgroup, xlims, n_range, ref_lines = NA
) {
if (nrow(sum_df) == 0) {
stop("your feature/facet df is zero rows long. check your inputs?")
}
#add newline breaks to the facet text
sum_df$facet_format <- lapply(sum_df$facet_me, force_string_breaks, 30) %>%
unlist()
#get the arrow size on a universal scale
min_width <- 0.2
max_width <- 0.5
pct_of_range <- ((sum_df$n - n_range[1]) / (n_range[2] - n_range[1]))
sum_df$size_scaled <- min_width + (pct_of_range * (max_width - min_width))
#if identical, NaN
sum_df$size_scaled <- ifelse(is.nan(sum_df$size_scaled), 1, sum_df$size_scaled)
all_na_test <- all(is.na(sum_df$cgp))
#cgp labeler
cgp_labeler <- function(n, cgp) {
if (
(unique(sum_df$start_fallwinterspring) == 'Spring' & unique(sum_df$end_fallwinterspring) == 'Winter') |
all(is.na(sum_df$cgp))
) {
return(paste(n, 'stu'))
}
if (n < 10) {
return(paste(n, 'stu'))
} else {
return(paste(n, 'stu', '| CGP:', round(cgp, 0)))
}
}
e <- new.env()
e$xlims <- xlims
sum_df <- sum_df %>%
dplyr::rowwise() %>%
dplyr::mutate(
cgp_label = cgp_labeler(n, cgp)
) %>%
as.data.frame()
#make
p <- ggplot(
data = sum_df,
aes(
x = start_rit,
xend = end_rit,
y = 1,
yend = 1
),
environment = e
)
if (class(ref_lines) == "numeric") {
p <- p + annotate(
geom = 'rect',
xmin = ref_lines[1], xmax = ref_lines[2], ymin = -1, ymax = 3,
fill = 'dodgerblue',
alpha = 0.15,
size = 1.25
)
}
#labels
p <- p + geom_text(
aes(
x = start_rit + 0.5 * (end_rit - start_rit),
y = 0.75,
label = facet_format
),
inherit.aes = FALSE,
size = 9,
alpha = 0.4,
color = 'hotpink'
) +
geom_segment(
aes(
size = size_scaled
),
arrow = grid::arrow(length = grid::unit(0.2 + (0.075 * sum_df$size_scaled), "cm"))
) +
#start rit
geom_text(
aes(
x = start_rit,
y = 0.7,
label = paste0(round(start_rit, 1), ' (', substr(start_fallwinterspring, 1, 1), ')')
),
inherit.aes = FALSE,
size = 3,
color = 'gray40'
) +
#end rit
geom_text(
aes(
x = end_rit,
y = 0.7,
label = paste0(round(end_rit, 1), ' (', substr(end_fallwinterspring, 1, 1), ')')
),
inherit.aes = FALSE,
size = 3,
color = 'gray40'
) +
#n stu and CGP
geom_text(
aes(
x = start_rit + 0.5 * (end_rit - start_rit),
y = 1.35,
label = cgp_label
),
fontface = 'italic',
color = 'gray40',
size = 4
) +
coord_cartesian(
xlim = c(xlims[1] - 0.5, xlims[2] + 0.5),
ylim = c(0, 2)
) +
facet_grid(
facet_format ~ .
) +
theme_bw() +
theme(
axis.title.y = element_blank(),
axis.text.y = element_blank(),
axis.ticks.y = element_blank(),
panel.grid.major.y = element_blank(),
panel.grid.minor.y = element_blank(),
panel.border = element_blank(),
panel.margin = grid::unit(0, "lines"),
plot.margin = grid::unit(c(1,1,1,1), "mm")
) +
labs(x = 'RIT') +
scale_size_identity()
#title
p_title <- grob_justifier(
grid::textGrob(
subgroup, gp = grid::gpar(fontsize = 18, fontface = 'bold')
),
"center", "center"
)
first_row <- if (nrow(sum_df) <= 2) {1.5} else {1}
#arrange and return
gridExtra::arrangeGrob(
p_title, p,
nrow = 2, heights = c(first_row, 9)
)
}
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.