#' @title Becca Vichniac's Quartile (Floating Bar) Chart
#'
#' @description
#' \code{becca_plot} returns a ggplot object binned quaritle performonce
#'
#' @details
#' This function builds and prints a bar graph with 4 bins per bar show MAP data
#' binned by quartile (National Percentile Rank). Bars are centered at 50th percentile
#' horizonatally
#'
#' @param .data the data frame in TEAM canoncical style (long data forms)
#' @param school_name_column column in \code{.data} with school names
#' @param cohort_name_column column in \code{.data} with cohornt names
#' @param grade_level_season_column column in \code{.data} with numeric indicating grade season (e.g., Fall 4th
#' = 3.3, Winter 4th = 3.7, Spring 4th = 4.0).
#' @param measurement_scale_column column in \code{.data} with subject
#' @param test_percentile_column = column in \code{.data} with NPR.
#' @param first_and_spring_only indicator for showing shoing Fall-to-Spring rather than Spring-to-Spring
#' @param justify_widths width justification indicator
#' @param justify_min
#' @param justify_max
#' @param entry_grades = c(-0.7, 4.3) GradeSeasons that should use Fall and Spring (i.e., Kinder =-.3 and 5th = 4.3)
#' @param color_scheme only 'KIPP Report Card'
#' @param facets = FALSE
#' @param facet_opts = FALSE
#' @param title_text = FALSE
#' @param small_n_cutoff drop a grade_level_season if less than x% of the max?
#' (useful when dealing with weird cohort histories)
#'
#' @return prints a ggplot object
#' @export
becca_plot <- function(
.data
#for munging data
,school_name_column = 'sch_abbr'
,cohort_name_column = 'cohort'
,grade_level_season_column = 'grade_level_season'
,measurement_scale_column = 'measurementscale'
,test_percentile_column = 'percentile_2011_norms'
#controls if 'extra' data points (fall, winter) get dropped
,first_and_spring_only = TRUE
,entry_grades = c(-0.8, 4.2)
,auto_justify_x = TRUE
,justify_widths = FALSE
,justify_min = NA
,justify_max = NA
,color_scheme = 'KIPP Report Card'
,facets = FALSE
,facet_opts = FALSE
,title_text = FALSE
,small_n_cutoff = .001
) {
require(dplyr)
# Changed passed dataframes' column names to those used throughout
# function
colnames(.data)[colnames(.data) == school_name_column] <- 'SCH_ABBREV'
colnames(.data)[colnames(.data) == cohort_name_column] <- 'COHORT'
colnames(.data)[colnames(.data) == grade_level_season_column] <- 'GRADE_LEVEL_SEASON'
colnames(.data)[colnames(.data) == measurement_scale_column] <- 'MEASUREMENTSCALE'
colnames(.data)[colnames(.data) == test_percentile_column] <- 'PERCENTILE_2011_NORMS'
#TRANSFORMATION 1 - TRIM
#trim down the C.data - we don't need all the columns
require(data.table)
d1 <- as.data.table(as.data.frame(.data[,c(
'SCH_ABBREV'
,'COHORT'
,'GRADE_LEVEL_SEASON'
,'MEASUREMENTSCALE'
,'PERCENTILE_2011_NORMS')]))
#all terms or first & spring only?
if (first_and_spring_only) {
#possible entry grades controlled by entry_grades parameter
#default is Fall K, Fall 5 (aka -0.7, 4.3) - only change if you need to
#add an additional entry grade (perhaps 9th?) or to take away 5th
#(eg for a fully grown KIPP school?)
d1 <- d1[with(d1, round(GRADE_LEVEL_SEASON, 1) %in% round(entry_grades,1) | GRADE_LEVEL_SEASON %% 1 == 0), ]
}
#drop small N time periods
by_grade_season <- group_by(.data, GRADE_LEVEL_SEASON)
grade_season_counts <- dplyr::summarize(
by_grade_season
,n=n()
)
biggest <- max(grade_season_counts$n)
grade_season_counts$include <- grade_season_counts$n >= small_n_cutoff * biggest
use_these <- grade_season_counts[grade_season_counts$include==TRUE, 'GRADE_LEVEL_SEASON']
d1 <- d1[d1$GRADE_LEVEL_SEASON %in% use_these$GRADE_LEVEL_SEASON, ]
#calculate quartile from test percentile
d1[,QUARTILE:=floor((PERCENTILE_2011_NORMS/25) + 1)]
#TRANSFORMATION 2 - COUNT
#calculate group level averages. Our final data set should have
#SCHOOL COHORT YEAR SUBJECT QUARTILE PCT
#There is definitely a more elegant way to do this that doesn't
#require 2 ddply calls, but this works for now
d2<-d1[,list(N_Qrtl=.N),
keyby=list(SCH_ABBREV,
COHORT,
GRADE_LEVEL_SEASON,
MEASUREMENTSCALE,
QUARTILE)][
d1[,list(.N),
keyby=list(SCH_ABBREV,
COHORT,
GRADE_LEVEL_SEASON,
MEASUREMENTSCALE)]][,PCT:=round(N_Qrtl/N*100,1)]
#add a column that indicates above/below grade level
#this simplifies bar chart creation
#set flags for above and below
d2[QUARTILE<=2, AT_GRADE_LEVEL_DUMMY:='NO']
d2[QUARTILE>=3, AT_GRADE_LEVEL_DUMMY:='YES']
#TRANSFORMATION 4 - CUSTOM ORDERING
#this was tricky (and important!) -- thanks Mike H.
d2[,ORDER:=QUARTILE]
#stage_3$ORDER <- stage_3$QUARTILE
#2 becomes placeholder
d2[QUARTILE==2, ORDER:=99]
#stage_3[stage_3$QUARTILE == 2, 'ORDER'] <- 'placeholder'
#1 becomes 2
d2[QUARTILE==1, ORDER:=2]
#stage_3[stage_3$QUARTILE == 1, 'ORDER'] <- 2
#placeholder becomes 1
d2[ORDER==99,ORDER:=1]
#stage_3[stage_3$ORDER == 'placeholder', 'ORDER'] <- 1
#finally sort by new order (so midpoint calculation works properly)
final_data <- copy(d2[order(MEASUREMENTSCALE,
SCH_ABBREV,
COHORT,
GRADE_LEVEL_SEASON,
ORDER)])
#TRANSFORMATION 5 - TWO .datas FOR CHART
#super helpful advice from: http://stackoverflow.com/questions/13734368/ggplot2-and-a-stacked-bar-chart-with-negative-values
#above
npr_above <- final_data[AT_GRADE_LEVEL_DUMMY == 'YES']
#below
npr_below <- final_data[AT_GRADE_LEVEL_DUMMY == 'NO']
#flip the sign
npr_below[, PCT:= PCT * -1]
#TRANSFORMATION 5 - CALCULATE MIDPOINTS (for chart labels)
#one df for the two quartiles above the national average...
npr_above <- npr_above[,list(N,
PCT,
AT_GRADE_LEVEL_DUMMY,
ORDER,
QUARTILE,
MIDPOINT=cumsum(PCT) - 0.5*PCT),
by=list(SCH_ABBREV,
COHORT,
GRADE_LEVEL_SEASON,
MEASUREMENTSCALE)]
#...and another for those below.
npr_below <- npr_below[,list(N,
PCT,
AT_GRADE_LEVEL_DUMMY,
ORDER,
QUARTILE,
MIDPOINT=cumsum(PCT) - 0.5*PCT),
by=list(SCH_ABBREV,
COHORT,
GRADE_LEVEL_SEASON,
MEASUREMENTSCALE)]
npr_below[,QUARTILE:=ordered(QUARTILE, levels = names(sort(-table(QUARTILE))))]
#FORMAT X AXIS LABELS
becca_x_breaks <- sort(unique(final_data$GRADE_LEVEL_SEASON))
becca_x_labels <- unlist(lapply(becca_x_breaks, fall_spring_me))
if (auto_justify_x == TRUE) {
becca_x_breaks <- c(min(becca_x_breaks) - 0.35, becca_x_breaks, max(becca_x_breaks) + 0.35)
becca_x_labels <- c('', becca_x_labels, '')
}
if (justify_widths == TRUE) {
becca_x_breaks <- c(justify_min, becca_x_breaks, justify_max)
becca_x_labels <- c('', becca_x_labels, '')
}
#PLOT PLOT PLOT PLOT
p <- ggplot() +
#top half of NPR plots
geom_bar(
data = npr_above
,aes(
x = GRADE_LEVEL_SEASON
,y = PCT
,fill = factor(QUARTILE)
,order = ORDER
)
,stat = "identity"
) +
#bottom half of NPR plots
geom_bar(
data = npr_below
,aes(
x = GRADE_LEVEL_SEASON
,y = PCT
,fill = factor(QUARTILE)
,order = ORDER
)
,stat = "identity"
) +
#labels above
geom_text(
data = npr_above
,aes(
x = GRADE_LEVEL_SEASON
,y = MIDPOINT
,label = round(PCT,0)
)
,size = 4
) +
#labels below
geom_text(
data = npr_below
,aes(
x = GRADE_LEVEL_SEASON
,y = MIDPOINT
,label = abs(round(PCT, 0))
)
,size = 4
) +
#axis labels
labs(
x = 'Grade Level'
,y = 'Percentage of Cohort'
) +
#clean out some default ggplot formatting elements
theme(
#zero out cetain formatting
panel.background = element_blank()
,plot.background = element_blank()
,panel.grid.major = element_blank()
,panel.grid.minor = element_blank()
,axis.ticks.y = element_blank()
#title and axis sizes
,title = element_text(size = rel(0.9))
,axis.title.x = element_text(size = rel(0.9))
,axis.text.y = element_blank()
,plot.margin = rep(unit(0,"null"),4)
) +
scale_x_continuous(
breaks = becca_x_breaks
,labels = becca_x_labels
) +
coord_cartesian(
xlim=c(min(becca_x_breaks),max(becca_x_breaks))
)
legend_labels = c('1st', '2nd', '3rd', '4th')
#color style?
if(color_scheme == 'KIPP Report Card') {
p <- p +
#dark gray, light gray, light orange, dark orange
scale_fill_manual(
values = c(
rgb(207, 204, 193, max = 255)
,rgb(230, 230, 230, max = 255)
,rgb(254, 188, 17, max = 255)
,rgb(247, 148, 30, max = 255)
)
,name = 'Quartiles'
,labels = legend_labels
)
} else if (color_scheme == 'Sequential Blues') {
p <- p + scale_fill_brewer(
type = "seq"
,palette = 1
)
} else {
p <- p + scale_fill_manual(
values = color_scheme
,labels = legend_labels
)
}
#title?
if (title_text != FALSE) {
p <- p +
labs(
title = title_text
)
}
#facet specified AND facet opts
if (facets != FALSE & facet_opts != FALSE) {
p <- p + eval(facet_grid(as.formula(facets), facet_opts))
#facet specified WITHOUT facet opts
} else if (facets != FALSE & facet_opts == FALSE) {
p <- p + facet_grid(as.formula(facets))
}
#no facet specified = no need to do anything (implicit)
#legend formatting stuff
p <- p + guides(
fill = guide_legend(
#make it a little smaller
title.theme = element_text(
angle = 0
,size = rel(8)
)
,keywidth = .5
,keyheight = .5
#flips the order
,reverse = TRUE)
)
p
}
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.