#' Identify charter host districts
#'
#' @param df dataframe of NJ school data containing `district_id` column
#'
#' @return df with host district id and name for every matching record
#' @export
id_charter_hosts <- function(df) {
ensure_that(
df, 'district_id' %in% names(.) | 'district_code' %in% names(.) ~
"supplied dataframe must contain 'district_id' or 'district_code'"
)
charter_city_slim <- charter_city %>% select(-district_name)
if ('district_id' %in% names(df)) {
df_new <- df %>% left_join(charter_city_slim, by = 'district_id')
} else if ('district_code' %in% names(df)) {
names(charter_city_slim)[1] <- 'district_code'
df_new <- df %>% left_join(charter_city_slim, by = 'district_code')
}
ensure_that(
df, nrow(.) == nrow(df_new) ~ 'joining to the charter hosts data set changed the size of your input dataframe. this could be an issue with the `charter_city` dataframe included in this package.'
)
return(df_new)
}
#' Helper function to return aggregate enrollment columns in correct order
#'
#' @param df aggregate enrollment dataframe
#'
#' @return data.frame
#' @export
agg_enr_column_order <- function(df) {
df %>%
select(
end_year, CDS_Code,
county_id, county_name,
district_id, district_name,
school_id, school_name,
program_code, program_name, grade_level,
subgroup,
n_students,
pct_total_enr,
n_schools,
is_state, is_county,
is_district, is_charter_sector, is_allpublic,
is_school,
is_subprogram
)
}
#' Helper function to support calculating pct_total on aggregate enr dataframes
#'
#' @param df aggregate enrollment dataframe
#'
#' @return data.frame
#' @export
agg_enr_pct_total <- function(df) {
df_totals <- df %>%
filter(subgroup == 'total_enrollment') %>%
select(end_year, district_id, program_code, n_students) %>%
rename(row_total = n_students)
# this calculation shouldn't generate duplicate rows
nrow_before = nrow(df)
nonuniques <- df %>%
mutate(
hash = paste(end_year, district_id, program_code, sep = '_')
) %>%
pull(hash) %>%
tabyl() %>%
arrange(-n) %>%
filter(n > 1)
# join totals back to enable calculation of percentages
df <- df %>%
left_join(df_totals, by = c('end_year', 'district_id', 'program_code')) %>%
mutate(
'pct_total_enr' = n_students / row_total
) %>%
select(-row_total)
ensure_that(
df, nrow(.) == nrow_before ~
sprintf('calculating percent of total changed the size of the sector_aggs dataframe.\n
this suggests duplicate district_id/subgroup/year rows.\n
offending year / district: %s', nonuniques)
)
df
}
#' Calculate Charter Sector Enrollment Aggregates
#'
#' @param df a tidied enrollment dataframe, eg output of `fetch_enr`
#'
#' @return dataframe with charter sector aggregates per host city
#' @export
charter_sector_enr_aggs <- function(df) {
# id hosts
df <- id_charter_hosts(df)
# charters are reported twice, one per school one per district
# take the district level only, in the hopes that NJ will
# someday fix this and report charter campuses
df_modern <- df %>%
filter(
end_year >= 2010 &
county_id == 80 &
!district_id=='9999' &
school_id == '999'
)
df_old <- df %>%
filter(
end_year < 2010 &
!district_id == '9999' &
school_id == '999' &
as.numeric(district_id) >= 6000
)
df <- bind_rows(df_modern, df_old)
# group by - host city and summarize
df <- df %>%
group_by(
end_year,
host_county_id, host_county_name,
host_district_id, host_district_name,
program_code, program_name, grade_level,
subgroup
) %>%
summarize(
n_students = sum(n_students, na.rm = TRUE),
n_schools = n()
) %>%
ungroup()
# give psuedo district names and codes and create appropriate boolean flags
df <- df %>%
rename(
county_id = host_county_id,
county_name = host_county_name
) %>%
mutate(
CDS_Code = NA_character_,
district_id = paste0(host_district_id, 'C'),
district_name = paste0(host_district_name, ' Charters'),
school_id = '999C',
school_name = 'Charter Sector Total',
is_state = FALSE,
is_county = FALSE,
is_citywide = FALSE,
is_district = FALSE,
is_charter_sector = TRUE,
is_allpublic = FALSE,
is_school = FALSE,
is_subprogram = !program_code == '55'
) %>%
select(-host_district_id, -host_district_name)
# calculate percent
df <- agg_enr_pct_total(df)
# column order and return
agg_enr_column_order(df)
}
#' Calculate All Public Enrollment Aggregates
#'
#' @param df
#'
#' @param df a tidied enrollment dataframe, eg output of `fetch_enr`
#'
#' @return dataframe with all public school option aggregates for any charter host city
#' @export
allpublic_enr_aggs <- function(df) {
df <- df %>% ungroup()
# id hosts
df <- id_charter_hosts(df)
# if charter, make host_district_id the district id
df <- df %>%
mutate(
is_charter = !is.na(host_district_id),
county_id = ifelse(!is.na(host_county_id), host_county_id, county_id),
district_id = ifelse(!is.na(host_district_id), host_district_id, district_id)
)
# take only district level rows (not school)
# group by - newly modified county_id, district_id and summarize
df <- df %>%
filter(is_district) %>%
group_by(
end_year,
county_id, district_id,
program_code, program_name, grade_level,
subgroup
) %>%
summarize(
n_students = sum(n_students, na.rm = TRUE),
n_schools = n(),
n_charter = sum(is_charter, na.rm = TRUE)
) %>%
ungroup()
# if there are no charters in the host, out of scope for this calc
df <- df %>%
filter(n_charter > 0)
# add county_name, district_name by joining to charter_city
ch_join <- charter_city %>%
select(host_district_id, host_district_name, host_county_name) %>%
rename(
district_id = host_district_id,
district_name = host_district_name,
county_name = host_county_name
) %>%
unique()
df <- df %>%
left_join(ch_join, by = 'district_id')
# give psuedo district names and codes
# create appropriate boolean flag
df <- df %>%
mutate(
CDS_Code = NA_character_,
district_id = paste0(district_id, 'A'),
district_name = paste0(district_name, ' All Public'),
school_id = '999A',
school_name = 'All Public Total',
is_state = FALSE,
is_county = FALSE,
is_citywide = FALSE,
is_district = FALSE,
is_charter_sector = FALSE,
is_allpublic = TRUE,
is_school = FALSE,
is_subprogram = !program_code == '55'
)
# calculate percent
df <- agg_enr_pct_total(df)
# column order and return
agg_enr_column_order(df)
}
#' Calculate Charter Sector PARCC aggregates
#'
#' @param df tidied PARCC dataframe, eg output of fetch_parcc
#'
#' @return df containing charter sector aggregate PARCC performance
#' @export
charter_sector_parcc_aggs <- function(df) {
# id hosts
df <- id_charter_hosts(df) %>%
mutate(
is_charter = !is.na(host_district_id)
)
# charters are reported twice, one per school one per district
# take the district level only, in the hopes that NJ will
# someday fix this and report charter campuses
df <- df %>%
filter(county_id == '80' & !district_id=='9999' & is.na(school_id))
# dfg isn't particularly meaningful and some charters are in the
# ND not determined bucket
df <- df %>% mutate(dfg = NA)
# group by - host city and summarize
df <- df %>%
group_by(
testing_year,
assess_name, test_name,
host_county_id, host_county_name,
host_district_id, host_district_name,
dfg,
grade,
subgroup, subgroup_type
) %>%
parcc_aggregate_calcs() %>%
ungroup()
# give psuedo district names and codes and create appropriate boolean flags
df <- df %>%
rename(
county_id = host_county_id,
county_name = host_county_name
) %>%
mutate(
district_id = paste0(host_district_id, 'C'),
district_name = paste0(host_district_name, ' Charters'),
school_id = '999C',
school_name = 'Charter Sector Total',
is_state = FALSE,
is_dfg = FALSE,
is_district = FALSE,
is_charter = FALSE,
is_school = FALSE,
is_charter_sector = TRUE,
is_allpublic = FALSE
) %>%
select(-host_district_id, -host_district_name)
# organize and return
parcc_column_order(df)
}
#' Calculate All Public Options PARCC Aggregates
#'
#' @param df tidied PARCC dataframe, eg output of fetch_parcc
#'
#' @return df containing all public option aggregates by district
#' @export
allpublic_parcc_aggs <- function(df) {
# id hosts
df <- id_charter_hosts(df)
# if charter, make host_district_id the district id
df <- df %>%
mutate(
county_id = ifelse(!is.na(host_county_id), host_county_id, county_id),
district_id = ifelse(!is.na(host_district_id), host_district_id, district_id)
)
# only district rows
df <- df %>% filter(is_district)
# dfg isn't particularly meaningful and some charters are in the
# ND not determined bucket
df <- df %>% mutate(dfg = NA)
# group by - newly modified county_id, district_id and summarize
df <- df %>%
group_by(
testing_year,
assess_name, test_name,
county_id,
district_id,
dfg,
grade,
subgroup, subgroup_type
) %>%
parcc_aggregate_calcs() %>%
ungroup()
# if there are no charters in the host, out of scope for this calc
df <- df %>%
filter(n_charter_rows > 0)
# add county_name, district_name by joining to charter_city
ch_join <- charter_city %>%
select(host_district_id, host_district_name, host_county_name) %>%
rename(
district_id = host_district_id,
district_name = host_district_name,
county_name = host_county_name
) %>%
unique()
df <- df %>%
left_join(ch_join, by = 'district_id')
# give psuedo district names and codes
# create appropriate boolean flag
df <- df %>%
mutate(
district_id = paste0(district_id, 'A'),
district_name = paste0(district_name, ' All Public'),
school_id = '999A',
school_name = 'All Public Total',
is_state = FALSE,
is_dfg = FALSE,
is_district = FALSE,
is_charter = FALSE,
is_school = FALSE,
is_charter_sector = FALSE,
is_allpublic = TRUE
)
# organize and return
parcc_column_order(df)
}
#' Calculate Charter Sector Grad Rate aggregates
#'
#' @param df tidied grate dataframe, eg output of fetch_grate
#'
#' @return df containing charter sector aggregate grad rate
#' @export
charter_sector_grate_aggs <- function(df) {
# id hosts
df <- id_charter_hosts(df)
# charters are reported twice, one per school one per district
# take the district level only, in the hopes that NJ will
# someday fix this and report charter campuses
df <- df %>%
filter(county_id == '80' & !district_id=='9999' & school_id == '999')
# group by - host city and summarize
df <- df %>%
group_by(
end_year,
host_county_id, host_county_name,
host_district_id, host_district_name,
subgroup, methodology
) %>%
grate_aggregate_calcs() %>%
ungroup()
# give psuedo district names and codes and create appropriate boolean flags
df <- df %>%
rename(
county_id = host_county_id,
county_name = host_county_name
) %>%
mutate(
district_id = paste0(host_district_id, 'C'),
district_name = paste0(host_district_name, ' Charters'),
school_id = '999C',
school_name = 'Charter Sector Total',
is_state = FALSE,
is_district = FALSE,
is_charter = FALSE,
is_school = FALSE,
is_charter_sector = TRUE,
is_allpublic = FALSE
) %>%
select(-host_district_id, -host_district_name)
# organize and return
grate_column_order(df)
}
#' Calculate All Public Grad Rate aggregates
#'
#' @param df tidied grate dataframe, eg output of fetch_grate
#'
#' @return df containing allpublic aggregate grad rate
#' @export
allpublic_grate_aggs <- function(df) {
# id hosts
df <- id_charter_hosts(df)
# if charter, make host_district_id the district id
df <- df %>%
mutate(
county_id = ifelse(!is.na(host_county_id), host_county_id, county_id),
district_id = ifelse(!is.na(host_district_id), host_district_id, district_id)
)
# only district rows
df <- df %>% filter(is_district)
# group by - newly modified county_id, district_id and summarize
df <- df %>%
group_by(
end_year,
county_id,
district_id,
subgroup,
methodology
) %>%
grate_aggregate_calcs() %>%
ungroup()
# if there are no charters in the host, out of scope for this calc
df <- df %>%
filter(n_charter_rows > 0)
# add county_name, district_name by joining to charter_city
ch_join <- charter_city %>%
select(host_district_id, host_district_name, host_county_name) %>%
rename(
district_id = host_district_id,
district_name = host_district_name,
county_name = host_county_name
) %>%
unique()
df <- df %>%
left_join(ch_join, by = 'district_id')
# give psuedo district names and codes
# create appropriate boolean flag
df <- df %>%
mutate(
district_id = paste0(district_id, 'A'),
district_name = paste0(district_name, ' All Public'),
school_id = '999A',
school_name = 'All Public Total',
is_state = FALSE,
is_dfg = FALSE,
is_district = FALSE,
is_charter = FALSE,
is_school = FALSE,
is_charter_sector = FALSE,
is_allpublic = TRUE
)
# organize and return
grate_column_order(df)
}
#' Calculate Charter Sector Grad Count aggregates
#'
#' @param df tidied grad count dataframe, eg output of fetch_grad_count
#'
#' @return df containing charter sector aggregate grad count
#' @export
charter_sector_gcount_aggs <- function(df) {
# id hosts
df <- id_charter_hosts(df)
# charters are reported twice, one per school one per district
# take the district level only, in the hopes that NJ will
# someday fix this and report charter campuses
df <- df %>%
filter(county_id == '80' & !district_id=='9999' & school_id == '999')
# group by - host city and summarize
df <- df %>%
group_by(
end_year,
host_county_id, host_county_name,
host_district_id, host_district_name,
subgroup
) %>%
gcount_aggregate_calcs() %>%
ungroup()
# give psuedo district names and codes and create appropriate boolean flags
df <- df %>%
rename(
county_id = host_county_id,
county_name = host_county_name
) %>%
mutate(
district_id = paste0(host_district_id, 'C'),
district_name = paste0(host_district_name, ' Charters'),
school_id = '999C',
school_name = 'Charter Sector Total',
is_state = FALSE,
is_district = FALSE,
is_charter = FALSE,
is_school = FALSE,
is_charter_sector = TRUE,
is_allpublic = FALSE
) %>%
select(-host_district_id, -host_district_name)
# organize and return
gcount_column_order(df)
}
#' Calculate Charter Sector Grad Count aggregates
#'
#' @param df tidied grad count dataframe, eg output of fetch_grad_count
#'
#' @return df containing charter sector aggregate grad count
#' @export
allpublic_gcount_aggs <- function(df) {
df <- df %>% ungroup()
# id hosts
df <- id_charter_hosts(df)
# if charter, make host_district_id the district id
df <- df %>%
mutate(
county_id = ifelse(!is.na(host_county_id), host_county_id, county_id),
district_id = ifelse(!is.na(host_district_id), host_district_id, district_id)
)
# only district rows
df <- df %>% filter(is_district)
# group by - newly modified county_id, district_id and summarize
df <- df %>%
group_by(
end_year,
county_id,
district_id,
subgroup
) %>%
gcount_aggregate_calcs() %>%
ungroup()
# if there are no charters in the host, out of scope for this calc
df <- df %>%
filter(n_charter_rows > 0)
# add county_name, district_name by joining to charter_city
ch_join <- charter_city %>%
select(host_district_id, host_district_name, host_county_name) %>%
rename(
district_id = host_district_id,
district_name = host_district_name,
county_name = host_county_name
) %>%
unique()
df <- df %>%
left_join(ch_join, by = 'district_id')
# give psuedo district names and codes
# create appropriate boolean flag
df <- df %>%
mutate(
district_id = paste0(district_id, 'A'),
district_name = paste0(district_name, ' All Public'),
school_id = '999A',
school_name = 'All Public Total',
is_state = FALSE,
is_dfg = FALSE,
is_district = FALSE,
is_charter = FALSE,
is_school = FALSE,
is_charter_sector = FALSE,
is_allpublic = TRUE
)
# organize and return
gcount_column_order(df)
}
#' Calculate Charter Sector Special Populations Aggregates
#'
#' @param df a tidied enrollment dataframe, eg output of
#' `fetch_reportcard_special_pop`
#'
#' @return dataframe with charter sector aggregates per host city
#' @export
charter_sector_spec_pop_aggs <- function(df) {
df <- df %>%
id_charter_hosts()
df <- df %>%
mutate(is_charter = !is.na(host_district_id)) %>%
filter(school_id != '999') %>%
group_by(
end_year,
host_county_id, host_county_name,
host_district_id, host_district_name,
subgroup
) %>%
spec_pop_aggregate_calcs()
# give psuedo district names and codes and create appropriate boolean flags
df <- df %>%
rename(
county_id = host_county_id,
county_name = host_county_name
) %>%
mutate(
district_id = paste0(host_district_id, 'C'),
district_name = paste0(host_district_name, " Charters"),
school_id = '999C',
school_name = 'Charter Sector Total',
is_district = FALSE,
is_charter_sector = TRUE,
is_allpublic = FALSE,
is_school = FALSE
) %>%
select(-host_district_id, -host_district_name)
# column order and return
df %>%
agg_spec_pop_column_order() %>%
return()
}
#' Calculate All City Special Populations aggregates
#'
#' @param df tidied grad count dataframe, eg output of
#' `fetch_reportcard_special_pop``
#'
#' @return df with all city aggregates per host city
#' @export
allpublic_spec_pop_aggs <- function(df) {
df <- df %>%
id_charter_hosts() %>%
# if charter, make host_district_id the district_id
mutate(
is_charter = !is.na(host_district_id),
district_id = ifelse(!is.na(host_district_id), host_district_id, district_id)
)
# take only district level rows (not school)
# group by - newly modified county_id, district_id and summarize
df <- df %>%
filter(district_id != '999') %>%
group_by(
end_year,
district_id,
subgroup
) %>%
spec_pop_aggregate_calcs() %>%
ungroup()
# if there are no charters in the host, out of scope for this calc
df <- df %>%
filter(n_charter_rows > 0)
# add county_name, district_name by joining to charter_city
ch_join <- charter_city %>%
select(host_district_id, host_district_name, host_county_name) %>%
rename(
district_id = host_district_id,
district_name = host_district_name,
county_name = host_county_name
) %>%
unique()
df <- df %>%
left_join(ch_join, by = 'district_id')
# give psuedo district names and codes
# create appropriate boolean flag
df <- df %>%
mutate(
district_id = paste0(district_id, 'A'),
district_name = paste0(district_name, ' All Public'),
school_id = '999A',
school_name = 'All Public Total',
is_district = FALSE,
is_charter_sector = FALSE,
is_allpublic = TRUE,
is_school = FALSE
)
# column order and return
agg_spec_pop_column_order(df) %>%
return()
}
#' Calculate Charter Sector SPED Aggregates
#'
#' @param df a tidied district special education dataframe,
#' e.g. output of `fetch_sped`
#'
#' @return dataframe with charter sector aggregates per host city
#' @export
charter_sector_sped_aggs <- function(df) {
# id hosts
df <- id_charter_hosts(df)
# group by - host city and summarize
df <- df %>%
mutate(is_charter = !is.na(host_district_id)) %>%
group_by(
end_year,
host_county_id, host_county_name,
host_district_id, host_district_name
) %>%
sped_aggregate_calcs() %>%
ungroup()
# give psuedo district names and codes and create appropriate boolean flags
df <- df %>%
rename(
county_id = host_county_id,
county_name = host_county_name
) %>%
mutate(
district_id = paste0(host_district_id, 'C'),
district_name = paste0(host_district_name, " Charters"),
school_id = '999C',
school_name = 'Charter Sector Total',
is_district = FALSE,
is_charter_sector = TRUE,
is_allpublic = FALSE,
is_school = FALSE
) %>%
select(-host_district_id, -host_district_name)
# column order and return
agg_sped_column_order(df) %>%
return()
}
#' Calculate Charter Sector SPED aggregates
#'
#' @param df tidied grad count dataframe, eg output of fetch_sped
#'
#' @return df containing charter sector aggregate sped
#' @export
allpublic_sped_aggs <- function(df) {
df <- df %>% ungroup()
# id hosts
df <- id_charter_hosts(df)
# if charter, make host_district_id the district id
df <- df %>%
mutate(
is_charter = !is.na(host_district_id),
district_id = ifelse(!is.na(host_district_id), host_district_id, district_id)
)
# take only district level rows (not school)
# group by - newly modified county_id, district_id and summarize
df <- df %>%
group_by(
end_year,
district_id
) %>%
sped_aggregate_calcs() %>%
ungroup()
# if there are no charters in the host, out of scope for this calc
df <- df %>%
filter(n_charter_rows > 0)
# add county_name, district_name by joining to charter_city
ch_join <- charter_city %>%
select(host_district_id, host_district_name, host_county_name) %>%
rename(
district_id = host_district_id,
district_name = host_district_name,
county_name = host_county_name
) %>%
unique()
df <- df %>%
left_join(ch_join, by = 'district_id')
# give psuedo district names and codes
# create appropriate boolean flag
df <- df %>%
mutate(
district_id = paste0(district_id, 'A'),
district_name = paste0(district_name, ' All Public'),
school_id = '999A',
school_name = 'All Public Total',
is_district = FALSE,
is_charter_sector = FALSE,
is_allpublic = TRUE,
is_school = FALSE
)
# column order and return
agg_sped_column_order(df) %>%
return()
}
#' Calculate Charter Sector postsecondary matriculation aggregates
#'
#' @param df tidied matriculation dataframe, eg output of
#' enrich_matric_counts
#'
#' @return df containing charter sector matriculation aggregates
#' @export
charter_sector_matric_aggs <- function(df) {
# id hosts
df <- id_charter_hosts(df)
# charters are reported twice, one per school one per district
# take the district level only, in the hopes that NJ will
# someday fix this and report charter campuses
df <- df %>%
filter(county_id == '80' & !district_id=='9999' & school_id == '999')
# group by - host city and summarize
df <- df %>%
# 0s are reported as 0 -- distinct from NA
# these are then schools w/ grad counts but no matric rates
filter(!is.na(enroll_any)) %>%
group_by(
end_year,
host_county_id, host_county_name,
host_district_id, host_district_name,
is_16mo, subgroup
) %>%
matric_aggregate_calcs() %>%
ungroup()
# give psuedo district names and codes and create appropriate boolean flags
df <- df %>%
rename(
county_id = host_county_id,
county_name = host_county_name
) %>%
mutate(
district_id = paste0(host_district_id, 'C'),
district_name = paste0(host_district_name, ' Charters'),
school_id = '999C',
school_name = 'Charter Sector Total',
is_state = FALSE,
is_district = FALSE,
is_charter = FALSE,
is_school = FALSE,
is_charter_sector = TRUE,
is_allpublic = FALSE
) %>%
select(-host_district_id, -host_district_name)
# organize and return
matric_column_order(df)
}
#' Calculate All Public matriculation aggregates
#'
#' @param df tidied postsecondary matriculation dataframe, eg output
#' of enrich_matric_counts()
#'
#' @return df containing all-public sector postsecondary matriculation rates
#' @export
allpublic_matric_aggs <- function(df) {
df <- df %>% ungroup()
# id hosts
df <- id_charter_hosts(df)
# if charter, make host_district_id the district id
df <- df %>%
mutate(
county_id = ifelse(!is.na(host_county_id), host_county_id, county_id),
district_id = ifelse(!is.na(host_district_id), host_district_id, district_id)
)
# only district rows
df <- df %>% filter(is_district)
# group by - newly modified county_id, district_id and summarize
agg_df <- df %>%
# 0s are reported as 0 -- distinct from NA
filter(!is.na(enroll_any)) %>%
group_by(
end_year,
county_id,
district_id,
is_16mo, subgroup
) %>%
matric_aggregate_calcs() %>%
ungroup()
# if there are no charters in the host, out of scope for this calc
agg_df <- agg_df %>%
filter(n_charter_rows > 0)
# add county_name, district_name by joining to charter_city
ch_join <- charter_city %>%
select(host_district_id, host_district_name, host_county_name) %>%
rename(
district_id = host_district_id,
district_name = host_district_name,
county_name = host_county_name
) %>%
unique()
agg_df <- agg_df %>%
left_join(ch_join, by = 'district_id')
# give psuedo district names and codes
# create appropriate boolean flag
agg_df <- agg_df %>%
mutate(
district_id = paste0(district_id, 'A'),
district_name = paste0(district_name, ' All Public'),
school_id = '999A',
school_name = 'All Public Total',
is_state = FALSE,
is_dfg = FALSE,
is_district = FALSE,
is_charter = FALSE,
is_school = FALSE,
is_charter_sector = FALSE,
is_allpublic = TRUE
)
# organize and return
matric_column_order(agg_df)
}
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.