R/dese_get.R

Defines functions dese_get dese_attendance dese_enrollmentbyracegender dese_selectedpopulations dese_attrition

Documented in dese_attendance dese_attrition dese_enrollmentbyracegender dese_get dese_selectedpopulations

#' Get data from Massachusetts DESE
#'
#' Get public school information from Massachusetts \href{http://profiles.doe.mass.edu/state_report/}{State Reports}.
#'
#' A wrapper function that execute a GET to http://profiles.doe.mass.edu/state_report/
#' and parses the results.
#'
#' @param report the name of the report you are getting
#' @param query any additional query needed to complete the url
#' @return data.frame
#' @import httr
#' @import rvest
#' @export
#' @seealso \code{\link{GET}} and \code{\link{rvest}} which this function wraps
#' @examples
#' dese_get('enrollmentbyracegender', query=list('mode'='school', 'year'='2016'))
dese_get <- function(report, query=NULL){

  baseurl <- 'http://profiles.doe.mass.edu'
  ua <- user_agent("https://github.com/rgknight/madese")

  url <- modify_url(baseurl,
                    path=paste0('state_report/', report, '.aspx'),
                    query=query)

  print(url)

  resp <- GET(url, ua)

  parsed <- content(resp, as="text")

  parsed <- gsub(" ", "", parsed) # Remove non-standard character

  if (grepl("404 - Page Not Found", parsed)) {
    stop(
      sprintf(
        "Request failed; report not found [%s]\n%s",
        status_code(resp),
        resp$url
      ),
      call. = FALSE
    )
  }

  parsed
}

#' Attendance and Retention Report
#'
#' Get the \href{http://profiles.doe.mass.edu/state_report/indicators.aspx}{Attendance and Retention} Report from Massachusetts DESE
#'
#' @param year a character string with the year (e.g., school year 2015-2016 is "2016")
#' @param mode district or school
#' @return data.frame
#' @seealso \code{\link{dese_get}} which this function wraps
#' @import rvest
#' @export
#' @examples
#' dese_attendance("2016")
dese_attendance <- function(year, mode='school'){
  query <- list(
    'mode'=mode,
    'year'=as.character(year),
    'export_excel'='yes'
  )

  parsed <- dese_get('indicators', query)

  df <- read_html(parsed) %>%
    html_node('table') %>%
    html_table(header = T, fill=T)

  # Data has extra columns in some years
  df <- df[colSums(!is.na(df)) > 0]

  # Data has two header lines,
  # with actual header a combination of the lines
  extra_names <- df[1, ]
  df <- df[-1, ]

  names(df) <- paste(names(df), extra_names, sep="_")

  # Take out extra dots in names
  names(df) <- gsub("\\.[1-4]", "", names(df))

  # Fix school and org code
  names(df)[1] <- "school"
  names(df)[2] <- "org_code"

  df
}

#' Enrollment by Race and Gender
#'
#' Get the \href{http://profiles.doe.mass.edu/state_report/enrollmentbyracegender.aspx}{Enrollemnt by Race/Gender} Report from Massachusetts DESE
#'
#' @param year a character string with the numeric year (e.g., school year 2015-2016 is "2016")
#' @param mode district or school
#' @return data.frame
#' @seealso \code{\link{dese_get}} which this function wraps
#' @import rvest
#' @export
#' @examples
#' dese_enrollmentbyracegender("2016")
dese_enrollmentbyracegender <- function(year, mode='school'){
  query <- list(
    'mode'=mode,
    'year'=as.character(year),
    'Continue'='View+Report',
    'export_excel'='yes'
  )

  parsed <- dese_get('enrollmentbyracegender', query)

  read_html(parsed) %>%
    html_node('table') %>%
    html_table(header = T, fill=T)


}

#' Enrollment by Selected Populations
#'
#' Get the \href{http://profiles.doe.mass.edu/state_report/selectedpopulations.aspx}{Enrollemnt by Selected Populations} Report from Massachusetts DESE
#' (first language not english, english language learners, students with disabilities, etc)
#'
#' @param year a character string with the numeric year (e.g., school year 2015-2016 is "2016")
#' @param mode district or school
#' @return data.frame
#' @seealso \code{\link{dese_get}} which this function wraps
#' @import rvest
#' @export
#' @examples
#' dese_selectedpopulations("2016")
dese_selectedpopulations <- function(year, mode='school'){
  query <- list(
    'mode'=mode,
    'year'=as.character(year),
    'Continue'='View+Report',
    'export_excel'='yes'
  )

  data <- dese_get('selectedpopulations', query)

  data <- read_html(data) %>%
    html_node('table') %>%
    html_table(header = T, fill=T)

  # data has two header rows; remove extra row
  extra_names <- data[1, ]
  data <- data[-1, ]
  cols <- ncol(data)
  names(data)[3:cols] <- paste0(names(data)[3:cols], "_", extra_names[3:cols])

  data
}

#' Attrition
#'
#' Get the \href{http://profiles.doe.mass.edu/state_report/attrition.aspx}{Attrition}
#' Report from Massachusetts DESE website.
#' Attrition is students who left the school over the summer.
#'
#' @param year the year of the report
#' @param mode district or school
#' @param group get the report for a specific student group. Defaults to all students.
#' @seealso \code{\link{dese_get}} which this function wraps
#' @import rvest
#' @import dplyr
#' @importFrom magrittr extract2
#' @importFrom xml2 read_html
#' @export
#' @examples
#' dese_attrition("2016")
dese_attrition <- function(year, mode='SCHOOL', group='ALL'){
  query <- list(
    'ctl00$ContentPlaceHolder1$reportType'=toupper(mode),
    'ctl00$ContentPlaceHolder1$fycode'=as.character(year),
    'ctl00$ContentPlaceHolder1$studentGroup'=toupper(group),
    'export_excel'='yes'
  )

  parsed <- dese_get('attrition', query)

  result <- read_html(parsed) %>%
    html_nodes('table') %>%
    extract2(2) %>%
    html_table(header = T, fill=T)

  result <- result %>%
    mutate_at(vars(3:15),funs(gsub(" ", "", .)))

  result
}


#' Mobility Rates
#'
#' Get the \href{http://profiles.doe.mass.edu/state_report/mobilityrates.aspx}{Mobility Rates}
#' Report from Massachusetts DESE website.
#' Mobility rates has various metrics on how many students move through the school in a year
#'
#' @param year the year of the report
#' @param mode district or school
#' @param group get the report for a specific student group. Defaults to all students.
#' @seealso \code{\link{dese_get}} which this function wraps
#' @import rvest
#' @importFrom magrittr extract2
#' @importFrom xml2 read_html
#' @export
#' @examples
#' dese_mobilityrates(2016)
dese_mobilityrates <- function(year, mode='SCHOOL', group='ALL'){
  query <- list(
    'ctl00$ContentPlaceHolder1$reportType'=toupper(mode),
    'ctl00$ContentPlaceHolder1$cohortYear'=as.character(year),
    'ctl00$ContentPlaceHolder1$studentGroup'=toupper(group),
    'ctl00$ContentPlaceHolder1$rateType'='4-Year:REG',
    'export_excel'='yes'
  )

  parsed <- dese_get('mobilityrates', query)

  result <- read_html(parsed) %>%
    html_nodes('table') %>%
    extract2(2) %>%
    html_table(header = T, fill=T)

  result
}

#' dese_ssdr_days_missed
#'
#' Get SSDR-based reports from Massachusetts DESE website.
#'
#' @param year the year of the report
#' @param mode district or school
#' @param group get the report for a specific student group. Defaults to all students. Can be
#' - ALL = All students
#' - SPED = Students with disabilities
#' - ECODIS = Economically disadvantaged students
#' - F = Female students
#' - M = Male students
#' - AI = American Indian or Alaskan Native students
#' - AS = Asian students
#' - BL = Black / African American students
#' - HS = Hispanic / Latino students
#' - MR = Multiracial, non-hispanic / Latino students
#' - HP = Native Hawaiian / Pacific Islander students
#' - WH = White students
#' @param offense Defaults to all offenses.
#' @seealso \code{\link{dese_get}} which this function wraps
#' @import rvest
#' @importFrom magrittr extract2
#' @importFrom xml2 read_html
#' @export
dese_ssdr_days_missed <- function(year, mode='SCHOOL', group='ALL', offense='All'){
  query <- list(
    'ctl00$ContentPlaceHolder1$reportType'=toupper(mode),
    'ctl00$ContentPlaceHolder1$fycode'=as.character(year),
    'ctl00$ContentPlaceHolder1$studentGroup'=toupper(group),
    'ctl00$ContentPlaceHolder1$subjectCode'=toupper(offense),
    'export_excel'='yes'
  )

  parsed <- dese_get('ssdr_days_missed', query)

  result <- read_html(parsed) %>%
    html_nodes('table') %>%
    extract2(2) %>%
    html_table(header = T, fill=T)

  # Take out unicode pad character
  # Unfortunately due to https://github.com/hadley/dplyr/issues/1978
  # we can't just mutate_if(is.character)

  result <- result %>%
    mutate_all(funs(gsub(" ", "", .)))

  result
}

#' @rdname dese_ssdr_days_missed
#' @import rvest
#' @importFrom magrittr extract2
#' @importFrom xml2 read_html
#' @export
dese_ssdr <- function(year, mode='SCHOOL', group='ALL', offense='All'){
  query <- list(
    'ctl00$ContentPlaceHolder1$reportType'=toupper(mode),
    'ctl00$ContentPlaceHolder1$fycode'=as.character(year),
    'ctl00$ContentPlaceHolder1$studentGroup'=toupper(group),
    'ctl00$ContentPlaceHolder1$subjectCode'=toupper(offense),
    'export_excel'='yes'
  )

  parsed <- dese_get('ssdr', query)

  if (grepl("No Data Exists for this search criteria", parsed)){
    warning(sprintf("No Data Exists for %s %s %s %s", as.character(year), mode, group, offense))
    return(1)
  }

  result <- read_html(parsed) %>%
    html_nodes('table') %>%
    extract2(2) %>%
    html_table(header = T, fill=T)

  result <- result %>%
    mutate_all(funs(gsub(" ", "", .)))

  result
}

# Not working yet...
# #' MCAS Achievement Results
# #'
# #' Get \href{http://profiles.doe.mass.edu/state_report/mcas.aspx}{MCAS} Reports from Massachusetts DESE
# #'
# #'
# #' @param year a character string with the numeric year (e.g., school year 2015-2016 is "2016")
# #' @param mode district or school
# #' @param SchoolType All, Elementary, Middle, etc
# #' @param studentGroup 'AL' for all,
# #' @param grade 'AL' for all, 10, 08, etc
# #' @return data.frame
# #' @seealso \code{\link{dese_get}} which this function wraps
# #' @examples
# #' dese_mcas("2014")
# dese_mcas <- function(year, SchoolType='All', studentGroup='AL', grade='AL',mode='school'){
#
#     query <- list(
#     'reportType'=mode,
#     'grade'=grade,
#     'year'=as.character(year),
#     'apply2006'='Y',
#     'studentGroup'='AL',
#     'SchoolType'='All'
#   )
#
#   dese_get('mcas', query)
# }
#
rgknight/madese documentation built on May 27, 2019, 7:23 a.m.