R/odcr_scrape_open.R

Defines functions odcr_scrape_open

#' Scrape open cases from On Demand Court Records
#'
#' Scrapes data stored in HTML tables on ODCR.net, returns several dataframes, and (optionally) updates the OJO database with newly scraped data.
#'
#' @param courts A character vector of the courts to scrape, for example, "TULSA" or c("TULSA", "ROGERS")
#' @param casetypes A character vector of the case types to scrape, for example, "CF" or c("CF", "CM")
#' @param file_years A numeric vector of years to scrape, for example, 2016 or 2015:2017
#' @return Several tables with data from scraped cases, depending on the type of case and availability of data
#' @examples
#' \dontrun{
#' oscn_scrape("TULSA", "CF", 2020, 1)
#' oscn_scrape(oscn_counties, c("CF", "CM"), 2008:2015, 1:1000, updatedb = TRUE, update_freq = 100)
#' }
#' @seealso oscn_scrape_all(), oscn_scrape_remaining()

odcr_scrape_open <- function(courts, casetypes, file_years, cutoff_date = "01-01") {

  for (court_tmp in courts) {
    for (casetype_tmp in casetypes) {
      for (file_year_tmp in file_years) {
        
        sum_tbl <- if_else(casetype_tmp %in% c("CF", "CM", "TR"),
                           "ojo_crim_cases",
                           "ojo_civ_cases")
        
        connect_ojo()

        open_cases <- tbl(ojo_db, sum_tbl) %>%
          filter(court == court_tmp,
                 casetype == casetype_tmp,
                 file_year == file_year_tmp,
                 disp_case == "OPEN") %>%
          select(court, casenum, disp_case, file_date) %>%
          collect %>%
          mutate(case_seq = str_sub(casenum, 9, 13)) %>%
          distinct %>% 
          filter(file_date >= ymd(paste0(file_year_tmp, "-", cutoff_date)))
        
        disconnect_ojo()

        message(paste(
          "Collecting data for", nrow(open_cases), casetype_tmp, "cases filed in", court_tmp
        ))

        odcr_scrape(court_tmp, casetype_tmp, file_year_tmp, as.numeric(open_cases$case_seq), update_freq = 20)

      }
    }
  }
}
openjusticeok/ojo documentation built on Feb. 2, 2021, 5:47 a.m.