R/oscn_scrape.R

Defines functions oscn_scrape

Documented in oscn_scrape

#' Scrape case data from Oklahoma State Courts Network 
#'
#' Scrapes data stored in HTML tables on OSCN.net, returns several dataframes, and (optionally) updates the OJO database with newly scraped data.
#'
#' @param courts A character vector of the courts to scrape, for example, "TULSA" or c("TULSA", "ROGERS")
#' @param casetypes A character vector of the case types to scrape, for example, "CF" or c("CF", "CM")
#' @param years A numeric vector of years to scrape, for example, 2016 or 2015:2017
#' @param case_seqs Sequential case numbers to scrape (i.e., the "45" in "CF-2015-45"), for example, 1 or 1:7000
#' @param updatedb Logical, whether to upload data to the OJO database
#' @param update_freq Integer, how many cases to scrape before updating the database and clearing local data. 
#' @return Several tables with data from scraped cases, depending on the type of case and availability of data
#' @examples
#' \dontrun{
#' oscn_scrape("TULSA", "CF", 2020, 1)
#' oscn_scrape(oscn_counties, c("CF", "CM"), 2008:2015, 1:1000, updatedb = TRUE, update_freq = 100)
#' }
#' @seealso oscn_scrape_all(), oscn_scrape_remaining()

oscn_scrape <- function(courts, casetypes, years, case_seqs, updatedb = TRUE, update_freq = 20) {
  
  if (updatedb == TRUE) {oscn_reset()}
  
  options(warn=-1)
  
  for (l in courts) {
    court_tmp <- str_to_upper(l)
    
    for (m in casetypes) {
      casetype_tmp <- str_to_upper(m)
      
      for (j in years) {
        caseyear_tmp <- j
        
        for (k in case_seqs) {
          
          start <- Sys.time()
          caseseq_tmp <- k
          casenum_tmp <- paste(casetype_tmp, caseyear_tmp,
                               str_pad(caseseq_tmp, side = "left", width = 5, pad = 0),
                               sep = "-")
          
          url <- paste0("http://www.oscn.net/dockets/GetCaseInformation.aspx?db=", court_tmp, "&number=", casenum_tmp) 
          
          ht <- try(read_html(httr::GET(url,
                                        config = httr::config(ssl_verifypeer = FALSE))))
          
          if (class(ht) == "try-error") {
            
            queued_tmp <<- tibble(court = str_to_upper(court_tmp),
                                  casenum = casenum_tmp) 
            
            if (exists("queued")) {
              queued <<- bind_rows(queued, queued_tmp)
            } else {
              queued <<- queued_tmp
            }
          } else {
            d <- ht %>%
              html_nodes("table") %>%
              html_table()
            
            #### Skip scraping if case record is empty, record that it's unavailable in oscn_updates ####
            if (length(d) < 2) {
              updates_tmp <- tibble(oscn_update_id = paste0(courtlist[[str_to_upper(court_tmp)]], casenum_tmp, "-", Sys.time()),
                                    court = str_to_upper(court_tmp),
                                    casenum = casenum_tmp,
                                    casetype = casetype_tmp,
                                    file_year = caseyear_tmp,
                                    updated = Sys.time(),
                                    available = FALSE)
              
              lastupdate_tmp <- updates_tmp %>%
                mutate(oscn_lastupdate_id = str_sub(oscn_update_id, 1, 17),
                       ip = ifelse(is.na(str_extract(system("ifconfig", intern = TRUE)[2], "(\\d|[[:punct:]]){10,30}")),
                                   "Laptop",
                                   str_extract(system("ifconfig", intern = TRUE)[2], "(\\d|[[:punct:]]){10,30}"))) %>%
                select(oscn_lastupdate_id, court, casenum, casetype, file_year, last_update = updated, available)
              
              if (exists("updates")) {updates <<- bind_rows(updates, updates_tmp)
              } else { updates <<- updates_tmp }
              
              if (exists("lastupdate")) {lastupdate <<- bind_rows(lastupdate, lastupdate_tmp)
              } else { lastupdate <<- lastupdate_tmp }
              
              message(paste(court_tmp, casenum_tmp, "scraped in", Sys.time() - start, "seconds (data not available)."))
              
            }
            
            else {
              for (i in 1:length(d)) {
                
                if ("" %in% names(d[[i]])) {
                  
                } else {
                  t <- as.data.frame(d[[i]]) %>%
                    mutate(court = str_to_upper(court_tmp),
                           court_code = courtlist[[court_tmp]],
                           casenum = casenum_tmp,
                           casetype = casetype_tmp,
                           file_year = caseyear_tmp)
                  
                  #### oscn_caseinfo table ####
                  if ("X1" %in% names(t) & str_detect(t[1,2], "Judge:")) {
                    caseinfo_tmp <- t %>%
                      mutate(file_date = str_extract(X2, "(?<=Filed: )\\d{1,2}/\\d{1,2}/\\d{1,4}") %>%
                               mdy,
                             close_date = str_extract(X2, "(?<=Closed: )\\d{1,2}/\\d{1,2}/\\d{1,4}") %>%
                               mdy,
                             judge = str_extract(X2, "(?<=Judge: )(\\w|\\s).*") %>%
                               str_squish %>%
                               str_to_upper,
                             oscn_ci_id = paste0(court_code, casenum)) %>%
                      select(oscn_ci_id, court, casenum, casetype, file_year, file_date, close_date, judge)
                    
                    if (exists("caseinfo")) {caseinfo <<- bind_rows(caseinfo, caseinfo_tmp)}
                    else {caseinfo <<- caseinfo_tmp}
                    
                    partynames_tmp <- t %>%
                      select(court, casenum, casetype, court_code, file_year, X1) %>%
                      separate(X1, sep = "\\sv\\.\\s", into = c("party_plaint", "party_def")) %>%
                      mutate(party_plaint = str_remove_all(party_plaint, "Plaintiff,|\\sand\\s"),
                             party_def = str_remove_all(party_def, "(Defendant(,|\\.)|\\sand\\s)")) %>%
                      separate(party_plaint, sep = "\n", into = paste0("party_plaint", 1:10)) %>%
                      separate(party_def, sep = "\n", into = paste0("party_def", 1:10)) %>%
                      gather(party_type, party, contains("party")) %>%
                      mutate(party = party %>%
                               str_to_upper %>%
                               str_squish %>%
                               str_remove(",$"),
                             party_type = case_when(str_detect(party_type, "plaint") ~ "Plaintiff",
                                                    str_detect(party_type, "def") ~ "Defendant",
                                                    TRUE ~ "Other")) %>%
                      filter(!is.na(party), party != "") %>%
                      mutate(oscn_pn_id = paste0(court_code, casenum, "-",
                                                 str_pad(row_number(), side = "left", width = 2, pad = "0"))) %>%
                      select(oscn_pn_id, court, casenum, casetype, file_year, party_type, party)
                    
                    if (exists("partynames")) {partynames <<- bind_rows(partynames, partynames_tmp)}
                    else {partynames <<- partynames_tmp}
                  }
                  #### oscn_atts table ####
                  else if ("Attorney" %in% names(t)) {
                    
                    d2 <- ht %>%
                      html_nodes("table") %>%
                      gsub(pattern = '<br>', replacement = "|")
                    
                    rows <- str_extract_all(d2[i], "(?<=\\<td)(.|\r|\n)*?(?=\\<\\/td)") %>%
                      unlist
                    
                    atts_tmp <- tibble()
                    
                    for (j in 1:length(rows)) {
                      if (j %% 2 == 1) {atts_tmp[nrow(atts_tmp) + 1, "att"] <- rows[j]}
                      else {atts_tmp[nrow(atts_tmp), "rep_party"] <- rows[j]}
                    }
                    
                    atts_tmp <- atts_tmp %>%
                      mutate(court = str_to_upper(court_tmp),
                             court_code = courtlist[[court_tmp]],
                             casenum = casenum_tmp,
                             casetype = casetype_tmp,
                             file_year = caseyear_tmp) %>%
                      mutate(att_name = str_extract(att, "(?<=\\>).*?(?=\\(Bar )") %>%
                               str_squish %>%
                               str_to_upper,
                             att_barno = str_extract(att, "(?<=Bar #).*?(?=\\))"),
                             att_address = str_extract(att, "(?<=\\)).*") %>%
                               str_squish %>%
                               str_to_upper %>%
                               str_remove_all("^\\||\\|$") %>%
                               str_replace_all("\\|", ", "),
                             rep_party = rep_party %>%
                               str_replace(".*?>", "|") %>%
                               str_extract_all("(?<=\\|).*?(?=\\|)")) %>%
                      unnest(rep_party) %>%
                      mutate(rep_party = rep_party %>%
                               str_squish %>%
                               str_remove(",$")) %>%
                      select(-att) %>%
                      mutate(oscn_att_id = paste0(court_code, casenum, "-",
                                                  str_pad(row_number(), width = 2, side = "left", pad = "0"))) %>%
                      select(oscn_att_id, court, casenum, casetype, file_year,
                             att_name, att_barno, att_address, rep_party)
                    
                    if (exists("atts")) {atts <<- bind_rows(atts, atts_tmp)}
                    else {atts <<- atts_tmp}
                  }
                  #### oscn_events table ####
                  else if ("Event" %in% names(t)) {
                    events_tmp <- t %>%
                      mutate(event_date = str_extract(Event, "(?<=day, ).*?(?= at )") %>%
                               mdy,
                             event_weekday = str_extract(Event, "^\\w*?(?=,)") %>%
                               str_to_upper,
                             event_time = str_extract(Event, "(?<= at )\\d{1,2}:\\d{1,2} (AM|PM)"),
                             event_desc = str_extract(Event, "(?<=\\d{1,2}:\\d{1,2} (AM|PM))(.|\\s)*") %>%
                               str_squish %>%
                               str_to_upper,
                             defname = Party %>%
                               str_to_upper %>%
                               str_squish,
                             oscn_ev_id = paste0(court_code, casenum, "-",
                                                 str_pad(row_number(), width = 3, side = "left", pad = 0))) %>%
                      select(oscn_ev_id, court, casenum, casetype, file_year,
                             defname, event_date, event_weekday, event_time, event_desc)
                    
                    if (exists("events")) {
                      events <<- bind_rows(events, events_tmp)
                    }
                    else {events <<- events_tmp}
                  }
                  #### oscn_crim_disps table ####
                  else if ("X1" %in% names(t) & str_detect(t[1,1], "Count #")) {
                    names(d[[i + 1]])[1] <- "ignore"
                    crim_disps_tmp <- d[[i + 1]] %>%
                      mutate(court = str_to_upper(court_tmp),
                             casenum = casenum_tmp,
                             casetype = str_sub(casenum, 1, 2),
                             file_year = str_sub(casenum, 4, 7) %>% as.numeric,
                             court_code = courtlist[[court_tmp]],
                             X1 = as.character(d[[i]][1]),
                             X2 = as.character(d[[i]][2])) %>%
                      select(-ignore) %>%
                      mutate(ct_no = str_extract(X1, "\\d{1,2}"),
                             ct_code = str_extract(X2, "(?<=as Filed:)\\s+\\w{2,10}(?=,)") %>%
                               str_squish %>%
                               str_to_upper,
                             ct_desc = str_extract(X2, "(?<=, ).*?(?=,\\s)") %>%
                               str_to_upper,
                             ct_stat = str_extract(X2, "(?<=violation of).*?(?=Date)"),
                             off_date = str_extract(X2, "(?<=Offense: )\\d{1,2}/\\d{1,2}/\\d{1,4}") %>%
                               mdy) %>%
                      rename(txt = `Disposition Information`) %>%
                      mutate(disp = str_extract(txt, "(?<=Disposed:  ).*?(?=,)") %>%
                               str_to_upper,
                             disp_date = str_extract(txt, "\\d{1,2}/\\d{1,2}/\\d{1,4}") %>%
                               mdy,
                             disp_type = str_extract(txt, "(?<=\\d{4}\\.)(.|\\s)*?(?=Count|$)") %>%
                               str_squish %>%
                               str_to_upper,
                             disp_desc = str_extract(txt, "(?<=Count as Disposed:)(.|\\s)*?(?=(Viol))") %>%
                               str_squish %>%
                               str_to_upper,
                             disp_code = str_extract(disp_desc, "(?<=\\()\\w{2,12}(?=\\)$)") %>%
                               str_to_upper,
                             disp_stat = str_extract(txt, "(?<=Violation of)(.|\\s)*") %>%
                               str_squish,
                             defname = `Party Name` %>% str_to_upper) %>%
                      mutate(oscn_crimdisp_id = paste0(court_code, casenum, "-",
                                                       str_pad(ct_no, width = 3, side = "left", pad = "0"), "-",
                                                       str_pad(row_number(), width = 3, side = "left", pad = "0"), "-",
                                                       str_sub(disp, 1, 3))) %>%
                      select(oscn_crimdisp_id, court, casenum, casetype, file_year,
                             ct_no, defname, off_date, ct_code, ct_desc, ct_stat,
                             disp, disp_type, disp_date, disp_code, disp_desc, disp_stat)
                    
                    if (exists("crim_disps")) {crim_disps <<- bind_rows(crim_disps, crim_disps_tmp)}
                    else {crim_disps <<- crim_disps_tmp}
                  }
                  
                  ##### oscn_civ_disps table ####
                  else if ("X1" %in% names(t) & str_detect(t[1,1], "Issue #")) {
                    civ_disps_tmp <- d[[i + 1]][2:3] %>%
                      mutate(court = str_to_upper(court_tmp),
                             casenum = casenum_tmp,
                             casetype = str_sub(casenum, 1, 2),
                             file_year = str_sub(casenum, 4, 7) %>% as.numeric,
                             court_code = courtlist[[court_tmp]],
                             X1 = as.character(d[[i]][1]),
                             X2 = as.character(d[[i]][2])) %>%
                      mutate(iss_no = str_extract(X1, "\\d{1,2}"),
                             iss_desc = str_extract(X2, "(?<=Issue: )(.|\\s)*?(?=(\\(|Filed))") %>%
                               str_trim %>%
                               str_to_upper,
                             iss_plaint = str_extract(X2, "(?<=Filed By:)(.|\\s)*?(?=File)") %>%
                               str_trim %>%
                               str_to_upper,
                             file_date = str_extract(X2, "(?<=Filed Date:)(.|\\s)*?$") %>% str_trim %>% mdy) %>%
                      rename(txt = `Disposition Information`) %>%
                      mutate(disp = str_extract(txt, "(?<=Disposed:  ).*?(?=,)"),
                             disp_type = str_extract(txt, "(?<=\\d{4}\\.)(.|\\s)*") %>%
                               str_trim %>%
                               str_to_upper,
                             disp_date = str_extract(txt, "\\d{1,2}/\\d{1,2}/\\d{1,4}") %>%
                               mdy,
                             defname = `Party Name` %>%
                               str_remove("Defendant:") %>%
                               str_to_upper %>%
                               str_squish) %>%
                      mutate(oscn_civdisp_id = paste0(court_code, casenum, "-",
                                                      str_pad(iss_no, width = 3, side = "left", pad = "0"), "-",
                                                      str_pad(row_number(), width = 3, side = "left", pad = "0"),"-",
                                                      str_sub(disp, 1, 3))) %>%
                      select(oscn_civdisp_id, court, casenum, casetype, file_year, file_date, defname,
                             iss_no, iss_desc, iss_plaint,
                             disp_date, disp, disp_type)
                    
                    if (exists("civ_disps")) {civ_disps <<- bind_rows(civ_disps, civ_disps_tmp)}
                    else {civ_disps <<- civ_disps_tmp}
                  }
                  else if ("Code" %in% names(t)) {
                    mins_tmp <- t %>%
                      mutate(fee_amt = str_remove_all(Amount, "(\\s|\\$|,)") %>%
                               as.numeric,
                             min_date = mdy(Date),
                             min_row = row_number(),
                             defname = str_to_upper(Party)) %>%
                      mutate(oscn_min_id = paste0(court_code, casenum, "-",
                                                  str_pad(row_number(), width = 3, side = "left", pad = "0"))) %>%
                      group_by(min_date, Code) %>%
                      mutate(oscn_min_id = ifelse(caseyear_tmp > 2018,
                                                  paste0(court_code, casenum, "-",
                                                         Code, "-",
                                                         min_date, "-",
                                                         str_pad(row_number(), width = 3, side = "left", pad = "0")),
                                                  oscn_min_id)) %>%
                      select(oscn_min_id, court, casenum, casetype, file_year,
                             defname, min_date, min_row, min_code = Code, min_desc = Description, fee_amt, ct_no = Count)
                    
                    if (caseyear_tmp > 2018) {
                      mins_tmp <- mins_tmp %>% 
                        group_by(min_code, min_date) %>% 
                        mutate(oscn_min_id = paste0(str_sub(oscn_min_id, 1, 4), 
                                                    casenum, "-",
                                                    min_code, "-",
                                                    min_date, "-",
                                                    str_pad(as.character(row_number()), width = 3, side = "left", pad = "0"))) %>% 
                        select(oscn_min_id, court, casenum, casetype, file_year,
                               defname, min_date, min_row, min_code, min_desc, fee_amt, ct_no)
                    }
                    
                    if (exists("mins")) {mins <<- bind_rows(mins, mins_tmp)}
                    else {mins <<- mins_tmp}
                    
                    disp_mins_tmp <- mins_tmp %>%
                      select(oscn_min_id, court, casenum, casetype, file_year,
                             defname, min_date, min_row, min_code, min_desc, fee_amt, ct_no) %>% 
                      filter(min_code %in% c("CONVICTED", "DEFERRED"))
                    
                    if (nrow(disp_mins_tmp) > 0) {
                      if (exists("disp_mins")) {disp_mins <<- bind_rows(disp_mins, disp_mins_tmp)}
                      else {disp_mins <<- disp_mins_tmp}
                    }
                    
                  }
                }
              }
              
              # if (!str_detect(ht, "Citation Information")) { }
              # else {
              #   c <- ht %>%
              #     html_nodes("blockquote") %>%
              #     html_text() %>%
              #     as.tibble %>%
              #     separate(value, into = paste0("col", 1:30), sep = "\\s{3,}") %>%
              #     gather(cno, c, contains("col")) %>%
              #     separate(c, into = c("varname", "value"), sep = ":") %>%
              #     select(-cno) %>%
              #     filter(!is.na(varname), varname != "") %>%
              #     spread(varname, value) %>%
              #     mutate(court = str_to_upper(court_tmp),
              #            casenum = casenum_tmp,
              #            casetype = str_sub(casenum, 1, 2),
              #            file_year = str_sub(casenum, 4, 7) %>% as.numeric,
              #            court_code = courtlist[[court_tmp]]) %>%
              #     mutate(oscn_cit_id = paste0(court_code, casenum, "-",
              #                                 str_pad(row_number(), width = 3, side = "left", pad = "0")))
              #   
              #   if ("Personal Injury" %in% names(c)) {
              #     c <- select(oscn_cit_id, court, casenum, casetype, file_year,
              #                 arr_agency = `Arresting Agency`,
              #                 loc_off = `Location of Offense`,
              #                 county = `County`,
              #                 viol_type = `Violation Type`,
              #                 veh_make = `Vehicle Make`,
              #                 veh_model = `Vehicle Model`,
              #                 veh_tag = `Vehicle Tag`,
              #                 veh_tag_yr = `Vehicle Tag Year`,
              #                 veh_tag_issuer = `Vehicle Tag Issuer`,
              #                 accident = `Accident`,
              #                 acc_inj = `Personal Injury`,
              #                 acc_prop = `Property Damage`,
              #                 bond_amt = `Bond Amount`
              #     ) %>%
              #       mutate_if(is.character, str_to_upper)
              #   }
              #   
              #   if (exists("citations")) {citations <<- bind_rows(citations, c)}
              #   else {citations <<- c}
              # }
              
              ### Party names list
              
              pn <- ht %>% 
                html_nodes("p")
              
              pn <- pn[2] %>% 
                enframe %>% 
                mutate(value = as.character(value) %>% 
                         str_remove_all("<(/|)p>") %>% 
                         str_squish)
              
              if (str_detect(pn[1, "value"], "href")) {
                parties_tmp <- pn %>%
                  separate(value, into = paste0("party", 1:20), sep = "<br>") %>% 
                  pivot_longer(cols = contains("party"), names_to = "pnum", values_to = "text") %>% 
                  filter(text != "", !is.na(text)) %>% 
                  mutate(court = str_to_upper(court_tmp),
                         casenum = casenum_tmp,
                         casetype = str_sub(casenum, 1, 2),
                         file_year = str_sub(casenum, 4, 7) %>% as.numeric,
                         party_id = str_extract(text, "(?<=id=)\\d{5,20}"),
                         party_name = str_extract(text, "(?<=\\d{5}[^\\d]{2}).*(?=\\<)") %>% 
                           str_to_upper %>% 
                           str_trim,
                         party_type = text %>% 
                           str_extract("(?<=\\>, ).*?$") %>% 
                           str_to_upper %>% 
                           str_trim) %>% 
                  select(court, casenum, casetype, file_year, party_id, party_name, party_type)
                
              } else {
                parties_tmp <- pn %>%
                  separate(value, into = paste0("party", 1:20), sep = "<br>") %>% 
                  pivot_longer(cols = contains("party"), names_to = "pnum", values_to = "party_name") %>% 
                  filter(party_name != "", !is.na(party_name)) %>% 
                  separate(party_name, into = c("party_name", "party_type"),
                           sep = ", (?=[[:alpha:]]*$)") %>% 
                  mutate(court = str_to_upper(court_tmp),
                         casenum = casenum_tmp,
                         casetype = str_sub(casenum, 1, 2),
                         file_year = str_sub(casenum, 4, 7) %>% as.numeric,
                         party_name = str_to_upper(party_name)) %>% 
                  select(court, casenum, casetype, file_year, party_name, party_type)
              }
              
              if (exists("parties")) {
                parties <<- bind_rows(parties, parties_tmp)
              } else {
                parties <<- parties_tmp
              }
              
              ##### Navigate to party urls and extract information ####
              ### Get list of party links
              l <- ht %>%
                html_nodes("p a") %>%
                html_attrs() %>%
                unlist
              
              l <- l[str_detect(l, "GetParty") & !str_detect(l, "id=32000$")] %>%
                as.tibble
              
              if (nrow(l) == 0) {} else {
                ### Loop through party links
                
                for (k in 1:nrow(l)) {
                  p <- try(read_html(
                    httr::GET(paste0("http://www.oscn.net/dockets/", l[k, 1]),
                              config = httr::config(ssl_verifypeer = FALSE)
                    )
                  ))
                  
                  if (class(p) == "try-error") {
                    
                  } else {
                    
                    def_id_tmp <- str_extract(l[k, 1], "(?<=id=).*")
                    
                    p <- p %>%
                      html_nodes("table") %>%
                      html_table
                    
                    for (i in 1:length(p)) {
                      if ("Requested Party" %in% names(p[[i]])) {
                        party_tmp <- p[[i]] %>%
                          mutate(defname = `Requested Party` %>%
                                   str_to_upper,
                                 alias = `Alias or Alternate Names` %>%
                                   str_to_upper) %>%
                          mutate(def_id = def_id_tmp) %>%
                          mutate(court = str_to_upper(court_tmp),
                                 casenum = casenum_tmp,
                                 casetype = casetype_tmp,
                                 file_year = caseyear_tmp,
                                 court_code = courtlist[[court_tmp]]) %>%
                          mutate(oscn_par_id = paste0(court_code, casenum, "-",
                                                      def_id)) %>%
                          select(oscn_par_id, court, casenum, casetype, file_year, def_id, defname, def_alias = alias)
                        if (exists("party")) {party <<- bind_rows(party, party_tmp)
                        } else {
                          party <<- party_tmp
                        }
                      } else if ("Marital Status" %in% names(p[[i]])) {
                        profile_tmp <- p[[i]] %>%
                          mutate(rec_date = mdy(`Record Date`),
                                 def_mob = paste0("1/", `Birth Month and Year`) %>% dmy,
                                 def_id = def_id_tmp) %>%
                          mutate(court = str_to_upper(court_tmp),
                                 casenum = casenum_tmp,
                                 casetype = casetype_tmp,
                                 file_year = caseyear_tmp,
                                 court_code = courtlist[[court_tmp]],
                                 oscn_pp_id = paste0(court_code, casenum, "-",
                                                     def_id, "-",
                                                     str_pad(row_number(), width = 2, side = "left", pad = 0))) %>%
                          select(oscn_pp_id, court, casenum, casetype, file_year, def_id, rec_date, def_mob)
                        if (exists("pprofile")) {pprofile <<- bind_rows(pprofile, profile_tmp)
                        } else {
                          pprofile <<- profile_tmp
                        }
                        #### oscn_party_address table ####
                      } else if ("Address" %in% names(p[[i]])) {
                        paddr_tmp <- p[[i]] %>%
                          mutate(rec_date = mdy(`Record Date`),
                                 def_address = as.character(Address) %>%
                                   str_to_upper,
                                 def_zip = str_extract(Address, "\\d{5}"),
                                 def_id = def_id_tmp,
                                 court = str_to_upper(court_tmp),
                                 casenum = casenum_tmp,
                                 casetype = casetype_tmp,
                                 court_code = courtlist[[court_tmp]],
                                 file_year = caseyear_tmp,
                                 oscn_pad_id = paste0(court_code, casenum, "-",
                                                      def_id, "-",
                                                      str_pad(row_number(), width = 2, side = "left", pad = 0))) %>%
                          select(oscn_pad_id, court, casenum, casetype, file_year,
                                 def_id, rec_date, def_address, def_zip)
                        if (exists("party_address")) {party_address <<- bind_rows(party_address, paddr_tmp)
                        } else {
                          party_address <<- paddr_tmp }
                      }
                    }
                  }
                }
              }
              message(paste(court_tmp, casenum_tmp, "scraped in", round(Sys.time() - start, 1), "seconds."))
              
              updates_tmp <- tibble(oscn_update_id = paste0(courtlist[[str_to_upper(court_tmp)]], 
                                                            casenum_tmp, "-", 
                                                            Sys.time()),
                                    court = str_to_upper(court_tmp),
                                    casenum = casenum_tmp,
                                    casetype = casetype_tmp,
                                    file_year = caseyear_tmp,
                                    updated = Sys.time(),
                                    available = ifelse(length(d) < 2, FALSE, TRUE))
              
              lastupdate_tmp <- updates_tmp %>%
                mutate(oscn_lastupdate_id = str_sub(oscn_update_id, 1, 17)) %>%
                select(oscn_lastupdate_id, court, casenum, casetype, file_year, last_update = updated, available)
              
              if (exists("updates")) {updates <<- bind_rows(updates, updates_tmp)
              } else { updates <<- updates_tmp }
              
              if (exists("lastupdate")) {lastupdate <<- bind_rows(lastupdate, lastupdate_tmp)
              } else { lastupdate <<- lastupdate_tmp }
              
              last_scraped <<- caseseq_tmp
              
              if (updatedb == TRUE & nrow(updates) >= update_freq) {
                oscn_updatedb()
                oscn_reset()
              }
            }
          }
        }
      }
      if (updatedb == TRUE) {
        oscn_updatedb()
      }
      else { 
        message("Database not updated. Run oscn_updatedb or data will be lost.")
      }
    }
  }
  ojo_disconnect_all()
}
openjusticeok/ojo documentation built on Feb. 2, 2021, 5:47 a.m.