R/oscn_scrape_parties.R

Defines functions oscn_scrape_parties

oscn_scrape_parties <- function(courts, casetypes, years, case_seqs, updatedb = TRUE) {
  
  for (l in courts) {
    court_tmp <- l
    
    for (m in casetypes) {
      casetype_tmp <- m
      
      for (j in years) {
        caseyear_tmp <- j
        
        for (k in case_seqs) {
          
          start <- Sys.time()
          caseseq_tmp <- k
          casenum_tmp <- paste(casetype_tmp, caseyear_tmp, caseseq_tmp, sep = "-")
          
          l <- read_html(paste0("http://www.oscn.net/dockets/GetCaseInformation.aspx?db=",
                                court_tmp, "&number=", casenum_tmp)) %>%
            html_nodes("p a") %>%
            html_attrs() %>%
            unlist
          
          l <- l[str_detect(l, "GetParty") & !str_detect(l, "id=32000$")] %>%
            as.tibble
          
          if (nrow(l) == 0) {} else {
            ### Loop through party links
            
            for (k in 1:nrow(l)) {
              
              p <- read_html(paste0("http://www.oscn.net/dockets/", l[k, 1])) %>%
                html_nodes("table") %>%
                html_table
              
              def_id_tmp <- str_extract(l[k, 1], "(?<=id=).*")
              
              for (i in 1:length(p)) {
                if ("Requested Party" %in% names(p[[i]])) {
                  party_tmp <- p[[i]] %>%
                    mutate(defname = `Requested Party` %>%
                             str_to_upper,
                           alias = `Alias or Alternate Names` %>%
                             str_to_upper) %>%
                    mutate(def_id = def_id_tmp) %>%
                    mutate(court = str_to_upper(court_tmp),
                           casenum = casenum_tmp %>%
                             str_to_upper) %>%
                    select(court, casenum, def_id, defname, def_alias = alias)
                  if (exists("party")) {party <<- bind_rows(party, party_tmp)
                  } else {
                    party <<- party_tmp
                  }
                } else if ("Marital Status" %in% names(p[[i]])) {
                  profile_tmp <- p[[i]] %>%
                    mutate(rec_date = mdy(`Record Date`),
                           def_mob = paste0("1/", `Birth Month and Year`) %>% dmy,
                           def_id = def_id_tmp) %>%
                    mutate(court = str_to_upper(court_tmp),
                           casenum = casenum_tmp %>%
                             str_to_upper) %>%
                    select(court, casenum, def_id, rec_date, def_mob)
                  if (exists("pprofile")) {pprofile <<- bind_rows(pprofile, profile_tmp)
                  } else {
                    pprofile <<- profile_tmp
                  }
                } else if ("Address" %in% names(p[[i]])) {
                  paddr_tmp <- p[[i]] %>%
                    mutate(rec_date = mdy(`Record Date`),
                           def_address = as.character(Address) %>%
                             str_to_upper,
                           def_zip = str_extract(Address, "\\d{5}"),
                           def_id = def_id_tmp,
                           court = str_to_upper(court_tmp),
                           casenum = casenum_tmp %>%
                             str_to_upper) %>%
                    select(court, casenum, def_id, rec_date, def_address, def_zip)
                  if (exists("party_address")) {party_address <<- bind_rows(party_address, paddr_tmp)
                  } else {
                    party_address <<- paddr_tmp }
                }
              }
            }
          }
          print(paste(court_tmp, casenum_tmp, "parties scraped in", Sys.time() - start, "seconds."))
          last_scraped <<- caseseq_tmp
        }
      }
    }
  }
  if (updatedb == TRUE) {
    oscn_updatedb()
    oscn_dedup()
    print("Database updated.")
  }
}
openjusticeok/ojo documentation built on Feb. 2, 2021, 5:47 a.m.