dev/parse.R

df_agencies <-
  seq_along(agency_details) %>%
  future_map_dfr(function(x){
    values <-
      agency_details[x] %>%
      str_replace("<br>", "\n") %>%
      read_html() %>%
      html_text() %>%
      str_split("\n") %>%
      flatten_chr() %>%
      str_trim() %>%
      discard(~.x == "") %>%
      unique()


    items <-
      c("nameAgencyDetailed", "nameOffice")

    data <-
      tibble(items, values) %>%
      spread(items, values)

    has_agency <- agency_details[x] %>% html_node(".pagency") %>% html_text() %>% length() > 0
    if (has_agency) {
      nameAgency <- agency_details[x] %>% html_node(".pagency") %>% html_text() %>% str_trim()
      data <-
        data %>% mutate(nameAgency) %>%
        select(nameAgency, everything())
    }
    data %>%
      mutate(idRow = x)
  })
abresler/govtrackR documentation built on July 11, 2020, 12:30 a.m.