Parses data-raw/chronology-page.html
to extract data on representatives.
Outputs data-raw/chronology-raw.csv
library(rvest) library(tidyverse) library(here)
reps <- read_html(here("data-raw", "chronology-page.html"))
# data is in a <table> tables_on_page <- html_nodes(reps, css = "table") # where summary has particular value rep_table <- which( html_attr(tables_on_page, "summary") == "LegislatorsChronological" ) rep_df_raw <- tables_on_page[[rep_table]] %>% html_table(fill = TRUE)
Clean up NA
s, split by session and remove session id rows:
rep_df <- rep_df_raw[, !is.na(names(rep_df_raw))] rep_df <- rep_df %>% drop_na(Legislator) %>% rename_all(tolower) %>% rename(desk_number = `desk number`) %>% mutate( session_line = str_detect(legislator, "Session :"), session_id = cumsum(session_line)) rep_with_session <- rep_df %>% group_by(session_id) %>% mutate(session = first(legislator)) %>% slice(-1) %>% ungroup() %>% select(-session_id, -session_line)
write_csv(rep_with_session, here("data-raw", "chronology-raw.csv"))
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.