## census search results...
census_table <- function(doc, trim_parish = TRUE){
# 1. Name (get first node after a/)
name <- html_nodes(doc, xpath="//div[@class='name']/a/node()[1]") %>% html_text(trim=TRUE)
## last names have space after \t, patronyms have NO space ???
# "Christian\t\t\t\t\t\t Lund" vs "Christian\t\t\t\t\t\tOlsen"
first_name <- gsub("([^\t]+).*", "\\1", name)
name2 <- gsub("[^\t]+\t+(.*)", "\\1", name)
if(any(grepl("\t", name2))) message("Warning: tabs in last name")
patronymic <- ifelse( grepl("^ ", name2), "", name2)
last_name <- ifelse( grepl("^ ", name2), trimws(name2), "")
name <- gsub("\t+ ?", " ", name)
#2. gender (venus = female, mars = male)
gender <- html_nodes(doc, xpath="//div[@class='gender']/span/i") %>% html_attr("class")
gender <- ifelse(gender == "fa fa-mars", "male", "female")
#3 Birth date
b1 <- html_nodes(doc, xpath="//div[@class='born']") %>% html_text
birth_date <- as.integer(gsub("[^0-9]", "", b1))
#4. Place of birth ??
# 5 Pos./Status
status <- html_nodes(doc, xpath="//div[@class='stilling-stand']") %>% html_text(trim=TRUE)
status <- gsub("Pos./Status: ", "", status)
# 6 Residence
residence <- html_nodes(doc, xpath="//div[@class='place']") %>% html_text(trim=TRUE)
residence <- gsub("Domicile: ", "", residence)
## Trim parish IF searching by parish??
if(trim_parish) residence <- gsub("[^:]+: ", "", residence)
#7 Event date <div class="action">
#8 Role <div class="role">
#9 Source <div class="source">
if(!all.equal( length(name), length(gender), length(birth_date), length(status), length(residence) )){
stop("Parsing error: Missing one or more nodes")
}
x <- tibble( name, residence, gender, birth_date, status, first_name, patronymic, last_name)
x
}
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.