R/get_baptisms.R

#' Get baptism page
#'
#' Get all baptisms from church book in Norway Digital Archives
#'
#' @param id  church book id
#' @param pages total number of pages (from Last)
#' @param hits total number of results
#'
#' @return A tibble
#'
#' @author Chris Stubben
#'
#' @examples
#' \dontrun{
#' # Church book from Orkdal local parish 1715-1742
#'  b1 <- get_baptisms("16196", 27, 1322)
#' }
#' @export

get_baptisms <- function( id, pages, hits){
  b1 <- vector("list", hits)
  names(b1) <- 1:hits
  for (i in 1:pages){
    url <- paste0("https://www.digitalarkivet.no/en/search/76/", id, "?rolle=far&page=", i)
    doc <- read_html( url)
    p1 <- html_nodes(doc, xpath='//div[@class="generic"]/a')  %>% html_attr("href")
    p1 <- gsub(".*/", "", p1)
    message("Downloading ", length(p1), " records from page ", i)
    for(j in 1:length(p1)){
      x <-  baptism_page( p1[j])
      n <- (i-1)*50 + j
       message("  ", n)
      b1[[n]] <- x
      # Sys.sleep(sample(1:2, 1))
    }
  }
  bind_rows(b1, .id = "id")
}


#' @describeIn get_baptisms Format baptism page
#' @param pid baptism page id
#' @export
baptism_page <- function(pid){
   url <- paste0("https://www.digitalarkivet.no/en/view/255/", pid)
   doc <- read_html( url)
   x <- html_nodes(doc, xpath='//div[@class="col-xs-12 col-md-6 ssp-semibold"]') %>% html_text(trim=TRUE)
   x1 <- matrix(x, ncol=6, byrow=TRUE)
   ## Baptism date?
   x2 <- html_nodes(doc, xpath='//div[@class="col-xs-6 col-sm-3 ssp-semibold"]') %>% html_text(trim=TRUE)
   ## combine birth year and place (almost always empty?)
   tibble(role = x1[,1], name = x1[,2], residence = x1[,6], position = x1[,3],
     birth = apply(x1[,4:5], 1, function(x) paste(x[x!="-"], collapse="; ")),
     page = x2[1], year = x2[4], date = x2[6], pid = pid)
}
cstubben/aRkivet documentation built on May 14, 2019, 12:25 p.m.