R/get_records.R

Defines functions get_records

Documented in get_records

#' Get marriage and baptism records in the Norway Digital Archives
#'
#' Loop through advanced search results and get marriage and baptism records from the Norway Digital Archives
#'
#' @param x Advanced search results from \code{\link{advanced_search}}
#'
#' @return a data.frame with event, no, name, residence, date, role, position, birth, place, full name
#'
#' @author Chris Stubben
#'
#' @examples
#' \dontrun{
#'   s1 <- advanced_search(1638, first="Siv*", last="Lars*", residence="Stub*")
#'   sivert <- get_records(s1)
#' }
#' @export
get_records <- function(x){
   # loop
    x<- subset(x, type %in% c("baptism", "marriage"))
   n <- nrow(x)
   if(n==0) stop("No baptism or marriage records found in search results")
   message("Found ", n , " baptism or marriage records")
   xf <- vector("list", n)

   for(i in 1:n){
      message( " Getting row ", i)
      doctype <- x$type[i]
      edate <- x$event[i]
      year <- substr(edate, 1,4)  # year

      doc <- XML::htmlParse( suppressWarnings( readLines(  x$url[i])))
      x1 <- XML::readHTMLTable(doc, which=2, stringsAsFactors=FALSE)
     ##  Marriage.  note Roll and Name are reversed in BAPTISM records
     ## "Serial no., Roll, Name, Position, Birth year, Birth place, Residence"

     names(x1) <-  tolower(gsub("[ .]", "", names(x1) ))

      # missing residence - use father's
      if(doctype == "baptism"){
         if(x1$residence[x1$roll=="barn"]==""){
            x1$residence[x1$roll=="barn"] <- x1$residence[x1$roll=="far"]
         }
         if("mor"  %in% x1$roll &&  x1$residence[x1$roll=="mor"]==""){
          #  message("Mother is missing residence, using ",  x1$residence[x1$roll=="far"])
            x1$residence[x1$roll=="mor"] <- x1$residence[x1$roll=="far"]
         }
         x1$event <- paste(x1$name[1], year, doctype)
      }else if(doctype=="marriage"){
         x1$event <- paste(x1$name[x1$roll=="brudgom"], "and",
                           x1$name[x1$roll=="brur"], year, doctype)
      }else{
         x1$event <- paste(year, doctype)
      }
      x1$date <- edate
      xf[[i]] <-  x1
      Sys.sleep(sample(1:3) )
   }
   x <- dplyr::bind_rows(xf)
   # add combine name and residence
   x$full <- apply(x[, c("name", "residence")], 1, paste, collapse=" ")

   x <- x[, c("event", "serialno" , "name", "residence", "date", "roll", "position",
             "birthyear", "birthplace", "full")]
    names(x)[c(2,6,8,9)] <- c("no", "role", "birth", "place")
   dplyr::tbl_df(x)

}
cstubben/norwayr documentation built on May 14, 2019, 12:25 p.m.