#' Advanced search in the Norway Digital Archives
#'
#' Formats the Advanced Person Search page at
#' \url{https://www.digitalarkivet.no/en/search/persons/advanced}
#'
#' @param municipals a vector of municipal codes or names (and \code{municipal} dataset used to lookup codes)
#' @param county a vector county codes, from the first two digits of the municipal code
#' @param region a vector of regions, 01=
#' @param first First name
#' @param last Last name
#' @param gender Gender, m or f
#' @param role Role like barn, far, mor, fadder, brudgom, brur
#' @param domicile Domicile, residence or farm name
#' @param event Event date in yyyy-mm-dd format, split into yyyy and mm-dd fields in url
#' @param birth Birth date in yyyy-mm-dd format
#' @param place Place of birth
#' @param family Family position, marital status or occupation
#' @param county County code
#' @param all Default search gets first page with 50 records, set all=TRUE to download all results
#' @return A data.frame with ame, residence, year, type, role, event, birth, place, family position, source, url
#' @note Municipal codes in code{\link{municipal}}. Use wildcard searches * since
#' most names and places have alternate spellings like Michel or Mikkel, Larsen or Larssen, etc
#' @seealso \code{\link{get_records}} to loop through search results and get indvidual records
#' @author Chris Stubben
#' @examples
#' \dontrun{
#' # search for Sivert Larsen Stubben
#' advanced_search(1638, first="Siv*", last="Lars*", residence="Stub*")
#' # Siverts in 1799
#' advanced_search(1638, first="Siv*", event=1799)
#' advanced_search(1638, event="1776-07-04")
#' # Mikkel or Michel Stubben (but not Mildri or Mimi)
#' advanced_search(1638, first="Mi*", residence="Stub*", gender="m")
#' }
#' @export
advanced_search <- function(municipals=NULL, county=NULL, region=NULL,
event = NULL, birth=NULL, period = NULL,
firstname=NULL, lastname=NULL,
gender = NULL, role = NULL,
date_event= NULL, date_birth=NULL, place_birth=NULL, domicile=NULL,
position=NULL, disable_name_variants = TRUE,
all=FALSE){
url1 <- "https://www.digitalarkivet.no/en/search/persons/advanced"
municode <- municipals
## lookup names in municipal
if(!all( grepl("^[0-9]+$", municode))){
#data(municipal)
municode <- municipal$code[ municipal$name %in% municode ]
}
l1 <- list(`m[]` = municode , `c[]` = county, `r[]` = region, `gender[]` = gender, `role[]` = role)
# compose_query requires values with length 1
if( any( sapply(l1, length)>1)) l1 <- as.list(setNames(unlist(l1),rep(names(l1), lengths(l1))))
## use "1" for TRUE , NULL for anything else
if(!is.null(disable_name_variants)) disable_name_variants <- switch(disable_name_variants==TRUE, "1")
l2 <- list(firstname=firstname, lastname=lastname,
event_year_from= event[1], event_year_to=event[length(event)], event_date= date_event,
from = period[1], to = period[length(period)],
birth_year_from=birth[1], birth_year_to=birth[length(birth)], birth_date=date_birth,
birth_place=place_birth,
domicile=domicile, position=position, disable_name_variants =disable_name_variants)
n <- sapply(l2, length)
if(any(n > 1)) stop("Vectors are not allow in ", names(n[n>1]))
url2 <- httr:::compose_query( c( l1, l2) )
url <- paste0( url1, "?", url2)
url
message("Getting ", url)
doc <- XML::htmlParse( suppressWarnings( readLines(url) ) )
results <- XML::xpathSApply(doc, "//p[@class='comment standalone']", XML::xmlValue)
n <- as.numeric( gsub(".* ([0-9]+) hit.*", "\\1", results))
message(results)
x <- XML::readHTMLTable(doc , stringsAsFactors=FALSE , which=1)
# lower-case first word for column name
names(x) <- tolower(gsub("([^ ]+).*", "\\1", names(x) ))
x$url <- XML::xpathSApply(doc, "//td/a", XML::xmlGetAttr, "href")
if(all){
n <- ceiling(n/50)
y <- vector("list", n)
y[[1]] <- x
for( i in 2: n){
message(" Getting page ", i)
url2 <- paste0(url, "&page=", i)
doc <- XML::htmlParse(suppressWarnings( readLines(url2) ) )
x <- XML::readHTMLTable(doc , stringsAsFactors=FALSE , which=1)
names(x) <- tolower(gsub("([^ ]+).*", "\\1", names(x) ))
x$url <- XML::xpathSApply(doc, "//td/a", XML::xmlGetAttr, "href")
y[[i]] <- x
Sys.sleep(sample(1:3))
}
x <- do.call("rbind", y)
}else{
if(n > 50) message("Only downloading first 50 records. Set all=TRUE to download all ", n, " results")
}
## change dd.mm.yyyy to yyyy.mm.dd for sorting
x$event <- sapply(strsplit(x$event, "\\."), function(y) paste(rev(y), collapse="."))
x <- x[order(x$event),]
rownames(x) <- NULL
## add record type by parsing url string
y <- gsub( ".*/en-gb/([^/]+/[^/]+).*", "\\1", x$url)
utype <- c("ft/person", "gen/vis", "kb/df", "kb/dp", "kb/gr", "kb/in",
"kb/kf", "kb/pa", "kb/uf", "kb/va", "kb/vi")
rtype <- c("census", "tax", "stillbirth", "baptism", "death", "introduced", "confirmation",
"absolutions", "migration", "vaccination", "marriage")
n1 <- match( y, utype)
if(any(is.na(n1))) message("Warning: missing record type")
x$type <- rtype[n1]
## add year, if missing, check source
n1 <- x$event == ""
if(sum(n1)>0) x$event[n1] <- gsub(".*?([0-9]{4}).*", "\\1", x$source[n1])
# some vaccinations with ,00
x$event <- gsub(",00$", "", x$event)
x$year <- as.numeric(substr( x$event, 1,4))
## change date to birth
names(x)[2] <- "birth"
x <- x[, c(1, 5, 11, 10, 7,6, 2:4, 8:9)]
dplyr::tbl_df(x)
}
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.