#' Search census records at the Norway Digital Archives
#'
#' Formats the Search for Persons in the 1801 census at
#' \url{https://digitalarkivet.arkivverket.no/en-gb/ft/sok/1801}.
#' Change the year to 1865, 1875, 1900, or 1910 to search other censuses
#'
#' @param year Census year, 1801, 1865, 1875, 1900, or 1910 (other years are partial censuses)
#' @param municipal Municipal code, one or more codes like 1638 for Orkdal or c(1638, 1636) for Orkdal and Meldal
#' @param county County code, first two letters of municipal code for all parishes in county
#' @param region Region code, 1 to 5 for ...
#' @param first_name First name
#' @param last_name Last name or patronym
#' @param gender Gender, m or f
#' @param family_position Family position
#' @param marital_status Marital status
#' @param occupation Occupation or job
#' @param date_of_birth Date of birth in yyyy-mm-dd format, use 175* for anyone born in the 1750s
#' @param age estimated Age at census
#' @param notes Notes
#' @param residence Residence or domicile
#' @param district Census district
#' @param house House number
#' @param all Default search gets first page with 50 records, set all=TRUE to download all results
#' @param \dots Other options like \code{trim_parish} passed to \code{\link{census_table}}
#' @return A data.frame with name, residence, gender, birth_date, status, first_name, patronymic,
#' last_name. The last three columns are parsed from name in the html file (first is before a
#' string of tabs, patronym is right after the tabs or last is after tabs AND a space)
#' @note Municipal codes in code{\link{municipal}}.
#' @author Chris Stubben
#' @examples
#' \dontrun{
#' # set all=TRUE to get all 68 Siverts in Orkdal, default is first page only
#' census_search(1801, 1638, first="Siv*", all=TRUE)
#' # search in Meldal and Orkdal. Set trim_parish=FALSE to keep parish in residence
#' census_search(1801, municipal=c(1636, 1638), first="Lars", last="Siv*", trim_parish=FALSE)
#' }
#' @export
census_search <- function( year, municipal=NULL, county=NULL, region=NULL,
first_name=NULL, last_name=NULL, gender=NULL, family_position=NULL,
marital_status=NULL, occupation=NULL, date_of_birth=NULL, age=NULL,
notes=NULL, residence=NULL, district=NULL, house=NULL, all=FALSE, ...){
url1 <- paste0("https://digitalarkivet.no/en/census/search/", year)
l1 <- list(`m[]` = municipal, `c[]` = county, `r[]` = region)
x <- unlist(l1)
if( any( !grepl("^[0-9]+$", x))) stop("Locations should be numeric codes")
## make new list if more than one location code (vectors not allowed in compose_query)
if( any( sapply(l1, length)>1)){
l1 <- as.list(x)
names(l1) <- names(x)
}
# skip ny_husholdning=new_household, (1 for 1st person or x for Lodger)
l2 <- list(fornavn=first_name, etternavn=last_name, kjonn=gender, familiestilling=family_position,
sivilstand=marital_status, yrke=occupation, fodselsaar=date_of_birth, alder=age,
merknader=notes, bosted=residence, bydel=district, gaardsnummer=house)
n <- sapply(l2, length)
if(any(n > 1)) stop("Vectors are not allow in search fields")
url2 <- httr:::compose_query( c( l1, l2) )
url <- paste0( url1, "?", url2)
message("Getting ", url)
doc <- read_html(url)
results <- html_nodes(doc, xpath="//span[@data-hits]") %>% html_text
n <- as.numeric( gsub(".* ([0-9]+) hit.*", "\\1", results))
message(results)
if(n > 0){
n1 <- ceiling(n/50)
y <- vector("list", n1)
y[[1]] <- census_table(doc, ...)
if(all){
if(n1 > 40) stop("Too many results to download. Try narrowing search")
if(n1 > 1 ){
for( i in 2: n1){
message(" Getting page ", i)
url2 <- paste0(url, "&page=", i)
doc2 <- read_html(url2)
y[[i]] <- census_table(doc2, ...)
Sys.sleep(sample(1:3))
}
}
}else{
if(n > 50) message("Only downloading first 50 records. Set all=TRUE to download all ", n, " results")
}
x <- bind_rows(y)
x
}
}
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.