#' Search items from the Digital Public Library of America (DPLA).
#'
#' @export
#'
#' @param q (character) Query terms.
#' @param description (character) Object description.
#' @param title (character) Object title.
#' @param subject (character) Subject area
#' @param creator (character) Creator name
#' @param type (character) Type of object
#' @param publisher (character) Publisher
#' @param format (character) Format
#' @param rights (character) Rights
#' @param contributor (character) Contributor
#' @param provider (character) Provider
#' @param sp (character) Query all spatial fields.
#' @param sp_coordinates (character) Query only coordinates. Of the form
#' <latitude,longitude>
#' @param sp_city (character) Query by city name.
#' @param sp_county (character) Query by county name.
#' @param sp_distance (character) Distance from point defined in the
#' \code{sp_coordinates} field
#' @param sp_country (character) Query by location country
#' @param sp_code (character) Query by ISO 3166-2 country code. Codes are
#' included in this package, see [country_codes]. Find out more at
#' <https://www.iso.org/obp/ui/#search>
#' @param sp_name (character) Location name.
#' @param sp_region (character) Name of a region, e.g., "Upstate New York"
#' (literal)
#' @param sp_state (character) ISO 3166-2 code for a U.S. state or territory
#' @param language (character) One of a name of a language, or an ISO-639 code.
#' Codes are included in this package, see [language_codes].
#' Find out more at <http://www-01.sil.org/iso639-3/default.asp>
#' @param sort_by (character) The default sort order is ascending. Most, but
#' not all fields can be sorted on. Attempts to sort on an un-sortable field
#' will return the standard error structure with a HTTP 400 status code.
#' @param fields (character) A vector of the fields to return in the output.
#' The default is all fields. See [dpla_fields] for options.
#' @param date (character) A date
#' @param date_before (character) Date before
#' @param date_after (character) Date after
#' @param page_size (integer) Number of items to return, defaults to 100.
#' Max of 500.
#' @param page (integer) Page number to return, defaults to NULL.
#' @param facets (character) Fields to facet on.
#' @param facet_size (integer) Default to 100, maximum 2000.
#' @param key (character) Your DPLA API key. See [dpla_get_key()]
#' @param ... Curl options passed on to [crul::HttpClient()]
#' @param what (character) One of list or table (data.frame). (Default: table)
#'
#' @return A list of length three:
#' \itemize{
#' \item meta - metadata for the call, with one row and three columns:
#' \itemize{
#' \item found - number of records found matching criteria
#' \item start - offset from record 1
#' \item returned - number of records returned
#' }
#' \item data - tibble (data.frame) of results
#' \item facets - list of same length as number of facets requested,
#' each with a list of length two with a meta tibble/data.frame, and
#' a data tibble/data.frame
#' }
#'
#' @details Note that parsing of results right now can lead to multiple
#' rows per record because sometimes an array of length > 1 for a result
#' makes a data.frame of more than one row per record. Thus, you will
#' get duplicated values in the `id` column of the results.
#'
#' @examples \dontrun{
#' # Basic search, "fruit" in any fields
#' dpla_items(q="fruit")
#'
#' # Limit records returned
#' dpla_items(q="fruit", page_size=2)
#'
#' # Return certain fields
#' dpla_items(q="fruit", fields=c("id","publisher","format"))
#' dpla_items(q="fruit", fields="subject")
#'
#' # Max is 500 per call, but you can use combo of page_size and page params
#' dpla_items(q="fruit", fields="id", page_size=500)$meta$returned
#' lapply(1:2, function(x) {
#' dpla_items(q="fruit", fields="id", page_size=500, page=x)$meta$returned
#' })
#' out <- lapply(1:2, function(x) dpla_items(q="fruit", fields="id",
#' page_size=500, page=x))
#' lapply(out, function(y) head(y$data))
#'
#' # Search by date
#' out <- dpla_items(q="science", date_before=1900, page_size=200)
#' out$data
#'
#' # Search by various fields
#' dpla_items(description="obituaries", page_size=2, fields="description")
#' dpla_items(title="obituaries", page_size=2, fields="title")
#' dpla_items(subject="yodeling", page_size=2, fields="subject")
#' dpla_items(creator="Holst-Van der Schalk", page_size=2, fields="creator")
#' dpla_items(type="text", page_size=2, fields="type")
#' dpla_items(publisher="Leningrad", page_size=2, fields="publisher")
#' dpla_items(rights="unrestricted", page_size=2, fields="rights")
#' dpla_items(provider="HathiTrust", page_size=2, fields="provider")
#'
#' ## don't seem to work
#' # dpla_items(contributor="Smithsonian", page_size=2, fields="contributor")
#' # dpla_items(format="Electronic resource", page_size=2, fields="format")
#'
#' # Spatial search
#' ## sp searches all spatial fields, or search on specific fields, see those
#' ## params with sp_*
#' dpla_items(sp='Boston', page_size=2)
#' dpla_items(sp_state='Hawaii', page_size=2)
#' dpla_items(sp_state='Massachusetts OR Hawaii', page_size=2)
#' dpla_items(sp_coordinates='40,-100', page_size=2)
#' dpla_items(sp_country='Canada', page_size=2)
#' dpla_items(sp_county='Sacramento', page_size=2)
#'
#' # Language search
#' dpla_items(language='Russian')$meta
#' dpla_items(language='rus')$meta
#' dpla_items(language='English')$meta
#'
#' # Sorting
#' dpla_items(fields=c("id","title"), page_size=10)
#' dpla_items(fields=c("id","title"), page_size=10,
#' sort_by="sourceResource.title")
#'
#' # Faceting
#' dpla_items(facets="sourceResource.format", page_size=0)
#' dpla_items(facets="sourceResource.format", page_size=0, facet_size=5)
#' ids <- c("sourceResource.spatial.state","sourceResource.spatial.country")
#' dpla_items(facets=ids, page_size=0)
#' dpla_items(facets="sourceResource.type", page_size=0)
#' #dpla_items(facets="sourceResource.spatial.coordinates:42.3:-71",
#' # page_size=0)
#' #dpla_items(facets="sourceResource.temporal.begin", page_size=0)
#' dpla_items(facets="provider.name", page_size=0)
#' dpla_items(facets="isPartOf", page_size=0)
#' dpla_items(facets="hasView", page_size=0)
#' }
dpla_items <- function(q=NULL, description=NULL, title=NULL, subject=NULL,
creator=NULL, type=NULL, publisher=NULL, format=NULL, rights=NULL,
contributor=NULL, provider=NULL, sp=NULL, sp_coordinates=NULL, sp_city=NULL,
sp_county=NULL, sp_distance=NULL, sp_country=NULL, sp_code=NULL,
sp_name=NULL, sp_region=NULL, sp_state=NULL, fields=NULL, sort_by=NULL,
date=NULL, date_before=NULL, date_after=NULL, language=NULL,
page_size=100, page=NULL, facets=NULL, facet_size=100, key=NULL,
what="table", ...) {
if (!what %in% c('table', 'list')) {
stop("what must be one of 'table' or 'list'", call. = FALSE)
}
fields2 <- fields
tmp <- dpla_items_(
key, q, page_size, page, fields, provider, description, title, subject,
creator, type, publisher, format, rights, contributor, sp, sp_coordinates,
sp_city, sp_county, sp_distance, sp_country, sp_code, sp_name,
sp_region, sp_state, language, date, date_before, date_after, facets,
facet_size, sort_by, ...)
hi <- stats::setNames(
tibble::as_data_frame(tmp[c('count','start','limit')]),
c('found','start','returned')
)
dat <- tmp$docs
fac <- tmp$facets
if (what == "list") {
structure(list(meta = hi, data = dat, facets = fac))
} else {
facdat <- proc_fac(fac)
output <- tibble::as_data_frame(
data.table::rbindlist(
lapply(dat, getdata, flds = fields),
use.names = TRUE, fill = TRUE
)
)
if (is.null(fields)) {
list(meta = hi, data = output, facets = facdat)
} else {
output2 <- output[,names(output) %in% fields2]
# convert one column factor string to data.frame
# (happens when only one field is requested)
if ("factor" %in% class(output2)) {
output3 <- tibble::as_data_frame(output2)
names(output3) <- fields2
list(meta = hi, data = output3, facets = facdat)
} else {
list(meta = hi, data = output2, facets = facdat)
}
}
}
}
dpla_items_ <- function(key=NULL, q=NULL, page_size=100, page=NULL,
fields=NULL, provider=NULL, description=NULL, title=NULL, subject=NULL,
creator=NULL, type=NULL, publisher=NULL, format=NULL, rights=NULL,
contributor=NULL, sp=NULL, sp_coordinates=NULL, sp_city=NULL, sp_county=NULL,
sp_distance=NULL, sp_country=NULL, sp_code=NULL, sp_name=NULL, sp_region=NULL,
sp_state=NULL, language=NULL, date=NULL, date_before=NULL, date_after=NULL,
facets=NULL, facet_size=100, sort_by=NULL, ...) {
fields <- filter_fields(fields)
args <- dcomp(list(
api_key = key_check(key), q = q, page_size = page_size,
page = page, fields = fields,
provider = provider,
sourceResource.description = description,
sourceResource.title = title,
sourceResource.subject = subject,
sourceResource.creator = creator,
sourceResource.type = type,
sourceResource.publisher = publisher,
sourceResource.format = format,
sourceResource.rights = rights,
sourceResource.contributor = contributor,
sourceResource.spatial = sp,
sourceResource.spatial.coordinates = sp_coordinates,
sourceResource.spatial.city = sp_city,
sourceResource.spatial.county = sp_county,
sourceResource.spatial.distance = sp_distance,
sourceResource.spatial.country = sp_country,
`sourceResource.spatial.iso3166-2` = sp_code,
sourceResource.spatial.name = sp_name,
sourceResource.spatial.region = sp_region,
sourceResource.spatial.state = sp_state,
sourceResource.language = language,
sourceResource.date = date,
sourceResource.date.before = date_before,
sourceResource.date.after = date_after,
facets = coll(facets), facet_size = facet_size,
sort_by = sort_by
))
dpla_GET(path = "items", args, ...)
#dpla_GET(url = paste0(dpbase(), "items"), args, ...)
}
proc_fac <- function(fac){
lapply(fac, function(x){
hitmp <- x[c('_type','total','missing','other')]
hitmp[vapply(hitmp, is.null, logical(1))] <- NA
hitmp <- tibble::as_data_frame(hitmp)
fac_hi <- stats::setNames(hitmp, c('type','total','missing','other'))
getterm <- names(x)[names(x) %in% c('terms','ranges','entries')]
dat <- tibble::as_data_frame(data.table::rbindlist(x[[getterm]],
use.names = TRUE, fill = TRUE))
list(meta = fac_hi, data = dat)
})
}
# function to process data for each element
getdata <- function(y, flds){
if (is.null(flds)) {
id <- y$id
provider <- stats::setNames(tibble::as_data_frame(y$provider),
c("provider_url","provider_name"))
score <- y$score
url <- y$isShownAt
sourceResource <- y$sourceResource
sourceResource_df <- process_res(sourceResource)
sourceResource_df <-
sourceResource_df[,!names(sourceResource_df) %in% c("id","provider")]
other <- process_other(y)
df <- data.frame(id, sourceResource_df, provider, url,
stringsAsFactors = FALSE)
if (!is.null(score)) df$score <- score
tibble::as_data_frame(cbind(df, other))
} else {
names(y) <- gsub("sourceResource.", "", names(y))
if (length(y) == 1) {
onetemp <- list(y[[1]])
onename <- names(y)
names(onetemp) <- eval(onename)
process_res(onetemp)
} else {
process_res(y)
}
}
}
process_res <- function(x){
id <- reduce1(x$identifier)
if (is.null(id)) id <- x$id
title <- reduce1(x$title)
description <- reduce1(x$description)
subject <- if (length(x$subject) > 1) {
paste(as.character(unlist(x$subject)), collapse = ";")
} else {
if (length(x$subject) == 0) NULL else x$subject[[1]][["name"]]
}
language <- x$language[[1]][["name"]]
format <- reduce1(x$format)
collection <- if (any(names(x$collection) %in% "name")) {
x$collection[["name"]]
} else {
"no collection name"
}
type <- reduce1(x$type)
date <- x$date[[1]]
publisher <- reduce1(x$publisher)
provider <- x$provider[["name"]]
creator <- reduce1(x$creator)
rights <- reduce1(x$rights)
replacenull <-
function(y) if (is.null(y) || length(y) == 0) "no content" else y
ents <- list(id,title,description,subject,language,format,
collection,type,provider,publisher,creator,rights,date)
names(ents) <- c("id","title","description","subject","language","format",
"collection","type","provider","publisher","creator",
"rights","date")
ents <- lapply(ents, replacenull)
tibble::as_data_frame(ents)
}
process_other <- function(x){
# FIXME
## Still need to give back fields: @context, originalRecord
get <- c('dataProvider','@type','object','ingestionSequence',
'ingestDate','_rev', 'aggregatedCHO','_id','ingestType','@id')
have <- x[ names(x) %in% get ]
have[vapply(have, class, "") == "NULL"] <- NA_character_
df <- tibble::as_data_frame(have)
stats::setNames(df, names(have))
}
reduce1 <- function(x){
x <- unlist(x)
if (length(x) > 1) paste(as.character(x), collapse = ";") else x
}
filter_fields <- function(fields){
if (!is.null(fields)) {
fieldsfunc <- function(x) {
if (x %in% c("title","description","subject","creator","type",
"publisher", "format","rights","contributor","date",
"spatial","spatial.coordinates","spatial.city",
"spatial.county","spatial.distance","spatial.country",
"spatial.iso3166-2",
"spatial.name","spatial.region",
"spatial.state","language")) {
paste("sourceResource.", x, sep = "")
} else {
x
}
}
paste(vapply(fields, fieldsfunc, "", USE.NAMES = FALSE), collapse = ",")
} else {
NULL
}
}
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.