Nothing
#' @title Search Cristin API
#' @description Use search parameters for the Cristin API and return the results
#' as a tibble. See \code{\link{CristinWrangler}}
#' @param id The Cristin id of the result, Default: NULL
#' @param doi DOI name (e.g. doi=10.1000/123456), Default: NULL
#' @param title The title of the result, Default: NULL
#' @param contributor Author's name or Cristin person id, Default: NULL
#' @param issn The issn of the result, Default: NULL
#' @param unit Id (e.g. unit=185.53.18.10), Default: NULL
#' @param institution Id (one number, e.g. institution=185), name or acronym of
#' the institution the contributors belong to, Default: NULL
#' @param user A person's username in Cristin together with the institution id
#' separated by colon (e.g., `askeladd:185`), Default: NULL
#' @param category Categories. See
#' \href{https://api.cristin.no/v2/doc/index.html#}{category codes}, Default:
#' NULL
#' @param published_since Results published since and inclusive the given year,
#' (yyyy), Default: NULL
#' @param published_before Results published before and inclusive the given
#' year, (yyyy), Default: NULL
#' @param created_since Results created since and inclusive the given date,
#' (yyyy-mm-dd), Default: NULL
#' @param created_before Results created before and inclusive the given date,
#' (yyyy-mm-dd), Default: NULL
#' @param modified_since Results modified since and inclusive the given date,
#' (yyyy-mm-dd), Default: NULL
#' @param modified_before Results modified before and inclusive the given date,
#' (yyyy-mm-dd), Default: NULL
#' @param year_reported The year a result was reported, Default: NULL
#' @param project_code Project code is the internal reference number used by
#' funding source, Default: NULL
#' @param funding_source Funding source code e.g: NFR, Default: NULL
#' @param funding Funding source code e.g: NFR, and project_code together
#' separated by colon (e.g., NFR:1234), Default: NULL
#' @param lang Two letter
#' \href{https://api.cristin.no/v2/doc/index.html#lang}{language code},
#' Default: NULL
#' @param page Page number. See
#' \href{https://api.cristin.no/v2/doc/index.html#pagination}{pagination},
#' Default: 1
#' @param per_page Number of items per page (1000 is max). See
#' \href{https://api.cristin.no/v2/doc/index.html#pagination}{pagination},
#' Default: 1000
#' @param max.results Do you need a limit?, Default: NULL
#' @param no.results Do you need only the number of results?, Default: FALSE
#' @param sort Sorts on `category` and/or `year_published`. See
#' \href{https://api.cristin.no/v2/doc/index.html#search}{search and sort}.
#' Default sort order is on `cristin_result_id` in ascending order, Default:
#' NULL
#' @param filter Vector of cateogries to include in results. See
#' \href{https://api.cristin.no/v2/results/categories}{category code},
#' Default: NULL
#' @param fields `fields = all` gives a list of result objects with all
#' available fields. If this parameter is omitted, a list of result summaries
#' with fewer fields will be returned, Default: all
#' @param open.query Define your own query terms, Default: NULL
#' @param all.results Find all results in query, Default: TRUE
#' @param force Force is seldom wise, but sometimes..., Default: FALSE
#' @param remove.duplicates Remove duplicates if TRUE, Default: TRUE
#' @param silent c2z is noisy, tell it to be quiet, Default: FALSE
#' @param base.url The base url for the Cristin API, Default:
#' https://api.cristin.no/v2/results
#' @param custom.url Define your own Cristin API url, Default: NULL
#' @param use.identifiers Use if ISBN/DOI identifiers if enabled, Default: TRUE
#' @param crossref.search Query Crossref database based on title, authors, and
#' date if enabled, Default: FALSE
#' @param autosearch Results could be automatically evaluated (based on some
#' logic) or you could inspect them manually if set to FALSE, Default: TRUE
#' @param zotero A list with information on the specified Zotero library (e.g.,
#' id, API key, collections, and items), Default: NULL
#' @param zotero.import Use \code{\link{CristinWrangler}} to wrangle metadata
#' into an acceptable format for Zotero, Default: TRUE
#' @param zotero.check Check for Cristin references already stored in Zotero,
#' Default: TRUE
#' @param remove.na Cristin contains many, more or less, obscure categories, and
#' not all are (yet) supported. By default these are removed, however, if this
#' option is set to FALSE unsupported categories are treated according to
#' replace.na, Default: TRUE
#' @param replace.na May the odds be in your favor and replace unsupported
#' categories with a predefined itemType if remove.na is set to false,
#' Default: 'book'
#' @param force.type Force all items to a predefined itemType, Default: NULL
#' @param override Put your faith in the algorithms and the identifiers (i.e.,
#' DOI/ISBN) and override what is reported in Cristin, Default: FALSE
#' @param polite Please store you email in `.Renviron` to query Crossref,
#' Default: TRUE
#' @param log A list for storing log elements, Default: list()
#' @return A list with (exported) items from Cristin
#' @details Please see
#' #' \href{https://oeysan.github.io/c2z/}{https://oeysan.github.io/c2z/}
#' @examples
#' \donttest{
#' # Simple `Cristin` search by id
#' example <- Cristin(id = "840998")
#'
#' # Print index using `ZoteroIndex`
#' if (any(nrow(example$results))) {
#' ZoteroIndex(example$results) |>
#' dplyr::select(name) |>
#' print(width = 80)
#' }
#' }
#' @seealso
#' \code{\link[httr]{http_error}}, \code{\link[httr]{GET}},
#' \code{\link[httr]{RETRY}}
#' \code{\link[utils]{tail}}, \code{\link[utils]{head}}
#' \code{\link[dplyr]{slice}}
#' @rdname Cristin
#' @export
Cristin <- function (id = NULL,
doi = NULL,
title = NULL,
contributor = NULL,
issn = NULL,
unit = NULL,
institution = NULL,
user = NULL,
category = NULL,
published_since = NULL,
published_before = NULL,
created_since = NULL,
created_before = NULL,
modified_since = NULL,
modified_before = NULL,
year_reported = NULL,
project_code = NULL,
funding_source = NULL,
funding = NULL,
lang = NULL,
page = 1,
per_page = 1000,
max.results = NULL,
no.results = FALSE,
sort = NULL,
filter = NULL,
fields = "all",
open.query = NULL,
all.results = TRUE,
force = FALSE,
remove.duplicates = TRUE,
silent = FALSE,
base.url = "https://api.cristin.no/v2/results",
custom.url = NULL,
use.identifiers = TRUE,
crossref.search = FALSE,
autosearch = TRUE,
zotero = NULL,
zotero.import = TRUE,
zotero.check = TRUE,
remove.na = TRUE,
replace.na = "book",
force.type = NULL,
override = FALSE,
polite = TRUE,
log = list()) {
# Visible bindings
export <- n.filter <- unsupported.id <- missing.id <- date_published <-
year_published <- firstName <- creatorType <- lastName <- filter.name <-
external.data <- results <- name <- key <- log.eta <- NULL
# Trim arguments
args <- as.list(environment())
for (i in seq_along(args)) {
arg <- if (is.character(args[[i]])) Trim(args[[i]]) else args[[i]]
assign(names(args[i]), arg)
}
# Set per_page to Cristin limit (1000) if per_page > 1000
if (per_page > 1000) per_page <- 1000
# Set per_page to 1 if no.results is set to TRUE
if (no.results) per_page <- 1
# Run if max.results is defined
if (any(per_page > max.results)) {
# Set limit as max.results if limit > max.results (and no filters)
per_page <- max.results
all.results <- FALSE
} else if (!all.results) {
max.results <- per_page
}
# Create query list
query.list <- if (!is.null(open.query)) {
open.query
} else {
list(id = id,
doi = doi,
title = title,
contributor = contributor,
issn = issn,
unit = unit,
institution = institution,
user = user,
category = category,
published_since = published_since,
published_before = published_before,
created_since = created_since,
created_before = created_before,
modified_since = modified_since,
modified_before = modified_before,
year_reported = year_reported,
project_code = project_code,
funding_source = funding_source,
funding = funding,
lang = lang,
page = page,
per_page = per_page,
sort = sort,
fields = fields)
}
# Remove empty elements from query
query.list <- (query.list[lengths(query.list) != 0])
# API query
if (is.null(custom.url)) {
httr.get <- Online(
httr::RETRY("GET", base.url, query = query.list, quiet = TRUE),
message = "Cristin URL",
silent = TRUE
)
} else {
httr.get <- Online(
httr::RETRY("GET", custom.url, quiet = TRUE),
message = "Zotero URL",
silent = TRUE
)
}
# Add to log
log <- append(log, httr.get$log)
# Log and return error if status code != 200
if (httr.get$error) {
return (log)
}
# Number of results
total.results <- n.results <- GoFish(
as.numeric(httr.get$data$headers[["x-total-count"]])
)
if (is.na(total.results)) total.results <- n.results <- 1
if (any(max.results < total.results)) total.results <- max.results
# Log number of results
log <- LogCat(
sprintf(
"Found %s", Pluralis(total.results, "result")
),
silent = silent,
log = log
)
# Return only number of results if no.results is set to TRUE
if (no.results | n.results == 0) {
return (
list(
n.results = n.results,
log = log
)
)
}
# Format data
results <- JsonToTibble(httr.get$data)
# Filter out articles not present in `filter` if defined.
if (!is.null(filter)) {
filter.results <- results |>
dplyr::filter(category$code %in% filter)
n.filter <- nrow(results) - nrow(filter.results)
results <- filter.results
}
# Check if data is supported if zotero.import = TRUE
if (zotero.import & nrow(results)) {
supported.data <- CristinSupported(
results,
zotero.check = zotero.check,
zotero = zotero,
remove.na = remove.na,
replace.na = replace.na,
force.type = force.type,
remove.duplicates = remove.duplicates,
silent = silent,
log = log
)
unsupported.id = supported.data$unsupported.id
missing.id = supported.data$missing.id
log <- supported.data$log
results <- supported.data$data
}
# Find number of pages given query parameters
pages <- seq_len(ceiling(total.results / per_page))
# Stop function if maximum number of pages exceeds total pages
if (page > max(pages)) {
log <- LogCat(
sprintf(
"The current query contains only %s",
Pluralis(max(pages), "page")
),
silent = silent,
fatal = TRUE,
log = log
)
}
# Number of results per page
limits <- c(
rep(per_page, length(pages) - 1),
per_page - (per_page * length(pages) - total.results)
)
# Set total.results to max.results if defined and not filter
if (all(is.null(filter), !zotero.import)) {
total.results <- max(0, min(max.results, total.results))
}
# Set total.results to maximum page size if start page == last page
if (page == max(pages)) {
total.results <- max(0, min(
total.results, utils::tail(limits, 1)
))
}
# Run if total.results > limit and all.results set to TRUE
if (total.results > per_page & all.results) {
# Find remaining pages given the query parameters
remaining.pages <- seq_len(ceiling(total.results / per_page) - 1 ) + page
# Set remaining pages to the maximum allowed by constraints
remaining.pages <- remaining.pages[remaining.pages <= max(pages)]
# Number of remaining pages
k.remaining.pages <- length(remaining.pages)
# Remaining results per pages
limits <- limits[match(remaining.pages, pages)]
# Calculate maximum results
total.results <- max(0,min(total.results, per_page + sum(limits)))
# Results per remaining pages
limits <- c(
utils::head(limits, -1), # Remove last page value
min( # min value of page value and remaining value
total.results - (per_page * k.remaining.pages), # remaining value
utils::tail(limits, 1) # Last page value
)
)
}
# Log number of remaining results if total.results < total.results
if (any(max.results < n.results, n.results > per_page & !all.results)) {
log <- LogCat(
sprintf(
"The provided query is limited to a maximum of %s" ,
Pluralis(max.results, "result")
),
silent = silent,
log = log
)
}
# Fetch remaining results if total.results > limit
# and all.results set to TRUE
if (total.results > per_page & all.results) {
# Stop function if 10 or more queries are needed and force is FALSE
if (k.remaining.pages >= 10 & !force) {
log <- LogCat(
"10 or more queries to the API is needed to complete the
request. Are you sure you want do this? Set force to TRUE",
fatal = TRUE,
silent = silent,
log = log
)
}
log <- LogCat(
sprintf(
"Conducting remaining %s" ,
Pluralis(k.remaining.pages, "query", "queries")
),
silent = silent,
log = log
)
# Start time for query
query.start <- Sys.time()
# Cycle through remaining queries
## Should perhaps vectorize, but for loop seems more informative tbh
for (i in seq_len(k.remaining.pages)) {
# Change start
query.list$page <- remaining.pages[i]
# Fetch remaining data
httr.get <- Online(
httr::RETRY("GET", base.url, query = query.list, quiet = TRUE),
silent = TRUE,
message = "Cristin URL"
)
# Add to log
log <- append(log, httr.get$log)
# Log and skip skip iteration upon error
if (httr.get$error) {
next
}
# Append data to list
add.results <- JsonToTibble(httr.get$data)
# Filter out articles not present in `filter` if defined.
if (!is.null(filter)) {
filter.results <- add.results |>
dplyr::filter(category$code %in% filter)
n.filter <- (nrow(add.results) - nrow(filter.results)) + n.filter
add.results <- filter.results
}
# Check if data is supported if zotero.import = TRUE
if (zotero.import & nrow(add.results)) {
supported.data <- CristinSupported(
add.results,
zotero.check = zotero.check,
zotero = zotero,
remove.na = remove.na,
replace.na = replace.na,
force.type = force.type,
remove.duplicates = remove.duplicates,
silent = TRUE,
log = log
)
unsupported.id = supported.data$unsupported.id
missing.id = supported.data$missing.id
log <- supported.data$log
add.results <- supported.data$data
}
# Append data to list
results <- AddAppend(add.results, results)
# Check if number of results > max.results if defined
if (!is.null(max.results)) {
if (any(nrow(results) > max.results)) {
# Remove rows not wanted from results
results <- results |>
dplyr::slice_head(n = max.results)
# Complete loop
i <- k.remaining.pages
}
}
# Estimate time of arrival
log.eta <- LogCat(
Eta(query.start, i, k.remaining.pages),
silent = silent,
flush = TRUE,
log = log,
append.log = FALSE
)
# Stop loop if i = k
if (i == k.remaining.pages) {
log <- LogCat(
"The maximum number of results is reached.",
silent = silent,
log = log
)
break
}
}
# Add to log
log <- append(log, log.eta)
}
# Log number of filtered out results
if (any(n.filter > 0)) {
log <- LogCat(
sprintf(
"Filtered out %s." ,
Pluralis(n.filter, "result")
),
silent = silent,
log = log
)
}
# Log number of unsupported results
if (any(length(unsupported.id) > 0)) {
log <- LogCat(
sprintf(
"Filtered out %s. See `$unsupported.id`." ,
Pluralis(length(unsupported.id), "result")
),
silent = silent,
log = log
)
}
# Log number of results with missing data
if (any(length(missing.id) > 0)) {
log <- LogCat(
sprintf(
"Filtered out %s. See `$missing.id`." ,
Pluralis(length(missing.id), "result")
),
silent = silent,
log = log
)
}
# Convert to Zotero-format if zotero.import is TRUE
if (zotero.import & nrow(results)) {
zotero.data <- CristinWrangler(
data = results,
use.identifiers = use.identifiers,
crossref.search = crossref.search,
autosearch = autosearch,
override = override,
silent = silent,
polite = polite,
log = log
)
results <- zotero.data$results
log <- zotero.data$log
}
return (
list(
data.cache = httr.get$data,
results = results,
n.results = n.results,
n.filter = n.filter,
unsupported.id = unsupported.id,
missing.id = missing.id,
log = log[!duplicated(log)]
)
)
}
Any scripts or data that you put into this service are public.
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.