R/util_functions.R

Defines functions fctChkFlds dfOrTibble dbCTRUpdateQueryHistory dbCTRAnnotateQueryRecords dbCTRLoadJSONFiles initTranformers ctrDocsDownload ctrTempDir ctrMultiDownload addMetaData typeField ctrDb ctrCache ctgovClassicToCurrent ctgovVersion

Documented in ctrDb

### ctrdata package
### utility functions

#### variable definitions ####

# prototype return structure
emptyReturn <- list(n = 0L, success = NULL, failed = NULL)
#
# EUCTR definitions
countriesEUCTR <- c(
  "AT", "BE", "BG", "HR", "CY", "CZ", "DK", "EE", "FI", "FR",
  "DE", "GR", "HU", "IE", "IT", "LV", "LT", "LU", "MT", "NL",
  "PL", "PT", "RO", "SK", "SE", "SI", "ES", "GB", "IS", "LI",
  "NO", "3RD")
countriesActive <- c(
  "AT", "BE", "BG", "HR", "CY", "CZ", "DK", "EE", "FI", "FR",
  "DE", "GR", "HU", "IE", "IT", "LV", "LT", "LU", "MT", "NL",
  "PL", "PT", "RO", "SK", "SE", "SI", "ES",       "IS", "LI",
  "NO", "3RD")
countriesPreferred <- c(
  "BE", "ES", "DE", "FR", "IT", "NL", "DK", "AT", "PL", "PT")
#
# regexpr
# - queryterm and urls
regQueryterm <- "[^-.a-zA-Z0-9=?+&#%_:\"/, {}\\(\\)]"
# - EudraCT e.g. 2010-022945-52
regEuctr <- "[0-9]{4}-[0-9]{6}-[0-9]{2}"
# - CTGOV
regCtgov <- "NCT[0-9]{8}"
# - CTGOV2
regCtgov2 <- regCtgov
# - regIsrctn
regIsrctn <- "[0-9][0-9]{7}"
# - CTIS e.g. 2022-501549-57-00
regCtis <- "[0-9]{4}-[0-9]{6}-[0-9]{2}-[0-9]{2}"
#
# register list, order important
registerList <- c("EUCTR", "CTGOV", "CTGOV2", "ISRCTN", "CTIS")
#
# user agent for all server requests
ctrdataUseragent <- paste0(
  "ctrdata/", utils::packageVersion("ctrdata"),
  " (https://cran.r-project.org/package=ctrdata)"
)

#### functions ####

#' ctgovVersion
#'
#' Checks for mismatch between label CTGOV and CTGOV2
#' and tries to guess the correct label
#'
#' @param url URL or data frame with query term
#' @param register any of the register names
#' @param verbose print detected API version
#'
#' @returns string
#'
#' @keywords internal
#' @noRd
#'
#' @examples
#'
#' ctgovVersion("https://www.clinicaltrials.gov/ct2/show/NCT02703272", "")
#' ctgovVersion("https://classic.clinicaltrials.gov/ct2/results?cond=&term=NCT02703272&cntry=", "")
#' ctgovVersion("https://clinicaltrials.gov/ct2/results?cond=&term=NCT02703272&cntry=", "")
#' ctgovVersion("https://classic.clinicaltrials.gov/ct2/show/NCT02703272?term=NCT02703272&draw=2&rank=1")
#' ctgovVersion("https://clinicaltrials.gov/ct2/results?cond=", "")
#'
#' ctgovVersion("https://www.clinicaltrials.gov/search?term=NCT04412252,%20NCT04368728", "")
#' ctgovVersion("term=NCT04412252,%20NCT04368728", "CTGOV2")
#' ctgovVersion("https://www.clinicaltrials.gov/search?distance=50&cond=Cancer", "")
#'
ctgovVersion <- function(url, register, verbose = FALSE) {

  # in case the input is from dbQueryHistory
  if (!is.atomic(url)) try(url <- url[["query-term"]], silent = TRUE)
  if (inherits(url, "try-error") || is.null(url)) return(register)

  # logic 1
  if (grepl(paste0(
    "clinicaltrials[.]gov/ct2/|",
    # these are classic-specific
    "[?&]rsub=|[?&]type=|[?&]rslt=|[?&]gndr=|[?&]recrs=|[?&]phase=|",
    "[?&]age=|[?&]cntry=|[?&][a-z]+_[a-z]+="), url)) { # e.g. strd_s
    if (verbose) message("* Appears specific for CTGOV Classic website")
    return("CTGOV")
  }

  # logic 2
  if (grepl(paste0(
    # clear identifiers of CTGOV2
    "aggFilters|clinicaltrials[.]gov/(search|study)[/?]|",
    "[?&]country=|[:][^/]|%3[aA]"), url)) {
    if (verbose) message("* Appears specific for CTGOV REST API 2.0")
    return("CTGOV2")
  }

  # default return
  if (verbose) message("Not overruling register label ", register)
  return(register)

}


#' ctgovClassicToCurrent
#'
#' Fully translates a user's search query URL from the classic website
#' into a query for the current website, with all search parameters.
#' added to accomodate classic website retirement as of 2024-06-25.
#' Note this function only handles search queries, but not display
#' URLs such as https://clinicaltrials.gov/ct2/show/NCT02703272.
#' The function is to be called by ctrGetQueryUrl(), which turns
#' search and display URLs into queries. See also
#' ./inst/tinytest/more_test_ctrdata_param_checks.R
#'
#' @param url url intended for a search in the classic CTGOV website
#'
#' @returns string url suitable for a search current CTGOV website
#'
#' @keywords internal
#' @noRd
#'
#' @importFrom utils URLdecode
#'
#' @examples
#'
#' ctgovClassicToCurrent("https://www.clinicaltrials.gov/search?term=NCT04412252,%20NCT04368728")
#' ctgovClassicToCurrent("https://classic.clinicaltrials.gov/ct2/results?cond=&term=NCT02703272&cntry=")
#' ctgovClassicToCurrent("https://clinicaltrials.gov/ct2/results?cond=&term=NCT02703272&cntry=")
#' ctgovClassicToCurrent("https://www.clinicaltrials.gov/search?distance=50&cond=Cancer")
#' ctgovClassicToCurrent("https://classic.clinicaltrials.gov/ct2/results?term=AREA[MaximumAge]+RANGE[0+days,+28+days]")
#'
ctgovClassicToCurrent <- function(url, verbose = TRUE) {

  # apiParams is a kind of dictionary for
  # mapping classic to current params
  #
  # - not matched:
  #   CTGOV2 studyComp
  #   CTGOV dist
  #   CTGOV rsub
  #
  apiParams <- list(
    #
    # start aggFilters
    #
    "ages:" = list(
      "extract" = c(
        "age=0(&|$)",
        "age=1(&|$)",
        "age=2(&|$)"
      ),
      "replace" = c(
        "child",
        "adult",
        "older"
      ),
      "collapse" = " ",
      "out" = character()
    ),
    #
    "phase:" = list(
      "extract" = c(
        "phase=4(&|$)",
        "phase=0(&|$)",
        "phase=1(&|$)",
        "phase=2(&|$)",
        "phase=3(&|$)"),
      "replace" = c(
        "0",
        "1",
        "2",
        "3",
        "4"),
      "collapse" = " ",
      "out" = character()
    ),
    #
    "docs:" = list(
      "extract" = c(
        "u_prot=Y(&|$)",
        "u_sap=Y(&|$)",
        "u_icf=Y(&|$)"),
      "replace" = c(
        "prot",
        "sap",
        "icf"),
      "collapse" = " ",
      "out" = character()
    ),
    #
    "results:" = list(
      "extract" = c(
        "rslt=With(&|$)",
        "rslt=Without(&|$)"),
      "replace" = c(
        "with",
        "without"),
      "collapse" = " ",
      "out" = character()
    ),
    #
    "funderType:" = list(
      "extract" = c(
        "fund=[013]*[2][013]*(&|$)",
        "fund=[123]*[0][123]*(&|$)",
        "fund=[023]*[1][023]*(&|$)",
        "fund=[012]*[3][012]*(&|$)"),
      "replace" = c(
        "industry", # 2
        "nih",      # 0
        "fed",      # 1
        "other"),   # 3
      "collapse" = " ",
      "out" = character()
    ),
    #
    "studyType:" = list(
      "extract" = c(
        "type=Intr",
        "type=Obsr",
        "type=PReg",
        "type=Expn",
        "ea_tmt=Yes",
        "ea_idv=Yes",
        "ea_int=Yes"
      ),
      "replace" = c(
        "int", # Interventional
        "obs", # Observational
        "obs_patreg", # Patient registries
        "exp",        # Expanded access
        "exp_treat",  # Treatment IND/Protocol
        "exp_indiv",  # Individual patients
        "exp_inter"   # Intermediate-size population
      ),
      "collapse" = " ",
      "out" = character()
    ),
    #
    "sex:" = list(
      "extract" = c(
        "gndr=Female",
        "gndr=Male"
      ),
      "replace" = c(
        "f",
        "m"
      ),
      "collapse" = " ",
      "out" = character()
    ),
    #
    "healthy:" = list(
      "extract" = "hlth=Y",
      "replace" = "y",
      "collapse" = " ",
      "out" = character()
    ),
    #
    "violation:" = list(
      "extract" = "f801=Yes",
      "replace" = "y",
      "collapse" = " ",
      "out" = character()
    ),
    #
    "status:" = list(
      "extract" = c(
        "recrs=a",
        "recrs=d",
        "recrs=b",
        "recrs=e",
        "recrs=h",
        "recrs=f",
        "recrs=g",
        "recrs=i",
        "recrs=m",
        "recrs=c",
        "recrs=j",
        "recrs=k",
        "recrs=l"
      ),
      "replace" = c(
        "rec", # Recruiting
        "act", # Active, not recruiting
        "not", # Not yet recruiting
        "com", # Completed
        "ter", # Terminated
        "enr", # Enrolling by invitation
        "sus", # Suspended
        "wit", # Withdrawn
        "unk", # Unknown
        "ava", # Available
        "nla", # No longer available
        "tna", # Temporarily not available
        "afm"), # Approved for marketing
      "collapse" = " ",
      "out" = character()
    ),
    #
    # end aggFilters
    #
    # dates
    "dates" = list(
      "extract" = list(
        "strd_s=([0-9]{2})/([0-9]{2})/([0-9]{4})(&|$)",
        "strd_e=([0-9]{2})/([0-9]{2})/([0-9]{4})(&|$)",
        "prcd_s=([0-9]{2})/([0-9]{2})/([0-9]{4})(&|$)",
        "prcd_e=([0-9]{2})/([0-9]{2})/([0-9]{4})(&|$)",
        "sfpd_s=([0-9]{2})/([0-9]{2})/([0-9]{4})(&|$)",
        "sfpd_e=([0-9]{2})/([0-9]{2})/([0-9]{4})(&|$)",
        "rfpd_s=([0-9]{2})/([0-9]{2})/([0-9]{4})(&|$)",
        "rfpd_e=([0-9]{2})/([0-9]{2})/([0-9]{4})(&|$)",
        "lupd_s=([0-9]{2})/([0-9]{2})/([0-9]{4})(&|$)",
        "lupd_e=([0-9]{2})/([0-9]{2})/([0-9]{4})(&|$)"
      ),
      "replace" = list(
        "start=\\3-\\1-\\2_",
        "start=_\\3-\\1-\\2",
        "primComp=\\3-\\1-\\2_",
        "primComp=_\\3-\\1-\\2",
        "firstPost=\\3-\\1-\\2_",
        "firstPost=_\\3-\\1-\\2",
        "resFirstPost=\\3-\\1-\\2_",
        "resFirstPost=_\\3-\\1-\\2",
        "lastUpdPost=\\3-\\1-\\2_",
        "lastUpdPost=_\\3-\\1-\\2"
      ),
      "collapse" = "@",
      "out" = list()
    ),
    #
    # translate simple terms
    list(
      "extract" = c(
        "(cond|city|id|intr|lead|locn|outc|spons|state|titles|term)=(.+)(&|$)",
        "(cntry)=(.+)(&|$)"
      ),
      "replace" = c(
        "&\\1=\\2",
        "&country=\\2"
      ),
      "collapse" = "",
      "out" = character()
    )
    #
  ) # apiParams

  ## now operate on the input

  # mangle input
  queryterm <- utils::URLdecode(url)
  queryterm <- gsub("[+]", " ", queryterm)

  # some specifics found by chance
  queryterm <- sub("[?&]recr=Open", "&recrs=b&recrs=a&recrs=c", queryterm)
  queryterm <- sub("[?&]recr=Closed", "&recrs=f&recrs=d&recrs=g&recrs=h&recrs=e&recrs=i&recrs=m&recrs=j&recrs=k&recrs=l", queryterm)

  # split and focus on parameters
  queryterm <- strsplit(queryterm, split = "[&?]")[[1]]
  queryterm <- queryterm[!grepl("^https://", queryterm)]
  queryterm <- queryterm[queryterm != ""]

  # iterate over API terms
  for (t in seq_along(queryterm)) {
    for (a in seq_along(apiParams)) {
      for (i in seq_along(apiParams[[a]][["extract"]])) {
        if (grepl(apiParams[[a]][["extract"]][[i]], queryterm[t])) {
          item <-
            sub(apiParams[[a]][["extract"]][[i]],
                apiParams[[a]][["replace"]][[i]],
                queryterm[t]
            )
          apiParams[[a]][["out"]] <-
            paste0(
              c(apiParams[[a]][["out"]], item),
              collapse = apiParams[[a]][["collapse"]]
            )
        } # if extract
      } # extract
    } # apiParams
  } # queryterm

  # merge
  apiParams <- sapply(apiParams, "[[", "out")
  apiParams <- apiParams[lapply(apiParams, length) > 0L]

  # handle two dates parameters into one
  if (length(apiParams[["dates"]])) {
    tmpSplit <- strsplit(apiParams[["dates"]], "@", fixed = TRUE)[[1]]
    apiParams[["dates"]] <- ""
    for (t in unique(sub("(.+)=.+", "\\1", tmpSplit))) {
      apiParams[["dates"]] <- paste0(c(
        apiParams[["dates"]], paste0(
          t, "=", sub(
            "_+", "_",
            paste0(
              sub(".+=(.+)", "\\1", tmpSplit[grepl(t, tmpSplit)]),
              collapse = "_")),
          collapse = "")),
        collapse = "&")
    }}

  # handle parts within aggFilter
  for (t in seq_along(apiParams)) {
    if (grepl(":", names(apiParams[t]))) apiParams[t] <- paste0(
      names(apiParams[t]), paste0(
        unique(strsplit(apiParams[[t]], " ")[[1]]), collapse = " ")
    )
  }

  # merge other and aggFilter parts
  apiParams <- paste0(
    "https://clinicaltrials.gov/search?",
    paste0(
      unique(apiParams[!grepl(":", names(apiParams))]),
      collapse = ""),
    "&aggFilters=",
    paste0(
      unique(apiParams[grepl(":", names(apiParams))]),
      collapse = ",")
  )

  # handle country
  if (grepl("[?&]country=[^$&]", apiParams)) {

    countryCode <- sub(".+([?&]country=)([A-Z]+)([$&]).*", "\\2", apiParams)

    # translate from 2 letter ISO to English name
    if (countryCode != apiParams) apiParams <-
        sub("([?&]country=)([A-Z]+)([$&])",
            paste0("\\1", countryTable[which(
              countryCode == countryTable[, "A2"]
            ), "ISO3166name", drop = TRUE][1], "\\3"),
            apiParams)
  }

  # prettify
  apiParams <- gsub("&&", "&", apiParams)
  apiParams <- gsub("&aggFilters=$", "", apiParams)
  apiParams <- gsub("search[?]&", "search?", apiParams)

  ## inform user

  # inform user
  if (verbose) message(
    "Since 2024-06-25, the classic CTGOV servers are no longer available. ",
    "Package ctrdata has translated the classic CTGOV query URL from this ",
    "call of function ctrLoadQueryIntoDb(queryterm = ...) into a query URL ",
    "that works with the current CTGOV2. This is printed below and is also ",
    "part of the return value of this function, ctrLoadQueryIntoDb(...)$url. ",
    "This URL can be used with ctrdata functions. Note that the fields and ",
    "data schema of trials differ between CTGOV and CTGOV2. "
  )

  # inform user
  message(
    "\nReplace this URL:\n\n", url,
    "\n\nwith this URL:\n\n", apiParams, "\n")

  # return
  return(apiParams)

} # end ctgovClassicToCurrent


#' Check, write, read cache object for ctrdata
#'
#' @param xname name of variable to read or write
#'
#' @param xvalue value of variable to write
#'
#' @param verbose If \code{TRUE}, prints additional information
#' (default \code{FALSE}).
#'
#' @keywords internal
#' @noRd
#'
#' @returns value of variable or `NULL` if variable does not exist
#'
ctrCache <- function(xname, xvalue = NULL, verbose = FALSE) {

  # hidden environment .ctrdataenv created in zzz.R

  # write or overwrite and exit early
  if (!is.null(xvalue)) {
    assign(x = xname, value = xvalue, envir = .ctrdataenv)
    if (verbose) message("- Wrote ", xname, " to cache ")
    return(xvalue)
  }

  # check and read any value for xname variable
  if (verbose) message("- Checking cache...")
  if (exists(x = xname, envir = .ctrdataenv)) {
    val <- try(get(x = xname, envir = .ctrdataenv), silent = TRUE)
    if (inherits(val, "try-error")) return(NULL)
    if (verbose) message("- Returning ", xname, " ")
    return(val)
  }

  # default
  return(NULL)
}


#' Check and prepare nodbi connection object for ctrdata
#'
#' @param con A database connection object, created with
#' \code{nodbi}. See section `1 - Database connection` in
#' \link{ctrdata}.
#'
#' @keywords internal
#'
#' @importFrom nodbi src_sqlite src_duckdb docdb_list
#' @importFrom utils capture.output
#'
#' @returns Connection object as list, with collection
#'  element under root
#'
ctrDb <- function(con) {

  ## ensure requirements
  minV <- sub(
    ".*nodbi[(<>=[:space:]]+([.0-9]+?)\\).*", "\\1",
    utils::packageDescription("ctrdata", fields = "Imports")
  )
  if (!(utils::packageVersion("nodbi") >=
        package_version(minV))) {
    stop("Update package 'nodbi' to version ",
         minV, " or later to use this function.",
         call. = FALSE)
  }

  ## check constructor
  if (!inherits(con, "docdb_src")) {
    stop(
      "Database connection object 'con' was not, ",
      "but should be created with nodbi.",
      call. = FALSE)
  }

  ## postgres
  if (inherits(con, "src_postgres")) {

    if (is.null(con$collection)) {
      stop(
        "Specify attribute 'collection' with a table name, using ",
        "<nodbi src_postgres object>[[\"collection\"]] <- \"test\"), ",
        "for package ctrdata to work.",
        call. = FALSE)
    }

    # add database as element under root
    con <- c(
      con,
      "db" = con$dbname,
      "ctrDb" = TRUE)

    ## return
    return(structure(
      con,
      class = c("src_postgres", "docdb_src")))
  }

  ## sqlite
  if (inherits(con, "src_sqlite")) {

    if (is.null(con$collection)) {
      stop(
        "Specify parameter 'collection' with a table name, ",
        "such as nodbi::src_sqlite(collection = 'test'), ",
        "for package ctrdata to work.",
        call. = FALSE)
    }

    # check
    if (inherits(try(nodbi::docdb_list(con), silent = TRUE), "try-error")) {
      con <- nodbi::src_sqlite(
        dbname = con$dbname,
        collection = con$collection)
    }

    # add database as element under root
    con <- c(
      con,
      "db" = con$dbname,
      "ctrDb" = TRUE)

    # print warning
    if (grepl(":memory:", con$dbname)) {
      warning(
        "Database not persisting",
        call. = FALSE, noBreaks. = FALSE)
    }

    ## return
    return(structure(
      con,
      class = c("src_sqlite", "docdb_src")))
  }

  ## mongo
  if (inherits(con, "src_mongo")) {

    # rights may be insufficient to call info(),
    # hence this workaround that should always
    # work and be stable to retrieve name of
    # collection in the mongo connection
    # suppress... for reconnect info from mongolite
    coll <- suppressMessages(utils::capture.output(con$con)[1])
    coll <- sub("^.*'(.*)'.*$", "\\1", coll)

    # add collection as element under root
    con <- c(
      con,
      "collection" = coll,
      "ctrDb" = TRUE)

    ## return
    return(structure(
      con,
      class = c("src_mongo", "docdb_src")))
  }

  ## duckdb
  if (inherits(con, "src_duckdb")) {

    if (is.null(con$collection)) {
      stop(
        "Specify parameter 'collection' with a table name, ",
        "such as nodbi::src_duckdb(collection = 'test'), ",
        "for package ctrdata to work.",
        call. = FALSE)
    }

    # check
    if (inherits(try(nodbi::docdb_list(con), silent = TRUE), "try-error")) {
      con <- nodbi::src_duckdb(
        dbdir = attr(attr(con$con, "driver"), "dbdir"),
        collection = con$collection)
    }

    # add database as element under root
    con <- c(
      con,
      "db" = attr(attr(con$con, "driver"), "dbdir"),
      "ctrDb" = TRUE)

    # print warning about nodbi::src_duckdb()
    if (grepl(":memory:", attr(attr(con$con, "driver"), "dbdir"))) {
      warning(
        "Database not persisting\n",
        call. = FALSE, noBreaks. = FALSE)

    }

    ## return
    return(structure(
      con,
      class = c("src_duckdb", "docdb_src")))

  }

  ## unprepared for other nodbi adapters so far
  stop(
    "Please specify in parameter 'con' a database connection ",
    "created with nodbi::src_...() functions. ctrdata currently ",
    "supports src_mongo(), src_sqlite(), src_postgres() and src_duckdb().",
    call. = FALSE)

} # end ctrDb



#' Change type of field based on name of field
#'
#' @param dv a vector of character strings
#'
#' @param fn a field name
#'
#' @returns a typed vector, same length as dv
#'
#' @importFrom lubridate duration ymd_hms dyears dmonths ddays
#' @importFrom rvest html_text read_html
#'
#' @keywords internal
#' @noRd
#'
typeField <- function(dv, fn) {

  # get function name
  ft <- typeVars[[fn]]

  # expand to function
  if (!is.null(ft)) ft <- switch(
    typeVars[[fn]],
    "ctrFactor" = "as.factor(x = x)",
    "ctrInt" = "as.integer(x = x)",
    "ctrIntList" = 'sapply(x, function(i) {i[i == "NA"] <- NA; as.integer(i)}, USE.NAMES = FALSE)',
    "ctrYesNo" = 'sapply(x, function(i) if (is.na(i)) NA else
       switch(i, "Yes" = TRUE, "No" = FALSE, NA), simplify = TRUE, USE.NAMES = FALSE)',
    "ctrFalseTrue" = 'if (is.numeric(x)) as.logical(x) else
       sapply(x, function(i) switch(tolower(i), "true" = TRUE, "false" = FALSE, NA), USE.NAMES = FALSE)',
    "ctrDate" = 'as.Date(x, tryFormats =
       c("%Y-%m-%d", "%Y-%m", "%Y-%m-%d %H:%M:%S", "%Y-%m-%dT%H:%M:%S",
    "%d/%m/%Y", "%Y-%m-%dT%H:%M:%S%z"))',
    "ctrDateUs" = 'as.Date(x, tryFormats = c("%b %e, %Y", "%Y-%m-%d", "%Y-%m"))',
    "ctrDateTime" = "lubridate::ymd_hms(x)",
    "ctrDifftime" = 'as.difftime(as.numeric(lubridate::duration(
       tolower(x)), units = "days"), units = "days")',
    "ctrDifftimeDays" = "lubridate::ddays(x = as.numeric(x))",
    "ctrDifftimeMonths" = "lubridate::dmonths(x = as.numeric(x))",
    "ctrDifftimeYears" = "lubridate::dyears(x = as.numeric(x))",
    NULL
  )

  # clean up text
  if (is.null(ft)) {

    # - if NA or similar is a string, change to NA
    if (typeof(dv) == "character") dv[grepl("^N/?A$|^ND$", dv)] <- NA_character_

    # - check if any html entities
    htmlEnt <- grepl("&[#a-zA-Z]+;", dv)

    # - convert html entities to text and symbols
    if (any(htmlEnt) && all(sapply(dv, typeof) == "character")) {
      dv[htmlEnt] <-
        lapply(dv[htmlEnt], function(i) {
          sapply(i, function(ii) {
            rvest::html_text(rvest::read_html(charToRaw(i)))
          }, USE.NAMES = FALSE)
        })
    }

    # - check if conversion to numeric works
    if ((typeof(dv) == "character") && any(!is.na(dv))) {
      dvn <- suppressWarnings(as.numeric(gsub(",", "", dv)))
      if (identical(is.na(dv), is.na(dvn))) return(dvn)
    }

    # - collapse unless list structure is heterogenous
    rowN1 <- sapply(dv, function(i) is.null(names(i)))
    rowN2 <- sapply(names(rowN1), function(i) is.null(i))
    rowType <- sapply(dv, function(i) typeof(unlist(i, recursive = FALSE)))
    #
    if (all(rowN1) &&
        all(rowN2) &&
        length(unique(rowN1)) <= 1L &&
        any(rowType == "character")) {
      #
      dv <- sapply(dv, function(i) {
        i <- gsub("\r", "\n", i)
        i <- sub("^Information not present in EudraCT", "", i)
        if (length(i) > 1L) {
          rowI <- paste0(i[!is.na(i)], collapse = " / ")
          if (nchar(rowI)) rowI else NA_character_
        } else {
          if (length(i) && !is.na(i)) i else NA_character_
        }
      })
    }

    # early return
    return(dv)

  }

  # early exit if already date or logical
  if (all(sapply(dv, class) %in%
          c("logical", "Date", "POSIXct", "POSIXt"))) return(dv)

  # record length of input dv for NULL handling
  lenDv <- length(dv)

  # apply typing function, returning
  # if possible a vector over list
  tryCatch(
    expr = {
      dv <- lapply(dv, function(x) {
        # - text mangling
        x <- ifelse(grepl("Information not present in EudraCT", x), NA, x)
        # - give Month Year a Day to allow conversion
        if (grepl("date", fn, ignore.case = TRUE)) {
          x <- sub("^ClinicalTrials.gov processed this data on ", "", x)
          x <- sub("^([a-zA-Z]+) ([0-9]{4})$", "\\1 15, \\2", x)
          x <- sub("^([0-9]{4}-[0-9]{2})$", "\\1-15", x)
        }
        # - apply function to x
        eval(parse(text = ft))
      })
    },
    error = function(e) {
      message(fn, ": returning untyped values, as ",
              ft, " raised an error when applied to ",
              paste0(unlist(dv), collapse = " / "))
      return(dv)
    },
    warning = function(w) {
      message(fn, ": returning untyped values, as ",
              ft, " raised a warning when applied to ",
              paste0(unlist(dv), collapse = " / "))
      return(dv)
    }
  )

  # exceptional case inform user
  if (is.null(dv)) {
    warning(paste0(
      fn, " could not be typed, please report here: ",
      "https://github.com/rfhb/ctrdata/issues"))
    dv <- rep_len(NA, lenDv)
  }

  # make original classes (e.g., Date) reappear
  if (!is.list(dv)) dv <- as.list(dv)
  if (all(sapply(dv, length) <= 1L)) {
    return(do.call("c", dv))
  }

  # return
  return(dv)

} # end typeField



#' Annotate ctrdata function return values
#'
#' @param x object to be annotated
#'
#' @inheritParams ctrDb
#'
#' @keywords internal
#' @noRd
#'
addMetaData <- function(x, con) {

  # add metadata
  attr(x, "ctrdata-dbname")         <- con$db
  attr(x, "ctrdata-collection")     <- con$collection
  attr(x, "ctrdata-dbqueryhistory") <- dbQueryHistory(con)

  # return annotated object
  return(x)

} # end addMetaData



#' ctrMultiDownload
#'
#' @param urls Vector of URLs to be downloaded
#' @param destfiles Vector of local file names into which to download
#' @param progress Set to \code{FALSE} to not print progress bar
#' @param data JSON string
#' @param verbose If \code{TRUE}, print additional information
#' (default \code{FALSE}).
#'
#' @keywords internal
#' @noRd
#'
#' @returns Data frame with columns such as status_code etc
#'
#' @importFrom utils URLencode
#' @importFrom jsonlite fromJSON toJSON validate
#' @importFrom httr2 request req_url req_headers req_throttle req_perform_parallel req_body_json req_user_agent req_perform req_retry req_options resp_status
#' @importFrom dplyr rows_update
#'
ctrMultiDownload <- function(
    urls,
    destfiles,
    data = NULL,
    progress = TRUE,
    verbose = TRUE) {

  # check params
  stopifnot(length(urls) == length(destfiles))
  if (!length(urls)) return(data.frame())

  # helper
  body2json <- function(x) {
    if (is.null(x)) return(NA_character_)
    as.character(jsonlite::toJSON(x, auto_unbox = TRUE))
  }

  # master data frame from all input
  downloadSet <- data.frame(
    "url" = utils::URLencode(urls),
    "destfile" = destfiles,
    "data" = rep.int(NA_character_, length(urls)),
    "toDo" = rep.int(TRUE, times = length(urls)),
    "success" = rep.int(TRUE, times = length(urls)),
    "status_code" = rep.int(NA_integer_, length(urls)),
    "content_type" = rep.int(NA_character_, length(urls))
  )

  # do not again download files that already exist
  # or that do not have an (arbitrary) minimal size.
  # nchar("Request failed.") is 15L
  downloadSet$toDo[
    file.exists(destfiles) &
      (is.na(file.size(destfiles)) |
         file.size(destfiles) > 20L)] <- FALSE

  # mangle data and prepare for request body
  if (!is.null(data)) downloadSet$data <- sapply(
    data, function(i) ifelse(
      is.null(i) || is.na(i) || !jsonlite::validate(i), NA,
      jsonlite::toJSON(jsonlite::fromJSON(
        i, simplifyVector = FALSE), auto_unbox = TRUE)),
    USE.NAMES = FALSE)

  # early exit
  if (!any(downloadSet$toDo)) return(
    downloadSet[!downloadSet$toDo, , drop = FALSE])

  # remove any duplicates
  downloadSet <- unique(downloadSet)

  # construct requests to do
  reqs <- mapply(
    function(u, d, f) {

      r <- httr2::request(
        # start with basic request
        base_url = u) |>
        # add user agent
        httr2::req_user_agent(ctrdataUseragent) |>
        # keep important option 2L for euctr
        httr2::req_options(http_version = 2) |>
        # hard-coded throttling
        httr2::req_throttle(
          # ensures that function never makes more
          # than capacity requests in fill_time_s
          capacity = 40L,
          fill_time_s = 2L
        ) |>
        # include retries in request
        httr2::req_retry(
          max_tries = 10L,
          max_seconds = NULL,
          retry_on_failure = FALSE,
          # adapt to return codes found with some registers
          is_transient = function(resp) httr2::resp_status(
            resp) %in% c(403L, 429L, 503L),
          failure_timeout = 60L
        )

      # conditionally add body
      if (!is.na(d)) r <-
        httr2::req_body_json(req = r, jsonlite::fromJSON(
          d, simplifyVector = FALSE))

      # adding file path
      r$fp <- f

      # return
      return(r)
    },
    u = downloadSet$url[downloadSet$toDo],
    d = downloadSet$data[downloadSet$toDo],
    f = downloadSet$destfile[downloadSet$toDo],
    SIMPLIFY = FALSE,
    USE.NAMES = FALSE
  )

  # randomise
  reqs <- reqs[sample.int(length(reqs))]

  # using a httr2 mock function to handle CDN
  cdnResolve <- function(req) {

    # check specifically for CTIS links
    # to be adjusted for other registers
    if (!grepl("/download$", req$url)) return(NULL)

    # execute request
    message(". ", appendLF = FALSE)
    resp <- httr2::req_perform(req, path = req$fp)

    # inspect response, exit early
    if (resp$status_code != 200L |
        !(!grepl("[.]json$", req$fp) &
          resp$headers$`Content-Type` == "application/json")
    ) return(resp)

    # extract url from downloaded json
    cdnUrl <- gsub('"', "", as.character(
      jqr::jq(file(req$fp), " .url ")))
    unlink(req$fp)
    req <- httr2::req_url(req = req, url = cdnUrl)

    # do download of resolved url and return
    httr2::req_perform(req = req, path = req$fp)

  }

  # parallel requests
  res <- httr2::req_perform_parallel(
    reqs,
    paths = sapply(reqs, "[[", "fp"),
    on_error = "continue",
    progress = TRUE,
    max_active = 10L,
    mock = cdnResolve
  )

  # mangle results data
  res <- lapply(
    res,
    function(r) {
      if (inherits(r, "httr2_failure")) return(
        data.frame(
          "url" = r$request$url,
          "destfile" = NA_character_, # TODO
          "data" = body2json(r$request$body$data),
          "toDo" = TRUE,
          "success" = FALSE,
          "status_code" = NA_integer_,
          "content_type" = NA_character_
        )
      )
      if (inherits(r, "httr2_error")) return(
        data.frame(
          "url" = r$request$url,
          "destfile" = as.character(r$resp$body),
          "data" = body2json(r$request$body$data),
          "toDo" = TRUE,
          "success" = FALSE,
          "status_code" = r$status,
          "content_type" = NA_character_
        )
      ) # else
      return(
        data.frame(
          "url" = r[["url"]],
          "destfile" = as.character(r[["body"]]),
          "data" = body2json(r$request$body$data),
          "toDo" = FALSE,
          "success" = TRUE,
          "status_code" = r[["status_code"]],
          "content_type" = r[["headers"]]$`content-type`
        )
      )})
  res <- as.data.frame(do.call(rbind, res))

  # update input, mind row order
  downloadSet <- dplyr::rows_update(
    downloadSet,
    res[, c("url", "destfile", "data", "toDo", "success",
            "status_code", "content_type"), drop = FALSE],
    by = "destfile"
  )

  # user info and stop
  if (inherits(downloadSet, "try-error")) {
    stop("Download failed; last error: ", class(downloadSet), call. = FALSE)
  }

  # only check success because this is filled initially by !toDo;
  # check status_code where it is NA as this is the latest action;
  # status_code is NA where this iteration determined it as isCdn
  downloadSet$toDo <- is.na(downloadSet$success) |
    # OK, Partial Content, Not Found, Range Not Satisfiable
    !(downloadSet$status_code %in% c(NA, 200L, 206L, 404L, 416L))

  # remove any files from failed downloads
  unlink(downloadSet$destfile[downloadSet$status_code %in% c(
    403L, 404L, 416L, 429L)])

  # finalise
  if (any(downloadSet$toDo)) {

    # remove any files from failed downloads
    unlink(downloadSet$destfile[downloadSet$toDo])

    message("Download failed for: status code / url(s):")
    apply(downloadSet[downloadSet$toDo, c("status_code", "url"), drop = FALSE],
          1, function(r) message(r[1], " / ", r[2], "\n", appendLF = FALSE))

  }

  # if previously downloaded, success may not reflect on disk;
  # thus safeguard by unsetting success if file does not exist
  downloadSet$success <- file.exists(downloadSet$destfile)

  # return
  return(downloadSet[!downloadSet$toDo, , drop = FALSE])

} # end ctrMultiDownload



#' ctrTempDir
#'
#' create empty temporary directory on localhost for
#' downloading from register into temporary directory
#'
#' @returns path to existing directory
#'
#' @keywords internal
#' @noRd
#'
ctrTempDir <- function(verbose = FALSE) {

  # verbose has the purpose to persist the temporary folder
  # beyond sessions, e.g. for debugging or for keeping the
  # originally downloaded files.

  # from ctrdata 1.16.0.9000, only downloaded files are kept;
  # calculated files are deleted after ctrLoadQueryIntoDb()

  # create temporary folder, unless
  # a folder is specified as option
  tempDir <- getOption(
    "ctrdata.tempdir",
    default = tempfile(pattern = "ctrDATA"))

  dir.create(tempDir, showWarnings = FALSE, recursive = TRUE)

  tempDir <- normalizePath(tempDir, mustWork = TRUE)
  keepDir <- file.path(tempDir, ".keepDir")

  options(ctrdata.tempdir = tempDir)

  # empty file to indicate if to keep or not folder
  if (verbose) file.create(keepDir) else unlink(keepDir)

  # see zzz.R for reg.finalizer()

  # inform user
  if (verbose) message(
    "\nDEBUG: ", tempDir,
    "\nUsing any previously downloaded files of the ",
    length(dir(path = tempDir, pattern = "^[.]")),
    " files existing in this folder.\n")

  # return
  return(tempDir)

}



#' ctrDocsDownload
#'
#' download documents
#'
#' @param dlFiles data frame with columns _id, filename, url
#' @param documents.path parameter from parent call
#' @param documents.regexp parameter from parent call
#' @param multiplex use http/2 or not
#' @param verbose print parameter from parent call
#'
#' @returns number of documents
#'
#' @keywords internal
#' @noRd
#'
ctrDocsDownload <- function(
    dlFiles,
    documents.path,
    documents.regexp,
    multiplex = TRUE,
    verbose) {

  # check and create directory
  createdDir <- try(
    dir.create(documents.path, recursive = TRUE, showWarnings = FALSE),
    silent = TRUE)

  # early return
  if (inherits(createdDir, "try-errror")) {

    warning("Directory could not be created for 'documents.path' ",
            documents.path, ", cannot download files", call. = FALSE)

    return(0L)
  }

  # continue after if
  message("- Downloading documents into 'documents.path' = ", documents.path)

  # canonical directory path
  documents.path <- normalizePath(documents.path, mustWork = TRUE)
  if (createdDir) message("- Created directory ", documents.path)

  # add destination file directory path
  dlFiles$filepath <- file.path(documents.path, dlFiles$`_id`)

  # create full filepathname
  dlFiles$filepathname <- file.path(dlFiles$filepath, dlFiles$filename)

  # check if destination document exists
  dlFiles$fileexists <- file.exists(dlFiles$filepathname) &
    file.size(dlFiles$filepathname) > 20L

  # placeholder or files
  if (is.null(documents.regexp)) {

    message("- Creating empty document placeholders (max. ", nrow(dlFiles), ")")

    # create subdirectories by trial
    sapply(
      unique(dlFiles$filepath), function(i) if (!dir.exists(i))
        dir.create(i, showWarnings = FALSE, recursive = TRUE)
    )

    # create empty files
    filesCount <-
      sapply(
        dlFiles$filepathname,
        function(i) if (!file.exists(i))
          file.create(i, showWarnings = TRUE),
        USE.NAMES = FALSE)

    filesCount <- sum(unlist(filesCount), na.rm = TRUE)

  } else {

    # inform
    message("- Applying 'documents.regexp' to ",
            nrow(dlFiles), " missing documents")

    # apply regexp
    dlFiles <- dlFiles[
      grepl(documents.regexp, dlFiles$filename, ignore.case = TRUE), ,
      drop = FALSE]

    # documents download
    message("- Creating subfolder for each trial")

    # create subdirectories by trial
    sapply(
      unique(dlFiles$filepath), function(i) if (!dir.exists(i))
        dir.create(i, showWarnings = FALSE, recursive = TRUE)
    )

    # inform
    message("- Downloading ",
            nrow(dlFiles[!dlFiles$fileexists, , drop = FALSE]),
            " missing documents " , appendLF = FALSE)

    # check and remove duplicate filepathname rows
    duplicateUrls <- duplicated(tolower(dlFiles$url))
    if (any(duplicateUrls)) {
      message(
        "(excluding ", sum(duplicateUrls), " documents ",
        "from duplicate URLs, e.g. for trial ids ",
        paste0(
          sample(
            dlFiles$`_id`[duplicateUrls],
            min(length(dlFiles$`_id`[duplicateUrls]), 3L)),
          collapse = ", "),
        ") ", appendLF = FALSE)
      dlFiles <- dlFiles[!duplicateUrls, , drop = FALSE]
    }

    duplicateFiles <- duplicated(tolower(dlFiles$filepathname))
    if (any(duplicateFiles)) {
      message(
        "(excluding ", sum(duplicateFiles), " documents ",
        "with duplicate names, e.g. ",
        paste0(
          sample(
            dlFiles$filepathname[duplicateFiles],
            min(length(dlFiles$filepathname[duplicateFiles]), 3L)),
          collapse = ", "),
        ") ", appendLF = FALSE)
      dlFiles <- dlFiles[!duplicateFiles, , drop = FALSE]
    }

    # do download
    filesCount <- ctrMultiDownload(
      urls = dlFiles$url[!dlFiles$fileexists],
      destfiles = dlFiles$filepathname[!dlFiles$fileexists],
      verbose = verbose
    )
    message("\n\r", appendLF = FALSE)

    # check results
    filesCount <- ifelse(
      !nrow(filesCount), 0L,
      sum(filesCount$success, na.rm = TRUE))

  } # is.null(documents.regexp)

  # inform user
  message(sprintf(paste0(
    "= Newly saved %i ",
    ifelse(is.null(documents.regexp), "placeholder ", ""),
    "document(s) for %i trial(s); ",
    "%i of such document(s) for %i trial(s) already existed in %s"),
    filesCount,
    length(unique(dlFiles$`_id`)),
    sum(dlFiles$fileexists),
    length(unique(dlFiles$`_id`[dlFiles$fileexists])),
    documents.path
  ))

  # return
  return(filesCount)

} # end ctrDocsDownload



#' initTranformers
#'
#' https://cran.r-project.org/web/packages/V8/vignettes/npm.html
#'
#' @importFrom V8 v8 JS
#' @importFrom readr read_file
#'
#' @keywords internal
#' @noRd
#'
initTranformers <- function() {

  # prepare V8, see ./inst/js/
  ct <- V8::v8()

  # get javascript for xml to ndjson
  ct$source(system.file("js/bundle.js", package = "ctrdata"))

  # function for xml to ndjson conversion
  ct$assign(
    "parsexml",
    # https://www.npmjs.com/package/xml2js#options
    V8::JS("function(xml, opts) {injs.parseString(xml, opts, function (err, result)
           { out = result; }); return JSON.stringify(out); }"))

  # native javascript function for euctr txt to ndjson conversion
  ct$eval(readr::read_file(system.file("js/euctr2ndjson.js", package = "ctrdata")))

  # assign into package private environment, see zzz.R
  assign("ct", ct, envir = .ctrdataenv)

}



#' dbCTRLoadJSONFiles
#'
#' @param dir Path to local directory with JSON files
#' from downloading and converting
#'
#' @importFrom jsonlite validate
#' @importFrom nodbi docdb_create
#' @importFrom stats na.omit
#' @importFrom jqr jq
#'
#' @inheritParams ctrDb
#'
#' @inheritParams ctrLoadQueryIntoDb
#'
#' @returns List with elements n (number of imported trials),
#' _id's of successfully imported trials and
#' _id's of trials that failed to import
#'
#' @keywords internal
#' @noRd
#'
dbCTRLoadJSONFiles <- function(dir, con, verbose) {

  # find files
  tempFiles <- dir(
    path = dir,
    pattern = "^.+_trials_.*.ndjson$",
    full.names = TRUE)

  # check
  if (!length(tempFiles)) stop(
    "no .+_trials_.*.ndjson files found in ", dir)

  # initialise counters
  fc <- length(tempFiles)

  ## iterate ndjson files -----------------------------------------------------------------

  retimp <- lapply(
    X = seq_along(tempFiles),
    function(tempFile) {

      ## initialise output
      idSuccess <- NULL
      idFailed <- NULL
      idAnnotation <- NULL
      nImported <- 0
      ids <- NULL

      ## get _id's

      # inform user
      message(
        "JSON file #: ", tempFile, " / ", fc,
        "                               \r",
        appendLF = FALSE)

      # get all ids using jq, safet than regex
      ids <- gsub("\"", "", as.vector(
        jqr::jq(file(tempFiles[tempFile]), " ._id ")))

      ## existing data -------------------------------------------------

      # get ids
      dbIds <- try({
        nodbi::docdb_query(
          src = con,
          key = con$collection,
          query = "{}",
          fields = '{"_id": 1}')
      }, silent = TRUE)

      # get annotations
      annoDf <- try({
        nodbi::docdb_query(
          src = con,
          key = con$collection,
          query = paste0(
            '{"_id": {"$in": [',
            paste0('"', ids, '"', collapse = ","), "]}}"),
          fields = '{"_id": 1, "annotation": 1}')
      }, silent = TRUE)

      if (!inherits(annoDf, "try-error") && length(annoDf[["_id"]])) {
        annoDf <- merge(
          data.frame("_id" = ids, check.names = FALSE, stringsAsFactors = FALSE),
          annoDf, all.x = TRUE) # only need input ids, do not need all.y
      } else {
        annoDf <-
          data.frame("_id" = ids, check.names = FALSE, stringsAsFactors = FALSE)
      }

      if (is.null(annoDf[["annotation"]]))
        annoDf[["annotation"]] <- rep(NA, length(ids))

      ## import ----------------------------------------------------

      # separation into delete and create, or update
      # because update may be a frequent use case and
      # using this may accelerate and may avoid table
      # records that remain being marked for deletion

      if (verbose) message("DBUG: ", tempFiles[tempFile])

      if (inherits(dbIds, "try-error") ||
          is.null(dbIds) ||
          length(setdiff(ids, dbIds[["_id"]])) > 0L) {

        # only if relevant records exist
        if (!inherits(dbIds, "try-error") &&
            !is.null(dbIds) &&
            length(intersect(ids, dbIds[["_id"]])) > 0L) {

          # delete
          res <- try({
            nodbi::docdb_delete(
              src = con,
              key = con$collection,
              query = paste0(
                '{"_id": {"$in": [',
                paste0('"', intersect(ids, dbIds[["_id"]]),
                       '"', collapse = ","), ']}}'))
          }, silent = TRUE)

          # early exit
          if (inherits(res, "try-error") &&
              grepl("read.?only", res)) stop(
                "Database is read-only, cannot load trial records.\n",
                "Change database connection in parameter 'con = ...'",
                call. = FALSE
              )
        }

        # create
        res <- try({
          suppressWarnings(
            suppressMessages(
              nodbi::docdb_create(
                src = con,
                key = con$collection,
                value = tempFiles[tempFile]
              )))}, silent = TRUE)

      } else {

        # update
        res <- try({
          suppressWarnings(
            suppressMessages(
              nodbi::docdb_update(
                src = con,
                key = con$collection,
                query = "{}",
                value = tempFiles[tempFile]
              )))}, silent = TRUE)

      }

      # early exit
      if (inherits(res, "try-error") &&
          grepl("read.?only", res)) stop(
            "Database is read-only, cannot load trial records.\n",
            "Change database connection in parameter 'con = ...'",
            call. = FALSE
          )

      # handle res and generate return values
      if (inherits(res, "try-error") ||
          res == 0L ||
          res != nrow(annoDf)) {

        # if res failed, step into line by line mode
        fdLines <- file(tempFiles[tempFile], open = "rt", blocking = TRUE)

        while (TRUE) {

          tmpOneLine <- readLines(con = fdLines, n = 1L, warn = FALSE)
          if (length(tmpOneLine) == 0L || !nchar(tmpOneLine)) break
          id <- sub(".*\"_id\":[ ]*\"(.*?)\".*", "\\1", tmpOneLine)
          res <- suppressWarnings(suppressMessages(nodbi::docdb_create(
            src = con, key = con$collection, value = paste0("[", tmpOneLine, "]"))))

          nImported <- nImported + res
          if (res) idSuccess <- c(idSuccess, id)
          if (!res) idFailed <- c(idFailed, id)
          if (!res) warning("Failed to load: ", id, call. = FALSE)
          if (res) idAnnotation <- c(idAnnotation, annoDf[
            annoDf[["_id"]] == id, "annotation", drop = TRUE][1])

        }
        close(fdLines)

      } else {

        nImported <- nImported + res
        idSuccess <- c(idSuccess, annoDf[, "_id", drop = TRUE])
        idAnnotation <- c(idAnnotation, annoDf[, "annotation", drop = TRUE])

      }

      # return values for lapply
      list(
        success = idSuccess,
        failed = idFailed,
        n = nImported,
        annotations = idAnnotation)

    }) # sapply tempFiles

  # prepare return values, n is successful only
  n <- sum(sapply(retimp, "[[", "n"), na.rm = TRUE)
  success <- as.vector(unlist(sapply(retimp, "[[", "success")))
  failed <- as.vector(unlist(sapply(retimp, "[[", "failed")))
  annotations <- as.vector(unlist(sapply(retimp, "[[", "annotations")))

  # return
  return(list(
    n = n,
    success = success,
    failed = failed,
    annotations = annotations))

} # end dbCTRLoadJSONFiles


#' dbQueryAnnotateRecords
#'
#' @inheritParams ctrLoadQueryIntoDb
#'
#' @keywords internal
#' @noRd
#'
#' @importFrom jsonlite toJSON
#' @importFrom nodbi docdb_update
#'
dbCTRAnnotateQueryRecords <- function(
    recordnumbers,
    recordannotations,
    annotation.text,
    annotation.mode,
    con,
    verbose) {

  # debug
  if (verbose) message("Annotating records...")
  if (verbose) message(recordnumbers)
  if (verbose) message(annotation.mode)

  # df from existing annotations
  if (is.null(recordannotations)) recordannotations <- ""
  annotations <- data.frame(
    "_id" = recordnumbers,
    "annotation" = recordannotations,
    stringsAsFactors = FALSE,
    check.names = FALSE)

  # check if dataframe is as expected: columns _id and annotation
  # dataframe could be empty if _ids not yet imported
  if (nrow(annotations) == 0) {
    annotations <- data.frame("_id" = recordnumbers,
                              "annotation" = "",
                              stringsAsFactors = FALSE,
                              check.names = FALSE)
  }

  # modify the annotations
  annotations[["annotation"]] <- trimws(
    switch(
      annotation.mode,
      "replace" = paste0(annotation.text),
      "prepend" = paste0(annotation.text, " ", ifelse(
        is.na(annotations[["annotation"]]), "", annotations[["annotation"]])),
      paste0(ifelse(is.na(annotations[["annotation"]]), "", annotations[["annotation"]]),
             " ", annotation.text)
    ))

  # ensure columns including order
  annotations <- annotations[, c("_id", "annotation"), drop = FALSE]

  # debug
  if (verbose) message(annotations)

  # update the database
  result <- nodbi::docdb_update(
    src = con,
    key = con$collection,
    value = annotations,
    query = "{}")

  # inform user
  message("= Annotated retrieved records (", result, " records)")

} # end dbCTRAnnotateQueryRecords


#' dbCTRUpdateQueryHistory
#'
#' @inheritParams ctrLoadQueryIntoDb
#'
#' @keywords internal
#' @noRd
#'
#' @importFrom jsonlite toJSON
#' @importFrom nodbi docdb_delete docdb_create docdb_update
#'
dbCTRUpdateQueryHistory <- function(
    register,
    queryterm,
    recordnumber,
    con,
    verbose) {

  ## check database connection
  con <- ctrDb(con)

  # debug
  if (verbose) message("Running dbCTRUpdateQueryHistory...")

  # compose history entry from current search
  # default for format methods is "%Y-%m-%d %H:%M:%S"
  newHist <- data.frame(
    "query-timestamp" = format(Sys.time(), "%Y-%m-%d %H:%M:%S"),
    "query-register"  = register,
    "query-records"   = recordnumber,
    "query-term"      = queryterm,
    check.names = FALSE,
    stringsAsFactors = FALSE)

  # retrieve existing history data
  hist <- dbQueryHistory(con, verbose)

  # append current search
  # default for format methods is "%Y-%m-%d %H:%M:%S"
  if (!is.null(hist) &&
      nrow(hist)) {

    newHist <- rbind(hist, newHist)
    newHist <- list("queries" = newHist)

    res <- suppressMessages(
      nodbi::docdb_update(
        src = con,
        key = con$collection,
        value = newHist,
        query = '{"_id": "meta-info"}'
      ))

  } else {

    # to list
    newHist <- list(list(
      "_id" = "meta-info",
      "queries" = newHist))

    # write new document
    res <- suppressMessages(
      nodbi::docdb_create(
        src = con,
        key = con$collection,
        value = newHist
      ))
  }

  # inform user
  if (res == 1L) {
    message('Updated history ("meta-info" in "', con$collection, '")')
  } else {
    warning('Could not update history ("meta-info" in "', con$collection,
            '")', call. = FALSE, immediate. = FALSE)
  }
} # end dbCTRUpdateQueryHistory


#' dfOrTibble
#'
#' @returns tibble or data frame, depending on loaded packages
#'
#' @param df data frame input
#'
#' @keywords internal
#' @noRd
#'
dfOrTibble <- function(df) {

  if (any(sapply(
    .packages(), function(i)
      any(i == c("tibble", "magrittr", "tidyr", "dplyr")))
  )) {

    return(tibble::as_tibble(df))

  } else {

    return(df)

  }

} # end dfOrTibble


#' fctChkFlds
#'
#' Calls for its side effect to stop if arguments
#' are not conforming to expectations
#'
#' @returns df with flds columns
#'
#' @param df data frame
#' @param flds fields needed for a function
#'
#' @keywords internal
#' @noRd
#'
fctChkFlds <- function(df, flds) {

  flds <- unique(unlist(flds, use.names = FALSE))

  nms <- names(df)

  fldsM <- flds[!sapply(flds, function(i) any(i == nms))]

  if (length(fldsM)) stop(
    "Fields missing in 'df':\n", paste0(fldsM, "\n"),
    call. = FALSE)

  return(df[, c("_id", flds), drop = FALSE])

}

Try the ctrdata package in your browser

Any scripts or data that you put into this service are public.

ctrdata documentation built on Jan. 18, 2026, 9:07 a.m.