### ctrdata package
#' Find names of fields in the database collection
#'
#' Given part of the name of a field of interest to the user, this
#' function returns the full field names used in records that were
#' previously loaded into a collection
#' (using \link{ctrLoadQueryIntoDb}). Only names of fields that have
#' a value in the collection can be returned.
#' Set \code{sample = FALSE} to force screening all records in the
#' collection for field names, see below.
#'
#' The full names of child fields are returned in dot notation (e.g.,
#' \code{clinical_results.outcome_list.outcome.measure.class_list.class.title})
#' In addition, names of parent fields (e.g.,
#' \code{clinical_results}) are returned.
#' Data in parent fields is typically complex (nested), see
#' \link{dfTrials2Long} for easily handling it.
#' For field definitions of the registers, see
#' "Definition" in \link{ctrdata-registers}.
#' Note: When \code{dbFindFields} is first called after
#' \link{ctrLoadQueryIntoDb}, it will take a moment.
#'
#' @param namepart A character string (can be a regular expression,
#' including Perl-style) to be searched among all field names (keys)
#' in the collection, case-insensitive. The default `".*"` lists all fields.
#'
#' @param sample If \code{TRUE} (default), uses a sample of only 5 trial
#' records per register to identify fields, to rapidly return a possibly
#' incomplete set of field names.
#' If \code{FALSE}, uses all trial records in the collection, which will
#' take more time with more trials but ensures to returns all names of
#' all fields in the collection.
#'
#' @param verbose If \code{TRUE}, prints additional information
#' (default \code{FALSE}).
#'
#' @importFrom nodbi docdb_query
#'
#' @inheritParams ctrDb
#'
#' @return Vector of strings with full names of field(s) found,
#' ordered by register and alphabet, see examples. Names of the vector
#' are the names of the register holding the respective fields. The field
#' names can be fed into \link{dbGetFieldsIntoDf} to extract the
#' data for the field(s) from the collection into a data frame.
#'
#' @export
#'
#' @examples
#'
#' dbc <- nodbi::src_sqlite(
#' dbname = system.file("extdata", "demo.sqlite", package = "ctrdata"),
#' collection = "my_trials"
#' )
#'
#' dbFindFields(namepart = "date", con = dbc)[1:5]
#'
#' # view all 3350+ fields from all registers:
#'
#' allFields <- dbFindFields(con = dbc, sample = FALSE)
#'
#' if (interactive()) View(data.frame(
#' register = names(allFields),
#' field = allFields))
#'
dbFindFields <- function(namepart = ".*",
con,
sample = TRUE,
verbose = FALSE) {
## sanity checks
if (!is.atomic(namepart)) stop("'namepart' should be atomic.", call. = FALSE)
if (length(namepart) > 1) stop("'namepart' should have one element.", call. = FALSE)
if (namepart == "") stop("Empty 'namepart' parameter.", call. = FALSE)
## check database connection
if (is.null(con$ctrDb)) con <- ctrDb(con = con)
## check if cache environment has entry for the database
keyslist <- ctrCache(
xname = paste0("keyslist_", con$db, "/", con$collection, "_timestamp")
)
## get sample reference value
cacheSmpl <- ctrCache(
xname = paste0("keyslist_", con$db, "/", con$collection, "_sample")
)
## get cache reference value
cacheRef <- as.character(rev(unlist(try(nodbi::docdb_query(
src = con, key = con$collection, query = '{"_id": "meta-info"}',
fields = '{"queries.query-timestamp": 1, "_id": 0}'
), silent = TRUE)))[1])
## invalidate cache
cacheOutdated <- is.null(keyslist) || (cacheRef != keyslist) || (!sample & cacheSmpl)
## get keyslist
if (cacheOutdated) {
# inform user
message("Finding fields in database collection", appendLF = FALSE)
queries <- paste0('{"ctrname": "', sort(registerList), '"}')
names(queries) <- sort(registerList)
# queries by sample
if (sample) {
# adding query for EUCTR results data
queries <- c('{"trialInformation": {"$regex": ".+"}}',
'{"clinical_results": {"$regex": ".+"}}', queries)
names(queries)[1:2] <- c("EUCTR", "CTGOV")
n <- 5L
message(" (sampling ", n, " trial records) ", appendLF = FALSE)
} else {
message(" (may take some time) ", appendLF = FALSE)
n <- -1L
}
# get names
keyslist <- NULL
# iterate over registers
for (i in seq_along(queries)) {
message(" . ", appendLF = FALSE)
# iterate over query items
# get fields from register
keysAdd <- nodbi::docdb_query(
src = con,
key = con$collection,
query = queries[i],
listfields = TRUE,
limit = n
)
# give keys name of register
if (!is.null(keysAdd)) names(keysAdd) <- rep(names(queries)[i], length(keysAdd))
# accumulate keys
keyslist <- c(keyslist, keysAdd)
}
message()
# clean empty entries and exclude _id for consistency
keyslist <- keyslist[keyslist != "_id" & keyslist != ""]
## store keyslist to environment (cache)
if (length(keyslist) > 1) {
ctrCache(
xname = paste0("keyslist_", con$db, "/", con$collection),
xvalue = keyslist
)
ctrCache(
xname = paste0("keyslist_", con$db, "/", con$collection, "_timestamp"),
xvalue = cacheRef
)
ctrCache(
xname = paste0("keyslist_", con$db, "/", con$collection, "_sample"),
xvalue = sample
)
message("Field names cached for this session.")
}
} else {
message("Using cache of fields. ")
if (cacheSmpl) message(
"Sample used, specify 'sample = FALSE' to digest all records.", "")
keyslist <- ctrCache(
xname = paste0("keyslist_", con$db, "/", con$collection),
verbose = verbose
)
} # generate keyslist
## inform user of unexpected situation
if ((length(keyslist) == 0) || all(keyslist == "")) {
warning("No keys could be extracted, please check database ",
"and collection: ", con$db, "/", con$collection,
call. = FALSE
)
}
## now do the actual search and find for key name parts
fields <- keyslist[grepl(
pattern = namepart, x = keyslist,
ignore.case = TRUE, perl = TRUE
)]
## to remove duplicates
fieldsDf <- unique(data.frame(
register = names(fields),
field = fields,
stringsAsFactors = FALSE))
fieldsTbl <- table(fieldsDf[["register"]])
# user info
if (verbose) message(
paste0(names(fieldsTbl), collapse = " / "), ": ",
paste0(fieldsTbl, collapse = " / "))
fields <- fieldsDf[["field"]]
names(fields) <- fieldsDf[["register"]]
# user info
if (verbose) message("Found ", length(fields), " fields.")
# return value if no fields found
if (!length(fields)) fields <- ""
# return the match(es)
return(fields)
} # end dbFindFields
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.