Nothing
### ctrdata package
#' Create data frame of specified fields from database collection
#'
#' Fields in the collection are retrieved from all records into a data
#' frame (or tibble). Within a given trial record, a fields can be
#' hierarchical and structured, that is, nested.
#' Th function uses the field names to appropriately type the values
#' that it returns, harmonising original values (e.g. "Information not present
#' in EudraCT" to `NA`, "Yes" to `TRUE`, "false" to `FALSE`,
#' date strings to dates or time differences, number strings to numbers).
#' The function simplifies the structure of nested data and
#' may concatenate multiple strings in a field using " / " (see example)
#' and may have widened the returned data frame with additional columns that
#' were recursively expanded from simply nested data (e.g., "externalRefs"
#' to columns "externalRefs.doi", "externalRefs.eudraCTNumber" etc.).
#' For an alternative way for handling the complex nested data, see
#' \link{dfTrials2Long} followed by \link{dfName2Value} for extracting
#' the sought variable(s).
#'
#' @param fields Vector of one or more strings, with names of sought fields.
#' See function \link{dbFindFields} for how to find names of fields.
#' Dot path notation ("field.subfield") without indices is supported.
#' If compatibility with `nodbi::src_postgres()` is needed, specify fewer
#' than 50 fields, consider also using parent fields e.g., `"a.b"` instead
#' of `c("a.b.c.d", "a.b.c.e")`, accessing sought fields with
#' \link{dfTrials2Long} followed by \link{dfName2Value} or other R functions.
#'
#' @param verbose Printing additional information if set to \code{TRUE};
#' (default \code{FALSE}).
#'
#' @inheritParams ctrDb
#'
#' @param ... Do not use (captures deprecated parameter \code{stopifnodata})
#'
#' @return A data frame (or tibble, if \code{tibble} is loaded)
#' with columns corresponding to the sought fields.
#' A column for the records' `_id` will always be included.
#' The maximum number of rows of the returned data frame is equal to,
#' or less than the number of trial records in the database collection.
#'
#' @importFrom nodbi docdb_query
#' @importFrom stats na.omit
#' @importFrom tibble as_tibble
#'
#' @export
#'
#' @examples
#'
#' dbc <- nodbi::src_sqlite(
#' dbname = system.file("extdata", "demo.sqlite", package = "ctrdata"),
#' collection = "my_trials")
#'
#' # get fields that are nested within another field
#' # and can have multiple values with the nested field
#' dbGetFieldsIntoDf(
#' fields = "b1_sponsor.b31_and_b32_status_of_the_sponsor",
#' con = dbc)
#'
#' # fields that are lists of string values are
#' # returned by concatenating values with a slash
#' dbGetFieldsIntoDf(
#' fields = "keyword",
#' con = dbc)
#'
dbGetFieldsIntoDf <- function(fields = "",
con, verbose = FALSE, ...) {
# check fields
if (!is.vector(fields) ||
!all(class(fields) %in% "character")) {
stop("Input should be a vector of strings of field names.", call. = FALSE)
}
# deprecated
params <- list(...)
if (!is.null(params[["stopifnodata"]])) warning(
'Parameter "stopifnodata" is deprecated.'
)
# remove NA, NULL if included in fields
fields <- fields[!is.null(fields) & !is.na(fields)]
# remove _id if included in fields
fields <- unique(fields["_id" != fields])
# check if valid fields
if (any(fields == "") || (length(fields) == 0)) {
stop("'fields' contains empty elements; ",
"please provide a vector of strings of field names. ",
"Function dbFindFields() can be used to find field names. ",
call. = FALSE)
}
if (length(fields) > 49L) {
message(
"If compatibility with nodbi::src_postgres() is needed, specify fewer ",
"than 50 (was: ", length(fields), ") fields; parent fields, ",
'e.g., "a.b" instead of c("a.b.c.d", "a.b.c.e"), can also be used ',
"and sought fields can be extracted with dfTrials2Long() followed by ",
"dfName2Value() or other R functions.")
}
# check database connection
if (is.null(con$ctrDb)) con <- ctrDb(con = con)
# get the data
dfi <- nodbi::docdb_query(
src = con,
key = con$collection,
query = '{}',
fields = paste0('{"', paste0(fields, collapse = '": 1, "'), '": 1}')
)
# early exit
if (is.null(dfi) || !ncol(dfi)) stop(
"No records with values for any specified field."
)
# user info
if (verbose) message("Retrieved ", nrow(dfi), " rows of data.")
# warn
notFound <- setdiff(fields, names(dfi))
if (length(notFound)) warning("No data could be extracted for '",
paste0(notFound, collapse = "', '"), "'.")
# recursively widen results if a column is a data frame
# by adding the data frame's columns as result columns.
# this is a convenience function and historically users
# may expect such behaviour from package ctrdata.
doneNmnt <- NULL
while (TRUE) {
nm <- names(dfi)
nt <- sapply(dfi, is.data.frame)
if (!any(nt)) break
doNmnt <- nm[nt]
if (!length(doNmnt) || setequal(doNmnt, doneNmnt)) break
for (colN in doNmnt) {
expDf <- dfi[[colN]]
if (!is.data.frame(expDf) || !ncol(expDf) || !nrow(expDf)) next
names(expDf) <- paste0(colN, ".", names(expDf))
# handle any data frames within expDf
colK <- sapply(expDf, function(c) ifelse(is.null(ncol(c)) || ncol(c) > 0L, 1L, 0L))
colK <- names(colK)[sapply(colK, function(c) !is.integer(c) || c > 0L)]
if (length(colK)) expDf <- expDf[, colK, drop = FALSE]
# merge with dfi
colD <- na.omit(match(c(colN, names(expDf)), names(dfi)))
dfi <- cbind(dfi[, -colD, drop = FALSE], expDf)
}
doneNmnt <- doNmnt
} # while
# for date time conversion
lct <- Sys.getlocale("LC_TIME")
on.exit(Sys.setlocale("LC_TIME", lct), add = TRUE)
Sys.setlocale("LC_TIME", "C")
# iterates over columns for typing
nm <- names(dfi)
for (i in seq_len(ncol(dfi))) {
if (nm[i] == "_id") next
if (inherits(dfi[[i]], "data.frame") && !ncol(dfi[[i]])) next
# simplify and replace NULL with NA
dfi[[i]][!sapply(dfi[[i]], length)] <- NA
# type column (NOTE this should not be a data frame because
# the extraction would result in a data frame and this would
# break the typing approach, thus the recursion above)
dfi[[i]] <- typeField(dfi[[i]], nm[i])
if (verbose) message(nm[i], " of type ", typeof(dfi[[i]]), ", ", appendLF = FALSE)
}
# remove rows with only NAs
naout <- is.na(dfi)
nc <- length(setdiff(attr(naout, "dimnames")[[2]], "_id"))
dfi <- dfi[rowSums(naout) < nc, , drop = FALSE]
# sort, add meta data
dfi <- addMetaData(
dfi[order(dfi[["_id"]]), , drop = FALSE],
con = con)
# return
if (any("tibble" == .packages())) return(tibble::as_tibble(dfi))
return(dfi)
} # end dbGetFieldsIntoDf
Any scripts or data that you put into this service are public.
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.