Nothing
# Copyright 2023 DARWIN EU (C)
#
# This file is part of omopgenerics
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
#' 'summarised_results' object constructor
#'
#' @param x Table.
#' @param settings Settings for the summarised_result object.
#'
#' @return A `summarised_result` object
#'
#' @export
#'
#' @examples
#' library(dplyr)
#' library(omopgenerics)
#'
#' x <- tibble(
#' "result_id" = 1L,
#' "cdm_name" = "cprd",
#' "group_name" = "cohort_name",
#' "group_level" = "acetaminophen",
#' "strata_name" = "sex &&& age_group",
#' "strata_level" = c("male &&& <40", "male &&& >=40"),
#' "variable_name" = "number_subjects",
#' "variable_level" = NA_character_,
#' "estimate_name" = "count",
#' "estimate_type" = "integer",
#' "estimate_value" = c("5", "15"),
#' "additional_name" = "overall",
#' "additional_level" = "overall"
#' ) |>
#' newSummarisedResult()
#'
#' x
#' settings(x)
#' summary(x)
#'
#' x <- tibble(
#' "result_id" = 1L,
#' "cdm_name" = "cprd",
#' "group_name" = "cohort_name",
#' "group_level" = "acetaminophen",
#' "strata_name" = "sex &&& age_group",
#' "strata_level" = c("male &&& <40", "male &&& >=40"),
#' "variable_name" = "number_subjects",
#' "variable_level" = NA_character_,
#' "estimate_name" = "count",
#' "estimate_type" = "integer",
#' "estimate_value" = c("5", "15"),
#' "additional_name" = "overall",
#' "additional_level" = "overall"
#' ) |>
#' newSummarisedResult(settings = tibble(
#' result_id = 1L, result_type = "custom_summary", mock = TRUE, value = 5
#' ))
#'
#' x
#' settings(x)
#' summary(x)
#'
newSummarisedResult <- function(x, settings = attr(x, "settings")) {
# inital input check
assertTable(x = x, class = "data.frame", columns = resultColumns("summarised_result"), allowExtraColumns = TRUE)
assertTable(x = settings, class = "data.frame", null = TRUE, columns = "result_id", allowExtraColumns = TRUE)
# constructor
x <- constructSummarisedResult(x, settings)
# validate
x <- validateSummarisedResult(x)
return(x)
}
constructSummarisedResult <- function(x, settings) {
x <- dplyr::as_tibble(x) |>
dplyr::mutate(result_id = as.integer(.data$result_id))
settings <- createSettings(x, settings) |>
dplyr::arrange(.data$result_id)
x <- x |>
dplyr::select(dplyr::all_of(resultColumns(table = "summarised_result"))) |>
dplyr::filter(.data$variable_name != "settings") |>
dplyr::arrange(.data$result_id)
structure(.Data = x, settings = settings) |>
addClass(c("summarised_result", "omop_result"))
}
validateSummarisedResult <- function(x,
call = parent.frame()) {
# settings
validateResultSettings(attr(x, "settings"), call = call)
# sr
validateSummarisedResultTable(x, call = call)
}
createSettings <- function(x, settings) {
set <- list()
# provided settings
set$provided <- dplyr::as_tibble(settings)
# present in result
set$present <- x |>
dplyr::select("result_id") |>
dplyr::distinct()
# long settings
notCharacter <- character()
setLong <- x |>
dplyr::filter(.data$variable_name == "settings") |>
dplyr::select(
"result_id", "estimate_name", "estimate_type", "estimate_value"
)
if (nrow(setLong) > 0) {
if (any(unique(setLong$estimate_type) != "character")) {
notCharacter <- setLong$estimate_name[setLong$estimate_type != "character"]
}
set$long <- setLong |>
dplyr::select(!"estimate_type") |>
dplyr::distinct() |>
tidyr::pivot_wider(
names_from = "estimate_name", values_from = "estimate_value"
)
}
# extra columns
cols <- colnames(x)
cols <- cols[!cols %in% resultColumns("summarised_result")]
if (length(cols) > 0) {
# to consider to give an error in future release
cli::cli_inform(c(
"!" = "{.var {cols}} moved to settings. This is not recommended as settings should be explicitly provided.",
"i" = "NOTE that this can cause problems with settings."
))
set$extra_columns <- x |>
dplyr::select("result_id", dplyr::all_of(cols)) |>
dplyr::distinct()
}
# merge settings
set <- set |>
purrr::compact() |>
purrr::map(\(x) dplyr::mutate(x, result_id = as.integer(.data$result_id))) |>
purrr::reduce(dplyr::full_join, by = "result_id")
# missing settings
compulsory <- c("result_type", "package_name", "package_version")
colsMissing <- compulsory[!compulsory %in% colnames(set)]
if (length(colsMissing) > 0) {
cli::cli_inform("{.var {colsMissing}} added to {.pkg settings}.")
for (col in colsMissing) {
set <- set |>
dplyr::mutate(!!col := "")
}
}
set <- set |>
dplyr::mutate(dplyr::across(
.cols = dplyr::all_of(compulsory), \(x) dplyr::coalesce(x, "")
))
# group, strata and additional
set <- set |>
addLabels(x, "group") |>
addLabels(x, "strata") |>
addLabels(x, "additional")
# all settings must be character
types <- variableTypes(set)
notCharacter <- c(
notCharacter,
types$variable_name[types$variable_type != "character" & types$variable_name != "result_id"]
)
if (length(notCharacter) > 0) {
cli::cli_inform("{.var {notCharacter}} casted to character.")
set <- set |>
dplyr::mutate(dplyr::across(
dplyr::all_of(notCharacter), \(x) as.character(x)
))
}
# min_cell_count
if (!"min_cell_count" %in% colnames(set)) {
set <- set |>
dplyr::mutate(min_cell_count = "0")
} else {
set <- set |>
dplyr::mutate(
min_cell_count = dplyr::coalesce(.data$min_cell_count, "0"),
min_cell_count = dplyr::if_else(
.data$min_cell_count == "1", "0", .data$min_cell_count
)
)
}
# remove NA
colsRemove <- set |>
purrr::keep(\(x) all(is.na(x))) |>
names()
colsRemove <- colsRemove[!colsRemove %in% c(
"result_id", "result_type", "package_name", "package_version", "group",
"strata", "additional", "min_cell_count"
)]
if (length(colsRemove) > 0) {
cli::cli_inform("{.var {colsRemove}} eliminated from settings as all elements are NA.")
set <- set |>
dplyr::select(!dplyr::all_of(colsRemove))
}
# order variables
initialCols <- c(
"result_id", "result_type", "package_name", "package_version", "group",
"strata", "additional", "min_cell_count"
)
otherCols <- sort(colnames(set)[!colnames(set) %in% initialCols])
set <- set |>
dplyr::select(dplyr::all_of(c(initialCols, otherCols)))
return(set)
}
addLabels <- function(set, x, prefix) {
if (!prefix %in% colnames(set)) {
if (nrow(x) == 0) {
set <- set |>
dplyr::mutate(!!prefix := "")
} else {
set <- set |>
dplyr::left_join(
x |>
dplyr::group_by(.data$result_id) |>
dplyr::group_split() |>
purrr::map(\(x) {
resId <- x$result_id[1]
lab <- x |>
dplyr::select(dplyr::all_of(paste0(prefix, "_name"))) |>
dplyr::distinct() |>
dplyr::pull() |>
stringr::str_split(pattern = " &&& ") |>
unlist() |>
unique()
lab <- paste0(lab[lab != "overall"], collapse = " &&& ")
dplyr::tibble(result_id = resId, !!prefix := lab)
}) |>
dplyr::bind_rows(),
by = "result_id"
)
}
}
set |>
dplyr::mutate(!!prefix := dplyr::coalesce(.data[[prefix]], ""))
}
validateResultSettings <- function(set, call) {
if (is.null(set)) {
"{.cls summarised_result} object does not have settings attribute." |>
cli::cli_abort(call = call)
}
if (!"result_id" %in% colnames(set)) {
"{.var result_id} must be part of settings attribute." |>
cli::cli_abort(call = call)
}
types <- variableTypes(set)
if (types$variable_type[types$variable_name == "result_id"] != "integer") {
"{.var result_id} must be {.cls integer} in settings attribute." |>
cli::cli_abort(call = call)
}
types <- types |>
dplyr::filter(.data$variable_name != "result_id")
notCharacter <- types$variable_name[types$variable_type != "character"]
if (length(notCharacter) > 0) {
"{.var {notCharacter}} must be {.cls character} in settings attribute." |>
cli::cli_abort(call = call)
}
if (length(set$result_id) != length(unique(set$result_id))) {
"{.var result_id} must be unique in settings attribute." |>
cli::cli_abort(call = call)
}
if (nrow(set) != nrow(dplyr::distinct(dplyr::select(set, !"result_id")))) {
"Each {.var result_id} must be unique and contain a unique set of {.pkg settings}." |>
cli::cli_abort(call = call)
}
# tidy names
tidyGroup <- extractColumns(set, "group")
tidyStrata <- extractColumns(set, "strata")
tidyAdditional <- extractColumns(set, "additional")
reportOverlap(tidyGroup, tidyStrata, "group", "strata", call)
reportOverlap(tidyGroup, tidyAdditional, "group", "additional", call)
reportOverlap(tidyStrata, tidyAdditional, "strata", "additional", call)
invisible()
}
getLabels <- function(x) {
stringr::str_split(string = x, pattern = " &&& ") |>
purrr::map(\(x) x[!x %in% c("", "overall")])
}
extractColumns <- function(x, col) {
x[[col]] |>
as.list() |>
rlang::set_names(as.character(x$result_id)) |>
purrr::map(\(x) {
x |>
getLabels() |>
purrr::flatten_chr() |>
unique()
})
}
reportOverlap <- function(tidy1, tidy2, group1, group2, call) {
x <- purrr::map2(tidy1, tidy2, intersect) |>
purrr::compact()
if (length(x) == 0) {
return(invisible())
}
message <- x |>
purrr::imap_chr(\(x, nm) {
paste0(
"In result_id = ", nm, ": `", paste0(x, collapse = "`, `"),
"` present in both {.pkg ", group1, "} and {.pkg ", group2, "}."
)
}) |>
unname()
cli::cli_abort(message = message, call = call)
}
validateSummarisedResultTable <- function(x,
duplicates = TRUE,
pairs = TRUE,
duplicateEstimates = TRUE,
suppressPossibility = TRUE,
call) {
# all columns
columns <- resultColumns(table = "summarised_result")
notPresent <- columns[!columns %in% colnames(x)]
if (length(notPresent) > 0) {
"{.var {notPresent}} not present in {.cls summarised_result} object." |>
cli::cli_abort(call = call)
}
# extra columns
extraColumns <- colnames(x)[!colnames(x) %in% columns]
if (length(extraColumns) > 0) {
"extra columns ({.var {extraColumns}}) not allowed in {.cls summarised_result} object." |>
cli::cli_abort(call = call)
}
# correct type
x <- checkColumnsFormat(x = x, "summarised_result")
# Cannot contain NA columns
checkNA(x = x, "summarised_result")
# estimate type
estimateType <- unique(x$estimate_type)
notValidEstimateTypes <- estimateType[!estimateType %in% estimateTypeChoices()]
if (length(notValidEstimateTypes) > 0) {
"{.var {notValidEstimateTypes}} {?is/are} not valid estimate_type values." |>
cli::cli_abort(call = call)
}
# all ids in result must be in settings
idsResult <- unique(x$result_id)
idsSettings <- unique(attr(x, "settings")$result_id)
notPresent <- idsResult[!idsResult %in% idsSettings]
if (length(notPresent) > 0) {
cli::cli_abort("result_id: {.var {notPresent}} not present in {.pkg settings} but present in data.")
}
# duplicates
if (duplicates) {
nr <- nrow(x)
x <- x |>
dplyr::distinct()
eliminated <- nr - nrow(x)
if (eliminated > 0) {
cli::cli_inform(c("!" = "{eliminated} duplicated row{?s} eliminated."))
}
}
# columPairs
if (pairs) {
validateNameLevel(x = x, prefix = "group", validation = "warning")
validateNameLevel(x = x, prefix = "strata", validation = "warning")
validateNameLevel(x = x, prefix = "additional", validation = "warning")
}
# no duplicated estimates
if (duplicateEstimates) {
checkDuplicated(x, validation = "error")
}
# suppress availability
if (suppressPossibility) {
checkGroupCount(x)
}
return(x)
}
checkColumns <- function(x, resultName, call = parent.frame()) {
cols <- resultColumns(table = resultName)
notPresent <- cols[!cols %in% colnames(x)]
if (length(notPresent) > 0) {
cli::cli_abort(
"{paste0(notPresent, collapse = ', ')} must be present in a {.cls {resultName}}
object."
)
}
x |> dplyr::relocate(dplyr::all_of(cols))
}
checkNA <- function(x, type, call = parent.frame()) {
cols <- fieldsResults$result_field_name[
fieldsResults$result == type & fieldsResults$na_allowed == FALSE
]
for (col in cols) {
if (any(is.na(unique(x[[col]])))) {
cli::cli_abort("`{col}` must not contain NA.", call = call)
}
}
invisible(NULL)
}
checkColumnsFormat <- function(x, resultName) {
cols <- resultColumns(resultName)
expectedFormat <- fieldsResults$datatype[fieldsResults$result == resultName]
formats <- purrr::map_chr(x, typeof)
id <- formats != expectedFormat
cols <- cols[id]
formats <- formats[id]
expectedFormat <- expectedFormat[id]
if (length(cols) > 0) {
err <- character()
for (k in seq_along(cols)) {
res <- tryCatch(
expr = {
x <- x |>
dplyr::mutate(!!cols[k] := giveType(.data[[cols[k]]], expectedFormat[k]))
list(x = x, err = character())
},
error = function(e) {
list(x = x, err = cols[k])
}
)
x <- res$x
err <- c(err, res$err)
}
if (length(err) > 0) {
err <- paste0(err, ": format=", formats, " (expected=", expectedFormat, ")")
names(err) <- rep("*", length(err))
cli::cli_abort(c("The following colum does not have a correct format", err))
} else {
err <- paste0(cols, ": from ", formats, " to ", expectedFormat)
names(err) <- rep("*", length(err))
cli::cli_inform(c("!" = "The following column type were changed:", err))
}
}
invisible(x)
}
checkGroupCount <- function(x, validation = "error", call = parent.frame()) {
grouping <- c(
"result_id", "cdm_name", "group_name", "group_level", "strata_name",
"strata_level", "additional_name", "additional_level"
)
obsLabels <- x |>
dplyr::pull("variable_name") |>
unique()
obsLabelsL <- obsLabels |>
stringr::str_replace_all(pattern = "_", replacement = " ") |>
tolower()
res <- character()
n <- 0
for (gcount in groupCount) {
if (n < 5) {
ol <- obsLabels[obsLabelsL %in% gcount]
xx <- x |>
dplyr::filter(
.data$variable_name %in% ol &
stringr::str_detect(.data$estimate_name, "count")
) |>
dplyr::select(dplyr::all_of(c(grouping, "variable_name"))) |>
dplyr::group_by(dplyr::across(dplyr::all_of(grouping))) |>
dplyr::filter(dplyr::n() > 1) |>
dplyr::group_split() |>
as.list()
for (k in seq_along(xx)) {
if (n < 5) {
res <- c(res, "*" = glue::glue("{nrow(xx[[k]])} '{gcount}' in variable_name for: {getGrouping(xx[[k]])}."))
n <- n + 1
}
}
}
}
if (length(res) > 0) {
res <- c(
"Each grouping (unique combination of: {grouping}) can not contain repeated group identifiers ({groupCount}).",
"First {n} combination{?s}:",
res
)
cli::cli_abort(res)
}
return(invisible(NULL))
}
getGrouping <- function(x) {
x <- x |>
dplyr::select(-dplyr::any_of("variable_name")) |>
dplyr::distinct() |>
as.list()
lapply(seq_along(x), function(kk) {
paste0(names(x)[kk], ": ", x[[kk]])
}) |>
unlist() |>
paste0(collapse = ", ")
}
#' Validate if two columns are valid Name-Level pair.
#'
#' @param x A tibble.
#' @param prefix Prefix for the name-level pair, e.g. 'strata' for
#' strata_name-strata_level pair.
#' @param sep Separation pattern.
#' @param validation Either 'error', 'warning' or 'message'.
#' @param call Will be used by cli to report errors.
#'
#' @export
#'
validateNameLevel <- function(x,
prefix,
sep = " &&& ",
validation = "error",
call = parent.frame()) {
# inital checks
assertCharacter(prefix, length = 1)
nameColumn <- paste0(prefix, "_name")
levelColumn <- paste0(prefix, "_level")
assertTable(x, columns = c(nameColumn, levelColumn))
assertCharacter(sep)
assertValidation(validation)
# distinct pairs
distinctPairs <- x |>
dplyr::select(
"name" = dplyr::all_of(nameColumn), "level" = dplyr::all_of(levelColumn)
) |>
dplyr::distinct() |>
dplyr::mutate(dplyr::across(
c("name", "level"),
list(elements = ~ stringr::str_split(.x, pattern = sep))
)) |>
dplyr::mutate(dplyr::across(
dplyr::ends_with("elements"),
list(length = ~ lengths(.x))
))
# pairs that dont match
notMatch <- distinctPairs |>
dplyr::filter(
.data$name_elements_length != .data$level_elements_length
)
# error / warning
if (nrow(notMatch) > 0) {
unmatch <- notMatch |>
dplyr::select("name", "level") |>
dplyr::mutate("name_and_level" = paste0(
.env$nameColumn, ": ", .data$name, "; ", .env$levelColumn, ": ",
.data$level
)) |>
dplyr::pull("name_and_level")
num <- length(unmatch)
nun <- min(num, 5)
unmatch <- unmatch[1:nun]
names(unmatch) <- rep("*", nun)
mes <- "name: `{nameColumn}` and level: `{levelColumn}` does not match in
number of arguments ({num} unmatch), first {nun} unmatch:"
# report
report(c(mes, unmatch), validation = validation, call = call)
}
# check case
nameCase <- distinctPairs[["name_elements"]] |>
unlist() |>
unique()
notSnake <- nameCase[!isCase(nameCase, "snake")]
if (length(notSnake) > 0) {
"{length(notSnake)} element{?s} in {nameColumn} {?is/are} not snake_case." |>
report(validation = validation, call = call)
}
return(invisible(x))
}
isCase <- function(x, case) {
if (length(x) == 0) {
return(logical())
}
flag <- switch(case,
"snake" = isSnakeCase(x),
"sentence" = isSentenceCase(x),
"NA" = rep(TRUE, length(x)),
rep(NA, length(x))
)
return(flag)
}
isSentenceCase <- function(x) {
if (length(x) > 0) {
x == snakecase::to_sentence_case(x)
} else {
x
}
}
isSnakeCase <- function(x) {
if (length(x) > 0) {
x == toSnakeCase(x)
} else {
x
}
}
checkColumnContent <- function(x, col, content) {
if (!all(x[[col]] %in% content)) {
notType <- x[[col]][!x[[col]] %in% content] |> unique()
len <- length(notType)
notType <- notType[1:min(5, len)]
cli::cli_abort(c(
"{col} contains incorrect values, possible values:
{paste0(content, collapse = ', ')}. Observed values:
{paste0(notType, collapse = ', ')}{ifelse(len>5, '...', '.')}"
))
}
return(invisible(TRUE))
}
checkDuplicated <- function(x, validation, call = parent.frame()) {
nraw <- nrow(x)
ndist <- x |>
dplyr::select(!"estimate_value") |>
dplyr::distinct() |>
nrow()
dup <- nraw - ndist
if (dup > 0) {
report(
message = c(
"{dup} duplicated results with different estimate values found.",
"i" = "Run the following to see which are",
"data |>",
" " = "dplyr::group_by(dplyr::across(!'estimate_value')) |>",
" " = "dplyr::tally() |>",
" " = "dplyr::filter(n > 1)"
),
validation = validation,
call = call
)
}
return(invisible(TRUE))
}
giveType <- function(x, type) {
switch(type,
"integer" = as.integer(x),
"double" = as.double(x),
"character" = as.character(x),
"logical" = as.logical(x),
x
)
}
validateTidyNames <- function(result, call = parent.frame()) {
# setting columns
colsSettings <- colnames(settings(result))
colsSettings <- colsSettings[colsSettings != "result_id"]
# group columns
colsGroup <- uniqueCols(result$group_name)
# strata columns
colsStrata <- uniqueCols(result$strata_name)
# additional columns
colsAdditional <- uniqueCols(result$additional_name)
# default columns
colsSummarisedResult <- resultColumns("summarised_result")
cols <- list(
settings = colsSettings,
group = colsGroup,
strata = colsStrata,
additional = colsAdditional,
summarised_result = colsSummarisedResult
)
# compare each pair
len <- length(cols)
nms <- names(cols)
for (k in 1:(len - 1)) {
for (i in (k + 1):len) {
both <- intersect(cols[[k]], cols[[i]])
if (length(both) > 0) {
"{.var {both}} {?is/are} present in both '{nms[k]}' and '{nms[i]}'. This will be an error in the next release." |>
cli::cli_warn() # Turn error
}
}
}
return(invisible(result))
}
uniqueCols <- function(x) {
x <- x |>
unique() |>
stringr::str_split(" &&& ") |>
unlist() |>
unique()
x[x != "overall"]
}
#' Required columns that the result tables must have.
#'
#' @param table Table to see required columns.
#'
#' @return Required columns
#'
#' @export
#'
#' @examples
#' library(omopgenerics)
#'
#' resultColumns()
#'
resultColumns <- function(table = "summarised_result") {
assertChoice(table, unique(fieldsResults$result))
x <- fieldsResults$result_field_name[fieldsResults$result == table]
return(x)
}
#' Choices that can be present in `estimate_type` column.
#'
#' @return A character vector with the options that can be present in
#' `estimate_type` column in the summarised_result objects.
#'
#' @export
#'
#' @examples
#' library(omopgenerics)
#'
#' estimateTypeChoices()
#'
estimateTypeChoices <- function() {
c(
"numeric", "integer", "date", "character", "proportion", "percentage",
"logical"
)
}
#' Empty `summarised_result` object.
#'
#' @param settings Tibble/data.frame with the settings of the empty
#' summarised_result. It has to contain at least `result_id` column.
#'
#' @return An empty `summarised_result` object.
#'
#' @export
#'
#' @examples
#' library(omopgenerics)
#'
#' emptySummarisedResult()
#'
emptySummarisedResult <- function(settings = NULL) {
if (is.null(settings)) {
settings <- dplyr::tibble(
"result_id" = integer(),
"result_type" = character(),
"package_name" = character(),
"package_version" = character()
)
}
resultColumns("summarised_result") |>
rlang::rep_named(list(character())) |>
dplyr::as_tibble() |>
dplyr::mutate("result_id" = as.integer()) |>
newSummarisedResult(settings = settings)
}
report <- function(message,
validation, # error/warning/inform
call = parent.frame(), # where error is reported
.envir = parent.frame()) { # where glue statements are evaluated
if (validation == "error") {
cli::cli_abort(addSignal(message, "x"), .envir = .envir, call = call)
} else if (validation == "warning") {
cli::cli_warn(addSignal(message, "!"), .envir = .envir)
} else if (validation == "inform") {
cli::cli_inform(addSignal(message, "!"), .envir = .envir)
}
return(invisible(TRUE))
}
addSignal <- function(x, nm) {
if (length(x) > 0) names(x)[1] <- nm
return(x)
}
#' Create a <summarised_result> object from a data.frame, given a set of
#' specifications.
#'
#' @param x A data.frame.
#' @param group Columns in x to be used in group_name-group_level formatting.
#' @param strata Columns in x to be used in strata_name-strata_level formatting.
#' @param additional Columns in x to be used in additional_name-additional_level
#' formatting.
#' @param estimates Columns in x to be formatted into:
#' estimate_name-estimate_type-estimate_value.
#' @param settings Columns in x thta form the settings of the
#' <summarised_result> object.
#'
#' @return A <summarised_result> object.
#' @export
#'
#' @examples
#' x <- dplyr::tibble(
#' cohort_name = c("cohort1", "cohort2"),
#' variable_name = "age",
#' mean = c(50, 45.3),
#' median = c(55L, 44L)
#' )
#'
#' transformToSummarisedResult(
#' x = x,
#' group = c("cohort_name"),
#' estimates = c("mean", "median")
#' )
#'
transformToSummarisedResult <- function(x,
group = character(),
strata = character(),
additional = character(),
estimates = character(),
settings = character()) {
# check input
assertTable(x = x, class = "data.frame")
assertCharacter(group, unique = TRUE)
assertCharacter(strata, unique = TRUE)
assertCharacter(additional, unique = TRUE)
assertCharacter(estimates, unique = TRUE)
assertCharacter(settings, unique = TRUE)
vals <- list(
"group" = group,
"strata" = strata,
"additional" = additional,
"estimates" = estimates,
"settings" = settings
)
# not present
notPresent <- vals |>
purrr::map(\(xx) xx[!xx %in% colnames(x)]) |>
purrr::keep(\(x) length(x) > 0)
if (length(notPresent) > 0) {
n <- length(unlist(notPresent))
mes <- purrr::imap_chr(notPresent, \(x, nm) {
paste0("{.pkg ", nm, "}: `", paste0(x, collapse = "`, `"), "`.")
}) |>
rlang::set_names("*")
cli::cli_abort(c(x = "{n} column{?s} {?is/are} not present in x:", mes))
}
# intersections
repeated <- unique(unlist(vals)) |>
rlang::set_names() |>
purrr::map(\(x) names(purrr::keep(vals, \(val) x %in% val))) |>
purrr::keep(\(x) length(x) > 1)
if (length(repeated) > 0) {
n <- length(unlist(repeated))
mes <- purrr::imap_chr(repeated, \(x, nm) {
paste0("{.pkg ", nm, "} present in: ", paste0(x, collapse = ", "), ".")
}) |>
rlang::set_names("*")
cli::cli_abort(c(x = "There can not be repeated elements:", mes))
}
# extra columns
extraCols <- colnames(x) |>
purrr::keep(\(x) !x %in% c(unique(unlist(vals)), "cdm_name", "variable_name", "variable_level"))
if (length(extraCols) > 0) {
cli::cli_warn(c("!" = "The following columns have been eliminated: {.var {extraCols}}."))
x <- x |>
dplyr::select(!dplyr::all_of(extraCols))
}
# cdm_name, variable_name, variable_level
if (!"cdm_name" %in% colnames(x)) {
cli::cli_inform(c(i = "Column {.var cdm_name} created as 'unknown' as not present in x."))
x <- x |>
dplyr::mutate(cdm_name = "unknown")
}
if (!"variable_name" %in% colnames(x)) {
cli::cli_inform(c(i = "Column {.var variable_name} created as 'overall' as not present in x."))
x <- x |>
dplyr::mutate(variable_name = "overall")
}
if (!"variable_level" %in% colnames(x)) {
cli::cli_inform(c(i = "Column {.var variable_level} created as 'overall' as not present in x."))
x <- x |>
dplyr::mutate(variable_level = "overall")
}
# cast to character
cols <- c("cdm_name", "variable_name", "variable_level", group, strata,
additional, settings) |>
purrr::keep(\(col) !identical(dplyr::type_sum(x[[col]]), "chr"))
if (length(cols) > 0) {
cli::cli_warn(c("!" = "{length(cols)} column{?s} casted to character: {.var {cols}}."))
x <- x|>
dplyr::mutate(dplyr::across(dplyr::all_of(cols), as.character))
}
# pivot estimates
# get types
types <- estimates |>
rlang::set_names() |>
purrr::map(\(col) assertClassification(dplyr::type_sum(x[[col]])))
# format columns
funs <- purrr::map(types, \(x) {
switch(x,
"character" = \(x) x,
"logical" = \(x) as.character(x),
"date" = \(x) as.character(x),
"numeric" = \(x) as.character(x),
"integer" = \(x) as.character(x))
})
x <- x |>
dplyr::mutate(dplyr::across(
.cols = names(funs),
.fns = ~ funs[[dplyr::cur_column()]](.)
))
x <- x |>
tidyr::pivot_longer(
cols = dplyr::all_of(estimates),
names_to = "estimate_name",
values_to = "estimate_value",
values_drop_na = TRUE
) |>
dplyr::inner_join(
types |>
purrr::map(\(x) dplyr::tibble(estimate_type = x)) |>
dplyr::bind_rows(.id = "estimate_name"),
by = "estimate_name"
)
# create summary object
x <- x |>
uniteGroup(cols = group) |>
uniteStrata(cols = strata) |>
uniteAdditional(cols = additional)
if (length(settings) > 0) {
set <- x |>
dplyr::select(dplyr::all_of(settings)) |>
dplyr::distinct() |>
dplyr::mutate(result_id = as.integer(dplyr::row_number()))
x <- x |>
dplyr::inner_join(set, by = settings, relationship = "many-to-one") |>
dplyr::select(!dplyr::all_of(settings))
} else {
set <- dplyr::tibble(result_id = 1L)
x <- x |>
dplyr::mutate(result_id = 1L)
}
set <- set |>
dplyr::mutate(
group = paste0(.env$group, collapse = " &&& "),
strata = paste0(.env$strata, collapse = " &&& "),
additional = paste0(.env$additional, collapse = " &&& ")
)
newSummarisedResult(x = x, settings = set)
}
Any scripts or data that you put into this service are public.
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.