Nothing
#' Generate an execution/calling plan for computing a report from the metadata
#'
#' @param meta_data [data.frame] the data frame that contains metadata
#' attributes of study data
#' @param label_col [variable attribute] the name of the column in the metadata
#' with labels of variables
#' @param dimensions [dimensions] Vector of dimensions to address in the report.
#' Allowed values in the vector are Completeness, Consistency,
#' and Accuracy. The generated report will only cover the
#' listed data quality dimensions. Accuracy is computational
#' expensive, so this dimension is not enabled by default.
#' Completeness should be included, if Consistency is
#' included, and Consistency should be included, if Accuracy
#' is included to avoid misleading detections of e.g. missing
#' codes as outliers, please refer to the data quality concept
#' for more details. Integrity is always included.
#' @param meta_data_segment [data.frame] -- optional: Segment level metadata
#' @param meta_data_dataframe [data.frame] -- optional: Data frame level
#' metadata
#' @param meta_data_cross_item [data.frame] -- optional: Cross-item level
#' metadata
#' @param specific_args [list] named list of arguments specifically for one of
#' the called functions, the of the list elements
#' correspond to the indicator functions whose calls
#' should be modified. The elements are lists of
#' arguments.
#' @param arg_overrides [list] arguments to be passed to all called indicator
#' functions if applicable.
#' @param resp_vars variables to be respected, `NULL` means to use all.
#' @param filter_indicator_functions [character] regular expressions, only
#' if an indicator function's name
#' matches one of these, it'll
#' be used for the report. If
#' of length zero, no filtering
#' is performed.
#'
#' @return a list of calls
util_generate_calls <- function(dimensions,
meta_data,
label_col,
meta_data_segment,
meta_data_dataframe,
meta_data_cross_item,
specific_args,
arg_overrides,
resp_vars,
filter_indicator_functions) {
.dimensions <- unique(c("Integrity", dimensions))
.dimensions <- substr(tolower(.dimensions), 1, 3)
.dimensions[.dimensions == "int"] <- "int_all" # use the wrappers, only
.dimensions <- paste(paste0(.dimensions, "_"), collapse = "|")
.dimensions <- sprintf("^(%s)", .dimensions)
exports <- getNamespaceExports(utils::packageName()) # TODO: Use util_all_ind_functions
ind_functions <- grep(.dimensions, exports, value = TRUE)
ind_functions <- ind_functions[
vapply(ind_functions,
exists,
envir = getNamespace(utils::packageName()),
mode = "function", FUN.VALUE = logical(1))]
ind_functions <- c(setdiff(ind_functions, c(
"con_contradictions", # we use the new cross-item-level now
"con_limit_deviations", # there is a specific function for each limit type, now
"acc_robust_univariate_outlier", # is just a synonym for acc_univariate_outlier
"acc_distributions", # there are specific functions for location and proportion checks
"com_unit_missingness" # function does not really create something reasonable, currently
)), "int_datatype_matrix") # the data type matrix function does not start with int_all, but integrity should always run.
ind_functions <- setNames(nm = ind_functions)
ind_functions <- util_filter_names_by_regexps(ind_functions,
filter_indicator_functions)
all_calls <- lapply(ind_functions,
util_generate_calls_for_function,
meta_data = meta_data,
label_col = label_col,
meta_data_segment = meta_data_segment,
meta_data_dataframe = meta_data_dataframe,
meta_data_cross_item = meta_data_cross_item,
specific_args = specific_args,
arg_overrides = arg_overrides,
resp_vars = resp_vars
)
# TODO: https://gitlab.com/libreumg/dataquier/-/issues/147, before
explode_splinter <- function(entity_name, splinter, fkt) {
to_explode <- vapply(lapply(splinter, attr, "explode"),
identical,
FUN.VALUE = logical(1),
y = TRUE)
if (all(!to_explode)) {
res <- do.call(what = call,
args = c(list(name = fkt), splinter),
quote = TRUE)
attr(res, "entity_name") <- entity_name
return(res)
}
my_splinter <- head(splinter[to_explode], 1)
remainder <- tail(splinter[to_explode], -1)
not_a_bomb <- splinter[!to_explode]
lapply(my_splinter[[1]], function(my_splinter_part) {
my_arg <- setNames(
list(my_splinter_part),
nm = names(my_splinter)
)
arg_list <- c(my_arg, remainder, not_a_bomb)
explode_splinter(entity_name = entity_name,
splinter = arg_list,
fkt = fkt)
})
}
explode <- function(fkt, bomb) {
mapply(SIMPLIFY = FALSE,
names(bomb),
bomb,
FUN = explode_splinter,
MoreArgs = list(fkt = fkt))
}
all_calls <- unlist(mapply(SIMPLIFY = FALSE,
names(all_calls), all_calls, FUN = explode))
has_resp_vars <- vapply(lapply(all_calls, names),
`%in%`, x = "resp_vars", FUN.VALUE = logical(1))
suppress <- has_resp_vars &
vapply(lapply(all_calls, `[[`, "resp_vars"), length,
FUN.VALUE = integer(1)) == 0
all_calls <- all_calls[!suppress]
make_name <- function(nm, call) {
entity_name <- attr(call, "entity_name")
r <- gsub(".", "_", fixed = TRUE, sub( # the alias names must not contain dots ".", the first dot separates the call alias from the entity name (mostly VAR_NAMES)
paste0("\\.\\Q",
entity_name,
"\\E"),
"",
nm,
perl = TRUE))
r <- sub("_TIME_VAR", "", r) # all with explode = TRUE
r <- sub("_GROUP_VAR", "", r)
r <- paste0(tolower(r), ".", entity_name)
r
}
names(all_calls) <- unlist(mapply(SIMPLIFY = FALSE,
names(all_calls), all_calls, FUN = make_name))
util_stop_if_not(all(grepl(fixed = TRUE, ".", names(all_calls)))) # there must be at least one dot to separated the col. name from the row name (wolog this is the first dots)
if (length(all_calls)) {
rn <- util_sub_string_right_from_.(names(all_calls))
cn <- util_sub_string_left_from_.(names(all_calls))
} else {
rn <- character(0)
cn <- character(0)
all_calls <- list()
}
attr(all_calls, "rn") <- rn
attr(all_calls, "cn") <- cn
multivariatecol <- names(all_calls)[vapply(lapply(all_calls,
`[[`, "resp_vars"), length,
FUN.VALUE = integer(1)) != 1] # all calls that passed more ore less than one variable in resp_vars
multivariatecol <- gsub("\\..*$", "", multivariatecol)
attr(all_calls, "multivariatcol") <- multivariatecol # SQ2 compatibility, everything, that does not address item_level, i.e., that is not called once for each variable; CAVE: Strange name in SQ2 standard w/o "e"
all_calls
}
Any scripts or data that you put into this service are public.
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.