Nothing
# DQAstats - Perform data quality assessment (DQA) of electronic health
# records (EHR)
# Copyright (C) 2019-2024 Universitätsklinikum Erlangen
#
# This program is free software: you can redistribute it and/or modify
# it under the terms of the GNU General Public License as published by
# the Free Software Foundation, either version 3 of the License, or
# (at your option) any later version.
#
# This program is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
# GNU General Public License for more details.
#
# You should have received a copy of the GNU General Public License
# along with this program. If not, see <http://www.gnu.org/licenses/>.
#' @title completeness helper function
#'
#' @description Internal function to perform missing analysis.
#'
#' @inheritParams value_conformance
#'
#' @return A data.table with the absolute and relative counts of missing values
#' (results of the completeness checks) for each dataelement for the source
#' data system and the target data system.
#'
#' @examples
#' \donttest{# runtime ~ 5 sec.
#' utils_path <- system.file(
#' "demo_data/utilities/",
#' package = "DQAstats"
#' )
#' mdr_filename <- "mdr_example_data.csv"
#' rv <- list()
#' rv$mdr <- read_mdr(
#' utils_path = utils_path,
#' mdr_filename <- mdr_filename
#' )
#'
#' source_system_name <- "exampleCSV_source"
#' target_system_name <- "exampleCSV_target"
#'
#' rv <- c(rv, create_helper_vars(
#' mdr = rv$mdr,
#' source_db = source_system_name,
#' target_db = target_system_name
#' ))
#' # save source/target vars
#' rv$source$system_name <- source_system_name
#' rv$target$system_name <- target_system_name
#' rv$source$system_type <- "csv"
#' rv$target$system_type <- "csv"
#'
#' rv$log$logfile_dir <- tempdir()
#'
#' # set headless (without GUI, progressbars, etc.)
#' rv$headless <- TRUE
#'
#' # set configs
#' demo_files <- system.file("demo_data", package = "DQAstats")
#' Sys.setenv("EXAMPLECSV_SOURCE_PATH" = demo_files)
#' Sys.setenv("EXAMPLECSV_TARGET_PATH" = demo_files)
#'
#' # get configs
#' rv$source$settings <- DIZutils::get_config_env(
#' system_name = rv$source$system_name,
#' logfile_dir = rv$log$logfile_dir,
#' headless = rv$headless
#' )
#' rv$target$settings <- DIZutils::get_config_env(
#' system_name = tolower(rv$target$system_name),
#' logfile_dir = rv$log$logfile_dir,
#' headless = rv$headless
#' )
#'
#' # set start_time (e.g. when clicking the 'Load Data'-button in shiny
#' rv$start_time <- format(Sys.time(), usetz = TRUE, tz = "CET")
#'
#' # define restricting date
#' rv$restricting_date$use_it <- FALSE
#'
#' # load source data
#' tempdat <- data_loading(
#' rv = rv,
#' system = rv$source,
#' keys_to_test = rv$keys_source
#' )
#' rv$data_source <- tempdat$outdata
#'
#' # load target data
#' tempdat <- data_loading(
#' rv = rv,
#' system = rv$target,
#' keys_to_test = rv$keys_target
#' )
#' rv$data_target <- tempdat$outdata
#'
#' rv$data_plausibility$atemporal <- get_atemp_plausis(
#' rv = rv,
#' atemp_vars = rv$pl$atemp_vars,
#' mdr = rv$mdr,
#' headless = rv$headless
#' )
#'
#' # add the plausibility raw data to data_target and data_source
#' for (i in names(rv$data_plausibility$atemporal)) {
#' for (k in c("source_data", "target_data")) {
#' w <- gsub("_data", "", k)
#' raw_data <- paste0("data_", w)
#' rv[[raw_data]][[i]] <-
#' rv$data_plausibility$atemporal[[i]][[k]][[raw_data]]
#' rv$data_plausibility$atemporal[[i]][[k]][[raw_data]] <- NULL
#' }
#' gc()
#' }
#'
#' # calculate descriptive results
#' rv$results_descriptive <- descriptive_results(
#' rv = rv,
#' headless = rv$headless
#' )
#' completeness(
#' results = rv$results_descriptive,
#' headless = rv$headless,
#' logfile_dir = rv$log$logfile_dir
#' )
#' }
#'
#' @export
#'
completeness <- function(results, headless = FALSE, logfile_dir) {
# get names
obj_names <- names(results)
# initialize final list to output
outlist <- data.table::data.table(
cbind(
"Variable" = character(0),
"Missings (source)" = integer(0),
"Missings [%] (source)" = numeric(0),
"Missings (target)" = integer(0),
"Missings [%] (target)" = numeric(0)
)
)
# loop over objects
for (i in obj_names) {
msg <- paste("Performing missing analysis", i)
DIZtools::feedback(msg, findme = "7a28e87b30", logjs = isFALSE(headless),
logfile_dir = logfile_dir,
headless = headless)
count_out <- results[[i]]$counts
outlist <- rbind(
outlist,
data.table::data.table(
cbind(
"Variable" = i,
"Missings (source)" = count_out$source_data$cnt$missings,
"Missings [%] (source)" = round(
count_out$source_data$cnt$missings / count_out$source_data$cnt$n,
4
) * 100,
"Missings (target)" = count_out$target_data$cnt$missings,
"Missings [%] (target)" = round(
count_out$target_data$cnt$missings / count_out$target_data$cnt$n,
4
) * 100
)
),
fill = TRUE
)
}
return(outlist)
}
Any scripts or data that you put into this service are public.
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.