R/datamap.R

Defines functions generate_datamap

Documented in generate_datamap

# DQAstats - Perform data quality assessment (DQA) of electronic health
# records (EHR)
# Copyright (C) 2019-2024 Universitätsklinikum Erlangen
#
# This program is free software: you can redistribute it and/or modify
# it under the terms of the GNU General Public License as published by
# the Free Software Foundation, either version 3 of the License, or
# (at your option) any later version.
#
# This program is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
# GNU General Public License for more details.
#
# You should have received a copy of the GNU General Public License
# along with this program.  If not, see <http://www.gnu.org/licenses/>.

#' @title generate_datamap helper function
#'
#' @description Internal function to generate the dashboard data maps
#'
#' @inheritParams etl_checks
#' @inheritParams create_helper_vars
#' @inheritParams test_csv
#' @inheritParams load_sqls
#' @inheritParams atemp_plausi_results
#'
#' @return A data.table with the results of the datamap.
#'
#' @examples
#' \donttest{# runtime > 5 sec.
#' utils_path <- system.file(
#'   "demo_data/utilities/",
#'   package = "DQAstats"
#' )
#' mdr_filename <- "mdr_example_data.csv"
#' rv <- list()
#' rv$mdr <- read_mdr(
#'   utils_path = utils_path,
#'   mdr_filename <- mdr_filename
#' )
#'
#' source_system_name <- "exampleCSV_source"
#' target_system_name <- "exampleCSV_target"
#'
#' rv <- c(rv, create_helper_vars(
#'   mdr = rv$mdr,
#'   source_db = source_system_name,
#'   target_db = target_system_name
#' ))
#' # save source/target vars
#' rv$source$system_name <- source_system_name
#' rv$target$system_name <- target_system_name
#' rv$source$system_type <- "csv"
#' rv$target$system_type <- "csv"
#'
#' rv$log$logfile_dir <- tempdir()
#'
#' # set headless (without GUI, progressbars, etc.)
#' rv$headless <- TRUE
#'
#' # set configs
#' demo_files <- system.file("demo_data", package = "DQAstats")
#' Sys.setenv("EXAMPLECSV_SOURCE_PATH" = demo_files)
#' Sys.setenv("EXAMPLECSV_TARGET_PATH" = demo_files)
#'
#' # get configs
#' rv$source$settings <- DIZutils::get_config_env(
#'   system_name = rv$source$system_name,
#'   logfile_dir = rv$log$logfile_dir,
#'   headless = rv$headless
#' )
#' rv$target$settings <- DIZutils::get_config_env(
#'   system_name = tolower(rv$target$system_name),
#'   logfile_dir = rv$log$logfile_dir,
#'   headless = rv$headless
#' )
#'
#' # set start_time (e.g. when clicking the 'Load Data'-button in shiny
#' rv$start_time <- format(Sys.time(), usetz = TRUE, tz = "CET")
#'
#' # define restricting date
#' rv$restricting_date$use_it <- FALSE
#'
#' # load source data
#' tempdat <- data_loading(
#'   rv = rv,
#'   system = rv$source,
#'   keys_to_test = rv$keys_source
#' )
#' rv$data_source <- tempdat$outdata
#'
#' # load target data
#' tempdat <- data_loading(
#'   rv = rv,
#'   system = rv$target,
#'   keys_to_test = rv$keys_target
#' )
#' rv$data_target <- tempdat$outdata
#'
#' rv$data_plausibility$atemporal <- get_atemp_plausis(
#'   rv = rv,
#'   atemp_vars = rv$pl$atemp_vars,
#'   mdr = rv$mdr,
#'   headless = rv$headless
#' )
#'
#' # add the plausibility raw data to data_target and data_source
#' for (i in names(rv$data_plausibility$atemporal)) {
#'   for (k in c("source_data", "target_data")) {
#'     w <- gsub("_data", "", k)
#'     raw_data <- paste0("data_", w)
#'     rv[[raw_data]][[i]] <-
#'       rv$data_plausibility$atemporal[[i]][[k]][[raw_data]]
#'     rv$data_plausibility$atemporal[[i]][[k]][[raw_data]] <- NULL
#'   }
#'   gc()
#' }
#'
#' # calculate descriptive results
#' rv$results_descriptive <- descriptive_results(
#'   rv = rv,
#'   headless = rv$headless
#' )
#'
#' generate_datamap(
#'   results = rv$results_descriptive,
#'   db = rv$target$system_name,
#'   mdr = rv$mdr,
#'   rv = rv,
#'   headless = rv$headless
#' )
#' }
#' @export
#'
generate_datamap <- function(results,
                             mdr,
                             db,
                             rv,
                             headless = FALSE) {
  # get names
  data_names <- mdr[
    get("data_map") == 1 &
      get("source_system_name") == db,
    c("variable_name",
      "designation"),
    with = FALSE
  ]

  # not in variable list
  nivl <- setdiff(data_names$designation, names(rv$variable_list))
  if (length(nivl) > 0) {
    data_names <- data_names[get("designation") %in%
                               setdiff(data_names$designation, nivl), ]
  }


  if (nrow(data_names) < 1) {
    msg <- "No variables suitable for the data map found in the MDR"
    DIZtools::feedback(msg, logjs = isFALSE(headless), findme = "02c0846290",
                       logfile_dir = rv$log$logfile_dir,
                       headless = rv$headless)
    return(NULL)
  } else {
    obj_names <- data_names[, get("designation")]

    outlist <- list()

    for (i in c("source_data", "target_data")) {
      # initialize output table
      out <- data.table::data.table(
        "variable" = character(0),
        "n" = character(0),
        "valids" = character(0),
        "missings" = character(0),
        "distinct" = character(0)
      )

      for (j in obj_names) {
        out <-
          rbind(
            out,
            data.table::data.table(
              "variable" = j,
              "n" = results[[j]]$counts[[i]]$cnt$n,
              "valids" = results[[j]]$counts[[i]]$cnt$valids,
              "missings" = results[[j]]$counts[[i]]$cnt$missings,
              "distinct" = results[[j]]$counts[[i]]$cnt$distinct
            )
          )
      }
      outlist[[i]] <- out
    }
    return(outlist)
  }
}

Try the DQAstats package in your browser

Any scripts or data that you put into this service are public.

DQAstats documentation built on April 12, 2025, 2:21 a.m.