#' Generate missing report
#'
#' This function use the same method as described in the \code{cctab}, reporting
#' missingness of the variables. It includes which form is the variable from,
#' set as `Derived` if not from DLU file. And missing percentage with which subjects
#' are have missing value for that particular variable. This is internal function,
#' not intend to use it directly by user.
#'
#' @inheritParams cttab
#' @param dlu A data.frame of DLU file, this will be derived from the package environment
#' and should be set using \code{\link{get_dlu}}.
#'
#' @seealso \code{\link{cttab}} \code{\link{dump_missing_report}}
#' \code{\link{get_missing_report}} \code{\link{reset_missing_report}}
#'
#' @return A data frame
#'
#' @keywords internal
report_missing <- function(data,
vars,
select,
row_split = NULL,
subjid_string = getOption("cctu_subjid_string", default = "subjid")){
blnk_miss <- setNames(data.frame(matrix(ncol = 8, nrow = 0)),
c("form", "visit_var", "visit_label", "visit",
"variable", "label", "missing_pct", "subject_id"))
dlu <- get_dlu()
# If the subjid can not be found in the dataset
if(!subjid_string %in% names(data))
return(blnk_miss)
# Generate selection vector function
gen_selec <- function(dat, var, select = NULL) {
if (is.null(select) | !var %in% names(select)) {
return(rep(TRUE, length(dat[[var]])))
} else{
r <- eval(str2expression(select[var]), envir = dat)
r & !is.na(r)
}
}
vars <- unlist(vars, use.names = FALSE)
# Check if missing
any_miss <- sapply(vars, function(v)sum(is.na(data[[v]]))) > 0
# If all missing, return blank
if(all(!any_miss))
return(blnk_miss)
# Only report variables with missing
vars <- vars[any_miss]
res <- lapply(vars, function(v){
variable <- ifelse(has.label(data[[v]]), var_lab(data[[v]]), v)
z <- data[gen_selec(data, v, select[v]), ]
subid <- z[[subjid_string]][is.na(z[[v]])]
pct <- round(100*length(subid)/length(z[[subjid_string]]), 1)
if(v %in% dlu$shortcode)
fm_name <- dlu$form[dlu$shortcode == v]
else
fm_name <- "Derived"
data.frame(form = fm_name,
visit_var = NA,
visit_label = NA,
visit = NA,
variable = v,
label = variable,
missing_pct = paste0(pct, "% (",length(subid), "/",
length(z[[subjid_string]]), ")"),
subject_id = paste(subid, collapse = ", "),
row.names = NULL)
})
res <- do.call(rbind, res)
res[res$subject_id != "", ]
}
#' @name dump_missing_report
#' @aliases get_missing_report
#' @aliases reset_missing_report
#' @title Save/Get/Reset missingness report
#'
#' @description
#' \code{dump_missing_report} can be used to save the missingness report to
#' a file.
#' \code{get_missing_report} Return the missingness report data.
#' \code{reset_missing_report} Reset the internal missingness report data to blank.
#'
#' @param x File path the report will be dumped to. Default is under `Output`
#' folder, named as `variable_missing_report.csv`.
#' @seealso \code{\link{cttab}} \code{\link{report_missing}}
#' @export
#'
dump_missing_report <- function(x = "Output/variable_missing_report.csv"){
utils::write.csv(get_missing_report(),
file = x,
na = "", row.names = FALSE)
}
#' @rdname dump_missing_report
#' @export
get_missing_report <- function(){
unique(cctu_env$missing_report_data)
}
#' @rdname dump_missing_report
#' @export
reset_missing_report <- function(){
cctu_env$missing_report_data <- setNames(data.frame(matrix(ncol = 8, nrow = 0)),
c("form", "visit_var", "visit_label",
"visit", "variable", "label", "missing_pct",
"subject_id"))
}
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.