# Generated by fusen: do not edit by hand
#' Check Source and Target Dataframe
#'
#' Description
#'
#' @param .source
#' The Source Dataframe.
#' Must contain a unique column id and the columns you want to match on
#' @param .target
#' The Target Dataframe.
#' Must contain a unique column id and the columns you want to match on
#' @param .check
#' Check only column that are also in source, or all columns
#' @param .html
#' Output Check as hatml table?
#' @return Messages
#'
#' @export
#' @examples
#' check_data(table_source, table_target)
check_data <- function(.source, .target, .check = c("source", "all"), .html = TRUE) {
name <- df <- check <- ind <- cum <- value <- s <- Matrix <- Source <-
Target <- NULL
.source <- tibble::as_tibble(.source)
.target <- tibble::as_tibble(.target)
check_ <- match.arg(.check, c("source", "all"))
lst_ids_ <- check_id(.source, .target, .error = FALSE)
tab_ids_ <- tibble::enframe(lst_ids_) %>%
tidyr::separate(name, c("check", "df"), sep = "_", extra = "merge") %>%
tidyr::pivot_wider(names_from = df) %>%
dplyr::mutate(
check = dplyr::if_else(check == "e", "Column: 'id' exists", "Column: 'id' is unique")
) %>%
dplyr::mutate(dplyr::across(dplyr::everything(), as.character))
lst_nas_ <- check_nas(.source, .target, .check = check_)
tab_nas_ <- tibble::enframe(lst_nas_) %>%
tidyr::separate(name, c("df", "check"), sep = "_", extra = "merge") %>%
tidyr::pivot_wider(names_from = df) %>%
dplyr::mutate(check = paste0("Check NAs for column: '", check, "'")) %>%
dplyr::mutate(
dplyr::across(c(s, t), ~ scales::comma(., 1))
) %>%
dplyr::mutate(dplyr::across(dplyr::everything(), as.character))
lst_dup_ <- check_dup(.source, .target, .check = check_)
tab_ind_ <- tibble::enframe(lst_dup_$ind, value = "ind")
tab_cum_ <- tibble::enframe(lst_dup_$cum, value = "cum")
tab_dup_ <- dplyr::left_join(tab_ind_, tab_cum_, by = "name") %>%
dplyr::mutate(
dplyr::across(c(ind, cum), ~ scales::comma(., 1)),
cum = paste0("(", cum, ")")
) %>%
tidyr::unite(value, ind, cum, sep = " ") %>%
tidyr::separate(name, c("df", "check"), sep = "_", extra = "merge") %>%
tidyr::pivot_wider(names_from = df) %>%
dplyr::mutate(check = paste0("Check (cumulative) duplicates for column(s): '", check, "'")) %>%
dplyr::mutate(dplyr::across(dplyr::everything(), as.character))
ns_ <- as.numeric(nrow(.source))
nt_ <- as.numeric(nrow(.target))
tab_col_ <- tibble::tibble(
check = c(
"Elements in Dataframe/Matrix",
"Estimated memory allocation"
),
s = c(ns_, NA_real_),
t = c(nt_, NA_real_),
Matrix = c(ns_ * nt_, ns_ * nt_ * 8 / 1e6 * 20)
) %>%
dplyr::mutate(dplyr::across(c(s, t, Matrix), ~ scales::comma(., 1))) %>%
dplyr::mutate(Matrix = dplyr::if_else(dplyr::row_number() == 2, paste(Matrix, "MB"), Matrix))
tab_ <- dplyr::bind_rows(
tab_ids_, tibble::tibble(check = "", s = "", t = ""),
tab_nas_, tibble::tibble(check = "", s = "", t = ""),
tab_dup_, tibble::tibble(check = "", s = "", t = ""),
tab_col_
) %>%
dplyr::rename(Check = check, Source = s, Target = t)
if (.html) {
tab_ %>%
dplyr::mutate(
dplyr::across(
.cols = c(Source, Target, Matrix),
.fns = ~ dplyr::case_when(
. == "" | is.na(.) ~ "",
startsWith(Check, "Check NAs") ~ kableExtra::cell_spec(., bold = T),
grepl("\\([1-9]+\\)|\\(.*,.*\\)|FALSE", .) ~ kableExtra::cell_spec(., bold = T, color = "red"),
grepl("\\(0\\)|TRUE", .) ~ kableExtra::cell_spec(., bold = T, color = "green"),
TRUE ~ .
)
)
) %>%
kableExtra::kbl(escape = FALSE, align = "lccc") %>%
kableExtra::kable_paper() %>%
kableExtra::kable_styling(font_size = 14, html_font = "Times New Roman", bootstrap_options = "condensed")
} else {
tab_
}
}
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.