# Generated by fusen: do not edit by hand
#' Check Duplicates
#'
#' Description
#'
#' @param .source
#' The Source Dataframe.
#' Must contain a unique column id and the columns you want to match on
#' @param .target
#' The Target Dataframe.
#' Must contain a unique column id and the columns you want to match on
#' @param .check
#' Check only column that are also in source, or all columns
#' @return A list with duplicates
#'
#' @noRd
#' @examples
#' check_dup(table_source, table_target)
check_dup <- function(.source, .target, .check = c("source", "all")) {
check_ <- match.arg(.check, c("source", "all"))
.source <- tibble::as_tibble(.source)
.target <- tibble::as_tibble(.target)
cols_s_ <- stats::setNames(colnames(.source), paste0("s_", colnames(.source)))
cols_t_ <- stats::setNames(colnames(.target), paste0("t_", colnames(.target)))
cols_s_ <- cols_s_[!cols_s_ == "id"]
cols_t_ <- cols_t_[!cols_t_ == "id"]
cols_t_ <- cols_t_[order(match(cols_t_,cols_s_))]
if (check_ == "source") {
cols_t_ <- cols_t_[cols_t_ %in% cols_s_]
}
s_ <- tibble::as_tibble(.source)
t_ <- tibble::as_tibble(.target)
ind_ <- c(
purrr::map_int(cols_s_, ~ sum(duplicated(s_[[.x]]))),
purrr::map_int(cols_t_, ~ sum(duplicated(t_[[.x]])))
)
cum_ <- c(
purrr::map_int(
.x = stats::setNames(seq_len(length(cols_s_)), names(cols_s_)),
.f = ~ sum(duplicated(apply(s_[, cols_s_[1:.x]], 1, paste, collapse = "-")))
),
purrr::map_int(
.x = stats::setNames(seq_len(length(cols_t_)), names(cols_t_)),
.f = ~ sum(duplicated(apply(t_[, cols_t_[1:.x]], 1, paste, collapse = "-")))
)
)
list(ind = ind_, cum = cum_)
}
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.