# Generated by fusen: do not edit by hand
#' Split to Blocks
#'
#' Description
#'
#' @param .source
#' The Source Dataframe.\cr
#' (Must contain a unique column id and the columns you want to match on)
#' @param .target
#' The Target Dataframe.\cr
#' (Must contain a unique column id and the columns you want to match on)
#' @param .cols_match
#' A character vector of columns to perform fuzzy matching.
#' @param .char_block
#' Character Block Size. Used to partition data.\cr
#' - First element chunks the source data in ngram-blocks.\cr
#' - Second element allows for characters in target below/above block size.
#'
#' @return
#' A List
#'
#' @noRd
#' @examples
#' tab_source <- table_source[1:100, ]
#' tab_target <- table_target[1:999, ]
#' cols_match <- c("name", "iso3", "city", "address")
#' char_block = c(25, 5)
#'
#' split_block(
#' .source = tab_source,
#' .target = tab_target,
#' .cols_match = cols_match,
#' .char_block = char_block
#' )
split_block <- function(.source, .target, .cols_match, .char_block) {
n__ <- b__ <- NULL
check_id(.source, .target)
source_ <- prep_tables(.source, .cols_match)
target_ <- prep_tables(.target, .cols_match)
t_ <- dplyr::mutate(target_, n__ = nchar(!!dplyr::sym(.cols_match[1])))
max_t_ <- max(t_$n__)
s_ <- source_ %>%
dplyr::mutate(
n__ = nchar(!!dplyr::sym(.cols_match[1])),
n__ = dplyr::if_else(n__ > max_t_, max_t_, n__)
) %>%
dplyr::arrange(n__) %>%
dplyr::mutate(b__ = floor(n__ / .char_block[1])) %>%
dplyr::group_by(b__) %>%
dplyr::mutate(b__ = paste0(
stringi::stri_pad_left(dplyr::first(n__), 3, 0),
"-",
stringi::stri_pad_left(dplyr::last(n__), 3, 0)
)) %>%
dplyr::ungroup()
return(
list(
ls = split(dplyr::select(s_, -c(n__, b__)), s_$b__),
tt = t_
)
)
}
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.