# Generated by fusen: do not edit by hand
#' Perform LeftJoin on Data
#'
#' Description
#'
#' @param .source
#' The Source Dataframe.\cr
#' (Must contain a unique column id and the columns you want to match on)
#' @param .target
#' The Target Dataframe.\cr
#' (Must contain a unique column id and the columns you want to match on)
#' @param .cols_match
#' A character vector of columns to perform fuzzy matching.
#' @param .cols_join
#' Columns to perfrom an exact match on, before fuzzy-matching.\cr
#' (Matched IDs will be excluded from fuzzy-match)
#' @param .method
#' One of "osa", "lv", "dl", "hamming", "lcs", "qgram", "cosine", "jaccard", "jw", "soundex".\cr
#' See: stringdist-metrics {stringdist}
#' @return A Dataframe
#'
#' @export
#' @examples
#' join_data(
#' .source = table_source,
#' .target = table_target,
#' .cols_match = c("name", "iso3", "city", "address"),
#' .cols_join = c("name", "iso3"),
#' .method = "osa"
#' )
join_data <- function(.source, .target, .cols_match, .cols_join, .method = "osa") {
id_s <- id_t <- NULL
check_id(.source, .target)
source_ <- prep_tables(.source, .cols_match)
target_ <- prep_tables(.target, .cols_match)
s_ <- source_[, c("id", .cols_join)]
t_ <- target_[, c("id", .cols_join)]
non_ <- .cols_match[!.cols_match %in% .cols_join]
tab_ <- dplyr::inner_join(s_, t_, by = .cols_join, suffix = c("_s", "_t")) %>%
dplyr::mutate(
dplyr::across(!dplyr::matches("^id_s$|^id_t$"), ~1)
) %>%
dplyr::select(id_s, id_t, dplyr::everything()) %>%
`colnames<-`(c("id_s", "id_t", paste0("sim_", .cols_join)))
s_ <- dplyr::left_join(tab_, .source[, c("id", non_)], by = c("id_s" = "id"))
t_ <- dplyr::left_join(tab_, .target[, c("id", non_)], by = c("id_t" = "id"))
for (i in seq_len(length(non_))) {
tab_[[paste0("sim_", non_[i])]] <- stringdist::stringsim(s_[[non_[i]]], t_[[non_[i]]], .method)
}
return(tab_)
}
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.