#' Create a template table for tidying a text-column
#'
#' @inheritParams fuzzy_pool
#' @param .data A `data.frame` or `tbl`
#' @param stringvar The name of the column to be tidy (quoted or not)
#' @param .listpool An optional list provided by [`fuzzy_pool()`]
#'
#' @seealso [`stringdist()`][`stringdist::stringdist`]
#'
#' @export
#'
#' @examples
#' test_df <- data.frame(fruit = c("banana", "blueberry", "limon", "pinapple",
#' "apple", "aple", "Apple", "bonana"))
#' fuzzy_template <- fuzzy_match(test_df, fruit)
#' fuzzy_template
#'
fuzzy_match <- function(.data, stringvar, threshold = options("fuzzy_threshold")[[1]], .listpool = NULL, ...) {
## retrieve the column as a vector:
string_vec <- get_col(.data, {{stringvar}})
## compute the fuzzy matches::
if (is.null(.listpool)) {
list_pools <- fuzzy_pool(string_vec, threshold, ...)
} else {
list_pools <- .listpool
}
## compute column number:
lengths <- unlist(lapply(list_pools$messy, length))
max_possible <- ifelse(is.null(lengths), 0, max(lengths))
## turn the list created into a clean data frame:
index_to_do <- seq_len(max_possible)
list_pools_short <- lapply(list_pools$messy, function(x) x[index_to_do])
df_matches <- as.data.frame(do.call("rbind", list_pools_short), stringsAsFactors = FALSE)
if (max_possible > 0) {
colnames(df_matches) <- paste0("syn_", seq_len(max_possible))
}
rownames(df_matches) <- NULL
out <- cbind(selected = df_matches$syn_1, df_matches, stringsAsFactors = FALSE)
## turn into a tibble:
class(out) <- c("tbl_df", "tbl", "data.frame")
## return:
out
}
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.