Nothing
#' Create a table of comparison patterns
#'
#' @param pairs a \code{pairs} object, such as generated by
#' \code{\link{pair_blocking}}
#' @param on variables from \code{pairs} defining the comparison patterns. When
#' missing \code{names(comparators)} is used.
#' @param comparators a list with comparison functions for each of the
#' columns. When missing or \code{NULL}, the function looks for columns in
#' \code{pairs} with a \code{comparator} attribute.
#' @param complete add patterns that do not occur in the dataset to the result
#' (with \code{n = 0}).
#' @param ... passed on to other methods.
#'
#' @details
#' Since comparison vectors can contain continuous numbers (usually between 0
#' and 1), this could result in a very large number of possible comparison
#' vectors. Therefore, the comparison vectors are passed on to the comparators
#' in order to threshold them. This usually results in values 0 or 1. Missing
#' values are usually codes as 0. However, this all depends on the comparison
#' functions used. For more information see the documentation on the
#' \link[=cmp_identical]{comparison functions}.
#'
#' @return
#' Returns a \code{data.frame} with all unique comparison patterns that exist
#' in \code{pairs}, with a column \code{n} added with the number of times each
#' pattern occurs.
#'
#' @examples
#' data("linkexample1", "linkexample2")
#' pairs <- pair_blocking(linkexample1, linkexample2, "postcode")
#' pairs <- compare_pairs(pairs, c("lastname", "firstname", "address", "sex"))
#' tabulate_patterns(pairs)
#'
#' @import data.table
#' @export
tabulate_patterns <- function(pairs, on, comparators, complete = TRUE, ...) {
UseMethod("tabulate_patterns")
}
#' @rdname tabulate_patterns
#' @export
tabulate_patterns.pairs <- function(pairs, on, comparators, complete = TRUE, ...) {
# Process arguments
if (missing(comparators) || is.null(comparators)) {
# when using compare_vars or compare_pairs, the comparator is stored
# as an attribute in the column; retreive those
comparators <- lapply(pairs, attr, which = "comparator")
# remove elements without comparator
comparators <- comparators[!sapply(comparators, is.null)]
}
if (missing(on) || is.null(on)) on <- names(comparators)
# Tabulate
for (var in on) {
cmp_fun <- comparators[[var]]
if (!is.null(cmp_fun))
pairs[[var]] <- cmp_fun(pairs[[var]])
# we don't use the := assignment from data.table because we don't
# want to modify the original dataset
}
tab <- pairs[, list(n = .N), by = on]
# Remove the pairs class; tab is no longer a set of pairs
class(tab) <- setdiff(class(tab), "pairs")
# Add patterns not present in dataset
if (complete) {
possible_patterns <- lapply(tab[, ..on], function(x) {
u <- unique(x)
if (is.logical(u)) u <- unique(c(u, c(TRUE, FALSE)))
if (is.factor(x)) union(x, levels(x)) else u
})
possible_patterns <- do.call(CJ, possible_patterns)
tab <- tab[possible_patterns, , on = on]
tab$n[is.na(tab$n)] <- 0
}
structure(tab, on = on)
}
Any scripts or data that you put into this service are public.
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.