Nothing
#' Compare all pairs of records
#'
#' @param pairs a \code{pairs} object, such as generated by
#' \code{\link{pair_blocking}}
#' @param by variables from \code{x} and \code{y} on which to compare the
#' records.
#' @param comparators a names list of \link[=identical]{comparator functions},
#' for the named variables the given functions will be used to compare the
#' records. For the remaining variables the \code{default_comparator} will
#' be used.
#' @param x the first \code{data.frame}, when missing \code{attr(pairs, "x")}
#' is used.
#' @param y the second \code{data.frame}, when missing \code{attr(pairs, "y")}
#' is used.
#' @param default_comparator the default \link[=identical]{comparison function}.
#' @param overwrite overwrite exiting variables in \code{pairs}
#'
#' @return
#' Returns the \code{pairs} object with a column added for each variable in
#' \code{by}. The value is the column is given by the return value of the
#' corresponding \link[=identical]{comparison function}.
#'
#' @examples
#' data("linkexample1", "linkexample2")
#' pairs <- pair_blocking(linkexample1, linkexample2, "postcode")
#' pairs <- compare_pairs(pairs, c("lastname", "firstname", "address", "sex"))
#'
#' \dontshow{gc()}
#'
#' @import ldat
#' @import lvec
#' @export
compare_pairs <- function(pairs, by, comparators = list(default_comparator),
x, y, default_comparator = identical(), overwrite = FALSE) {
# Process and preparare input
if (missing(x)) x <- attr(pairs, "x")
if (is.null(x)) stop("Missing x.")
if (missing(y)) y <- attr(pairs, "y")
if (is.null(y)) stop("Missing y.")
if (missing(by) && !missing(by)) by <- names(comparators)
if (missing(by) || is.null(by)) stop("by is missing.")
if (!overwrite && any(by %in% names(pairs)))
stop("Variable in by already present in pairs.")
if (!all(by %in% names(x)))
stop("Not all variables in by are present in x.")
if (!all(by %in% names(y)))
stop("Not all variables in by are present in y.")
comparators <- extend_to(by, comparators, default = default_comparator)
# Compare
chunks <- chunk(pairs$x)
for (col in by) {
res <- if (is_ldat(pairs)) lvec(0, "numeric") else numeric()
comparator <- comparators[[col]]
for (c in chunks) {
x_i <- slice_range(pairs$x, range = c, as_r = TRUE)
x_chunk <- x[[col]][x_i]
y_i <- slice_range(pairs$y, range = c, as_r = TRUE)
y_chunk <- y[[col]][y_i]
comparison <- comparator(x_chunk, y_chunk)
if (is.null(res) || length(res) == 0) {
res <- if (is_ldat(pairs)) as_lvec(comparison) else comparison
length(res) <- length(pairs$x)
} else {
lset(res, range = c, values = comparison)
}
}
pairs[[col]] <- res
}
attr(pairs, "by") <- by
attr(pairs, "comparators") <- comparators
class(pairs) <- unique(c("compare", class(pairs)))
pairs
}
extend_to <- function(by, what = list(default), default) {
if (!is.list(what)) stop("what should be a list.")
has_names <- !is.null(names(what))
if (has_names) {
res <- vector("list", length(by))
names(res) <- by
for (el in names(res)) {
res[[el]] <- if (is.null(what[[el]])) default else what[[el]]
}
} else {
res <- rep(what, length.out = length(by))
names(res) <- by
}
res
}
Any scripts or data that you put into this service are public.
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.