Nothing
#' Score pairs based on a number of comparison vectors
#'
#' @param pairs a \code{pairs} object, such as generated by
#' \code{\link{pair_blocking}}
#' @param variable the name of the new variable to create in pairs. This will be a
#' logical variable with a value of \code{TRUE} for the selected pairs.
#' @param on character vector of variables on which the score should be based.
#' @param inplace logical indicating whether \code{pairs} should be modified in place. When
#' pairs is large this can be more efficient.
#' @param w1 a vector or list with weights for agreement for each of the
#' variables. It can either be a numeric vector of length 1 in which case the
#' same weight is used for all variables; A numeric vector of length equal to
#' the length of \code{on} in which case the weights correspond one-to-one to
#' the variables in \code{on}; A named numeric vector where the names
#' correspond to those in \code{on}, missing values are assigned a value of 1;
#' or a named list with numeric values. See details for more information.
#' @param w0 a vector or list with weights for non-agreement for each of the
#' variables. See details for more information. For the format see \code{w1}.
#' @param wna a vector or list with weights for agreement for each of the
#' variables. See details for more information. For the format see \code{w1}.
#' @param new_name name of new object to assign the pairs to on the cluster
#' nodes.
#' @param ... ignored
#'
#' @details
#' The individual contribution of a variable \code{x} to the total score is
#' given by \code{x * w1 + (1-x) * w0} in case of non-\code{NA} values and
#' \code{wna} in case of \code{NA}. This assumes that the values 1 corresponds
#' to complete agreement and the value 0 to complete non-agreement. In case of
#' complete agreement a variable contributes \code{w1} to the total score and in
#' case of complete non-agreement it contributes \code{w0} to the total score.
#'
#' @return
#' Returns the \code{data.table} \code{pairs} with the column \code{variable} added in
#' case of \code{score_simple.pairs}.
#'
#' In case of \code{score_simple.cluster_pairs}, \code{score_simple.pairs} is called on
#' each cluster node and the resulting pairs are assigned to \code{new_name} in
#' the environment \code{reclin_env}. When \code{new_name} is not given (or
#' equal to NULL) the original pairs on the nodes are overwritten.
#'
#' @examples
#' data("linkexample1", "linkexample2")
#' pairs <- pair_blocking(linkexample1, linkexample2, "postcode")
#' compare_pairs(pairs, on = c("firstname", "lastname", "sex"), inplace = TRUE)
#'
#' score_simple(pairs, "score", on = c("firstname", "lastname", "sex"))
#'
#' # Change the default weights
#' score_simple(pairs, "score", on = c("firstname", "lastname", "sex"),
#' w1 = 2, w0 = -1, wna = NA)
#'
#' # Use a named vector; omited elements from w1 get a weight of 1; those from
#' # w0 and wna a weight of 0.
#' score_simple(pairs, "score", on = c("firstname", "lastname", "sex"),
#' w1 = c("firstname" = 2, "lastname" = 3),
#' w0 = c("firstname" = -1, "lastname" = -0.5))
#'
#' # Use a named list; omited elements from w1 get a weight of 1; those from
#' # w0 and wna a weight of 0.
#' score_simple(pairs, "score", on = c("firstname", "lastname", "sex"),
#' w1 = list("firstname" = 2, "lastname" = 3),
#' w0 = list("firstname" = -1, "lastname" = -0.5))
#'
#' @rdname score_simple
#' @export
score_simple <- function(pairs, variable, on, w1 = 1, w0 = 0, wna = 0, ...) {
UseMethod("score_simple")
}
#' @rdname score_simple
#' @export
score_simple.pairs <- function(pairs, variable, on, w1 = 1.0, w0 = 0.0, wna = 0.0,
inplace = FALSE, ...) {
if (!all(on %in% names(pairs)))
stop("Not all variables in on are present in pairs")
# Process w1
w1_default <- if (is.numeric(w1) && length(w1) == 1) w1 else 1.0
if (!is.list(w1)) w1 <- as.list(w1)
w1 <- extend_to(on, w1, w1_default)
# Process w0
w0_default <- if (is.numeric(w0) && length(w0) == 1) w0 else 0.0
if (!is.list(w0)) w0 <- as.list(w0)
w0 <- extend_to(on, w0, w0_default)
# Process wna
wna_default <- if (is.numeric(wna) && length(wna) == 1) wna else 0.0
if (!is.list(wna)) wna <- as.list(wna)
wna <- extend_to(on, wna, wna_default)
# Calculate score
score <- numeric(nrow(pairs))
for (col in on) {
x <- as.numeric(pairs[[col]])
s <- x*w1[[col]] + (1-x) * w0[[col]]
s[is.na(s)] <- wna[[col]]
score <- score + s
}
if (inplace) {
pairs[, `:=`((variable), score)]
invisible(pairs)
} else {
pairs[[variable]] <- score
pairs
}
}
Any scripts or data that you put into this service are public.
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.