Nothing
#' Select matching pairs with a score above or equal to a threshold
#'
#' @param pairs a \code{pairs} object, such as generated by
#' \code{\link{pair_blocking}}
#' @param variable the name of the new variable to create in pairs. This will be a
#' logical variable with a value of \code{TRUE} for the selected pairs.
#' @param score name of the score/weight variable of the pairs. When not given
#' and \code{attr(pairs, "score")} is defined, that is used.
#' @param threshold the threshold to apply. Pairs with a score above or equal to the
#' threshold are selected.
#' @param inplace logical indicating whether \code{pairs} should be modified in place. When
#' pairs is large this can be more efficient.
#' @param new_name name of new object to assign the pairs to on the cluster
#' nodes.
#' @param ... ignored
#'
#' @return
#' Returns the \code{pairs} with the variable given by \code{variable} added. This
#' is a logical variable indicating which pairs are selected a matches.
#'
#' @examples
#' data("linkexample1", "linkexample2")
#' pairs <- pair_blocking(linkexample1, linkexample2, "postcode")
#' pairs <- compare_pairs(pairs, c("lastname", "firstname", "address", "sex"))
#' model <- problink_em(~ lastname + firstname + address + sex, data = pairs)
#' pairs <- predict(model, pairs, type = "mpost", add = TRUE, binary = TRUE)
#' # Select pairs with a mpost > 0.5
#' select_threshold(pairs, "selected", "mpost", 0.5, inplace = TRUE)
#'
#' # Example using cluster;
#' # In general the syntax is exactly the same except for the first call to
#' # to cluster_pair. Note the in general `inplace = TRUE` is implied when
#' # working with a cluster; therefore the assignment back to pairs can be
#' # omitted (also not a problem if it is not).
#' library(parallel)
#' data("linkexample1", "linkexample2")
#' cl <- makeCluster(2)
#' \dontshow{clusterEvalQ(cl, data.table::setDTthreads(1))}
#' pairs <- cluster_pair(cl, linkexample1, linkexample2)
#' compare_pairs(pairs, c("lastname", "firstname", "address", "sex"))
#' model <- problink_em(~ lastname + firstname + address + sex, data = pairs)
#' predict(model, pairs, type = "mpost", add = TRUE, binary = TRUE)
#' # Select pairs with a mpost > 0.5
#' # Unlike the regular pairs: inplace = TRUE is implied here
#' select_threshold(pairs, "selected", "mpost", 0.5)
#' stopCluster(cl)
#'
#' @rdname select_threshold
#' @export
select_threshold <- function(pairs, variable, score, threshold, ...) {
UseMethod("select_threshold")
}
#' @rdname select_threshold
#' @export
select_threshold.pairs <- function(pairs, variable, score, threshold,
inplace = FALSE, ...) {
if (is.character(score)) {
stopifnot(score %in% names(pairs))
score <- pairs[[score]]
}
if (inplace) {
pairs[, (variable) := ..score >= ..threshold]
} else {
pairs[[variable]] <- score >= threshold
}
if (inplace) invisible(pairs) else pairs
}
Any scripts or data that you put into this service are public.
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.