R/select_threshold.R

Defines functions select_threshold.data.frame select_threshold.ldat select_threshold

Documented in select_threshold

#' Select pairs for linkage using a threshold
#'
#' @param pairs a \code{pairs} object, such as generated by 
#'   \code{\link{pair_blocking}}
#' @param threshold the threshold to apply. Pairs with a score above the 
#'   threshold are selected. 
#' @param weight name of the score/weight variable of the pairs. When not given
#'   and \code{attr(pairs, "score")} is defined, that is used. 
#' @param var the name of the new variable to create in pairs. This will be a
#'   logical variable with a value of \code{TRUE} for the selected pairs.
#'
#' @return
#' Returns the \code{pairs} with the variable given by \code{var} added. This
#' is a logical variable indicating which pairs are selected a matches.
#'
#' @examples 
#' data("linkexample1", "linkexample2")
#' pairs <- pair_blocking(linkexample1, linkexample2, "postcode")
#' pairs <- compare_pairs(pairs, c("lastname", "firstname", "address", "sex"))
#' pairs <- score_simsum(pairs)
#' # Select pairs with a simsum > 5 as matches
#' pairs <- select_threshold(pairs, 5)
#' 
#' \dontshow{gc()}
#'
#' @export
select_threshold <- function(pairs, threshold, weight, var = "select") {
  if (!methods::is(pairs, "pairs")) stop("pairs should be an object of type 'pairs'.")
  UseMethod("select_threshold")
}

#' @export
select_threshold.ldat <- function(pairs, threshold, weight, var = "select") {  
  if (missing(weight) || is.null(weight)) weight <- attr(pairs, "score")
  if (is.null(weight)) stop("Missing weight")
  if (is.character(weight)) weight <- pairs[[weight]]
  pairs[[var]] <- weight > threshold
  attr(pairs, "selection") <- var
  pairs
}

#' @export
select_threshold.data.frame <- function(pairs, threshold, weight,
    var = "select") {  
  if (missing(weight) || is.null(weight)) weight <- attr(pairs, "score")
  if (is.null(weight)) stop("Missing weight")
  if (is.character(weight)) weight <- pairs[[weight]]
  pairs[[var]] <- weight > threshold
  attr(pairs, "selection") <- var
  pairs
}

Try the reclin package in your browser

Any scripts or data that you put into this service are public.

reclin documentation built on Nov. 23, 2021, 9:09 a.m.