R/filter_pairs_for_deduplication.R

Defines functions filter_pairs_for_deduplication_impl filter_pairs_for_deduplication.data.frame filter_pairs_for_deduplication.ldat filter_pairs_for_deduplication

Documented in filter_pairs_for_deduplication

#' Remove pairs which do not have to be compared for deduplication
#' 
#' In case of deduplication one tries to link a data set to itself. Therefore, 
#' comparisons only have to be made for records for which the index of the
#' records from the first data set is larger than the index from the record from
#' the second data set. 
#' 
#' @param pairs a \code{pairs} object, such as generated by 
#'   \code{\link{pair_blocking}}
#'   
#' @export
filter_pairs_for_deduplication <- function(pairs) {
  if (!methods::is(pairs, "pairs")) stop("pairs should be an object of type 'pairs'.")
  UseMethod("filter_pairs_for_deduplication")
}

#' @export
filter_pairs_for_deduplication.ldat <- function(pairs) {
  filter_pairs_for_deduplication_impl(pairs)
}

#' @export
filter_pairs_for_deduplication.data.frame <- function(pairs) {
  filter_pairs_for_deduplication_impl(pairs)
}

filter_pairs_for_deduplication_impl <- function(pairs) {
  sel <- pairs$y > pairs$x
  res <- if (is.data.frame(pairs)) pairs[sel, , drop = FALSE] else pairs[sel, ]
  attributes(res) <- attributes(pairs)
  res
}
djvanderlaan/reclin documentation built on Oct. 4, 2022, 7:03 p.m.