R/score_problink.R

Defines functions score_problink_impl score_problink.ldat score_problink.data.frame score_problink

Documented in score_problink

#' Score comparison patterns of pairs using the probabilistic linkage framework
#' 
#' @param pairs a \code{pairs} object, such as generated by 
#'   \code{\link{pair_blocking}}
#' @param model an object of type \code{\link{problink_em}} containing the 
#'   estimated m- and u-probabilities. When \code{NULL} or missing a model is
#'   estimated. 
#' @param var the name of the new variable that will be created (also see
#'   details). 
#' @param add add the estimated score to the \code{pairs} object and return the
#'   pairs object. Otherwise, just the scores are returned.
#' @param ... passed on to \code{\link{predict.problink_em}}.
#' 
#' @return 
#' When \code{add = TRUE}, the pairs object is returned with the scores added 
#' to it. The new column will have the name \code{var} unless additional 
#' arguments are passed on to \code{\link{predict.problink_em}} using the 
#' \code{...} argument that causes the calculation of multiple scores (such
#' are \code{type = "all"}). In that case the text given by \code{var} is 
#' prepended to the names of the variables returned by 
#' \code{\link{predict.problink_em}} (with a separator '\code{_}'). 
#' 
#' When \code{add = FALSE} the scores are returned as is. 
#' 
#' 
#' @examples 
#' data("linkexample1", "linkexample2")
#' pairs <- pair_blocking(linkexample1, linkexample2, "postcode")
#' pairs <- compare_pairs(pairs, c("lastname", "firstname", "address", "sex"))
#' pairs <- score_problink(pairs)
#' 
#' # is the same as
#' model <- problink_em(pairs)
#' pairs <- score_problink(pairs, model = model)
#' 
#' \dontshow{gc()}
#'
#' @export
score_problink <- function(pairs, model = NULL, var = "weight", 
    add = TRUE, ...) {
  if (!methods::is(pairs, "pairs")) stop("pairs should be an object of type 'pairs'.")
  UseMethod("score_problink")
}

#' @export
score_problink.data.frame <- function(pairs, model = NULL, var = "weight", 
    add = TRUE, ...) {
  if (missing(var)) {
    score_problink_impl(pairs, model, NULL, add, ...)
  } else { 
    score_problink_impl(pairs, model, var, add, ...)
  }
}

#' @export
score_problink.ldat <- function(pairs, model = NULL, var = "weight", 
    add = TRUE, ...) {
  if (missing(var)) {
    score_problink_impl(pairs, model, NULL, add, ...)
  } else { 
    score_problink_impl(pairs, model, var, add, ...)
  }
}

score_problink_impl <- function(pairs, model = NULL, var = "weight", add, ...) {
  if (missing(model) || is.null(model)) model <- problink_em(pairs)
  p <- stats::predict(model, newdata = pairs, ...)
  if (!add) return(p)
  if (!is.data.frame(p) && !is_ldat(p)) {
    if (is.null(var)) var <- "weight"
    pairs[[var]] <- p
    attr(pairs, "score") <- var
  } else {
    prepend <- if (!missing(var) && !is.null(var)) paste0(var, "_") else ""
    names(p) <- paste0(prepend, names(p))
    for (col in names(p)) pairs[[col]] <- p[[col]]
    if (paste0(prepend, "weight") %in% names(p)) {
      attr(pairs, "score") <- paste0(prepend, "weight")
    } else if (paste0(prepend, "mpost") %in% names(p)) {
      attr(pairs, "score") <- paste0(prepend, "mpost")
    }
  }
  pairs
}
djvanderlaan/reclin documentation built on Oct. 4, 2022, 7:03 p.m.