R/link.R

Defines functions link

Documented in link

#' Use the selected pairs to generate a linked data set
#' 
#' @param pairs a \code{pairs} object, such as generated by 
#'   \code{\link{pair_blocking}}
#' @param selection a logical variable with the same length as \code{pairs} has
#'   rows, or the name of such a variable in \code{pairs}. Pairs are only 
#'   selected when \code{select} is \code{TRUE}. When missing 
#'   \code{attr(pairs, "selection")} is used when available. 
#' @param all return all records from \code{x} and \code{y}; even those that 
#'   don't match.
#' @param all_x return all records from \code{x}.
#' @param all_y return all records from \code{y}. 
#' @param x the first data set; when missing \code{attr(pairs, "x")} is used.
#' @param y the second data set; when missing \code{attr(pairs, "y")} is used.
#' @param suffixes a character vector of length 2 specifying the suffixes to be
#'   used for making unique the names of columns in the result.
#' @param keep_from_pairs character vector with names of variables in \code{pairs}
#'   that should be included in the output.
#' 
#' @details 
#' Uses the selected pairs to link the two data sets to each other. Renames 
#' variables that are in both data sets.  
#'
#' @return
#' Returns a \code{data.table} containing records from \code{x} and \code{y} and
#' \code{pairs}. Columns that occur both in \code{x} and \code{y} gain a suffix
#' indicating from which data set they are. 
#'   
#' @export
#' 
link <- function(pairs, selection = NULL, all = FALSE, all_x = all, all_y = all, 
    x = attr(pairs, "x"), y = attr(pairs, "y"), suffixes = c(".x", ".y"),
    keep_from_pairs = c(".x", ".y")) {
  x$.x <- seq_len(nrow(x))
  y$.y <- seq_len(nrow(y))
  if (!missing(selection) && !is.null(selection)) {
    if (is.character(selection)) {
      stopifnot(selection %in% names(pairs))
      selection <- pairs[[selection]]
    }
  } else selection <- TRUE
  vars <- unique(c(".x", ".y", keep_from_pairs))
  res <- merge(pairs[selection == TRUE, ..vars], x, all.x = TRUE, 
    all.y = all_x, by = ".x", suffixes = c("_pairs", ""))
  res <- merge(res, y, all.x = TRUE, all.y = all_y, by = ".y",
    suffixes = suffixes)
  if (!(".x" %in% keep_from_pairs)) res[, .x := NULL]
  if (!(".y" %in% keep_from_pairs)) res[, .y := NULL]
  res
}

Try the reclin2 package in your browser

Any scripts or data that you put into this service are public.

reclin2 documentation built on May 29, 2024, 4:21 a.m.