R/link.R

Defines functions link_impl link.ldat link.data.frame link

Documented in link

#' Use the selected pairs to generate a linked data set
#' 
#' @param pairs a \code{pairs} object, such as generated by 
#'   \code{\link{pair_blocking}}
#' @param selection a logical variable with the same length as \code{pairs} has
#'   rows, or the name of such a variable in \code{pairs}. Pairs are only 
#'   selected when \code{select} is \code{TRUE}. When missing 
#'   \code{attr(pairs, "selection")} is used when available. 
#' @param x the first data set; when missing \code{attr(pairs, "x")} is used.
#' @param y the second data set; when missing \code{attr(pairs, "y")} is used.
#' @param all_x return all records from \code{x}.
#' @param all_y return all records from \code{y}. 
#' @param ... ignored.
#' 
#' @details 
#' Uses the selected pairs to link the two data sets to each other. Renames 
#' variables that are in both data sets.  
#'   
#' @export
link <- function(pairs, selection = NULL, x = NULL, y = NULL, all_x = TRUE, 
    all_y = TRUE, ...) {
  if (!methods::is(pairs, "pairs")) stop("pairs should be an object of type 'pairs'.")
  UseMethod("link")
}

#' @export
link.data.frame <- function(pairs, selection = NULL, x = NULL, y = NULL,
    all_x = TRUE, all_y = TRUE, ...) {
  link_impl(pairs, selection, x, y, all_x, all_y)
}

#' @export
link.ldat <- function(pairs, selection = NULL, x = NULL, y = NULL, all_x = TRUE, 
    all_y = TRUE, ...) {
  link_impl(pairs, selection, x, y, all_x, all_y)
}


link_impl <- function(pairs, selection = NULL, x = NULL, y = NULL, 
    all_x = TRUE, all_y = TRUE) {
  # Process x and y
  if (missing(x) || is.null(x)) x <- attr(pairs, "x")
  if (is.null(x)) stop("Missing x")
  if (missing(y) || is.null(y)) y <- attr(pairs, "y")
  if (is.null(y)) stop("Missing y")
  x$.x <- seq_len(nrow(x))
  y$.y <- seq_len(nrow(y))
  # Process selection
  if (missing(selection) || is.null(selection)) 
    selection <- attr(pairs, "selection")
  # (repeat previous if.. in case attribute was not set)
  if (missing(selection) || is.null(selection)) {
    selection <- TRUE
  } else if (is.character(selection)) {
    if (length(selection) != 1) 
      stop("When selection is a character vector; it needs to be length 1.")
    selection <- pairs[[selection]]
  } 
  # Link
  res <- data.frame('.x' = as_rvec(pairs$x[selection]),
    '.y' = as_rvec(pairs$y[selection]))
  res <- if (all_x) dplyr::full_join(res, x, by = ".x") else
    dplyr::left_join(res, x, by = ".x")
  res <- if (all_y) dplyr::full_join(res, y, by = ".y") else
    dplyr::left_join(res, y, by = ".y")
  res$.x <- NULL
  res$.y <- NULL
  res
  # The following gnerates note in R CMD check
  # dplyr::select(res, -`.x`, -`.y`)
}
djvanderlaan/reclin documentation built on Oct. 4, 2022, 7:03 p.m.