R/join_list.R

# adapted from dplyr

check_suffix <- function(x) {
  if (!is.character(x) || length(x) != 2) {
    stop(sprintf("suffix must be a character vector of length 2, not %s of length %d",
                 type_of(x), length(x)))
  }
  list(x = x[1], y = x[2])
}


join_list <- function(x, y, by, type = "full", suffix = c(".x", ".y")) {

  if (!is_simple_list(x) || !is_simple_list(y))
    stop("x and y need both to be lists")

  if (missing(by))
    stop("by argument needs to be provided")

  xh <- hash_by(x, by)
  yh <- hash_by(y, by)

  x_df <- tibble(id_x = seq_along(xh), hash = xh)
  y_df <- tibble(id_y = seq_along(yh), hash = yh)

  join <- switch(type,
                 left  = dplyr::left_join,
                 right = dplyr::right_join,
                 inner = dplyr::inner_join,
                 semi  = dplyr::semi_join,
                 anti  = dplyr::anti_join,
                 full  = dplyr::full_join,
                 stop(paste(type, "join is not supported.")))

  join_idx <- join(x_df, y_df, by = "hash")

  if (!is.null(suffix)) {

    suffix <- check_suffix(suffix)

    x <- lapply(x, function(.x) {
      if (!is.null(names(.x)))
        names(.x) <- paste0(names(.x), suffix$x)
      .x
    })

    y <- lapply(y, function(.y) {
      if (!is.null(names(.y)))
        names(.y) <- paste0(names(.y), suffix$y)
      .y
    })

  }

  if (type %in% c("semi", "anti")) {
    x[ unique(join_idx$id_x) ]
  } else {
    map2(x[join_idx$id_x], y[join_idx$id_y], function(.x, .y) {
      c(.x, .y)
    })
  }
}


#' Join two lists together
#'
#' @param x,y    lists to join.
#' @param by     a character vector of variables to join by.
#' @param copy   (not used)
#' @param suffix these suffixes will be added to the output to diambiguate
#'               values from \code{x} and \code{y}. Should be a character
#'               vector of length 2. Use \code{suffix = NULL} not to use
#'               suffixes.
#' @param \dots  other parameters passed onto methods (not used).
#'
#' @export

inner_join.list <- function(x, y, by, copy = FALSE, suffix = c(".x", ".y"), ...) {
  join_list(x, y, by = by, type = "inner", suffix = suffix)
}

#' @rdname inner_join.list
#' @export

left_join.list <- function(x, y, by, copy = FALSE, suffix = c(".x", ".y"), ...) {
  join_list(x, y, by = by, type = "left", suffix = suffix)
}

#' @rdname inner_join.list
#' @export

right_join.list <- function(x, y, by, copy = FALSE, suffix = c(".x", ".y"), ...) {
  join_list(x, y, by = by, type = "right", suffix = suffix)
}

#' @rdname inner_join.list
#' @export

full_join.list <- function(x, y, by, copy = FALSE, suffix = c(".x", ".y"), ...) {
  join_list(x, y, by = by, type = "full", suffix = suffix)
}

#' @rdname inner_join.list
#' @export

semi_join.list <- function(x, y, by, copy = FALSE, ...) {
  join_list(x, y, by = by, type = "semi", suffix = c(".x", ".y"))
}

#' @rdname inner_join.list
#' @export

anti_join.list <- function(x, y, by, copy = FALSE, ...) {
  join_list(x, y, by = by, type = "anti", suffix = c(".x", ".y"))
}
twolodzko/lolplyr documentation built on May 14, 2019, 8:22 a.m.