join_cols <- function(x_names,
y_names,
by,
...,
suffix = c(".x", ".y"),
keep = NULL,
error_call = caller_env()) {
check_dots_empty0(...)
if (is_false(keep) && any(by$condition != "==")) {
abort(
"Can't set `keep = FALSE` when using an inequality, rolling, or overlap join.",
call = error_call
)
}
check_duplicate_vars(x_names, "x", error_call = error_call)
check_duplicate_vars(y_names, "y", error_call = error_call)
check_join_vars(by$x, x_names, by$condition, "x", error_call = error_call)
check_join_vars(by$y, y_names, by$condition, "y", error_call = error_call)
suffix <- standardise_join_suffix(suffix, error_call = error_call)
x_by <- set_names(match(by$x, x_names), by$x)
y_by <- set_names(match(by$y, y_names), by$y)
x_loc <- seq_along(x_names)
names(x_loc) <- x_names
if (is_null(keep)) {
# In x_out, equi key variables need to keep the same name, and non-equi
# key variables and aux variables need suffixes for duplicates that appear
# in y_out. This is equivalent to `keep = TRUE` for the non-equi keys and
# `keep = FALSE` for the equi keys.
equi <- by$condition == "=="
y_aux <- setdiff(y_names, c(by$x[equi], by$y[equi]))
x_ignore <- by$x[equi]
x_check <- !x_names %in% x_ignore
names(x_loc)[x_check] <- add_suffixes(x_names[x_check], c(x_ignore, y_aux), suffix$x)
} else if (is_false(keep)) {
# In x_out, key variables need to keep the same name, and aux
# variables need suffixes for duplicates that appear in y_out
y_aux <- setdiff(y_names, c(by$x, by$y))
x_ignore <- by$x
x_check <- !x_names %in% x_ignore
names(x_loc)[x_check] <- add_suffixes(x_names[x_check], c(x_ignore, y_aux), suffix$x)
} else {
# In x_out, key variables and aux variables need suffixes
# for duplicates that appear in y_out
names(x_loc) <- add_suffixes(x_names, y_names, suffix$x)
}
y_loc <- seq_along(y_names)
names(y_loc) <- add_suffixes(y_names, x_names, suffix$y)
if (is_null(keep)) {
equi <- by$condition == "=="
y_ignore <- by$y[equi]
y_loc <- y_loc[!y_names %in% y_ignore]
} else if (is_false(keep)) {
y_ignore <- by$y
y_loc <- y_loc[!y_names %in% y_ignore]
}
# key = named locations to use for matching
# out = named locations to use in output
list(
x = list(key = x_by, out = x_loc),
y = list(key = y_by, out = y_loc)
)
}
check_join_vars <- function(vars,
names,
condition,
input,
...,
error_call = caller_env()) {
check_dots_empty0(...)
if (!is.character(vars)) {
message <- "Join columns in `{input}` must be character vectors."
cli_abort(message, call = error_call)
}
na <- is.na(vars)
if (any(na)) {
bullets <- c(
"Join columns in `{input}` can't be `NA`.",
x = "Problem at position {err_vars(na)}."
)
abort(bullets, call = error_call)
}
# Columns are allowed to appear in more than one non-equi condition
# (but not in a mix of non-equi and equi conditions).
# When non-equi conditions are present, `keep` can't be `FALSE` so we don't
# have to worry about merging into the same key column multiple times (#6499).
non_equi <- condition != "=="
vars <- c(vars[!non_equi], unique(vars[non_equi]))
dup <- duplicated(vars)
if (any(dup)) {
vars <- unique(vars[dup])
bullets <- c(
"Join columns in `{input}` must be unique.",
x = "Problem with {err_vars(vars)}."
)
cli_abort(bullets, call = error_call)
}
missing <- setdiff(vars, names)
if (length(missing) > 0) {
bullets <- c(
"Join columns in `{input}` must be present in the data.",
x = "Problem with {err_vars(missing)}."
)
cli_abort(bullets, call = error_call)
}
}
check_duplicate_vars <- function(vars,
input,
...,
error_call = caller_env()) {
check_dots_empty0(...)
dup <- duplicated(vars)
if (any(dup)) {
bullets <- c(
"Input columns in `{input}` must be unique.",
x = "Problem with {err_vars(vars[dup])}."
)
cli_abort(bullets, call = error_call)
}
}
standardise_join_suffix <- function(x,
...,
error_call = caller_env()) {
check_dots_empty0(...)
if (!is.character(x) || length(x) != 2) {
bullets <- c(
"`suffix` must be a character vector of length 2, not {.obj_type_friendly {x}} of length {length(x)}."
)
cli_abort(bullets, call = error_call)
}
if (any(is.na(x))) {
msg <- "`suffix` can't be `NA`."
cli_abort(msg, call = error_call)
}
list(x = x[[1]], y = x[[2]])
}
# `join_cols()` checks that `x` and `y` are individually unique,
# which plays into assumptions made here
add_suffixes <- function(x, y, suffix) {
if (identical(suffix, "")) {
return(x)
}
x <- c(y, x)
# Never marks the "first" duplicate (i.e. never anything in `y`)
dup <- duplicated(x)
while (any(dup)) {
x[dup] <- paste0(x[dup], suffix)
dup <- duplicated(x)
}
loc <- seq2(length(y) + 1L, length(x))
x <- x[loc]
x
}
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.