# R/join-vars.R In dplyr: A Grammar of Data Manipulation

```join_vars <- function(x_names, y_names, by, suffix = list(x = ".x", y = ".y")) {
# Record position of join keys
idx <- get_join_var_idx(x_names, y_names, by)

x_names_by <- x_names[idx\$x\$by]
x_names_aux <- x_names[idx\$x\$aux]
y_names_aux <- y_names[idx\$y\$aux]

x_new <- x_names
x_new[idx\$x\$aux] <- add_suffixes(x_names_aux, c(x_names_by, y_names_aux), suffix\$x)

x_x <- seq_along(x_names)
x_y <- idx\$y\$by[match(x_names, by\$x)]
y_x <- rep_along(idx\$y\$aux, NA)
y_y <- seq_along(idx\$y\$aux)

# Return a list with 3 parallel vectors
# At each position, values in the 3 vectors represent
#  alias - name of column in join result
#  x - position of column from left table or NA if only from right table
#  y - position of column from right table or NA if only from left table
ret <- list(alias = c(x_new, y_new), x = c(x_x, y_x), y = c(x_y, y_y))
# In addition, the idx component contains indices of "by" and "aux" variables
# for x and y, respectively (see get_join_var_idx())
ret\$idx <- idx
ret
}

get_join_var_idx <- function(x_names, y_names, by) {
x_idx <- get_by_aux(x_names, by\$x)
y_idx <- get_by_aux(y_names, by\$y)

list(x = x_idx, y = y_idx)
}

get_by_aux <- function(names, by) {
if (length(by) > 0) {
by <- match(by, names)
aux <- seq_along(names)[-by]
} else {
by <- integer()
aux <- seq_along(names)
}

list(by = by, aux = aux)
}

add_suffixes <- function(x, y, suffix) {
if (identical(suffix, "")) {
return(x)
}

out <- rep_along(x, na_chr)
for (i in seq_along(x)) {
nm <- x[[i]]
while (nm %in% y || nm %in% out) {
nm <- paste0(nm, suffix)
}

out[[i]] <- nm
}
out
}
```

## Try the dplyr package in your browser

Any scripts or data that you put into this service are public.

dplyr documentation built on July 4, 2019, 5:08 p.m.