Nothing
#' Coalescing join
#'
#' EXPERIMENTAL! This function joins two dataframes together by key, and then coalesces any
#' columns that have shared names (i.e. fills in NAs). A utility function primarily
#' used internally within nflverse to help build player IDs
#'
#' @param x,y dataframes. Will be coerced to data.table
#' @param by keys to join on, as a plain or named character vector
#' @param type one of "left" (all rows of x and matching rows of y),
#' "inner" (matching rows of x and y), "full" (all rows of x and y)
#' @param by.x,by.y alternate form of keys to join on - if provided, will override `by`.
#' @param incomparables keys to NOT match on, i.e. NA should not match on NA.
#' @param sort whether to sort output by the join keys
#' @param ... other args passed to `merge.data.frame()`
#'
#' @return a data.frame joining x and y dataframes together, with every column from
#' both x and y and patching NA values in x with those in y.
#'
#'
#' @examples
#' \dontshow{.for_cran()}
#' x <- data.frame(id1 = c(NA_character_,letters[1:4]), a = c(1,NA,3,NA,5), b = 1:5 * 10)
#' y <- data.frame(id2 = c(letters[3:11],NA_character_), a = -(1:10), c = 1:10 * 100)
#'
#' join_coalesce(x,y, by = c("id1"="id2"))
#' join_coalesce(x,y, by.x = "id1", by.y = "id2")
#' join_coalesce(x,y, by = c("id1"="id2"), type = "inner")
#' join_coalesce(x,y, by = c("id1"="id2"), type = "full")
#'
#' @export
join_coalesce <- function(x, y, by = NULL,
type = c("left", "inner", "full"),
...,
by.x = NULL,by.y = NULL,
sort = TRUE,
incomparables = c(NA,NaN)) {
type <- rlang::arg_match0(type, c("left", "inner", "full"))
stopifnot(
is.data.frame(x),
is.data.frame(y),
length(by.x) == length(by.y),
(is.character(by) && length(by) >= 1) ||
(is.character(by.x) && is.character(by.y) && length(by.x) >= 1)
)
x <- as.data.frame(x)
y <- as.data.frame(y)
keys_x <- if (!is.null(by.x)) by.x else if(is.null(names(by))) by else ifelse(names(by) == "", by, names(by))
keys_y <- if (!is.null(by.y)) by.y else by
check_keys <- c(
"Join `by` keys in x are not unique" = nrow(x) != nrow(unique(x[keys_x])),
"Join `by` keys in y are not unique" = nrow(y) != nrow(unique(y[keys_y])),
"Join `by` keys in x have NAs" = any(is.na(x[keys_x])),
"Join `by` keys in y have NAs"= any(is.na(y[keys_y]))
)
if(any(check_keys)) {
cli::cli_warn(
names(check_keys)[which(check_keys)]
)
}
joined_cols <- c(setdiff(names(x), keys_x), setdiff(names(y), keys_y))
dupl_cols <- joined_cols[duplicated(joined_cols)]
# data.table's merge doesn't have an "incomparables" argument in the current prod version
# this causes NA to match to NA
merged_df <- merge.data.frame(
x = x,
y = y,
by.x = keys_x,
by.y = keys_y,
all.x = type %in% c("left", "full"),
all.y = type == "full",
sort = sort,
incomparables = incomparables,
...,
suffixes = c("..x", "..y")
)
data.table::setDT(merged_df)
for (col in dupl_cols) {
data.table::set(
merged_df,
j = col,
value = merged_df[[paste0(col, "..x")]] %c% merged_df[[paste0(col, "..y")]])
data.table::set(merged_df, j = paste0(col, "..x"), value = NULL)
data.table::set(merged_df, j = paste0(col, "..y"), value = NULL)
}
out <- merged_df[,c(keys_x,unique(joined_cols)), with = FALSE]
data.table::setDF(out)
return(out)
}
Any scripts or data that you put into this service are public.
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.