R/nest_join.R

Defines functions duckplyr_nest_join nest_join.duckplyr_df

# Generated by 02-duckplyr_df-methods.R
#' @export
nest_join.duckplyr_df <- function(x, y, by = NULL, copy = FALSE, keep = NULL, name = NULL, ..., na_matches = c("na", "never"), unmatched = "drop") {
  # from dplyr implementation
  check_keep(keep)
  na_matches <- check_na_matches(na_matches)

  if (is.null(name)) {
    name <- as_label(enexpr(y))
  } else {
    check_string(name)
  }

  # Our implementation
  rel_try(NULL,
    "No relational implementation for nest_join()" = TRUE,
    {
      return(out)
    }
  )

  # dplyr forward
  x_df <- x
  class(x_df) <- setdiff(class(x_df), "duckplyr_df")
  y_df <- y
  class(y_df) <- setdiff(class(y_df), "duckplyr_df")
  nest_join <- dplyr$nest_join.data.frame
  out <- nest_join(x_df, y_df, by, copy, keep, name, ..., na_matches = na_matches, unmatched = unmatched)
  out <- dplyr_reconstruct(out, x)
  return(out)

  # dplyr implementation
  check_dots_empty0(...)
  check_keep(keep)
  na_matches <- check_na_matches(na_matches)

  if (is.null(name)) {
    name <- as_label(enexpr(y))
  } else {
    check_string(name)
  }

  x_names <- tbl_vars(x)
  y_names <- tbl_vars(y)

  if (is_cross_by(by)) {
    warn_join_cross_by()
    by <- new_join_by()
    cross <- TRUE
  } else {
    cross <- FALSE
  }

  if (is_null(by)) {
    by <- join_by_common(x_names, y_names)
  } else {
    by <- as_join_by(by)
  }

  vars <- join_cols(x_names, y_names, by = by, suffix = c("", ""), keep = keep)
  y <- auto_copy(x, y, copy = copy)

  x_in <- as_tibble(x, .name_repair = "minimal")
  y_in <- as_tibble(y, .name_repair = "minimal")

  x_key <- set_names(x_in[vars$x$key], names(vars$x$key))
  y_key <- set_names(y_in[vars$y$key], names(vars$x$key))

  args <- join_cast_common(x_key, y_key, vars)
  x_key <- args$x
  y_key <- args$y

  condition <- by$condition
  filter <- by$filter

  # We always want to retain all of the matches. We never experience a Cartesian
  # explosion because `nrow(x) == nrow(out)` is an invariant of `nest_join()`,
  # and the whole point of `nest_join()` is to nest all of the matches for that
  # row of `x` (#6392).
  multiple <- "all"

  # Will be set to `"none"` in `join_rows()`. Because we can't have a Cartesian
  # explosion, we don't care about many-to-many relationships.
  relationship <- NULL

  rows <- join_rows(
    x_key = x_key,
    y_key = y_key,
    type = "nest",
    na_matches = na_matches,
    condition = condition,
    filter = filter,
    cross = cross,
    multiple = multiple,
    unmatched = unmatched,
    relationship = relationship,
    user_env = caller_env()
  )

  y_loc <- vec_split(rows$y, rows$x)$val

  out <- set_names(x_in[vars$x$out], names(vars$x$out))

  # Modify all columns in one step so that we only need to re-group once
  new_cols <- vec_cast(out[names(x_key)], x_key)

  y_out <- set_names(y_in[vars$y$out], names(vars$y$out))
  y_out <- map(y_loc, vec_slice, x = y_out)
  y_out <- map(y_out, dplyr_reconstruct, template = y)
  new_cols[[name]] <- y_out

  out <- dplyr_col_modify(out, new_cols)
  dplyr_reconstruct(out, x)
}

duckplyr_nest_join <- function(x, y, by = NULL, copy = FALSE, keep = NULL, name = NULL, ...) {
  if (is.null(name)) {
    name <- as_label(enexpr(y))
  } else {
    check_string(name)
  }

  try_fetch(
    {
      x <- as_duckplyr_df(x)
      y <- as_duckplyr_df(y)
    },
    error = function(e) {
      testthat::skip(conditionMessage(e))
    }
  )
  out <- nest_join(x, y, by, copy, keep, name, ...)
  class(out) <- setdiff(class(out), "duckplyr_df")
  out
}
duckdblabs/duckplyr documentation built on Nov. 6, 2024, 10 p.m.