R/union_all.R

Defines functions duckplyr_union_all union_all.duckplyr_df

# Generated by 02-duckplyr_df-methods.R
#' @export
union_all.duckplyr_df <- function(x, y, ...) {
  # Our implementation
  check_dots_empty()
  check_compatible(x, y)

  x_names <- names(x)
  y_names <- names(y)
  if (identical(x_names, y_names)) {
    # Ensure identical() is very cheap
    y_names <- x_names
  }

  rel_try(list(name = "union_all", x = x, y = y),
    "No duplicate names" = !identical(x_names, y_names) && anyDuplicated(x_names) && anyDuplicated(y_names),
    "Tables of different width" = length(x_names) != length(y_names),
    "Name mismatch" = !identical(x_names, y_names) && !all(y_names %in% x_names),
    {
      x_rel <- duckdb_rel_from_df(x)
      y_rel <- duckdb_rel_from_df(y)
      if (!identical(x_names, y_names)) {
        # FIXME: Select by position
        exprs <- nexprs_from_loc(x_names, set_names(seq_along(x_names), x_names))
        y_rel <- rel_project(y_rel, exprs)
      }

      x_rel <- oo_prep(x_rel, "___row_number_x", extra_cols_post = "___row_number_y")
      y_rel <- oo_prep(y_rel, "___row_number_y", extra_cols_pre = "___row_number_x")

      rel <- rel_union_all(x_rel, y_rel)

      # NULLs sort first in duckdb!
      rel <- oo_restore(rel, c("___row_number_x", "___row_number_y"))

      out <- rel_to_df(rel)
      out <- dplyr_reconstruct(out, x)
      return(out)
    }
  )

  # dplyr forward
  union_all <- dplyr$union_all.data.frame
  out <- union_all(x, y, ...)
  return(out)

  # dplyr implementation
  check_dots_empty()
  check_compatible(x, y)

  out <- vec_rbind(x, y)
  dplyr_reconstruct(out, x)
}

duckplyr_union_all <- function(x, y, ...) {
  try_fetch(
    {
      x <- as_duckplyr_df(x)
      y <- as_duckplyr_df(y)
    },
    error = function(e) {
      testthat::skip(conditionMessage(e))
    }
  )
  out <- union_all(x, y, ...)
  class(out) <- setdiff(class(out), "duckplyr_df")
  out
}
duckdblabs/duckplyr documentation built on Nov. 6, 2024, 10 p.m.