R/symdiff.R

Defines functions duckplyr_symdiff symdiff.duckplyr_df

# Generated by 02-duckplyr_df-methods.R
#' @export
symdiff.duckplyr_df <- function(x, y, ...) {
  # Our implementation
  check_dots_empty()
  check_compatible(x, y)

  x_names <- names(x)
  y_names <- names(y)
  if (identical(x_names, y_names)) {
    # Ensure identical() is very cheap
    y_names <- x_names
  }

  rel_try(list(name = "symdiff", x = x, y = y),
    "No duplicate names" = !identical(x_names, y_names) && anyDuplicated(x_names) && anyDuplicated(y_names),
    "Tables of different width" = length(x_names) != length(y_names),
    "Name mismatch" = !identical(x_names, y_names) && !all(y_names %in% x_names),
    {
      if (oo_force()) {
        x_not_y <- anti_join(x, y, by = x_names)
        y_not_x <- anti_join(y, x, by = x_names)
        out <- union(x_not_y, y_not_x)
      } else {
        x_rel <- duckdb_rel_from_df(x)
        y_rel <- duckdb_rel_from_df(y)

        if (!identical(x_names, y_names)) {
          # FIXME: Select by position
          exprs <- nexprs_from_loc(x_names, set_names(seq_along(x_names), x_names))
          y_rel <- rel_project(y_rel, exprs)
        }

        rel <- rel_set_symdiff(x_rel, y_rel)
        out <- rel_to_df(rel)
        out <- dplyr_reconstruct(out, x)
      }
      return(out)
    }
  )

  # dplyr forward
  symdiff <- dplyr$symdiff.data.frame
  out <- symdiff(x, y, ...)
  return(out)

  # dplyr implementation
  check_dots_empty()
  check_compatible(x, y)

  out <- vec_set_symmetric_difference(x, y, error_call = current_env())

  dplyr_reconstruct(out, x)
}

duckplyr_symdiff <- function(x, y, ...) {
  try_fetch(
    {
      x <- as_duckplyr_df(x)
      y <- as_duckplyr_df(y)
    },
    error = function(e) {
      testthat::skip(conditionMessage(e))
    }
  )
  out <- symdiff(x, y, ...)
  class(out) <- setdiff(class(out), "duckplyr_df")
  out
}
duckdblabs/duckplyr documentation built on Nov. 6, 2024, 10 p.m.