tools/02-duckplyr_df-methods.R

gert::git_pull(repo = ".sync/dplyr-main")

pkgload::load_all(".sync/dplyr-main")

source("tools/00-funs.R", echo = TRUE)

func_decl <- function(name, formals) {
  nse_args <- rlang::list2(
    add_count = c("wt"),
    count = c("wt"),
    filter = c(".by"),
    mutate = c(".by", ".before", ".after"),
    nest_join = c("y"),
    pull = c("var", "name"),
    reframe = c(".by"),
    relocate = c(".before", ".after"),
    rename_with = c(".cols"),
    sample_frac = c("size", "weight"),
    sample_n = c("size", "weight"),
    slice = c(".by"),
    slice_head = c("by"),
    slice_tail = c("by"),
    summarise = c(".by"),
  )

  no_reconstruct <- c(
    "count",
    "reframe",
    "relocate",
    "rename",
    "slice",
    "summarise",
    "transmute",
    NULL
  )

  data_arg <- sym(names(formals)[[1]])

  if (name %in% names(nse_args)) {
    nse <- nse_args[[name]]
  } else {
    nse <- character()
  }

  ellipsis <- which(names(formals) == "...")

  sym_formals <- rlang::set_names(rlang::syms(names(formals)), names(formals))

  curly_formals <- map(sym_formals[nse], rlang::call2, .fn = "{")
  curly_curly_formals <- map(curly_formals, rlang::call2, .fn = "{")

  forward_formals <- sym_formals
  forward_formals[nse] <- curly_curly_formals

  if (length(ellipsis) > 0) {
    names(forward_formals)[seq.int(ellipsis)] <- ""
  } else {
    names(forward_formals) <- NULL
  }

  reassign_call <- rlang::call2(
    "<-",
    rlang::sym(name),
    rlang::call2("$", rlang::sym("dplyr"), rlang::sym(paste0(name, ".data.frame")))
  )

  forward_call <- rlang::call2(name, !!!forward_formals)

  rlang::new_function(formals, expr({
    !!reassign_call
    out <- !!forward_call
    return(out)
  }))
}

first_line <- "# Generated by 02-duckplyr_df-methods.R"

func_decl_chr <- function(
    generic,
    code,
    name,
    new_code_chr,
    is_tbl_return,
    always_fallback) {
  formals <- formals(code)

  two_tables <- (length(formals) > 1) && (names(formals)[[2]] == "y")

  new_code_chr <- paste(utils::capture.output(print(new_code_chr)), collapse = "\n")

  rel_try_chr <- paste0(
    "  # Our implementation\n",
    "  rel_try(NULL,\n",
    if (always_fallback) "    # Always fall back to dplyr\n",
    '    "No relational implementation for ', generic, '()" = TRUE,\n',
    "    {\n",
    "      return(out)\n",
    "    }\n",
    "  )\n",
    "\n"
  )

  new_code_chr <- sub("[{]", paste0("{\n", rel_try_chr, "  # dplyr forward"), new_code_chr)

  dplyr_code <- brio::read_file(fs::path("dplyr-methods", paste0(generic, ".txt")))
  dplyr_impl <- c(
    "",
    "  # dplyr implementation",
    gsub("^[^{]*[{]\n", "", dplyr_code, perl = TRUE)
  )

  new_code_chr <- gsub("\n[}]", paste0("\n", dplyr_impl, collapse = ""), new_code_chr)

  method_code <- paste0(
    "#' @export\n",
    name,
    " <- ",
    new_code_chr,
    "\n"
  )

  if (two_tables) {
    arg_1 <- names(formals)[[1]]
    arg_2 <- names(formals)[[2]]
    args <- paste0(arg_1, ", ", arg_2)
    assign_impl <- c(
      '    {',
      '      {{{arg_1}}} <- as_duckplyr_df({{{arg_1}}})',
      '      {{{arg_2}}} <- as_duckplyr_df({{{arg_2}}})',
      '    },'
    )
  } else {
    arg_1 <- names(formals)[[1]]
    args <- arg_1
    assign_impl <- c(
      '    {{{arg_1}}} <- as_duckplyr_df({{{arg_1}}}),'
    )
  }

  test_impl <- c(
    '  try_fetch(',
    assign_impl,
    '    error = function(e) {',
    '      testthat::skip(conditionMessage(e))',
    '    }',
    '  )',
    '  out <- {{{generic}}}({{{args}}}, ...)',
    if (is_tbl_return) '  class(out) <- setdiff(class(out), "duckplyr_df")',
    '  out'
  )

  test_code <- c(
    'duckplyr_{{{generic}}} <- function({{args}}, ...) {',
    test_impl,
    '}',
    ''
  )

  test_code <- whisker::whisker.render(test_code)

  code <- paste0(
    first_line,
    "\n",
    method_code,
    test_code
  )
  code
}

duckplyr_df_methods <-
  df_methods %>%
  filter(!skip_impl) %>%
  mutate(formals = map(code, formals)) %>%
  mutate(new_code = pmap(list(name, formals), func_decl)) %>%
  mutate(new_code_chr = map(
    new_code,
    constructive::construct,
    check = FALSE,
    constructive::opts_function(environment = FALSE)
  )) %>%
  mutate(new_fun = paste0(name, ".duckplyr_df")) %>%
  rowwise() %>%
  mutate(decl_chr = func_decl_chr(
    name,
    code,
    new_fun,
    new_code_chr,
    is_tbl_return,
    always_fallback
  )) %>%
  ungroup()

# If this fails, we need to install dplyr from source:
# system("R CMD INSTALL --with-keep.source .sync/dplyr-main")
stopifnot(!is.null(attr(duckplyr_df_methods$code[[1]], "srcref")))

old <-
  tibble(path = fs::dir_ls("R")) %>%
  mutate(first_line = map_chr(path, brio::read_lines, 1)) %>%
  filter(first_line == !!first_line)

fs::file_delete(old$path)

duckplyr_df_methods %>%
  mutate(path = fs::path("R", paste0(name, ".R"))) %>%
  select(text = decl_chr, path) %>%
  pwalk(brio::write_file)

# Patch files -------------------------------------------------------------------------

patches <- fs::dir_ls("patch")

walk(patches, ~ system(paste0("patch -p1 < ", .x)))

# Stop here to overwrite files if the code generation is updated


system(paste0("git clean -f -- R"))


# Collect new patches -----------------------------------------------------------------

r_status <- gert::git_status(pathspec = "R/*.R")$file

# Use this to refresh all patches
# r_status <- fs::dir_ls("R", glob = "*.R")

walk(r_status, function(file) {
  patch_path <- gsub("R/(.*)[.]R", "patch/\\1.patch", file)
  if (fs::file_exists(patch_path)) {
    system(paste0("patch -p1 -R < ", patch_path))
  }
  system(paste0("git diff -R -- ", file, " > ", patch_path))
  system(paste0("git checkout -- ", file))
})
duckdblabs/duckplyr documentation built on Nov. 6, 2024, 10 p.m.