R/summarise.R

Defines functions duckplyr_summarise summarise.duckplyr_df

# Generated by 02-duckplyr_df-methods.R
utils::globalVariables("___row_number")

#' @export
summarise.duckplyr_df <- function(.data, ..., .by = NULL, .groups = NULL) {
  force(.data)

  by <- eval_select_by(enquo(.by), .data)

  rel_try(list(name = "summarise", x = .data, args = try_list(dots = enquos(...), by = syms(by), .groups = .groups)),
    'summarize(.groups = "rowwise") not supported' = identical(.groups, "rowwise"),
    {
      rel <- duckdb_rel_from_df(.data)

      dots <- dplyr_quosures(...)
      dots <- fix_auto_name(dots)
      dots <- dots[!duplicated(names(dots), fromLast = TRUE)]

      oo <- (length(by) > 0) && oo_force()
      if (oo) {
        rel <- oo_prep(rel, colname = "___row_number", force = TRUE)
      }

      groups <- lapply(by, relexpr_reference)
      aggregates <- rel_translate_dots_summarise(dots, .data)

      if (oo) {
        aggregates <- c(
          list(rel_translate(
            quo(min(`___row_number`)),
            new_data_frame(list(`___row_number` = integer())),
            alias = "___row_number"
          )),
          aggregates
        )
      }

      out_rel <- rel_aggregate(rel, groups, unname(aggregates))
      # https://github.com/duckdb/duckdb/issues/7095
      if (length(groups) == 0) {
        out_rel <- rel_distinct(out_rel)
      }

      if (oo) {
        out_rel <- oo_restore(out_rel, "___row_number")
      }

      out <- rel_to_df(out_rel)
      # https://github.com/tidyverse/dplyr/pull/6988
      class(out) <- intersect(c("duckplyr_df", "tbl_df", "tbl", "data.frame"), class(.data))

      return(out)
    }
  )

  # dplyr forward
  summarise <- dplyr$summarise.data.frame
  out <- summarise(.data, ..., .by = {{ .by }}, .groups = .groups)
  # dplyr_reconstruct() is not called here, restoring manually
  if (!identical(.groups, "rowwise")) {
    # https://github.com/tidyverse/dplyr/pull/6988
    class(out) <- intersect(c("duckplyr_df", "tbl_df", "tbl", "data.frame"), class(.data))
  }
  return(out)

  # dplyr implementation
  by <- compute_by({{ .by }}, .data, by_arg = ".by", data_arg = ".data")

  cols <- summarise_cols(.data, dplyr_quosures(...), by, "summarise")
  out <- summarise_build(by, cols)

  if (!cols$all_one) {
    summarise_deprecate_variable_size()
  }

  if (!is_tibble(.data)) {
    # The `by` group data we build from is always a tibble,
    # so we have to manually downcast as needed
    out <- as.data.frame(out)
  }

  if (identical(.groups, "rowwise")) {
    out <- rowwise_df(out, character())
  }

  out
}

duckplyr_summarise <- function(.data, ...) {
  try_fetch(
    .data <- as_duckplyr_df(.data),
    error = function(e) {
      testthat::skip(conditionMessage(e))
    }
  )
  out <- summarise(.data, ...)
  class(out) <- setdiff(class(out), "duckplyr_df")
  out
}
duckdblabs/duckplyr documentation built on Nov. 6, 2024, 10 p.m.