R/utils-general.R

Defines functions vec_sample change_types deprecate_old_across deprecate_dot_fun is_rowwise is_ungrouped .df_as_tidytable check_across vec_ptype_compatible list_flatten tidytable_restore call_reduce is_simple_vector imap unpack f_sort tidytable_class get_dt_env remove_key df_name_repair df_col_order df_set_names call2_i_by call2_i call2_j dt_j

# dt starting with the j position
dt_j <- function(.df, j, ...) {
  j <- enquo(j)
  i <- new_quosure(expr(), get_env(j))
  dt(.df, !!i, !!j, ...)
}

# Create a call to `[.data.table` (j position)
call2_j <- function(.df, j = NULL, .by = NULL, .keyby = FALSE, ...) {
  .df <- enquo(.df)
  if (length(.by) == 0) {
    dt_expr <- call2("[", .df, expr(), j, ...)
  } else if (.keyby) {
    dt_expr <- call2("[", .df, expr(), j, keyby = .by, ...)
  } else {
    dt_expr <- call2("[", .df, expr(), j, by = .by, ...)
  }

  if (is_call(j, c(":=", "let"))) {
    dt_expr <- call2("[", dt_expr)
  }

  dt_expr
}

# Create a call to `[.data.table` (i position)
call2_i <- function(.df, i = NULL, .by = NULL) {
  if (length(.by) == 0) {
    # Use enquo(.df) to clean up error messages, #305
    call2("[", enquo(.df), i)
  } else {
    call2_i_by(.df, i, .by)
  }
}

# Uses fast `by` trick for i position using .I
#   See: https://stackoverflow.com/a/16574176/13254470
call2_i_by <- function(.df, i, .by) {
  j <- expr(.I[!!i])
  dt_expr <- call2_j(.df, j, .by)
  dt_expr <- call2("$", dt_expr, expr(V1))
  dt_expr <- call2_i(.df, dt_expr)
  dt_expr
}

globalVariables("V1")

# setnames without modify-by-reference
df_set_names <- function(.df, new_names = NULL, old_names = NULL) {
  if (is.null(old_names)) {
    out <- set_names(.df, new_names)
  } else {
    out <- fast_copy(.df)
    setnames(out, old_names, new_names)
  }
  out
}

# setcolorder without modify-by-reference
df_col_order <- function(.df, new_order) {
  out <- fast_copy(.df)
  setcolorder(out, new_order)
  out
}

# Repair names of a data.table
df_name_repair <- function(.df, .name_repair = "unique") {
  new_names <- vec_as_names(names(.df), repair = .name_repair)
  df_set_names(.df, new_names)
}

# Drop the key from a keyed data.table
remove_key <- function(.df) {
  if (haskey(.df)) {
    .df <- fast_copy(.df)
    setkey(.df, NULL)
  }
  .df
}

# Extract environment from quosures to build the evaluation environment
get_dt_env <- function(x, ...) {
  default <- caller_env(2)

  if (length(x) == 0) {
    dt_env <- default
  } else if (is_quosures(x)) {
    envs <- map(x, get_env)
    non_empty <- map_lgl(envs, ~ !identical(.x, empty_env()))
    if (any(non_empty)) {
      dt_env <- envs[non_empty][[1]]
    } else {
      dt_env <- default
    }
  } else {
    dt_env <- get_env(x)
    if (identical(dt_env, empty_env())) {
      dt_env <- default
    }
  }

  env(dt_env, ...)
}

tidytable_class <- function() {
  c("tidytable", "data.table", "data.frame")
}

# radix sort
#   Proxy for data.table::fsort since negative values aren't supported, #282
#   Can switch to data.table::fsort once negative doubles are handled
#   See: https://github.com/Rdatatable/data.table/issues/5051
f_sort <- function(x) {
  if (is.character(x)) {
    suppressWarnings(
      fsort(x, na.last = TRUE)
    )
  } else {
    vec_sort(x)
  }
}

# Unpack all data frame columns
unpack <- function(.df, .name_repair = "check_unique") {
  # Note: df_list requires data frame inputs to be unnamed to unpack
  out <- as.list(.df)
  is_data_frame <- map_lgl(out, is.data.frame)
  names(out)[is_data_frame] <- ""
  out <- df_list(!!!out, .name_repair = .name_repair)
  new_tidytable(out)
}

# imap implementation - for internal use only
imap <- function(.x, .f, ...) {
  map2(.x, names(.x) %||% seq_along(.x), .f, ...)
}

# Is object a vector and not a matrix
is_simple_vector <- function(x) {
  is_atomic(x) && !is.matrix(x)
}

# Reduce a list of calls to a single combined call
call_reduce <- function(x, fun) {
  Reduce(function(x, y) call2(fun, x, y), x)
}

# Restore user defined attributes
#   Ensures auto-index is removed
#   See: https://github.com/Rdatatable/data.table/issues/5042
tidytable_restore <- function(x, to) {
  to <- set_attr(to, "index", NULL)
  vec_restore(x, to)
}

# Flatten lists
list_flatten <- function(x, recursive = FALSE) {
  is_list <- map_lgl(x, obj_is_list)
  any_list <- any(is_list)
  if (any_list) {
    is_not_list <- !is_list
    x[is_not_list] <- lapply(x[is_not_list], list)
    out <- list_unchop(x, ptype = list())
  } else {
    out <- x
  }

  if (recursive && any_list) {
    out <- list_flatten(out, recursive)
  }

  out
}

# Check if two vectors have compatible ptypes
vec_ptype_compatible <- function(x, y) {
  tryCatch({vec_ptype_common(x, y); TRUE}, error = function(e) FALSE)
}

check_across <- function(dots, .fn) {
  uses_across <- any(map_lgl(dots, quo_is_call, c("across", "across.", "pick")))

  if (uses_across) {
    msg <- glue("`across()`/`pick()` are unnecessary in `{.fn}()`.
                Please directly use tidyselect.
                Ex: df %>% {.fn}(where(is.numeric))")
    abort(msg)
  }
}

# Needed until we can build S3 methods again once `verb.()` is removed
#   Regular `as_tidytable()` strips "grouped_tt" class
.df_as_tidytable <- function(.df) {
  if (!is.data.frame(.df)) {
    abort("`.df` must be a data frame.")
  }

  if (!is_tidytable(.df)) {
    as_tidytable(.df)
  } else {
    .df
  }
}

is_ungrouped <- function(.df) {
  !is_grouped_df(.df) && !is_rowwise(.df)
}

is_rowwise <- function(.df) {
  inherits(.df, "rowwise_tt")
}

deprecate_dot_fun <- function(fn = NULL, env = caller_env(), user_env = caller_env(2)) {
  fn <- fn %||% call_name(caller_call())
  what <- glue("{fn}()")
  with <- str_replace(what, ".", "", TRUE)
  details <- "Please note that all `verb.()` syntax has now been deprecated. \n"
  deprecate_soft(
    "v0.10.0", what, with, details, id = ".tidytable_dot_funs",
    env = env, user_env = user_env
  )
}

deprecate_old_across <- function(fn) {
  msg <- glue("`{fn}_across.()` is defunct as of v0.8.1 (Aug 2022).
              It has been deprecated with warnings since v0.6.4 (Jul 2021).
              Please use `{fn}(across())`")
  abort(msg)
}

# Does type changes with ptype & transform logic
#   For use in pivot_longer/unnest_longer/unnest_wider
change_types <- function(.df, .cols, .ptypes = NULL, .transform = NULL) {
  if (!is.null(.ptypes)) {
    if (!obj_is_list(.ptypes)) {
      # Allow providing a single ptype for all cols
      .ptypes <- vec_rep(list(.ptypes), length(.cols))
      .ptypes <- set_names(.ptypes, .cols)
    }
    .cols <- intersect(.cols, names(.ptypes))
    ptype_exprs <- map2(syms(.cols), .ptypes, ~ call2("vec_cast", .x, .y, .ns = "vctrs"))
    names(ptype_exprs) <- .cols
    .df <- mutate(.df, !!!ptype_exprs)
  }

  if (!is.null(.transform)) {
    if (!obj_is_list(.transform)) {
      # Allow providing a single transform for all cols
      .transform <- vec_rep(list(.transform), length(.cols))
      .transform <- set_names(.transform, .cols)
    }
    .cols <- intersect(.cols, names(.transform))
    .transform <- map(.transform, as_function)
    transform_exprs <- map2(.transform, syms(.cols), call2)
    names(transform_exprs) <- .cols
    .df <- mutate(.df, !!!transform_exprs)
  }

  .df
}

# For internal testing
vec_sample <- function(x, times = vec_size(x), replace = NULL) {
  if (is.null(replace)) {
    if (vec_size(x) == times) {
      replace <- FALSE
    } else {
      replace <- TRUE
    }
  }
  locs <- vec_seq_along(x)
  locs <- sample(locs, times, replace)
  vec_slice(x, locs)
}

Try the tidytable package in your browser

Any scripts or data that you put into this service are public.

tidytable documentation built on Oct. 5, 2023, 5:07 p.m.