R/counts.R

Defines functions counts cheapr_table

Documented in cheapr_table counts

#' Fast frequency tables - Still experimental
#'
#' @description
#' This is not a one-to-one copy of `base::table()` as some behaviours differ.
#' It is more flexible as it accepts inputs such as data frames and
#' `vctrs_rcrd` objects.
#'
#'
#' @param ... `>=1` objects that can be converted to a factor through
#' `cheapr::factor_()`.
#' @param names Should level names be kept? Default is `TRUE`.
#' @param order Should result be ordered by level names? Default is
#' `FALSE`.
#' @param na_exclude Should `NA` values be excluded? Default is `FALSE`.
#' @param classed Should a `table` object be returned? Default is `FALSE`
#' @param x A vector.
#' @param sort Should groups be sorted? Default is `FALSE`.
#'
#' @details
#' `cheapr_table()` tries to match the behaviour of `table()` where possible.
#' `counts()` is an alternative that returns a `data.frame` of unique keys and
#' counts.
#'
#' @returns
#' A named integer vector if one object is supplied, otherwise an
#' array.
#'
#' @rdname counts
#' @export
cheapr_table <- function(..., names = TRUE, order = FALSE, na_exclude = FALSE,
                         classed = FALSE){

  vecs <- list(...)

  to_factor <- function(x, na.excl){
    if (is.factor(x)){
      if (na_exclude){
        levels_drop_na(x)
      } else if (any_na(x)){
        levels_add_na(x)
      } else {
        x
      }
    } else {
      factor_(x, order = order, na_exclude = na_exclude)
    }
  }

  factors <- lapply(vecs, to_factor, na_exclude)

  if (length(factors) == 1){
    f <- factors[[1L]]
    lvls <- attr(f, "levels")
    out <- cpp_tabulate(f, length(lvls))
    if (names){
      dim_names <- list(lvls)
      names(dim_names) <- names(vecs)
      names(out) <- lvls
    } else {
      dim_names <- NULL
    }
    if (classed){
      out <- array(out, length(out), dim_names)
      class(out) <- "table"
    }
  } else {
    out <- do.call(collapse::qtab, c(factors, list(dnn = NULL)), envir = parent.frame())
    if (!classed){
      attrs_add(out, class = NULL, sorted = NULL, weighted = NULL, .set = TRUE)
    }
  }
  out
}
#' @rdname counts
#' @export
counts <- function(x, sort = is.factor(x)){
  if (sort && is.factor(x)){
    keys <- levels_factor(x)
    counts <- cpp_tabulate(x, length(keys))
  } else {
    groups <- collapse::GRP(
      x, sort = sort, return.order = FALSE, return.groups = FALSE
    )
    group_ids <- groups[["group.id"]]
    n_groups <- groups[["N.groups"]]

    if (!sort && vector_length(x) == n_groups){
      keys <- x
    } else {
      start_locs <- cpp_group_starts(group_ids, n_groups)
      if (sort && attr(start_locs, "sorted", TRUE) && vector_length(x) == n_groups){
        keys <- x
      } else {
        keys <- sset(x, start_locs)
      }
    }
    counts <- groups[["group.sizes"]]
  }
  fast_df(
    key = keys,
    count = counts
  )
}
#' @rdname counts
#' @export
table_ <- cheapr_table

Try the cheapr package in your browser

Any scripts or data that you put into this service are public.

cheapr documentation built on Nov. 28, 2025, 5:06 p.m.