R/get_skimmers.R

Defines functions get_sfl get_one_default_skimmer_names get_default_skimmer_names get_one_default_skimmer get_default_skimmers modify_default_skimmers get_skimmers.haven_labelled get_skimmers.AsIs get_skimmers.list get_skimmers.ts get_skimmers.Timespan get_skimmers.difftime get_skimmers.POSIXct get_skimmers.Date get_skimmers.complex get_skimmers.logical get_skimmers.character get_skimmers.factor get_skimmers.numeric get_skimmers.default get_skimmers

Documented in get_default_skimmer_names get_default_skimmers get_one_default_skimmer get_one_default_skimmer_names get_sfl get_skimmers get_skimmers.AsIs get_skimmers.character get_skimmers.complex get_skimmers.Date get_skimmers.default get_skimmers.difftime get_skimmers.factor get_skimmers.haven_labelled get_skimmers.list get_skimmers.logical get_skimmers.numeric get_skimmers.POSIXct get_skimmers.Timespan get_skimmers.ts modify_default_skimmers

#' @include skim_with.R stats.R
NULL

#' Retrieve the summary functions for a specific data type
#'
#' These functions are used to set the default skimming functions for a data
#' type. They are combined with the base skim function list (`sfl`) in
#' [skim_with()], to create the summary tibble for each type.
#'
#' When creating your own set of skimming functions, call [sfl()] within a
#' [get_skimmers()] method for your particular type. Your call to [sfl()] should
#' also provide a matching class in the `skim_type` argument.  Otherwise, it
#' will not be possible to dynamically reassign your default functions when
#' working interactively.
#'
#' Call [get_default_skimmers()] to see the functions for each type of summary
#' function currently supported. Call [get_default_skimmer_names()] to just see
#' the names of these functions. Use [modify_default_skimmers()] for a method
#' for changing the `skim_type` or functions for a default `sfl`. This is useful
#' for creating new default `sfl`'s.
#'
#' @param column An atomic vector or list. A column from a data frame.
#' @param skim_type A character scalar. The class of the object with default
#'   skimmers.
#' @return A `skim_function_list` object.
#' @seealso [sfl()]
#' @examples
#' # Defining default skimming functions for a new class, `my_class`.
#' # Note that the class argument is required for dynamic reassignment.
#' get_skimmers.my_class <- function(column) {
#'   sfl(
#'     skim_type = "my_class",
#'     mean,
#'     sd
#'   )
#' }
#'
#' # Integer and double columns are both "numeric" and are treated the same
#' # by default. To switch this behavior in another package, add a method.
#' get_skimmers.integer <- function(column) {
#'   sfl(
#'     skim_type = "integer",
#'     p50 = ~ stats::quantile(
#'       .,
#'       probs = .50, na.rm = TRUE, names = FALSE, type = 1
#'     )
#'   )
#' }
#' x <- mtcars[c("gear", "carb")]
#' class(x$carb) <- "integer"
#' skim(x)
#' \dontrun{
#' # In a package, to revert to the V1 behavior of skimming separately with the
#' # same functions, assign the numeric `get_skimmers`.
#' get_skimmers.integer <- skimr::get_skimmers.numeric
#'
#' # Or, in a local session, use `skim_with` to create a different `skim`.
#' new_skim <- skim_with(integer = skimr::get_skimmers.numeric())
#'
#' # To apply a set of skimmers from an old type to a new type
#' get_skimmers.new_type <- function(column) {
#'   modify_default_skimmers("old_type", new_skim_type = "new_type")
#' }
#' }
#' @export
get_skimmers <- function(column) {
  UseMethod("get_skimmers")
}

#' @describeIn get_skimmers The default method for skimming data. Only used when
#'   a column's data type doesn't match currently installed types. Call
#'   [get_default_skimmer_names] to see these defaults.
#' @export
get_skimmers.default <- function(column) {
  modify_default_skimmers("character", new_skim_type = "default")
}

#' @describeIn get_skimmers Summary functions for numeric columns, covering both
#'   [double()] and [integer()] classes: [mean()], [sd()], [quantile()] and
#'   [inline_hist()].
#' @export
get_skimmers.numeric <- function(column) {
  sfl(
    skim_type = "numeric",
    mean = ~ mean(., na.rm = TRUE),
    sd = ~ stats::sd(., na.rm = TRUE),
    p0 = ~ stats::quantile(., probs = 0, na.rm = TRUE, names = FALSE),
    p25 = ~ stats::quantile(., probs = .25, na.rm = TRUE, names = FALSE),
    p50 = ~ stats::quantile(., probs = .50, na.rm = TRUE, names = FALSE),
    p75 = ~ stats::quantile(., probs = .75, na.rm = TRUE, names = FALSE),
    p100 = ~ stats::quantile(., probs = 1, na.rm = TRUE, names = FALSE),
    hist = ~ inline_hist(., 5)
  )
}

#' @describeIn get_skimmers Summary functions for factor columns:
#'   [is.ordered()], [n_unique()] and [top_counts()].
#' @export
get_skimmers.factor <- function(column) {
  sfl(
    skim_type = "factor",
    ordered = is.ordered,
    n_unique = n_unique,
    top_counts = top_counts
  )
}

#' @describeIn get_skimmers Summary functions for character columns. Also, the
#'   default for unknown columns: [min_char()], [max_char()], [n_empty()],
#'   [n_unique()] and [n_whitespace()].
#' @export
get_skimmers.character <- function(column) {
  sfl(
    skim_type = "character",
    min = min_char,
    max = max_char,
    empty = n_empty,
    n_unique = n_unique,
    whitespace = n_whitespace
  )
}

#' @describeIn get_skimmers Summary functions for logical/ boolean columns:
#'   [mean()], which produces rates for each value, and [top_counts()].
#' @export
get_skimmers.logical <- function(column) {
  sfl(
    skim_type = "logical",
    mean = ~ mean(., na.rm = TRUE),
    count = top_counts
  )
}

#' @describeIn get_skimmers Summary functions for complex columns: [mean()].
#' @export
get_skimmers.complex <- function(column) {
  sfl(
    skim_type = "complex",
    mean = ~ mean(., na.rm = TRUE)
  )
}

#' @describeIn get_skimmers Summary functions for `Date` columns: [min()],
#'   [max()], [median()] and [n_unique()].
#' @export
get_skimmers.Date <- function(column) {
  sfl(
    skim_type = "Date",
    min = ~ min(., na.rm = TRUE),
    max = ~ max(., na.rm = TRUE),
    median = ~ stats::median(., na.rm = TRUE),
    n_unique = n_unique
  )
}

#' @describeIn get_skimmers Summary functions for `POSIXct` columns: [min()],
#'   [max()], [median()] and [n_unique()].
#' @export
get_skimmers.POSIXct <- function(column) {
  modify_default_skimmers("Date", new_skim_type = "POSIXct")
}

#' @describeIn get_skimmers Summary functions for `difftime` columns: [min()],
#'   [max()], [median()] and [n_unique()].
#' @export
get_skimmers.difftime <- function(column) {
  modify_default_skimmers("Date", new_skim_type = "difftime")
}

#' @describeIn get_skimmers Summary functions for `Timespan` columns: [min()],
#'   [max()], [median()] and [n_unique()].
#' @export
get_skimmers.Timespan <- function(column) {
  modify_default_skimmers("difftime", new_skim_type = "Timespan")
}

#' @describeIn get_skimmers Summary functions for `ts` columns: [min()],
#'   [max()], [median()] and [n_unique()].
#' @export
get_skimmers.ts <- function(column) {
  sfl(
    skim_type = "ts",
    start = ts_start,
    end = ts_end,
    frequency = stats::frequency,
    deltat = stats::deltat,
    mean = ~ mean(., na.rm = TRUE),
    sd = ~ stats::sd(., na.rm = TRUE),
    min = ~ min(., na.rm = TRUE),
    max = ~ max(., na.rm = TRUE),
    median = ~ stats::median(., na.rm = TRUE),
    line_graph = ~ inline_linegraph(., 16)
  )
}

#' @describeIn get_skimmers Summary functions for `list` columns: [n_unique()],
#'  [list_min_length()] and [list_max_length()].
#' @export
get_skimmers.list <- function(column) {
  sfl(
    skim_type = "list",
    n_unique = n_unique,
    min_length = list_min_length,
    max_length = list_max_length
  )
}

#' @describeIn get_skimmers Summary functions for `AsIs` columns: [n_unique()],
#'  [list_min_length()] and [list_max_length()].
#' @export
get_skimmers.AsIs <- function(column) {
  modify_default_skimmers("list", new_skim_type = "AsIs")
}

#' @describeIn  get_skimmers Summary functions for `haven_labelled` columns.
#'  Finds the appropriate skimmers for the underlying data in the vector.
#' @export
get_skimmers.haven_labelled <- function(column) {
  get_skimmers(vctrs::vec_data(column))
}

#' @rdname get_skimmers
#' @param new_skim_type The type to assign to the looked up set of skimmers.
#' @param new_funs Replacement functions for those in
#' @export
modify_default_skimmers <- function(skim_type,
                                    new_skim_type = NULL,
                                    new_funs = list()) {
  funs <- get_one_default_skimmer(skim_type)
  new_funs <- purrr::list_modify(funs, !!!new_funs)
  sfl(skim_type = new_skim_type %||% skim_type, !!!new_funs)
}

#' View default skimmer names and functions
#'
#' These utility functions look up the currently-available defaults for one or
#' more `skim_type`'s. They work with all defaults in the `skimr` package, as
#' well as the defaults in any package that extends `skimr`. See
#' [get_skimmers()] for writing lookup methods for different.
#'
#' The functions differ in return type and whether or not the result is in
#' a list. [get_default_skimmers()] and [get_one_default_skimmer()] return
#' functions. The former returns functions within a typed list, i.e.
#' `list(numeric = list(...functions...))`.
#'
#' The functions differ in return type and whether or not the result is in
#' a list. [get_default_skimmer_names()] and [get_one_default_skimmer_names()]
#' return a list of character vectors or a single character vector.
#'
#' [get_sfl()] returns the skimmer function list for a particular `skim_type`.
#' It differs from [get_default_skimmers()] in that the returned `sfl` contains
#' a list of functions and a `skim_type`.
#'
#' @param skim_type The class of the column being skimmed.
#' @export
get_default_skimmers <- function(skim_type = NULL) {
  if (is.null(skim_type)) {
    defaults <- as.character(utils::methods("get_skimmers"))
    types <- stringr::str_replace(defaults, "get_skimmers.", "")
    no_default <- purrr::discard(types, ~ .x == "default")
    iter <- rlang::set_names(no_default)
  } else {
    iter <- rlang::set_names(skim_type)
  }

  results <- purrr::map(iter, get_one_default_skimmer)
  no_defaults_for_class <- purrr::map_lgl(results, anyNA)
  results[!no_defaults_for_class]
}

#' @describeIn get_default_skimmers Get the functions associated with one
#'   `skim_type`.
#' @export
get_one_default_skimmer <- function(skim_type) {
  skimmers <- get_sfl(skim_type)
  skimmers$funs
}

#' @describeIn get_default_skimmers Get the names of the functions used in one
#'   or more `skim_type`'s.
#' @export
get_default_skimmer_names <- function(skim_type = NULL) {
  skimmers <- get_default_skimmers(skim_type)
  purrr::map(skimmers, names)
}

#' @describeIn get_default_skimmers Get the names of the functions used in one
#'   `skim_type`.
#' @export
get_one_default_skimmer_names <- function(skim_type) {
  skimmers <- get_one_default_skimmer(skim_type)
  names(skimmers)
}

#' @describeIn get_default_skimmers Get the `sfl` for a `skim_type`.
#' @export
get_sfl <- function(skim_type) {
  skimmers <- get_skimmers(structure(integer(), class = skim_type))
  if (skimmers$skim_type == "default") {
    warning("There are no default skimmers for type: ", skim_type)
    sfl(NA, skim_type = NA_character_)
  } else {
    skimmers
  }
}

Try the skimr package in your browser

Any scripts or data that you put into this service are public.

skimr documentation built on Dec. 28, 2022, 2:45 a.m.