R/scale_at.R

#' @rdname scale_at
#'
#' @name scale_at
#' @title Rescale columns of tabular data
#'
#' @description
#'   `multiply_at()` and `divide_at()` avoid one of the pitfalls of
#'   `mutate_at()`, which is that `mutate_at()` applies transformations
#'   sequentially, rather than in parallel. See Details, below.
#'
#' @details Suppose we want to "normalize" all columns by the values in one
#'   column. The following code doesn't yield that result:
#'
#'   `mtcars %>% mutate_at(vars(everything()), ~ 1 / wt)`
#'
#'   All columns after `wt` are left "untouched". Why?
#'
#'   What's happening is that the columns are being transformed sequentially,
#'   rather than in parallel. First `mpg` is divided by `wt`. Then, `cyl` is
#'   divided by `wt`, followed by `disp`, `hp`, and `drat`.
#'
#'   Next, `wt` is divided by `wt`. From this point on, `wt` is equal to 1.
#'
#'   When the next column, `qsec`, is divided by `wt`, it's just being divided
#'   by 1. And so on for `vs`, `am`, `gear`, and `carb`. The values in those
#'   columns remain the same.
#'
#' @param .tbl tabular data
#' @param .vars A list of columns generated by `vars()`, a character vector of
#'   column names, a numeric vector of column positions, or `NULL`.
#' @param by a value or expression, to be evaluated within `.tbl`
#'
NULL

#' @export
#' @importFrom rlang enquo
scale_at <- function (
  .tbl,
  .vars,
  by,
  operator,
  ...,
  .cols = NULL
) {

  # Capture for non-standard evaluation
  .by <- rlang::enquo(by)

  # Random string to use as a temporary column within `.tbl`
  tmpvar <- "foo" # digest::digest(rnorm(1))

  # Create temporary column
  mutated_data <-
    mutate(
      .tbl,
      !!tmpvar := !!.by)

  # Scale `.vars` by temporary column
  scaled_data <-
    mutate_at(
      mutated_data,
      .vars,
      .funs = ~ operator(., get(tmpvar)),
      ...,
      .cols = .cols)

  # Finally, drop the temporary column
  tidied_data <-
    select(
      scaled_data,
      -matches(tmpvar))

  return(tidied_data)

}

#' multiply_at
#'
#' @describeIn scale_at multiplication
#' @export
multiply_at <-
  purrr::partial(
    scale_at,
    operator = `*`)

#' divide_at
#'
#' @describeIn scale_at division
#' @export
divide_at <-
  purrr::partial(
    scale_at,
    operator = `/`)
BAAQMD/tbltools documentation built on Feb. 29, 2024, 5:45 a.m.