R/units.R

Defines functions iso_format convert_df_units_attr_to_implicit_units vec_arith.iso_double_with_units.MISSING vec_arith.numeric.iso_double_with_units vec_arith.iso_double_with_units.numeric vec_arith.iso_double_with_units.iso_double_with_units downcast_for_unknown_op vec_arith.iso_double_with_units.default vec_arith.iso_double_with_units vec_cast.factor.iso_double_with_units vec_cast.logical.iso_double_with_units vec_cast.character.iso_double_with_units vec_cast.integer.iso_double_with_units vec_cast.double.iso_double_with_units vec_cast.iso_double_with_units.integer vec_cast.iso_double_with_units.double vec_ptype2.integer.iso_double_with_units vec_ptype2.iso_double_with_units.integer vec_ptype2.double.iso_double_with_units vec_ptype2.iso_double_with_units.double vec_cast.iso_double_with_units.iso_double_with_units vec_ptype2.iso_double_with_units.iso_double_with_units vec_cast.iso_double_with_units.default vec_cast.iso_double_with_units vec_ptype2.iso_double_with_units.default vec_ptype2.iso_double_with_units vec_ptype_abbr.iso_double_with_units format.iso_double_with_units vec_ptype_full.iso_double_with_units check_units_identical iso_make_units_implicit iso_make_units_explicit iso_strip_units iso_get_units iso_is_double_with_units new_iso_double_with_units iso_double_with_units iso_with_units

Documented in iso_double_with_units iso_format iso_get_units iso_is_double_with_units iso_make_units_explicit iso_make_units_implicit iso_strip_units iso_with_units vec_arith.iso_double_with_units vec_cast.iso_double_with_units vec_ptype2.iso_double_with_units

# Class Definitions ======

#' Generate values with units
#'
#' These functions generate values with units that work well within data frames and tibbles and implement safety checks on operations that combine values with different units. To retrieve the value without units, use \code{\link{iso_strip_units}} (works for single variables and data frames/tibbles). To retrieve the unit use \code{\link{iso_get_units}}. Note that to correctly combine data frames / tibbles that have values with units in them, use \link[vctrs:vec_bind]{vec_rbind} instead of \link{rbind} or \link[dplyr:bind]{bind_rows}. \link[vctrs:vec_bind]{vec_rbind} will combine columns that have values with units if they have the same unit and otherwise convert back to plain values without units with a warning. The other functions will either fail or reduce the unit values to plain values with a cryptic warning message about not preserving attributes.
#'
#' @details \code{iso_with_units} is the primary function to generate values with units. At present, only numeric values are supported so this function is just a shorter alias for the number-specific \code{iso_double_with_units}. It is not clear yet whether any non-numeric values with units make sense to be supported at a later point or whether integer and decimal numbers should be treated differently when they have units.
#'
#' @param x the values (single value or vector)
#' @param units the units for the value, by default "undefined units" but this parameter should always be supplied when working with real data that has units
#' @family functions for values with units
#' @export
iso_with_units <- function(x, units = "undefined units") {
  if (is.numeric(x))
    iso_double_with_units(x, units)
  else
    stop("cannot add units to a value of type '", class(x)[1], "', try parse_number() to turn your value into a number first", call. = FALSE)
}

#' @rdname iso_with_units
#' @family functions for values with units
#' @export
iso_double_with_units <- function(x = double(), units = "undefined units") {
  x <- vctrs::vec_cast(x, double())
  units <- vctrs::vec_recycle(vctrs::vec_cast(units, character()), 1L)
  new_iso_double_with_units(x, units = units)
}

# double with units constructor
new_iso_double_with_units <- function(x = double(), units = "undefined units") {
  vctrs::vec_assert(x, ptype = double())
  vctrs::vec_assert(units, ptype = character(), size = 1)
  if (is.na(units[1])) stop("units must be set (NA is not permissible)", call. = FALSE)
  vctrs::new_vctr(x, units = units, class = "iso_double_with_units")
}

#' @importFrom methods setOldClass
methods::setOldClass(c("iso_double_with_units", "vctrs_vctr"))

#' Check if a value has units
#'
#' Check if a variable is a double with units. That is if it has been generated by \code{\link{iso_double_with_units}}.
#' @param x vector to check for whether it is a double with units
#' @family functions for values with units
#' @export
iso_is_double_with_units <- function(x) {
  inherits(x, "iso_double_with_units")
}

#' Retrieve number units
#'
#' This function returns the units of a numerical value generated by \code{\link{iso_double_with_units}}. It returns \code{NA}) for unitless variables. Returns a column-named vector of units if \code{x} is a data frame / tibble. Returns the direct units of \code{x} in all other cases.
#' @param x variable to get the units for (vector or data frame)
#' @family functions for values with units
#' @export
iso_get_units <- function(x) {
  if (is.data.frame(x)) {
    units <- purrr::map_chr(x, iso_get_units)
  } else {
    units <- attr(x, "units")
    if (is.null(units)) units <- NA_character_
  }
  return(units)
}


#' Strip units from variables
#'
#' This function converts numbers with units back into unitless numbers both for single variables and data frames / tibbles. For single variables, this is equivalent to the \code{as.numeric} function.
#' @param x variable to strip units from (vector or data frame)
#' @family functions for values with units
#' @export
iso_strip_units <- function(x) {
  if (is.data.frame(x)) {
    x <- dplyr::mutate_if(x, iso_is_double_with_units, as.numeric)
  } else if (iso_is_double_with_units(x)) {
    x <- as.numeric(x)
  }
  return(x)
}

#' Make units explicit
#'
#' This function is intended for data frames / tibbles only and makes the units of columns that have numbers with units explicit in the column name. It also strips the units attribute from those columns using \code{\link{iso_strip_units}}. The reverse function is \code{\link{iso_make_units_implicit}}.
#'
#' @param df the data frame in which to make the units explicit
#' @param prefix the prefix for the units
#' @param suffix the suffix for the units
#' @family functions for values with units
#' @examples
#' # a data frame with implicit units
#' df <- tibble(peak = 1:5, height = iso_double_with_units(1:5, "V"))
#' df
#'
#' # show with explicit units
#' iso_make_units_explicit(df)
#'
#' # show with explicit units (custom prefix & suffix)
#' iso_make_units_explicit(df, prefix = ".", suffix = "")
#' @export
iso_make_units_explicit <- function(df, prefix = " [", suffix = "]") {
  if(!is.data.frame(df)) stop("can only make units explicit in data frames", call. = FALSE)
  if (ncol(df) == 0) return(df)
  col_names <- names(df)
  col_units <- iso_get_units(df)
  new_col_names <- ifelse(is.na(col_units), col_names, paste0(col_names, prefix, col_units, suffix))
  df <- iso_strip_units(df)
  names(df) <- new_col_names
  return(df)
}

#' Make units implicit
#'
#' This function is intended for data frames /tibbles only and tries to figure out which numeric columns have units in the column names and makes those units implicit using \code{\link{iso_double_with_units}}. The reverse function is \code{\link{iso_make_units_explicit}}.
#' @param df the data frame in which to make the units implicit/explicit
#' @inheritParams iso_make_units_explicit
#' @examples
#' # generate implicit units
#' df <- tibble(peak = 1:5, `height [V]` = 1:5)
#' iso_make_units_implicit(df)
#'
#' # convert back and forth
#' iso_make_units_implicit(df) |> iso_make_units_explicit()
#'
#' # implicit units from custom prefix & suffix
#' df <- tibble(peak = 1:5, height.V = 1:5)
#' iso_make_units_implicit(df, prefix = ".", suffix = "")
#' @family functions for values with units
#' @export
iso_make_units_implicit <- function(df, prefix = " [", suffix = "]") {
  if(!is.data.frame(df)) stop("can only make units implicit in data frames", call. = FALSE)
  if(nchar(prefix) == 0) stop("prefix must be at least 1 character", call. = FALSE)
  col_names <- names(df)
  # find pattern keeping in mind that prefix and suffix could be a random set of
  # characters so we don't just want to stick them into a regexp and use fixed instead
  if (nchar(suffix) > 0)
    ends_with_suffix <- stringr::str_ends(col_names, fixed(suffix))
  else
    ends_with_suffix <- rep(TRUE, length(col_names))
  col_names <- stringr::str_sub(col_names, 1L, -1L - ends_with_suffix * nchar(suffix))
  prefix <- stringr::str_locate_all(col_names, fixed(prefix))
  prefix_start <- prefix |> purrr::map(~.x[,1]) |> purrr::map_int(~if(length(.x) == 0) { NA_integer_ } else { max(.x) })
  prefix_end <- prefix |> purrr::map(~.x[,2]) |> purrr::map_int(~if(length(.x) == 0) { NA_integer_ } else { max(.x) })
  has_units <- ends_with_suffix & !is.na(prefix_end)

  # update units
  units <- stringr::str_sub(col_names[has_units], prefix_end[has_units] + 1L)
  df[has_units] <- map2(df[has_units], units, ~iso_double_with_units(.x, units = .y))

  # update column names
  col_names <- stringr::str_sub(col_names, 1L, prefix_start - 1L)
  new_col_names <- names(df)
  new_col_names[has_units] <- col_names[has_units]
  names(df) <- new_col_names

  return(df)
}

# check for identical units (convenience function)
check_units_identical <- function(x, y, warn_if_not = FALSE) {
  check <- identical(attr(x, "units"), attr(y, "units"))
  if (!check && warn_if_not) {
    glue::glue(
      "don't know how to reconcile different units '{iso_get_units(x)}' and ",
      "'{iso_get_units(y)}', converting to double without units to continue") |>
      warning(call. = FALSE, immediate. = TRUE)
  }
  return(check)
}

# formatting during printout
#' @importFrom vctrs vec_ptype_full
#' @method vec_ptype_full iso_double_with_units
#' @export
vec_ptype_full.iso_double_with_units <- function(x, ...) {
  sprintf("%s in '%s'", vctrs::vec_ptype_full(vctrs::vec_data(x), ...), iso_get_units(x))
}

#' @method format iso_double_with_units
#' @export
format.iso_double_with_units <- function(x, ...) {
  format(vctrs::vec_data(x), ...)
}
#' @importFrom vctrs vec_ptype_abbr
#' @method vec_ptype_abbr iso_double_with_units
#' @export
vec_ptype_abbr.iso_double_with_units <- function(x, ...) {
  if (check_units_identical(x, new_iso_double_with_units())) x_units <- "undef"
  else x_units <- iso_get_units(x)
  sprintf("%s[%s]", vctrs::vec_ptype_abbr(vctrs::vec_data(x), ...), x_units)
}

# Combinations and Casting ======
#' vec_ptype2 for iso_double_with_units
#' @importFrom vctrs vec_ptype2
#' @inheritParams vctrs::vec_ptype2
#' @method vec_ptype2 iso_double_with_units
#' @export
#' @export vec_ptype2.iso_double_with_units
vec_ptype2.iso_double_with_units <- function(x, y, ...) UseMethod("vec_ptype2.iso_double_with_units", y)
#' @method vec_ptype2.iso_double_with_units default
#' @export
vec_ptype2.iso_double_with_units.default <- function(x, y, ..., x_arg = "x", y_arg = "y") {
  vctrs::vec_default_ptype2(x, y, x_arg = x_arg, y_arg = y_arg)
}

#' vec_cast for iso_double_with_units
#' @importFrom vctrs vec_cast
#' @inheritParams vctrs::vec_cast
#' @method vec_cast iso_double_with_units
#' @export
#' @export vec_cast.iso_double_with_units
vec_cast.iso_double_with_units <- function(x, to, ...) UseMethod("vec_cast.iso_double_with_units")
#' @method vec_cast.iso_double_with_units default
#' @export
vec_cast.iso_double_with_units.default <- function(x, to, ...) vctrs::vec_default_cast(x, to)

# combining doubles with units: only allow it if they have the same units
#' @method vec_ptype2.iso_double_with_units iso_double_with_units
#' @export
vec_ptype2.iso_double_with_units.iso_double_with_units <- function(x, y, ...) {
  if (check_units_identical(x, y, warn_if_not = TRUE)) {
    # units are the same, keep it a double with units
    new_iso_double_with_units(units = iso_get_units(x))
  } else {
    # convert to a double without units
    double()
  }
}

#' @method vec_cast.iso_double_with_units iso_double_with_units
#' @export
vec_cast.iso_double_with_units.iso_double_with_units <- function(x, to, ...)  {
  if (check_units_identical(x, to, warn_if_not = TRUE)) {
    # has the right units so there isn't anything to do for casting
    return(x)
  } else {
    # convert to a double without units
    return(vctrs::vec_data(x))
  }
}

# combining a double with units with a double without units yields a double without units
#' @method vec_ptype2.iso_double_with_units double
#' @export
vec_ptype2.iso_double_with_units.double <- function(x, y, ...) double()
#' @importFrom vctrs vec_ptype2.double
#' @method vec_ptype2.double iso_double_with_units
#' @export
vec_ptype2.double.iso_double_with_units <- function(x, y, ...) double()

# combining a double with units with an integer yields a double without units
#' @method vec_ptype2.iso_double_with_units integer
#' @export
vec_ptype2.iso_double_with_units.integer <- function(x, y, ...) double()
#' @importFrom vctrs vec_ptype2.integer
#' @method vec_ptype2.integer iso_double_with_units
#' @export
vec_ptype2.integer.iso_double_with_units <- function(x, y, ...) double()

# no other vec_c combinations are formally allowed for now
# since double with units + character/logical/factor should really not happen


# cast from integer or double to double with units takes the units of the object being cast to
#' @method vec_cast.iso_double_with_units double
#' @export
vec_cast.iso_double_with_units.double <- function(x, to, ...) {
  iso_double_with_units(x, units = iso_get_units(to))
}
#' @method vec_cast.iso_double_with_units integer
#' @export
vec_cast.iso_double_with_units.integer <- function(x, to, ...) {
  iso_double_with_units(x, units = iso_get_units(to))
}


# cast from double with units to any other format makes it behave like a double
# this allows c() to work as if it was a double (but other vec_c combinations are not allowed)
#' @importFrom vctrs vec_cast.double
#' @method vec_cast.double iso_double_with_units
#' @export
vec_cast.double.iso_double_with_units <- function(x, to, ...) vctrs::vec_data(x)
#' @importFrom vctrs vec_cast.integer
#' @method vec_cast.integer iso_double_with_units
#' @export
vec_cast.integer.iso_double_with_units <- function(x, to, ...) as.integer(vctrs::vec_data(x))
#' @importFrom vctrs vec_cast.character
#' @method vec_cast.character iso_double_with_units
#' @export
vec_cast.character.iso_double_with_units <- function(x, to, ...) as.character(vctrs::vec_data(x))
#' @importFrom vctrs vec_cast.logical
#' @method vec_cast.logical iso_double_with_units
#' @export
vec_cast.logical.iso_double_with_units <- function(x, to, ...) as.logical(vctrs::vec_data(x))
#' @importFrom vctrs vec_cast.factor
#' @method vec_cast.factor iso_double_with_units
#' @export
vec_cast.factor.iso_double_with_units <- function(x, to, ...) as.factor(vctrs::vec_data(x))

# Arithmetic =======

#' vec_arith for iso_double_with_units
#' @importFrom vctrs vec_arith
#' @inheritParams vctrs::vec_arith
#' @method vec_arith iso_double_with_units
#' @export
#' @export vec_arith.iso_double_with_units
vec_arith.iso_double_with_units <- function(op, x, y, ...) {
  UseMethod("vec_arith.iso_double_with_units", y)
}
#' @method vec_arith.iso_double_with_units default
#' @export
vec_arith.iso_double_with_units.default <- function(op, x, y, ...) {
  vctrs::stop_incompatible_op(op, x, y)
}

downcast_for_unknown_op <- function(op, x, y, warn = TRUE) {
  if (warn) {
    glue::glue(
      "don't know how to calculate <{vctrs::vec_ptype_full(x)}> {op} <{vctrs::vec_ptype_full(y)}>, ",
      "converting to double without units to continue"
    ) |> warning(call. = FALSE, immediate. = TRUE)
  }
  vctrs::vec_arith_base(op, x, y)
}

# combining two units objects (if their units are identical)
# allow + and -, and / looses the units
# all other cases convert to double with a warning
#' @method vec_arith.iso_double_with_units iso_double_with_units
#' @export
vec_arith.iso_double_with_units.iso_double_with_units <- function(op, x, y, ...) {
  if (check_units_identical(x, y, warn_if_not = TRUE)) {
    switch(
      op,
      "+" = ,
      "-" = new_iso_double_with_units(vctrs::vec_arith_base(op, x, y), units = iso_get_units(x)),
      "/" = vctrs::vec_arith_base(op, x, y),
      # downcast to double
      downcast_for_unknown_op(op, x, y)
    )
  } else {
    # downcast to double
    downcast_for_unknown_op(op, x, y, warn = FALSE)
  }
}

# combining a units object with a number
# allow division and multiplication
# all other cases convert to double with a warning
#' @method vec_arith.iso_double_with_units numeric
#' @export
vec_arith.iso_double_with_units.numeric <- function(op, x, y, ...) {
  switch(
    op,
    "/" = ,
    "*" = new_iso_double_with_units(vctrs::vec_arith_base(op, x, y), units = iso_get_units(x)),
    downcast_for_unknown_op(op, x, y)
  )
}
# combining a number with a units object
# allow multiplication, everything else convert to double with warning
#' @importFrom vctrs vec_arith.numeric
#' @method vec_arith.numeric iso_double_with_units
#' @export
vec_arith.numeric.iso_double_with_units <- function(op, x, y, ...) {
  switch(
    op,
    "*" = new_iso_double_with_units(vctrs::vec_arith_base(op, x, y), units = iso_get_units(y)),
    downcast_for_unknown_op(op, x, y)
  )
}

# missing scenarios for unary +x and -x
#' @method vec_arith.iso_double_with_units MISSING
#' @export
vec_arith.iso_double_with_units.MISSING <- function(op, x, y, ...) {
  switch(op,
         `-` = x * -1,
         `+` = x,
         vctrs::stop_incompatible_op(op, x, y)
  )
}

# Convert vendor data table ======

# convert data frame globas units attr to implicit units using iso_double_with_unit
# if there is no data frame units attribute, returns df
convert_df_units_attr_to_implicit_units <- function(df) {

  if (!is.data.frame(df) || length(df) == 0) return(df)

  units <- attr(df, "units")
  if (is.null(units) || !is.data.frame(units) ||
      !all(c("column", "units") %in% names(units))) {
    # safety checks for units attrs
    attr(df, "units") <- NULL
    return(df)
  }

  # process units
  units <- units |>
    # find out which columns are numeric
    dplyr::left_join(
      purrr::map_lgl(df, is.numeric) |> tibble::enframe("column", "numeric"),
      by = "column"
    ) |>
    filter(nchar(units) > 0)

  # info check
  if (nrow(problematic <- filter(units, !numeric)) > 0) {
    glue::glue("encountered non-numeric data table columns with units: ",
               "{paste(problematic$units, collapse = ', ')}. Only numeric column ",
               "units can be preserved.") |>
      warning(immediate. = TRUE, call. = FALSE)
  }

  # convert columns into double_with_units
  units <- dplyr::filter(units, numeric) |>
    dplyr::mutate(units = stringr::str_remove(units, "^\\[") |> stringr::str_remove("\\]$"))

  # construct the conversion quos
  unit_quos <-
    with(units,
         purrr::map2(column, units,
                     ~quo(iso_double_with_units(!!sym(.x), units = !!.y))) |>
           rlang::set_names(column))

  # convert the units
  df <- dplyr::mutate(df, !!!unit_quos)
  attr(df, "units") <- NULL

  return(df)
}

# Formatting =====

#' Format values
#'
#' Convenience function to easily format and concatenate text and numeric values. Can be used with any test and number data. Automatically detects \code{\link{iso_with_units}} values and incorporates the units into the formatting.
#'
#' @param ... variable names with data. Must have the same dimensions if multiple are supplied. Can be named to rename variable name output. Will include units in output for all \link{iso_with_units}.
#' @param signif number of significant digits for numbered data
#' @param format_names how to format the variable names, set to \code{NULL} to remove names
#' @param format_units how to format the units from \code{\link{iso_double_with_units}} variables, set to \code{NULL} to omit units
#' @param replace_permil whether to replace the term 'permil' with the permil symbol (\\u2030)
#' @param sep separator between variables if multiple are provided in \code{...}
#' @examples
#' x <- iso_with_units(1:5, "V")
#' y <- iso_with_units(1:5, "permil")
#' iso_format(x, y)
#' iso_format(amplitude = x, d13C = y)
#' @export
iso_format <- function(..., signif = 3, format_names = "%s: ", format_units="%s", replace_permil = TRUE, sep = "\n") {
  # find variable names
  vars <- rlang::enquos(...)
  has_name <- nchar(names(vars)) > 0
  names(vars)[!has_name] <- map_chr(vars[!has_name], rlang::as_label)

  # evaluate variables
  vars <- purrr::map(vars, rlang::eval_tidy)

  # check length
  vars_size <- purrr::map_int(vars, length)
  if (!all(vars_size == vars_size[1]))
    stop("iso_format encountered variables with unequal lengths", call. = FALSE)

  # format data
  values <- purrr::map2(vars, names(vars), ~{
    value <-
      if (iso_is_double_with_units(.x) && !is.null(format_units))
        paste0(signif(.x, digits = signif), sprintf(format_units, iso_get_units(.x)))
    else if (iso_is_double_with_units(.x) || is.numeric(.x))
      as.character(signif(as.numeric(.x), digits = signif))
    else as.character(.x)
    if (!is.null(format_names)) value <- paste0(sprintf(format_names, .y), value)
    value
  })

  # full text
  return(
    do.call(paste, args = c(values, list(sep = sep))) |>
      stringr::str_replace_all(fixed("permil"), "\u2030")
  )
}
KopfLab/isoreader documentation built on Aug. 6, 2023, 9:22 p.m.