R/subscript-loc.R

Defines functions cnd_subscript_oob_non_consecutive cnd_body_vctrs_error_subscript_oob_non_consecutive cnd_header_vctrs_error_subscript_oob_non_consecutive stop_location_oob_non_consecutive vctrs_cli_vec cnd_body_vctrs_error_subscript_oob_name cnd_body_vctrs_error_subscript_oob_location cnd_body.vctrs_error_subscript_oob cnd_header.vctrs_error_subscript_oob stop_subscript_oob cnd_body_vctrs_error_indicator_size stop_indicator_size cnd_bullets_subscript_empty stop_subscript_empty cnd_bullets_subscript_missing stop_subscript_missing cnd_bullets_location_need_non_zero stop_location_zero cnd_bullets_location_need_non_negative stop_location_negative cnd_bullets_location2_need_positive cnd_bullets_location2_need_present cnd_bullets_location2_need_scalar new_error_location2_type cnd_body_vctrs_error_location_negative_positive stop_location_negative_positive cnd_body_vctrs_error_location_negative_missing stop_location_negative_missing vec_as_location2_result num_as_location2 vec_as_location2 num_as_location vec_as_location

Documented in num_as_location num_as_location2 vec_as_location vec_as_location2

#' Create a vector of locations
#'
#' @description
#'
#' These helpers provide a means of standardizing common indexing
#' methods such as integer, character or logical indexing.
#'
#' * `vec_as_location()` accepts integer, character, or logical vectors
#'   of any size. The output is always an integer vector that is
#'   suitable for subsetting with `[` or [vec_slice()]. It might be a
#'   different size than the input because negative selections are
#'   transformed to positive ones and logical vectors are transformed
#'   to a vector of indices for the `TRUE` locations.
#'
#' * `vec_as_location2()` accepts a single number or string. It returns
#'   a single location as a integer vector of size 1. This is suitable
#'   for extracting with `[[`.
#'
#' * `num_as_location()` and `num_as_location2()` are specialized variants
#'   that have extra options for numeric indices.
#'
#' @inheritParams vec_slice
#' @inheritParams rlang::args_error_context
#'
#' @param n A single integer representing the total size of the
#'   object that `i` is meant to index into.
#'
#' @param names If `i` is a character vector, `names` should be a character
#'   vector that `i` will be matched against to construct the index. Otherwise,
#'   not used. The default value of `NULL` will result in an error
#'   if `i` is a character vector.
#'
#' @param missing How should missing `i` values be handled?
#'   - `"error"` throws an error.
#'   - `"propagate"` returns them as is.
#'   - `"remove"` removes them.
#'
#'   By default, vector subscripts propagate missing values but scalar
#'   subscripts error on them.
#'
#'   Propagated missing values can't be combined with negative indices when
#'   `negative = "invert"`, because they can't be meaningfully inverted.
#'
#' @param arg The argument name to be displayed in error messages.
#'
#' @return
#' - `vec_as_location()` and `num_as_location()` return an integer vector that
#'   can be used as an index in a subsetting operation.
#'
#' - `vec_as_location2()` and `num_as_location2()` return an integer of size 1
#'   that can be used a scalar index for extracting an element.
#'
#' @examples
#' x <- array(1:6, c(2, 3))
#' dimnames(x) <- list(c("r1", "r2"), c("c1", "c2", "c3"))
#'
#' # The most common use case validates row indices
#' vec_as_location(1, vec_size(x))
#'
#' # Negative indices can be used to index from the back
#' vec_as_location(-1, vec_size(x))
#'
#' # Character vectors can be used if `names` are provided
#' vec_as_location("r2", vec_size(x), rownames(x))
#'
#' # You can also construct an index for dimensions other than the first
#' vec_as_location(c("c2", "c1"), ncol(x), colnames(x))
#'
#' @keywords internal
#' @export
vec_as_location <- function(i,
                            n,
                            names = NULL,
                            ...,
                            missing = c("propagate", "remove", "error"),
                            arg = caller_arg(i),
                            call = caller_env()) {
  check_dots_empty0(...)

  .Call(
    ffi_as_location,
    i = i,
    n = n,
    names = names,
    loc_negative = "invert",
    loc_oob = "error",
    loc_zero = "remove",
    missing = missing,
    frame = environment()
  )
}

#' @rdname vec_as_location
#'
#' @param negative How should negative `i` values be handled?
#'   - `"error"` throws an error.
#'   - `"ignore"` returns them as is.
#'   - `"invert"` returns the positive location generated by inverting the
#'     negative location. When inverting, positive and negative locations
#'     can't be mixed. This option is only applicable for `num_as_location()`.
#'
#' @param oob How should out-of-bounds `i` values be handled?
#'   - `"error"` throws an error.
#'   - `"remove"` removes both positive and negative out-of-bounds locations.
#'   - `"extend"` allows positive out-of-bounds locations if they directly
#'     follow the end of a vector. This can be used to implement extendable
#'     vectors, like `letters[1:30]`.
#'
#' @param zero How should zero `i` values be handled?
#'   - `"error"` throws an error.
#'   - `"remove"` removes them.
#'   - `"ignore"` returns them as is.
#'
#' @export
num_as_location <- function(i,
                            n,
                            ...,
                            missing = c("propagate", "remove", "error"),
                            negative = c("invert", "error", "ignore"),
                            oob = c("error", "remove", "extend"),
                            zero = c("remove", "error", "ignore"),
                            arg = caller_arg(i),
                            call = caller_env()) {
  check_dots_empty0(...)

  if (is.object(i) || !(is_integer(i) || is_double(i))) {
    abort("`i` must be a numeric vector.")
  }
  .Call(
    ffi_as_location,
    i = i,
    n = n,
    names = NULL,
    loc_negative = negative,
    loc_oob = oob,
    loc_zero = zero,
    missing = missing,
    env = environment()
  )
}

#' @rdname vec_as_location
#' @export
vec_as_location2 <- function(i,
                             n,
                             names = NULL,
                             ...,
                             missing = c("error", "propagate"),
                             arg = caller_arg(i),
                             call = caller_env()) {
  check_dots_empty0(...)
  result_get(vec_as_location2_result(
    i,
    n = n,
    names = names,
    negative = "error",
    missing = missing,
    arg = arg,
    call = call
  ))
}
#' @rdname vec_as_location
#' @export
num_as_location2 <- function(i,
                             n,
                             ...,
                             negative = c("error", "ignore"),
                             missing = c("error", "propagate"),
                             arg = caller_arg(i),
                             call = caller_env()) {
  check_dots_empty0(...)

  if (!is_integer(i) && !is_double(i)) {
    abort("`i` must be a numeric vector.", call = call)
  }
  result_get(vec_as_location2_result(
    i,
    n = n,
    names = NULL,
    negative = negative,
    missing = missing,
    arg = arg,
    call = call
  ))
}

vec_as_location2_result <- function(i,
                                    n,
                                    names,
                                    missing,
                                    negative,
                                    arg,
                                    call) {
  allow_missing <- arg_match0(missing, c("error", "propagate")) == "propagate"
  allow_negative <- arg_match0(negative, c("error", "ignore")) == "ignore"

  result <- vec_as_subscript2_result(
    i = i,
    arg = arg,
    call = call
  )

  if (!is_null(result$err)) {
    parent <- result$err
    return(result(err = new_error_location2_type(
      i = i,
      subscript_arg = arg,
      body = parent$body,
      call = call
    )))
  }

  # Locations must be size 1, can't be NA, and must be positive
  i <- result$ok

  if (length(i) != 1L) {
    return(result(err = new_error_location2_type(
      i = i,
      subscript_arg = arg,
      body = cnd_bullets_location2_need_scalar,
      call = call
    )))
  }

  neg <- typeof(i) == "integer" && !is.na(i) && i < 0L
  if (allow_negative && neg) {
    i <- -i
  }

  if (is.na(i)) {
    if (!allow_missing && is.na(i)) {
      result <- result(err = new_error_location2_type(
        i = i,
        subscript_arg = arg,
        body = cnd_bullets_location2_need_present,
        call = call
      ))
    } else {
      result <- result(i)
    }
    return(result)
  }

  if (identical(i, 0L)) {
    return(result(err = new_error_location2_type(
      i = i,
      subscript_arg = arg,
      body = cnd_bullets_location2_need_positive,
      call = call
    )))
  }

  if (!allow_negative && neg) {
    return(result(err = new_error_location2_type(
      i = i,
      subscript_arg = arg,
      body = cnd_bullets_location2_need_positive,
      call = call
    )))
  }

  err <- NULL
  i <- tryCatch(
    vec_as_location(i, n, names = names, arg = arg, call = call),
    vctrs_error_subscript = function(err) {
      err[["subscript_scalar"]] <- TRUE
      err <<- err
      i
    }
  )
  if (!is_null(err)) {
    return(result(err = err))
  }

  if (neg) {
    i <- -i
  }

  result(i)
}


stop_location_negative_missing <- function(i, ..., call = caller_env()) {
  cnd_signal(new_error_subscript_type(
    i,
    ...,
    body = cnd_body_vctrs_error_location_negative_missing,
    call = call
  ))
}
cnd_body_vctrs_error_location_negative_missing <- function(cnd, ...) {
  missing_loc <- which(is.na(cnd$i))
  arg <- append_arg("Subscript", cnd$subscript_arg)

  if (length(missing_loc) == 1) {
    loc <- glue::glue("{arg} has a missing value at location {missing_loc}.")
  } else {
    n_loc <- length(missing_loc)
    missing_loc <- ensure_full_stop(enumerate(missing_loc))
    loc <- glue::glue(
      "{arg} has {n_loc} missing values at locations {missing_loc}"
    )
  }
  format_error_bullets(c(
    x = "Negative locations can't have missing values.",
    i = loc
  ))
}

stop_location_negative_positive <- function(i, ..., call = caller_env()) {
  cnd_signal(new_error_subscript_type(
    i,
    ...,
    body = cnd_body_vctrs_error_location_negative_positive,
    call = call
  ))
}
cnd_body_vctrs_error_location_negative_positive <- function(cnd, ...) {
  positive_loc <- which(cnd$i > 0)
  arg <- append_arg("Subscript", cnd$subscript_arg)

  if (length(positive_loc) == 1) {
    loc <- glue::glue("{arg} has a positive value at location {positive_loc}.")
  } else {
    n_loc <- length(positive_loc)
    positive_loc <- ensure_full_stop(enumerate(positive_loc))
    loc <- glue::glue(
      "{arg} has {n_loc} positive values at locations {positive_loc}"
    )
  }
  format_error_bullets(c(
    x = "Negative and positive locations can't be mixed.",
    i = loc
  ))
}


new_error_location2_type <- function(i,
                                     ...,
                                     class = NULL) {
  new_error_subscript2_type(
    class = class,
    i = i,
    numeric = "cast",
    character = "cast",
    ...
  )
}
cnd_bullets_location2_need_scalar <- function(cnd, ...) {
  cnd$subscript_arg <- append_arg("Subscript", cnd$subscript_arg)
  format_error_bullets(c(
    x = glue::glue_data(cnd, "{subscript_arg} must be size 1, not {length(i)}.")
  ))
}
cnd_bullets_location2_need_present <- function(cnd, ...) {
  cnd$subscript_arg <- append_arg("Subscript", cnd$subscript_arg)
  format_error_bullets(c(
    x = glue::glue_data(cnd, "{subscript_arg} must be a location, not {obj_type_friendly(i)}.")
  ))
}
cnd_bullets_location2_need_positive <- function(cnd, ...) {
  cnd$subscript_arg <- append_arg("Subscript", cnd$subscript_arg)
  format_error_bullets(c(
    x = glue::glue_data(cnd, "{subscript_arg} must be a positive location, not {i}.")
  ))
}

stop_location_negative <- function(i, ..., call = caller_env()) {
  cnd_signal(new_error_subscript_type(
    i,
    body = cnd_bullets_location_need_non_negative,
    ...,
    call = call
  ))
}
cnd_bullets_location_need_non_negative <- function(cnd, ...) {
  cnd$subscript_arg <- append_arg("Subscript", cnd$subscript_arg)
  format_error_bullets(c(
    x = glue::glue_data(cnd, "{subscript_arg} can't contain negative locations.")
  ))
}

stop_location_zero <- function(i, ..., call = caller_env()) {
  cnd_signal(new_error_subscript_type(
    i,
    body = cnd_bullets_location_need_non_zero,
    ...,
    call = call
  ))
}
cnd_bullets_location_need_non_zero <- function(cnd, ...) {
  zero_loc <- which(cnd$i == 0)
  zero_loc_size <- length(zero_loc)
  arg <- append_arg("Subscript", cnd$subscript_arg)

  if (zero_loc_size == 1) {
    loc <- glue::glue("It has a `0` value at location {zero_loc}.")
  } else {
    zero_loc <- ensure_full_stop(enumerate(zero_loc))
    loc <- glue::glue(
      "It has {zero_loc_size} `0` values at locations {zero_loc}"
    )
  }
  format_error_bullets(c(
    x = glue::glue("{arg} can't contain `0` values."),
    i = loc
  ))
}

stop_subscript_missing <- function(i, ..., call = caller_env()) {
  cnd_signal(new_error_subscript_type(
    i = i,
    body = cnd_bullets_subscript_missing,
    ...,
    call = call
  ))
}
cnd_bullets_subscript_missing <- function(cnd, ...) {
  cnd$subscript_arg <- append_arg("Subscript", cnd$subscript_arg)

  missing_loc <- which(is.na(cnd$i))
  if (length(missing_loc) == 1) {
    missing_line <- glue::glue("It has a missing value at location {missing_loc}.")
  } else {
    missing_enum <- ensure_full_stop(enumerate(missing_loc))
    missing_line <- glue::glue("It has missing values at locations {missing_enum}")
  }

  format_error_bullets(c(
    x = glue::glue_data(cnd, "{subscript_arg} can't contain missing values."),
    x = missing_line
  ))
}

stop_subscript_empty <- function(i, ..., call = caller_env()) {
  cnd_signal(new_error_subscript_type(
    i = i,
    body = cnd_bullets_subscript_empty,
    ...,
    call = call
  ))
}
cnd_bullets_subscript_empty <- function(cnd, ...) {
  cnd$subscript_arg <- append_arg("Subscript", cnd$subscript_arg)

  loc <- which(cnd$i == "")
  if (length(loc) == 1) {
    line <- glue::glue("It has an empty string at location {loc}.")
  } else {
    enum <- ensure_full_stop(enumerate(loc))
    line <- glue::glue("It has an empty string at locations {enum}")
  }

  format_error_bullets(c(
    x = glue::glue_data(cnd, "{subscript_arg} can't contain the empty string."),
    x = line
  ))
}

stop_indicator_size <- function(i, n, ..., call = caller_env()) {
  cnd_signal(new_error_subscript_size(
    i,
    n = n,
    ...,
    body = cnd_body_vctrs_error_indicator_size,
    call = call
  ))
}
cnd_body_vctrs_error_indicator_size <- function(cnd, ...) {
  cnd$subscript_arg <- append_arg("Logical subscript", cnd$subscript_arg)
  glue_data_bullets(
    cnd,
    x = "{subscript_arg} must be size 1 or {n}, not {vec_size(i)}."
  )
}

stop_subscript_oob <- function(i,
                               subscript_type,
                               ...,
                               call = caller_env()) {
  stop_subscript(
    class = "vctrs_error_subscript_oob",
    i = i,
    subscript_type = subscript_type,
    ...,
    call = call
  )
}

#' @export
cnd_header.vctrs_error_subscript_oob <- function(cnd, ...) {
  if (cnd_subscript_oob_non_consecutive(cnd)) {
    return(cnd_header_vctrs_error_subscript_oob_non_consecutive(cnd, ...))
  }

  elt <- cnd_subscript_element(cnd)
  action <- cnd_subscript_action(cnd)
  type <- cnd_subscript_type(cnd)

  if (action %in% c("rename", "relocate") || type == "character") {
    glue::glue("Can't {action} {elt[[2]]} that don't exist.")
  } else {
    glue::glue("Can't {action} {elt[[2]]} past the end.")
  }
}

#' @export
cnd_body.vctrs_error_subscript_oob <- function(cnd, ...) {
  switch(cnd_subscript_type(cnd),
    numeric =
      if (cnd_subscript_oob_non_consecutive(cnd)) {
        cnd_body_vctrs_error_subscript_oob_non_consecutive(cnd, ...)
      } else {
        cnd_body_vctrs_error_subscript_oob_location(cnd, ...)
      },
    character =
      cnd_body_vctrs_error_subscript_oob_name(cnd, ...),
    abort("Internal error: subscript type can't be `logical` for OOB errors.")
  )
}
cnd_body_vctrs_error_subscript_oob_location <- function(cnd, ...) {
  i <- cnd$i

  # In case of missing locations
  i <- i[!is.na(i)]

  if (cnd_subscript_action(cnd) == "negate") {
    # Only report negative indices
    i <- i[i < 0L]
  }

  # In case of negative indexing
  i <- abs(i)

  oob <- i[i > cnd$size]
  oob_enum <- vctrs_cli_vec(oob)

  n_loc <- length(oob)
  n <- cnd$size
  elt <- cnd_subscript_element_cli(n, cnd)

  # TODO: Switch to `format_inline()` and format bullets lazily through rlang
  cli::format_error(c(
    "i" = "{cli::qty(n_loc)} Location{?s} {oob_enum} do{?esn't/n't} exist.",
    "i" = "There {cli::qty(n)} {?is/are} only {elt}."
  ))
}
cnd_body_vctrs_error_subscript_oob_name <- function(cnd, ...) {
  elt <- cnd_subscript_element(cnd, capital = TRUE)
  oob <- cnd$i[!cnd$i %in% cnd$names]
  oob_enum <- enumerate(glue::backtick(oob))

  format_error_bullets(c(
    x = glue::glue(ngettext(
      length(oob),
      "{elt[[1]]} {oob_enum} doesn't exist.",
      "{elt[[2]]} {oob_enum} don't exist."
    ))
  ))
}

vctrs_cli_vec <- function(x, ..., vec_trunc = 5) {
  cli::cli_vec(as.character(x), list(..., vec_trunc = vec_trunc))
}

stop_location_oob_non_consecutive <- function(i,
                                              size,
                                              ...,
                                              call = caller_env()) {
  stop_subscript_oob(
    i = i,
    size = size,
    subscript_type = "numeric",
    subscript_oob_non_consecutive = TRUE,
    ...,
    call = call
  )
}

cnd_header_vctrs_error_subscript_oob_non_consecutive <- function(cnd, ...) {
  action <- cnd_subscript_action(cnd)
  elt <- cnd_subscript_element(cnd)
  glue::glue("Can't {action} {elt[[2]]} beyond the end with non-consecutive locations.")
}
cnd_body_vctrs_error_subscript_oob_non_consecutive <- function(cnd, ...) {
  i <- sort(cnd$i)
  i <- i[i > cnd$size]

  non_consecutive <- i[c(TRUE, diff(i) != 1L)]

  arg <- append_arg("Subscript", cnd$subscript_arg)
  if (length(non_consecutive) == 1) {
    x_line <- glue::glue("{arg} contains non-consecutive location {non_consecutive}.")
  } else {
    non_consecutive <- ensure_full_stop(enumerate(non_consecutive))
    x_line <- glue::glue("{arg} contains non-consecutive locations {non_consecutive}")
  }

  glue_data_bullets(
    cnd,
    i = "Input has size {size}.",
    x = x_line
  )
}

cnd_subscript_oob_non_consecutive <- function(cnd) {
  out <- cnd$subscript_oob_non_consecutive %||% FALSE
  check_bool(out)
  out
}

Try the vctrs package in your browser

Any scripts or data that you put into this service are public.

vctrs documentation built on Oct. 13, 2023, 1:05 a.m.