R/labelled.R

Defines functions vec_math.haven_labelled vec_arith.numeric.haven_labelled vec_arith.haven_labelled.numeric vec_arith.haven_labelled.haven_labelled vec_arith.haven_labelled.default vec_arith.haven_labelled vec_cast.haven_labelled.character vec_cast.haven_labelled.integer vec_cast.haven_labelled.double vec_cast.haven_labelled.haven_labelled vec_cast.character.haven_labelled vec_cast.integer.haven_labelled vec_cast.double.haven_labelled vec_ptype2.haven_labelled.haven_labelled vec_ptype2.haven_labelled.double vec_ptype2.double.haven_labelled is.labelled print_labels obj_print_footer.haven_labelled format.haven_labelled get_labeltext obj_print_header.haven_labelled vec_ptype_abbr.haven_labelled vec_ptype_full.haven_labelled summary.haven_labelled quantile.haven_labelled median.haven_labelled `names<-.haven_labelled` levels.haven_labelled as.character.haven_labelled validate_labelled new_labelled labelled

Documented in is.labelled labelled print_labels vec_arith.haven_labelled

#' Create a labelled vector.
#'
#' A labelled vector is a common data structure in other statistical
#' environments, allowing you to assign text labels to specific values.
#' This class makes it possible to import such labelled vectors in to R
#' without loss of fidelity. This class provides few methods, as I
#' expect you'll coerce to a standard R class (e.g. a [factor()])
#' soon after importing.
#'
#' @param x A vector to label. Must be either numeric (integer or double) or
#'   character.
#' @param labels A named vector or `NULL`. The vector should be the same type
#'   as `x`. Unlike factors, labels don't need to be exhaustive: only a fraction
#'   of the values might be labelled.
#' @param label A short, human-readable description of the vector.
#' @export
#' @examples
#' s1 <- labelled(c("M", "M", "F"), c(Male = "M", Female = "F"))
#' s2 <- labelled(c(1, 1, 2), c(Male = 1, Female = 2))
#' s3 <- labelled(
#'   c(1, 1, 2),
#'   c(Male = 1, Female = 2),
#'   label = "Assigned sex at birth"
#' )
#'
#' # Unfortunately it's not possible to make as.factor work for labelled objects
#' # so instead use as_factor. This works for all types of labelled vectors.
#' as_factor(s1)
#' as_factor(s1, levels = "values")
#' as_factor(s2)
#'
#' # Other statistical software supports multiple types of missing values
#' s3 <- labelled(
#'   c("M", "M", "F", "X", "N/A"),
#'   c(Male = "M", Female = "F", Refused = "X", "Not applicable" = "N/A")
#' )
#' s3
#' as_factor(s3)
#'
#' # Often when you have a partially labelled numeric vector, labelled values
#' # are special types of missing. Use zap_labels to replace labels with missing
#' # values
#' x <- labelled(c(1, 2, 1, 2, 10, 9), c(Unknown = 9, Refused = 10))
#' zap_labels(x)
labelled <- function(x = double(), labels = NULL, label = NULL) {
  x <- vec_data(x)
  labels <- vec_cast_named(labels, x, x_arg = "labels", to_arg = "x")
  validate_labelled(new_labelled(x, labels = labels, label = label))
}

new_labelled <- function(x = double(), labels = NULL, label = NULL,
                         ..., class = character()) {
  if (!is.numeric(x) && !is.character(x)) {
    cli_abort("{.arg x} must be a numeric or a character vector.")
  }
  if (!is.null(labels) && !vec_is(labels, x)) {
    cli_abort("{.arg labels} must be the same type as {.arg x}.")
  }
  if (!is.null(label) && (!is.character(label) || length(label) != 1)) {
    cli_abort("{.arg label} must be a character vector of length one.")
  }

  new_vctr(x,
    labels = labels,
    label = label,
    ...,
    class = c(class, "haven_labelled"),
    inherit_base_type = TRUE
  )
}

validate_labelled <- function(x) {
  labels <- attr(x, "labels")
  if (is.null(labels)) {
    return(x)
  }

  if (is.null(names(labels))) {
    cli_abort("{.arg labels} must have names.")
  }
  if (any(duplicated(stats::na.omit(labels)))) {
    cli_abort("{.arg labels} must be unique.")
  }

  x
}

#' @export
as.character.haven_labelled <- function(x, ...) {
  as.character(vec_data(x))
}

#' @export
levels.haven_labelled <- function(x) {
  NULL
}

# TODO: https://github.com/r-lib/vctrs/issues/1108
#' @export
`names<-.haven_labelled` <- function(x, value) {
  attr(x, "names") <- value
  x
}

#' @importFrom stats median
#' @export
median.haven_labelled <- function(x, na.rm = TRUE, ...) {
  if (is.character(x)) {
    cli_abort("Can't compute median of {.cls labelled<character>}.")
  }
  median(vec_data(x), na.rm = TRUE, ...)
}

#' @importFrom stats quantile
#' @export
quantile.haven_labelled <- function(x, ...) {
  if (is.character(x)) {
    cli_abort("Can't compute quantile of {.cls labelled<character>}.")
  }
  quantile(vec_data(x), ...)
}

#' @export
summary.haven_labelled <- function(object, ...) {
  summary(vec_data(object), ...)
}

# Formatting --------------------------------------------------------------

#' @export
vec_ptype_full.haven_labelled <- function(x, ...) {
  paste0("labelled<", vec_ptype_full(vec_data(x)), ">")
}

#' @export
vec_ptype_abbr.haven_labelled <- function(x, ...) {
  paste0(vec_ptype_abbr(vec_data(x)), "+lbl")
}

#' @export
obj_print_header.haven_labelled <- function(x, ...) {
  cat_line("<", vec_ptype_full(x), "[", vec_size(x), "]>", get_labeltext(x))
  invisible(x)
}

# Convenience function for getting the label with
# with a prefix (if label is not empty), used for
# printing 'label' and 'labelled_spss' vectors
get_labeltext <- function(x, prefix = ": ") {
  label <- attr(x, "label", exact = TRUE)
  if (!is.null(label)) {
    paste0(prefix, label)
  }
}

#' @export
format.haven_labelled <- function(x, ..., digits = getOption("digits")) {
  if (is.double(x)) {
    format_tagged_na(x, digits = digits)
  } else {
    format(vec_data(x), ...)
  }
}

#' @export
obj_print_footer.haven_labelled <- function(x, ...) {
  print_labels(x)
}


#' Print the labels of a labelled vector
#'
#' This is a convenience function, useful to explore the variables of
#' a newly imported dataset.
#' @param x A labelled vector
#' @param name The name of the vector (optional)
#' @export
#' @examples
#' s1 <- labelled(c("M", "M", "F"), c(Male = "M", Female = "F"))
#' s2 <- labelled(c(1, 1, 2), c(Male = 1, Female = 2))
#' labelled_df <- tibble::tibble(s1, s2)
#'
#' for (var in names(labelled_df)) {
#'   print_labels(labelled_df[[var]], var)
#' }
print_labels <- function(x, name = NULL) {
  if (!is.labelled(x)) {
    cli_abort("{.arg x} must be a labelled vector.")
  }
  labels <- attr(x, "labels")
  if (length(labels) == 0) {
    return(invisible(x))
  }

  cat("\nLabels:", name, "\n", sep = "")

  value <- if (is.double(labels)) format_tagged_na(labels) else unname(labels)

  lab_df <- data.frame(value = value, label = names(labels), row.names = NULL)
  print(lab_df, row.names = FALSE)

  invisible(x)
}


# Type system -------------------------------------------------------------

# Import to avoid R CMD check NOTE
#' @importFrom methods setOldClass
setOldClass(c("haven_labelled", "vctrs_vctr"))

#' @export
#' @rdname labelled
is.labelled <- function(x) inherits(x, "haven_labelled")

#' @export
vec_ptype2.double.haven_labelled <- function(x, y, ...) {
  data_type <- vec_ptype2(x, vec_data(y), ...)
  new_labelled(data_type,
    labels = vec_cast_named(attr(y, "labels"), data_type),
    label = attr(y, "label", exact = TRUE)
  )
}
#' @export
vec_ptype2.integer.haven_labelled <- vec_ptype2.double.haven_labelled
#' @export
vec_ptype2.character.haven_labelled <- vec_ptype2.double.haven_labelled
#' @export
vec_ptype2.haven_labelled.double <- function(x, y, ...) vec_ptype2(y, x, ...)
#' @export
vec_ptype2.haven_labelled.integer <- vec_ptype2.haven_labelled.double
#' @export
vec_ptype2.haven_labelled.character <- vec_ptype2.haven_labelled.double

#' @export
vec_ptype2.haven_labelled.haven_labelled <- function(x, y, ..., x_arg = "", y_arg = "") {
  # Use x as the prototype if the input vectors have matching metadata
  if (identical(attributes(x), attributes(y))) {
    return(x)
  }

  data_type <- vec_ptype2(vec_data(x), vec_data(y), ..., x_arg = x_arg, y_arg = y_arg)

  # Prefer labels from LHS
  x_labels <- vec_cast_named(attr(x, "labels"), data_type, x_arg = x_arg)
  y_labels <- vec_cast_named(attr(y, "labels"), data_type, x_arg = y_arg)
  labels <- combine_labels(x_labels, y_labels, x_arg, y_arg)

  # Prefer labels from LHS
  label <- attr(x, "label", exact = TRUE) %||% attr(y, "label", exact = TRUE)

  new_labelled(data_type, labels = labels, label = label)
}


#' @export
vec_cast.double.haven_labelled <- function(x, to, ...) vec_cast(vec_data(x), to)
#' @export
vec_cast.integer.haven_labelled <- function(x, to, ...) vec_cast(vec_data(x), to)
#' @export
vec_cast.character.haven_labelled <- function(x, to, ...) {
  if (is.character(x)) {
    vec_cast(vec_data(x), to, ...)
  } else {
    stop_incompatible_cast(x, to, ...)
  }
}

#' @export
vec_cast.haven_labelled.haven_labelled <- function(x, to, ..., x_arg = "", to_arg = "") {
  # Don't perform any processing if the input vectors have matching metadata
  if (identical(attributes(x), attributes(to))) {
    return(x)
  }

  out_data <- vec_cast(vec_data(x), vec_data(to), ..., x_arg = x_arg, to_arg = to_arg)

  x_labels <- attr(x, "labels")
  to_labels <- attr(to, "labels")
  out_labels <- to_labels %||% x_labels

  out <- labelled(out_data,
    labels = out_labels,
    label = attr(x, "label", exact = TRUE)
  )

  # do we lose tagged na values?
  if (is.double(x) && !is.double(out)) {
    lossy <- is_tagged_na(x)
    maybe_lossy_cast(out, x, to, lossy,
      x_arg = x_arg,
      to_arg = to_arg,
      details = "Only doubles can hold tagged na values."
    )
  }

  # do any values become unlabelled?
  if (!is.null(to_labels)) {
    lossy <- x %in% x_labels[!x_labels %in% out_labels]
    maybe_lossy_cast(out, x, to, lossy,
      x_arg = x_arg,
      to_arg = to_arg,
      details = paste0("Values are labelled in `", x_arg, "` but not in `", to_arg, "`.")
    )
  }

  out
}

#' @export
vec_cast.haven_labelled.double <- function(x, to, ...) {
  vec_cast.haven_labelled.haven_labelled(x, to, ...)
}
#' @export
vec_cast.haven_labelled.integer <- function(x, to, ...) {
  vec_cast.haven_labelled.haven_labelled(x, to, ...)
}
#' @export
vec_cast.haven_labelled.character <- function(x, to, ...) {
  vec_cast.haven_labelled.haven_labelled(x, to, ...)
}

# Arithmetic --------------------------------------------------------------

#' Internal vctrs methods
#'
#' @keywords internal
#' @export vec_arith.haven_labelled
#' @method vec_arith haven_labelled
#' @export
vec_arith.haven_labelled <- function(op, x, y, ...) {
  UseMethod("vec_arith.haven_labelled", y)
}
#' @export
#' @method vec_arith.haven_labelled default
vec_arith.haven_labelled.default <- function(op, x, y, ...) {
  stop_incompatible_op(op, x, y)
}
#' @export
#' @method vec_arith.haven_labelled haven_labelled
vec_arith.haven_labelled.haven_labelled <- function(op, x, y, ...) {
  vec_arith_base(op, x, y)
}
#' @export
#' @method vec_arith.haven_labelled numeric
vec_arith.haven_labelled.numeric <- function(op, x, y, ...) {
  vec_arith_base(op, x, y)
}
#' @export
#' @method vec_arith.numeric haven_labelled
vec_arith.numeric.haven_labelled <- function(op, x, y, ...) {
  vec_arith_base(op, x, y)
}

#' @export
vec_math.haven_labelled <- function(.fn, .x, ...) {
  vec_math_base(.fn, .x, ...)
}
hadley/haven documentation built on Jan. 5, 2024, 10:30 a.m.