R/val_labels.R

Defines functions names_prefixed_by_values.list names_prefixed_by_values.default names_prefixed_by_values sort_val_labels.data.frame sort_val_labels.haven_labelled sort_val_labels.default sort_val_labels remove_value_labels add_value_labels set_value_labels `val_label<-.data.frame` `val_label<-.haven_labelled` `val_label<-.default` `val_label<-` val_label.data.frame val_label.haven_labelled val_label.default val_label `val_labels<-.data.frame` `val_labels<-.haven_labelled_spss` `val_labels<-.haven_labelled` `val_labels<-.character` `val_labels<-.numeric` `val_labels<-.factor` `val_labels<-.default` `val_labels<-` val_labels.data.frame val_labels.haven_labelled val_labels.default val_labels

Documented in add_value_labels names_prefixed_by_values remove_value_labels set_value_labels sort_val_labels val_label val_labels

#' Get / Set value labels
#'
#' @param x A vector or a data.frame
#' @param prefixed Should labels be prefixed with values?
#' @param v A single value.
#' @param value A named vector for `val_labels()` (see [haven::labelled()]) or
#'   a character string for `val_label()`. `NULL` to remove the labels (except
#'   if `null_action = "labelled"`).
#'   For data frames, it could also be a named list with a vector of value
#'   labels per variable.
#' @param null_action,.null_action for advanced users, if `value = NULL`,
#' unclass the vector (default) or force/keep `haven_labelled` class
#' (if `null_action = "labelled"`)
#' @return
#'   `val_labels()` will return a named vector.
#'   `val_label()` will return a single character string.
#' @examples
#' v <- labelled(
#'   c(1, 2, 2, 2, 3, 9, 1, 3, 2, NA),
#'   c(yes = 1, no = 3, "don't know" = 9)
#' )
#' val_labels(v)
#' val_labels(v, prefixed = TRUE)
#' val_label(v, 2)
#' val_label(v, 2) <- "maybe"
#' v
#' val_label(v, 9) <- NULL
#' v
#' val_labels(v, null_action = "labelled") <- NULL
#' v
#' val_labels(v) <- NULL
#' v
#' @export
val_labels <- function(x, prefixed = FALSE) {
  UseMethod("val_labels")
}

#' @export
val_labels.default <- function(x, prefixed = FALSE) {
  # return nothing
  NULL
}

#' @export
val_labels.haven_labelled <- function(x, prefixed = FALSE) {
  labels <- attr(x, "labels", exact = TRUE)
  if (prefixed) {
    names(labels) <- names_prefixed_by_values(labels)
  }
  labels
}

#' @export
val_labels.data.frame <- function(x, prefixed = FALSE) {
  lapply(x, val_labels, prefixed = prefixed)
}

#' @rdname val_labels
#' @export
`val_labels<-` <- function(
    x,
    null_action = c("unclass", "labelled"),
    value) {
  UseMethod("val_labels<-")
}

#' @export
`val_labels<-.default` <- function(
    x,
    null_action = c("unclass", "labelled"),
    value) {
  null_action <- match.arg(null_action)
  if (!is.null(value) || null_action == "labelled") {
    x <- labelled(x, value, label = var_label(x))
  }
  # otherwise do nothing
  x
}

#' @export
`val_labels<-.factor` <- function(
    x,
    null_action = c("unclass", "labelled"),
    value) {
  null_action <- match.arg(null_action)
  if (!is.null(value) || null_action == "labelled") {
    stop("Value labels cannot be applied to factors.")
  }
  x %>% remove_attributes("labels")
}

#' @export
`val_labels<-.numeric` <- function(
    x,
    null_action = c("unclass", "labelled"),
    value) {
  null_action <- match.arg(null_action)
  if ((!is.null(value) && length(value) > 0) || null_action == "labelled") {
    x <- labelled(x, value, label = var_label(x))
  }
  x
}

#' @export
`val_labels<-.character` <- function(
    x,
    null_action = c("unclass", "labelled"),
    value) {
  null_action <- match.arg(null_action)
  if ((!is.null(value) && length(value) > 0) || null_action == "labelled") {
    x <- labelled(x, value, label = var_label(x))
  }
  x
}

#' @export
`val_labels<-.haven_labelled` <- function(
    x,
    null_action = c("unclass", "labelled"),
    value) {
  null_action <- match.arg(null_action)
  if (length(value) == 0) {
    value <- NULL
  }
  if (is.null(value) && null_action == "unclass") {
    x <- unclass(x)
    attr(x, "labels") <- NULL
  } else {
    x <- labelled(x, value, label = var_label(x))
  }
  x
}

#' @export
`val_labels<-.haven_labelled_spss` <- function(
    x,
    null_action = c("unclass", "labelled"),
    value) {
  null_action <- match.arg(null_action)
  if (length(value) == 0) {
    value <- NULL
  }
  if (
    is.null(value) &&
      is.null(attr(x, "na_values")) &&
      is.null(attr(x, "na_range")) &&
      null_action == "unclass"
  ) {
    x <- unclass(x)
    attr(x, "labels") <- NULL
  } else {
    x <-
      labelled_spss(
        x,
        value,
        na_values = attr(x, "na_values"),
        na_range = attr(x, "na_range"),
        label = var_label(x)
      )
  }
  x
}

#' @export
`val_labels<-.data.frame` <- function(
    x,
    null_action = c("unclass", "labelled"),
    value) {
  null_action <- match.arg(null_action)
  if (!is.list(value)) {
    temp <- as.list(rep(1, ncol(x)))
    names(temp) <- names(x)
    value <- lapply(temp, function(x) {
      x <- value
    })
  }

  if (!all(names(value) %in% names(x))) {
    missing_names <- stringr::str_c(
      setdiff(names(value), names(x)),
      collapse = ", "
    )
    stop("some variables not found in x:", missing_names)
  }

  for (var in names(value)) {
    if (!is.null(value[[var]])) {
      if (mode(x[[var]]) != mode(value[[var]])) {
        stop("`x` and `value` must be same type",
          call. = FALSE,
          domain = "R-labelled"
        )
      }
      if (typeof(x[[var]]) != typeof(value[[var]])) {
        mode(value[[var]]) <- typeof(x[[var]])
      }
    }
  }

  for (var in names(value)) {
    val_labels(x[[var]], null_action = null_action) <- value[[var]]
  }

  x
}


#' @rdname val_labels
#' @export
val_label <- function(x, v, prefixed = FALSE) {
  UseMethod("val_label")
}

#' @export
val_label.default <- function(x, v, prefixed = FALSE) {
  if (length(v) != 1) {
    stop("`v` should be a single value", call. = FALSE, domain = "R-labelled")
  }
  # return nothing
  NULL
}

#' @export
val_label.haven_labelled <- function(x, v, prefixed = FALSE) {
  if (length(v) != 1) {
    stop("`v` should be a single value", call. = FALSE, domain = "R-labelled")
  }
  labels <- val_labels(x, prefixed = prefixed)
  if (v %in% labels) {
    names(labels)[labels == v]
  } else {
    NULL
  }
}

#' @export
val_label.data.frame <- function(x, v, prefixed = FALSE) {
  lapply(x, val_label, v = v, prefixed = prefixed)
}

#' @rdname val_labels
#' @export
`val_label<-` <- function(x, v, null_action = c("unclass", "labelled"), value) {
  UseMethod("val_label<-")
}

#' @export
`val_label<-.default` <- function(
    x,
    v,
    null_action = c("unclass", "labelled"),
    value) {
  if (length(v) != 1) {
    stop("`v` should be a single value", call. = FALSE, domain = "R-labelled")
  }
  if (length(value) > 1) {
    stop("`value` should be a single character string or NULL",
      call. = FALSE, domain = "R-labelled"
    )
  }
  names(value) <- v
  val_labels(x, null_action = null_action) <- value
  x
}

#' @export
`val_label<-.haven_labelled` <- function(
    x,
    v,
    null_action = c("unclass", "labelled"),
    value) {
  if (length(v) != 1) {
    stop("`v` should be a single value", call. = FALSE, domain = "R-labelled")
  }
  if (length(value) > 1) {
    stop("`value` should be a single character string or NULL",
      call. = FALSE, domain = "R-labelled"
    )
  }

  labels <- val_labels(x)

  if (is.null(value)) {
    if (v %in% labels) {
      labels <- labels[labels != v]
    }
  } else {
    if (v %in% labels) {
      names(labels)[labels == v] <- value
    } else {
      names(v) <- value
      labels <- c(labels, v)
    }
  }

  if (length(labels) == 0) {
    labels <- NULL
  }

  val_labels(x, null_action = null_action) <- labels
  x
}

#' @export
`val_label<-.numeric` <- `val_label<-.haven_labelled`

#' @export
`val_label<-.character` <- `val_label<-.haven_labelled`

#' @export
`val_label<-.data.frame` <- function(
    x,
    v,
    null_action = c("unclass", "labelled"),
    value) {
  if (!is.list(value)) {
    temp <- as.list(rep(1, ncol(x)))
    names(temp) <- names(x)
    value <- lapply(temp, function(x) {
      x <- value
    })
  }

  value <- value[names(value) %in% names(x)]

  for (var in names(value)[]) {
    if (!is.character(value[[var]]) && !is.null(value[[var]])) {
      stop("`value` should contain only characters or NULL",
        call. = FALSE, domain = "R-labelled"
      )
    }
    if (length(value[[var]]) > 1) {
      stop("`value` should contain only one string (or NULL) per variable",
        call. = FALSE, domain = "R-labelled"
      )
    }
  }

  for (var in names(value)) {
    val_label(x[[var]], v, null_action = null_action) <- value[[var]]
  }

  x
}

#' @rdname val_labels
#' @export
get_value_labels <- val_labels

#' @rdname val_labels
#' @param .data a data frame or a vector
#' @param ... name-value pairs of value labels (see examples)
#' @param .labels value labels to be applied to the data.frame,
#'   using the same syntax as `value` in `val_labels(df) <- value`.
#' @param .strict should an error be returned if some labels
#'   doesn't correspond to a column of `x`?
#' @note
#' `get_value_labels()` is identical to `val_labels()`.
#'
#' `set_value_labels()`, `add_value_labels()` and `remove_value_labels()`
#' could be used with \pkg{dplyr} syntax.
#' While `set_value_labels()` will replace the list of value labels,
#' `add_value_labels()` and `remove_value_labels()` will update that list
#' (see examples).
#'
#' `set_value_labels()` could also be applied to a vector / a data.frame column.
#' In such case, you can provide a vector of value labels using `.labels` or
#' several name-value pairs of value labels (see example).
#' Similarly, `add_value_labels()` and `remove_value_labels()` could also be
#' applied on vectors.
#' @return
#'  `set_value_labels()`, `add_value_labels()` and `remove_value_labels()` will
#'  return an updated copy of `.data`.
#' @examples
#' if (require(dplyr)) {
#'   # setting value labels
#'   df <- tibble(s1 = c("M", "M", "F"), s2 = c(1, 1, 2)) %>%
#'     set_value_labels(
#'       s1 = c(Male = "M", Female = "F"),
#'       s2 = c(Yes = 1, No = 2)
#'     )
#'   val_labels(df)
#'
#'   # updating value labels
#'   df <- df %>% add_value_labels(s2 = c(Unknown = 9))
#'   df$s2
#'
#'   # removing a value labels
#'   df <- df %>% remove_value_labels(s2 = 9)
#'   df$s2
#'
#'   # removing all value labels
#'   df <- df %>% set_value_labels(s2 = NULL)
#'   df$s2
#'
#'   # example on a vector
#'   v <- 1:4
#'   v <- set_value_labels(v, min = 1, max = 4)
#'   v
#'   v %>% set_value_labels(middle = 3)
#'   v %>% set_value_labels(NULL)
#'   v %>% set_value_labels(.labels = c(a = 1, b = 2, c = 3, d = 4))
#'   v %>% add_value_labels(between = 2)
#'   v %>% remove_value_labels(4)
#' }
#' @export
set_value_labels <- function(
    .data,
    ...,
    .labels = NA,
    .strict = TRUE,
    .null_action = c("unclass", "labelled")) {
  .null_action <- match.arg(.null_action)
  if (!is.data.frame(.data) && !is.atomic(.data)) {
    stop(".data should be a data.frame or a vector")
  }

  # vector case
  if (is.atomic(.data)) {
    if (!identical(.labels, NA)) {
      val_labels(.data, null_action = .null_action) <- .labels
    } else {
      values <- unlist(rlang::dots_list(...))
      val_labels(.data, null_action = .null_action) <- values
    }
    return(.data)
  }

  # data.frame case
  if (!identical(.labels, NA)) {
    if (!.strict) {
      .labels <- .labels[intersect(names(.labels), names(.data))]
    }
    val_labels(.data, null_action = .null_action) <- .labels
  }
  values <- rlang::dots_list(...)
  if (.strict && !all(names(values) %in% names(.data))) {
    missing_names <- stringr::str_c(
      setdiff(names(values), names(.data)),
      collapse = ", "
    )
    stop("some variables not found in .data: ", missing_names)
  }

  for (v in intersect(names(values), names(.data))) {
    val_labels(.data[[v]], null_action = .null_action) <- values[[v]]
  }

  .data
}

#' @rdname val_labels
#' @export
add_value_labels <- function(
    .data,
    ...,
    .strict = TRUE,
    .null_action = c("unclass", "labelled")) {
  .null_action <- match.arg(.null_action)
  if (!is.data.frame(.data) && !is.atomic(.data)) {
    stop(".data should be a data.frame or a vector")
  }

  # vector case
  if (is.atomic(.data)) {
    values <- unlist(rlang::dots_list(...))
    if (is.null(names(values)) || any(names(values) == "")) {
      stop("all arguments should be named")
    }
    for (v in names(values)) {
      val_label(.data, values[[v]], null_action = .null_action) <- v
    }
    return(.data)
  }

  # data.frame case
  values <- rlang::dots_list(...)
  if (.strict && !all(names(values) %in% names(.data))) {
    missing_names <- stringr::str_c(
      setdiff(names(values), names(.data)),
      collapse = ", "
    )
    stop("some variables not found in .data: ", missing_names)
  }

  for (v in values) {
    if (is.null(names(v)) || any(names(v) == "")) {
      stop("all arguments should be named vectors")
    }
  }

  for (v in intersect(names(values), names(.data))) {
    for (l in names(values[[v]])) {
      val_label(.data[[v]], values[[v]][[l]], null_action = .null_action) <- l
    }
  }

  .data
}

#' @rdname val_labels
#' @export
remove_value_labels <- function(
    .data,
    ...,
    .strict = TRUE,
    .null_action = c("unclass", "labelled")) {
  .null_action <- match.arg(.null_action)
  if (!is.data.frame(.data) && !is.atomic(.data)) {
    stop(".data should be a data.frame or a vector")
  }

  # vector case
  if (is.atomic(.data)) {
    values <- unlist(rlang::dots_list(...))
    for (v in values) {
      val_label(.data, v, null_action = .null_action) <- NULL
    }
    return(.data)
  }

  # data.frame case
  values <- rlang::dots_list(...)
  if (.strict && !all(names(values) %in% names(.data))) {
    missing_names <- stringr::str_c(
      setdiff(names(values), names(.data)),
      collapse = ", "
    )
    stop("some variables not found in .data: ", missing_names)
  }

  for (v in intersect(names(values), names(.data))) {
    for (l in values[[v]]) {
      val_label(.data[[v]], l, null_action = .null_action) <- NULL
    }
  }

  .data
}

#' Sort value labels
#'
#' Sort value labels according to values or to labels
#'
#' @param x A labelled vector or a data.frame
#' @param according_to According to values or to labels?
#' @param decreasing In decreasing order?
#' @examples
#' v <- labelled(c(1, 2, 3), c(maybe = 2, yes = 1, no = 3))
#' v
#' sort_val_labels(v)
#' sort_val_labels(v, decreasing = TRUE)
#' sort_val_labels(v, "l")
#' sort_val_labels(v, "l", TRUE)
#' @export
sort_val_labels <- function(
    x, according_to = c("values", "labels"),
    decreasing = FALSE) {
  UseMethod("sort_val_labels")
}

#' @export
sort_val_labels.default <- function(
    x,
    according_to = c("values", "labels"),
    decreasing = FALSE) {
  # do nothing
  x
}

#' @export
sort_val_labels.haven_labelled <- function(
    x,
    according_to = c("values", "labels"),
    decreasing = FALSE) {
  according_to <- match.arg(according_to)
  labels <- val_labels(x)
  if (!is.null(labels)) {
    if (according_to == "values") {
      labels <- sort_tagged_na(labels, decreasing = decreasing)
    }
    if (according_to == "labels") {
      labels <- labels[order(names(labels), decreasing = decreasing)]
    }
    val_labels(x) <- labels
  }
  x
}

#' @export
sort_val_labels.data.frame <- function(
    x,
    according_to = c("values", "labels"),
    decreasing = FALSE) {
  x[] <- lapply(x, sort_val_labels,
    according_to = according_to,
    decreasing = decreasing
  )
  x
}

#' Turn a named vector into a vector of names prefixed by values
#' @param x vector to be prefixed
#' @examples
#' df <- dplyr::tibble(
#'   c1 = labelled(c("M", "M", "F"), c(Male = "M", Female = "F")),
#'   c2 = labelled(c(1, 1, 2), c(Yes = 1, No = 2))
#' )
#' val_labels(df$c1)
#' val_labels(df$c1) %>% names_prefixed_by_values()
#' val_labels(df)
#' val_labels(df) %>% names_prefixed_by_values()
#' @export
names_prefixed_by_values <- function(x) {
  UseMethod("names_prefixed_by_values")
}

#' @export
names_prefixed_by_values.default <- function(x) {
  if (is.null(x)) {
    return(NULL)
  }
  res <- as.character(x)
  if (is.double(x)) {
    res[is_tagged_na(x)] <- format_tagged_na(x[is_tagged_na(x)])
  }
  res <- paste0("[", res, "] ", names(x))
  names(res) <- names(x)
  res
}

#' @export
names_prefixed_by_values.list <- function(x) {
  lapply(x, names_prefixed_by_values)
}
larmarange/labelled documentation built on Oct. 11, 2024, 6:25 p.m.