R/to_factor.R

Defines functions to_fac_helper to_factor

Documented in to_factor

#' @title Convert variable into factor and keep value labels
#' @name to_factor
#'
#' @description This function converts a variable into a factor, but preserves
#'                variable and value label attributes. See 'Examples'.
#'
#' @seealso \code{\link[sjlabelled]{as_numeric}} to convert a factor into a
#'   numeric vector and \code{\link[sjlabelled]{as_label}} to convert a vector
#'   into a factor with labelled factor levels.
#'
#' @param x A vector or data frame.
#' @param ... Optional, unquoted names of variables that should be selected for
#'   further processing. Required, if \code{x} is a data frame (and no
#'   vector) and only selected variables from \code{x} should be processed.
#'   You may also use functions like \code{:} or tidyselect's
#'   \code{\link[tidyselect]{select_helpers}}.
#'   See 'Examples' or \href{../doc/design_philosophy.html}{package-vignette}.
#' @param add.non.labelled Logical, if \code{TRUE}, non-labelled values also
#'   get value labels.
#' @param ref.lvl Numeric, specifies the reference level for the new factor. Use
#'   this parameter if a different factor level than the lowest value should be
#'   used as reference level. If \code{NULL}, lowest value will become the
#'   reference level. See \code{\link{ref_lvl}} for details.
#'
#' @return A factor, including variable and value labels. If \code{x} is a
#'   data frame, the complete data frame \code{x} will be returned, where
#'   variables specified in \code{...} are coerced to factors (including
#'   variable and value labels); if \code{...} is not specified, applies to
#'   all variables in the data frame.
#'
#' @note This function is intended for use with vectors that have value and variable
#'   label attributes. Unlike \code{\link{as.factor}}, \code{to_factor} converts
#'   a variable into a factor and preserves the value and variable label attributes.
#'   \cr \cr
#'   Adding label attributes is automatically done by importing data sets
#'   with one of the \code{read_*}-functions, like \code{\link[sjlabelled]{read_spss}}.
#'   Else, value and variable labels can be manually added to vectors
#'   with \code{\link[sjlabelled]{set_labels}} and \code{\link[sjlabelled]{set_label}}.
#'   \cr \cr
#'   This function is kept for backwards-compatibility. It is preferred to
#'   use \code{\link[sjlabelled]{as_factor}}.
#'
#' @details \code{to_factor} converts numeric values into a factor with numeric
#'   levels. \code{\link[sjlabelled]{as_label}}, however, converts a vector into
#'   a factor and uses value labels as factor levels.
#'
#' @examples
#' library(sjlabelled)
#' data(efc)
#' # normal factor conversion, loses value attributes
#' x <- as.factor(efc$e42dep)
#' frq(x)
#'
#' # factor conversion, which keeps value attributes
#' x <- to_factor(efc$e42dep)
#' frq(x)
#'
#' # create parially labelled vector
#' x <- set_labels(efc$e42dep,
#'                 labels = c(`1` = "independent", `4` = "severe dependency",
#'                            `9` = "missing value"))
#'
#' # only copy existing value labels
#' to_factor(x)
#' get_labels(to_factor(x), values = "p")
#'
#' # also add labels to non-labelled values
#' to_factor(x, add.non.labelled = TRUE)
#' get_labels(to_factor(x, add.non.labelled = TRUE), values = "p")
#'
#'
#' # Convert to factor, using different reference level
#' x <- to_factor(efc$e42dep)
#' str(x)
#' table(x)
#'
#' x <- to_factor(efc$e42dep, ref.lvl = 3)
#' str(x)
#' table(x)
#'
#'
#' # easily coerce specific variables in a data frame to factor
#' # and keep other variables, with their class preserved
#' to_factor(efc, e42dep, e16sex, c172code)
#'
#' # use select-helpers from dplyr-package
#' library(dplyr)
#' to_factor(efc, contains("cop"), c161sex:c175empl)
#'
#'
#' @export
to_factor <- function(x, ..., add.non.labelled = FALSE, ref.lvl = NULL) {
  # evaluate arguments, generate data
  .dat <- get_dot_data(x, dplyr::quos(...))

  if (is.data.frame(x)) {
    # iterate variables of data frame
    for (i in colnames(.dat)) {
      x[[i]] <- to_fac_helper(.dat[[i]], add.non.labelled, ref.lvl)
    }
  } else {
    x <- to_fac_helper(.dat, add.non.labelled, ref.lvl)
  }

  x
}


to_fac_helper <- function(x, add.non.labelled, ref.lvl) {
  # is already factor?
  if (is.factor(x)) return(x)

  # retrieve value labels
  lab <-
    sjlabelled::get_labels(
      x,
      attr.only = TRUE,
      values = "n",
      non.labelled = add.non.labelled
    )

  # retrieve variable labels
  varlab <- attr(x, "label", exact = T)

  # switch value and names attribute, since get_labels
  # returns the values as names, and the value labels
  # as "vector content"
  if (!is.null(lab)) {
    if (is.character(x) || (is.factor(x) && !is_num_fac(x)))
      lab.switch <- names(lab)
    else
      lab.switch <- as.numeric(names(lab))

    names(lab.switch) <- as.character(lab)
  } else {
    lab.switch <- NULL
  }

  # convert variable to factor
  x <- factor(x, exclude = c(NA_character_, "NaN"))

  # set back value labels
  x <-
    suppressMessages(
      sjlabelled::set_labels(
        x,
        labels = lab.switch,
        force.labels = TRUE,
        force.values = FALSE
      )
    )

  # set back variable labels
  attr(x, "label") <- varlab

  # change reference level?
  if (!is.null(ref.lvl)) x <- ref_lvl(x, lvl = ref.lvl)

  x
}
strengejacke/sjmisc documentation built on Nov. 10, 2018, 1:24 p.m.