R/rec.R

Defines functions rec

Documented in rec

#' Recode Variable
#'
#' This function recodes numeric vectors, character vectors, or factors according
#' to recode specifications.
#'
#' Recode specifications appear in a character string, separated by semicolons
#' (see the examples below), of the form input = output. If an input value satisfies
#' more than one specification, then the first (from left to right) applies. If
#' no specification is satisfied, then the input value is carried over to the
#' result. \code{NA} is allowed in input and output. Several recode specifications
#' are supported:
#'
#' - single value
#' For example, 0 = NA
#'
#' - vector of values
#' For example, c(7, 8, 9) = 'high'
#'
#' - range of values
#' For example, 7:9 = 'C'. The special values lo (lowest value) and hi (highest
#' value) may appear in a range. For example, \code{lo:10 = 1}. Note that \code{:}
#' is not the R sequence operator. In addition you may not use \code{:} with the
#' collect operator, e.g., \code{c(1, 3, 5:7)} will cause an error.
#'
#' - else
#' For example, \code{else = NA}. Everything that does not fit a previous specification.
#' Note that \code{else} matches all otherwise unspecified values on input, including
#' \code{NA}.
#'
#' @param x         a numeric vector, character vector, factor, matrix or data
#'                  frame.
#' @param spec      a character string of recode specifications (see 'Details').
#' @param as.factor logical: if \code{TRUE}, character vector will be coerced to
#'                  a factor.
#' @param levels    a character vector for specifying the levels in the returned
#'                  factor.
#' @param names     a character string or character vector indicating the names
#'                  of the recoded variables when specifying more than one variable.
#'                  By default, variables are named with the ending \code{".r"}
#'                  resulting in e.g. \code{"x1.r"} and \code{"x2.r"}. Variable
#'                  names can also be specified using a character vector matching
#'                  the number of variables specified in \code{x} (e.g.,
#'                  \code{names = c("recode.x1", "recode.x2")}).
#' @param as.na     a numeric vector indicating user-defined missing values,
#'                  i.e. these values are converted to \code{NA} before conducting
#'                  the analysis.
#' @param table     logical: if \code{TRUE}, a cross table variable x recoded
#'                  variable is printed on the console if only one variable is
#'                  specified in \code{x}.
#' @param check     logical: if \code{TRUE}, argument specification is checked.
#'
#' @author
#' Takuya Yanagida \email{takuya.yanagida@@univie.ac.at}
#'
#' @seealso
#' \code{\link{item.reverse}}
#'
#' @references
#' Fox, J., & Weisberg S. (2019). \emph{An {R} Companion to Applied Regression} (3rd ed.).
#' Thousand Oaks CA: Sage. URL: https://socialsciences.mcmaster.ca/jfox/Books/Companion/
#'
#' @return
#' Returns a numeric vector or data frame with the same length or same number of
#' rows as \code{x} containing the recoded coded variable(s).
#'
#' @note
#' This function was adapted from the \code{recode()} function in the \pkg{car}
#' package by John Fox and Sanford Weisberg (2019).
#'
#' @export
#'
#' @examples
#' #--------------------------------------
#' # Numeric vector
#' x.num <- c(1, 2, 4, 5, 6, 8, 12, 15, 19, 20)
#'
#' # Recode 5 = 50 and 19 = 190
#' rec(x.num, "5 = 50; 19 = 190")
#'
#' # Recode 1, 2, and 5 = 100 and 4, 6, and 7 = 200 and else = 300
#' rec(x.num, "c(1, 2, 5) = 100; c(4, 6, 7) = 200; else = 300")
#'
#' # Recode lowest value to 10 = 100 and 11 to highest value = 200
#' rec(x.num, "lo:10 = 100; 11:hi = 200")
#'
#' # Recode 5 = 50 and 19 = 190 and check recoding
#' rec(x.num, "5 = 50; 19 = 190", table = TRUE)
#'
#' #--------------------------------------
#' # Character vector
#' x.chr <- c("a", "c", "f", "j", "k")
#'
#' # Recode a to x
#' rec(x.chr, "'a' = 'X'")
#'
#' # Recode a and f to x, c and j to y, and else to z
#' rec(x.chr, "c('a', 'f') = 'x'; c('c', 'j') = 'y'; else = 'z'")
#'
#' # Recode a to x and coerce to a factor
#' rec(x.chr, "'a' = 'X'", as.factor = TRUE)
#'
#' #--------------------------------------
#' # Factor
#' x.fac <- factor(c("a", "b", "a", "c", "d", "d", "b", "b", "a"))
#'
#' # Recode a to x, factor levels ordered alphabetically
#' rec(x.fac, "'a' = 'x'")
#'
#' # Recode a to x, user-defined factor levels
#' rec(x.fac, "'a' = 'x'", levels = c("x", "b", "c", "d"))
#'
#' #--------------------------------------
#' # Multiple variables
#' dat <- data.frame(x1.num = c(1, 2, 4, 5, 6),
#'                   x2.num = c(5, 19, 2, 6, 3),
#'                   x1.chr = c("a", "c", "f", "j", "k"),
#'                   x2.chr = c("b", "c", "a", "d", "k"),
#'                   x1.fac = factor(c("a", "b", "a", "c", "d")),
#'                   x2.fac = factor(c("b", "a", "d", "c", "e")))
#'
#' # Recode numeric vector and attach to 'dat'
#' dat <- cbind(dat,
#'              rec(dat[, c("x1.num", "x2.num")], "5 = 50; 19 = 190"))
#'
#' # Recode character vector and attach to 'dat'
#' dat <- cbind(dat,
#'              rec(dat[, c("x1.chr", "x2.chr")], "'a' = 'X'"))
#'
#' # Recode factor vector and attach to 'dat'
#' dat <- cbind(dat,
#'              rec(dat[, c("x1.fac", "x2.fac")], "'a' = 'X'"))
rec <- function(x, spec, as.factor = FALSE, levels = NULL, names = ".r",
                as.na = NULL, table = FALSE, check = TRUE) {

  #_____________________________________________________________________________
  #
  # Initial Check --------------------------------------------------------------

  # Check if input 'x' is missing
  if (isTRUE(missing(x))) { stop("Please specify a matrix or data frame for the argument 'x'.", call. = FALSE) }

  # Check if input 'x' is NULL
  if (isTRUE(is.null(x))) { stop("Input specified for the argument 'x' is NULL.", call. = FALSE) }

  # Convert 'x' into a vector when only one variable specified in 'x'
  if (isTRUE(ncol(data.frame(x)) == 1L)) { x <- unlist(x, use.names = FALSE) }

  # Check if input 'spec' is missing
  if (isTRUE(missing(spec))) { stop("Please specify a matrix or data frame for the argument 'spec'.", call. = FALSE) }

  # Check if input 'spec' is NULL
  if (isTRUE(is.null(spec))) { stop("Input specified for the argument 'spec' is NULL.", call. = FALSE) }

  #_____________________________________________________________________________
  #
  # Input Check ----------------------------------------------------------------

  # Check input 'check'
  if (isTRUE(!is.logical(check))) { stop("Please specify TRUE or FALSE for the argument 'check'.", call. = FALSE) }

  if (isTRUE(check)) {

    # Check input 'as.factor'
    if (isTRUE(!is.logical(as.factor))) { stop("Please specify TRUE or FALSE for the argument 'as.factor'.", call. = FALSE) }

    # Check input 'names'
    if (isTRUE(!is.null(dim(x)))) {

      if (isTRUE(!is.character(names))) { stop("Please specify a character string or vector for the argument 'names'.", call. = FALSE) }

      if (isTRUE(length(names) > 1L && length(names) != ncol(x))) { stop("The length of the vector specified in 'names' does not match with the number of variable in 'x'.", call. = FALSE) }

    }

    # Check input 'table'
    if (isTRUE(!is.logical(table))) { stop("Please specify TRUE or FALSE for the argument 'table'.", call. = FALSE) }

  }

  #_____________________________________________________________________________
  #
  # Data and Arguments ---------------------------------------------------------

  # Convert user-missing values into NA
  if (isTRUE(!is.null(as.na))) {

    x <- misty::as.na(x, na = as.na)

    # Variable with missing values only
    if (isTRUE(all(is.na(x)))) { stop("After converting user-missing values into NA, variable 'x' is completely missing.", call. = FALSE) }

  }

  #~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
  ## Define special values ####

  lo <- -Inf
  hi <- Inf

  #~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
  ## Recode specification terms ####

  spec.list <- rev(unlist(strsplit(spec, ";")))

  #_____________________________________________________________________________
  #
  # Main Function --------------------------------------------------------------

  #~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
  ## Single variable ####
  if (isTRUE(is.null(dim(x)))) {

    #~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
    ## Recoded result vector ####

    object <- x

    #~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
    ## Convert factor into character ####

    if (isTRUE(is.factor(object))) { object <- as.character(object) }

    #~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
    ## Loop across specification terms ####

    for (i in spec.list) {

      #...................
      ### Specification with range of values ####

      if (isTRUE(length(grep(":", i)) == 1L)) {

        range <- unlist(strsplit(unlist(strsplit(i, "="))[1L], ":"))

        low <- try(eval(parse(text = range[1L])), silent = TRUE)

        if (isTRUE(class(low) == "try-error")) {

          stop("In recode specification term: ", i, "\n       Message: ", attributes(low)$condition$message, call. = FALSE)

        }

        high <- try(eval(parse(text = range[2L])), silent = TRUE)

        if (isTRUE(class(high) == "try-error")) {

          stop("In recode specification term: ", i, "\n       Message: ", attributes(high)$condition$message, call. = FALSE)

        }

        target <- try(eval(parse(text = unlist(strsplit(i, "="))[2L])), silent = TRUE)

        if (isTRUE(class(target) == "try-error")) {

          stop("In recode specification term: ", i, "\n       Message: ", attributes(target)$condition$message, call. = FALSE)

        }

        object[(x >= low) & (x <= high)] <- target

      }

      #...................
      ### Specification with range of values else ####

      if (isTRUE(length(grep("else", i)) == 1L)) {

        target <- try(eval(parse(text = unlist(strsplit(i, "="))[2L])), silent = TRUE)

        if (isTRUE(class(target) == "try-error")) {

          stop("In recode specification term: ", i, "\n       Message: ", attributes(target)$condition$message, call. = FALSE)

        }

        object[seq_len(length(x))] <- target

      }

      #...................
      ### Specification with single or vector of values ####

      if (isTRUE(length(grep(":", i))  == 0L && length(grep("else", i)) == 0L)) {

        set <- try(eval(parse(text = unlist(strsplit(i, "="))[1L])), silent = TRUE)

        if (isTRUE(class(set) == "try-error")) {

          stop("In recode specification term: ", i, "\n       Message: ", attributes(set)$condition$message, call. = FALSE)

        }

        target <- try(eval(parse(text = unlist(strsplit(i, "="))[2L])), silent = TRUE)

        if (isTRUE(class(target) == "try-error")) {

          stop("In recode specification term: ", i, "\n       Message: ", attributes(target)$condition$message, call. = FALSE)

        }

        for (j in set) {

          if (isTRUE(is.na(j)))  {

            object[is.na(x)] <- target

          } else {

            object[x == j] <- target

          }

        }

      }

    }

    #...................
    ### Character and factor ####

    if (isTRUE(is.character(object))) {

      #......
      # Original vector was a factor
      if (isTRUE(is.factor(x))) {

        if (isTRUE(is.null(levels))) {

          object <- factor(object, levels = c(intersect(levels(x), object), setdiff(object, levels(x))))

        } else {

          object <- factor(object, levels = levels)

        }

      #......
      # Original vector was not a factor
      } else {

        if (isTRUE(as.factor)) {

          if (isTRUE(is.null(levels))) {

            object <- factor(object)

          } else {

            object <- factor(object, levels = levels)

          }

        #......
        # Convert character in numeric if possible
        } else {

          object.test <- suppressWarnings(as.numeric(object))

          if (isTRUE(sum(is.na(object.test)) == sum(is.na(object)))) {

            object <- as.numeric(object)

          }

        }

      }

    }

  #~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
  ## Multiple variables ####
  } else {

    object <- data.frame(lapply(x, misty::rec, spec = spec, as.factor = as.factor,
                                  levels = levels, as.na = as.na, check = FALSE))

    #...................
    ### Variable names ####

    if (isTRUE(length(names) == 1L)) {

      colnames(object) <- paste0(colnames(object), names)

    } else {

      colnames(object) <- names

    }

  }

  #_____________________________________________________________________________
  #
  # Output ---------------------------------------------------------------------

  #~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
  ## Print cross table ####

  if (isTRUE(is.null(dim(x)) && table)) {

    print(table(x, object, dnn = c("variable", "recoded variable")))

    return(invisible(object))

  } else {

    return(object)

  }

}

Try the misty package in your browser

Any scripts or data that you put into this service are public.

misty documentation built on Nov. 15, 2023, 1:06 a.m.