R/labels.R

Defines functions fast_match v_union labelled_and_unlabelled combine_labels is.labelled as.labelled.labelled as.labelled.factor as.labelled.default as.labelled unlab.list unlab.data.frame unlab.default unlab drop_unused_labels.data.frame drop_unused_labels.list drop_unused_labels.category drop_unused_labels.default drop_unused_labels unvl has.labels set_val_lab.data.frame set_val_lab.default add_val_lab set_val_lab val_lab.default val_lab.data.frame val_lab remove_class add_class add_labelled_class.list add_labelled_class.default add_labelled_class drop_lab.list drop_lab.data.frame drop_lab.default drop_lab has.label `var_lab<-.data.frame` `var_lab<-.default` `var_lab<-` var_lab.data.frame var_lab

Documented in add_labelled_class add_val_lab as.labelled drop_lab drop_unused_labels has.label has.labels is.labelled set_val_lab unlab unvl val_lab var_lab var_lab.data.frame

# These functions derived from expss package

#' Set or get variable label
#'
#' @description These functions set/get/drop variable labels. For
#' value labels see \link{val_lab}. 
#' \itemize{
#' \item{\code{var_lab}}{ returns variable label or NULL if label doesn't
#' exist.}
#' \item{\code{var_lab<-}}{ set variable label.}
#' \item{\code{drop_lab}}{ drops variable label.}
#' \item{\code{has.label}}{ check if variable label exists.}
#' \item{\code{add_labelled_class}}{ Add missing 'labelled' class. This function
#' is needed when you load SPSS data with packages which in some cases don't set
#' 'labelled' class for variables with labels. For example, \code{haven} package
#' doesn't set 'labelled' class for variables which have variable label but
#' don't have value labels. Note that to use 'expss' with 'haven' you need to
#' load 'expss' strictly after 'haven' to avoid conflicts.} }
#' @param x Variable. In the most cases it is numeric vector.
#' @param value A character scalar - label for the variable x.
#' @param default A character scalar. What we want to get from 'var_lab' if
#'   there is no variable label. NULL by default.
#' @param remove_classes A character vector of classes which should be removed
#'   from the class attribute of the \code{x}.
#' @return \code{var_lab} return variable label. If label doesn't exist it return
#'   NULL . \code{var_lab<-} return variable (vector x)
#'   of class "labelled" with attribute "label" which equals submitted value.
#' @details Variable label is stored in attribute "label" (\code{attr(x,"label")}). For
#'   preserving from dropping this attribute during some operations (such as \code{c})
#'   variable class is set to "labelled". There are special methods of
#'   subsetting and concatenation for this class. To drop variable label use
#'   \code{var_lab(var) <- NULL} or \code{drop_lab(var)}.
#' @export
#' @examples
#' data(mtcars)
#' mtcars = within(mtcars,{
#'                 var_lab(mpg) = "Miles/(US) gallon"
#'                 var_lab(cyl) = "Number of cylinders"
#'                 var_lab(disp) = "Displacement (cu.in.)"
#'                 var_lab(hp) = "Gross horsepower"
#'                 var_lab(drat) = "Rear axle ratio"
#'                 var_lab(wt) = "Weight (lb/1000)"
#'                 var_lab(qsec) = "1/4 mile time"
#'                 var_lab(vs) = "V/S"
#'                 var_lab(am) = "Transmission"
#'                 val_lab(am) = c(automatic = 0, manual=1)
#'                 var_lab(gear) = "Number of forward gears"
#'                 var_lab(carb) = "Number of carburetors"
#' })
#'
#' table(mtcars$am)
#'
#'
var_lab <- function(x, default = NULL) {
    UseMethod("var_lab")
}

#' @export
var_lab.default <- function (x, default = NULL) {
    y = attr(x, "label", exact = TRUE)
    if (is.null(y))
        return(default)
    y
}

#' @rdname var_lab
#' @export
var_lab.data.frame <- function(x, default = NULL) {
    lapply(x, var_lab)
}

#' @rdname var_lab
#' @export
`var_lab<-` <- function(x, value) {
    UseMethod("var_lab<-")
}


#' @export
`var_lab<-.default` <- function(x, value) {
    
    if ((!is.character(value) & !is.null(value)) | length(value) > 1)
        stop("`value` should be a single character string or NULL")

    x = remove_incompatible_classes(x)
    if (length(value) == 0) {
        attr(x, "label") = NULL
        if (length(val_lab(x)) == 0) {
            class(x) = setdiff(class(x), "labelled")
        }
        return(x)
    }
    # this conversion is needed to avoid strange bug (incorrect residuals)
    # with 'lm' with labelled integers
    # if(is.integer(x)) x[] = as.double(x)
    value = as.character(value)
    length(value) == 1 ||
        stop("'var_lab' - label should be vector of length 1.")
    attr(x, "label") <- value
    class(x) = union("labelled", class(x))
    x
}

#' @export
`var_lab<-.data.frame` <- function(x, value) {
    if ((!is.character(value) & !is.null(value)) & !is.list(value) |
        (is.character(value) & length(value) > 1 & length(value) != ncol(x)))
        stop("`value` should be a named list, NULL, a single character string or
             a character vector of same length than the number of columns in `x`")
    if (is.character(value) & length(value) == 1) {
        value <- as.list(rep(value, ncol(x)))
        names(value) <- names(x)
    }
    if (is.character(value) & length(value) == ncol(x)) {
        value <- as.list(value)
        names(value) <- names(x)
    }
    if (is.null(value)) {
        value <- as.list(rep(1, ncol(x)))
        names(value) <- names(x)
        value <- lapply(value, function(x) {
            x <- NULL
        })
    }
    
    if (!all(names(value) %in% names(x)))
        message("some variables not found in x")
    
    value <- value[names(value) %in% names(x)]
    for (var in names(value)) var_lab(x[[var]]) <- value[[var]]
    x
}

#'@rdname var_lab
#' @export
has.label <- function(x) {
    !is.null(attr(x, "label"))
}

#'@rdname var_lab
#' @export
drop_lab <- function(x) {
    UseMethod("drop_lab")
}

#' @export
drop_lab.default <- function(x) {
    attr(x, "label") <- NULL
    x
}

#' @export
drop_lab.data.frame <- function(x) {
    for (each in seq_along(x))
        x[[each]] = drop_lab(x[[each]])
    x
}

#' @export
drop_lab.list <- function(x) {
    for (each in seq_along(x))
        x[[each]] = drop_lab(x[[each]])
    x
}

#'@rdname var_lab
#' @export
drop_var_labs = drop_lab

#### add_labelled_class
#' @rdname var_lab
#' @export
add_labelled_class <- function(x,
                               remove_classes = c("haven_labelled",
                                                  "spss_labelled",
                                                  "haven_labelled_spss",
                                                  "vctrs_vctr")) {
    UseMethod("add_labelled_class")
}

#' @export
add_labelled_class.default <- function(x,
                                       remove_classes = c("haven_labelled",
                                                          "spss_labelled",
                                                          "haven_labelled_spss",
                                                          "vctrs_vctr")) {
    x = remove_class(x, remove_classes)
    if ((!is.null(var_lab(x)) ||
         !is.null(val_lab(x))) && !inherits(x, "labelled")) {
        x = add_class(x, "labelled")
    }
    x
}

#' @export
add_labelled_class.list <-
    function(x,
             remove_classes = c("haven_labelled",
                                "spss_labelled",
                                "haven_labelled_spss",
                                "vctrs_vctr")) {
        for (i in seq_along(x)) {
            x[[i]] =  add_labelled_class(x[[i]], remove_classes = remove_classes)
        }
        x
    }

add_class = function(x, ...){
    new_class = unlist(list(...))
    class(x) = union(new_class, class(x))
    x
}

remove_class = function(x, ...){
    new_class = unlist(list(...))
    class(x) = setdiff(class(x), new_class)
    x
}

#' @export
add_labelled_class.data.frame = add_labelled_class.list
############# value labels #######################

#' Set or get value labels
#'
#' @description These functions set/get/drop value labels. Duplicated values are not allowed.
#' If argument \code{x} is data.frame or list then labels applied to all
#' elements of data.frame/list. To drop value labels, use \code{val_lab(var) <-
#' NULL} or \code{unvl(var)}. For variable labels see \link{var_lab}. 
#' \itemize{
#' \item{\code{val_lab}}{ returns value labels or NULL if labels doesn't
#' exist.}
#' \item{\code{val_lab<-}}{ set value labels.}
#' \item{\code{set_val_lab}}{ returns variable with value labels.}
#' \item{\code{add_val_lab<-}}{ add value labels to already existing value labels.}
#' \item{\code{unvl}}{ drops value labels.}
#' \item{\code{has.labels}}{ check if value labels exists.}}
#' @param x Variable(s). Vector/data.frame/list.
#' @param value Named vector. Names of vector are labels for the
#'   appropriate values of variable x.
#' @param add Logical. Should we add value labels to old labels or replace it?
#'   Deafult is FALSE - we completely replace old values. If TRUE new value
#'   labels will be combined with old value labels.
#' @return \code{val_lab} return value labels (named vector). If labels doesn't
#'   exist it return NULL . \code{val_lab<-} and \code{set_val_lab} return
#'   variable (vector x) of class "labelled" with attribute "labels" which
#'   contains value labels.
#' @details Value labels are stored in attribute "labels"
#'   (\code{attr(x,"labels")}). We set variable class to "labelled" for preserving
#'   labels from dropping during some operations (such as \code{c} and \code{`[`}).
#' @export
#' @examples
#' # toy example
#' set.seed(123)
#' # score - evaluation of tested product
#'
#' score = sample(-1:1,20,replace = TRUE)
#' var_lab(score) = "Evaluation of tested brand"
#' val_lab(score) = c("Dislike it" = -1,
#'                    "So-so" = 0,
#'                    "Like it" = 1
#'                    )
#'
#'

val_lab <- function(x) {
    UseMethod("val_lab")
}

#' @export
val_lab.data.frame <- function(x) {
    # we consider data.frame as multiple response question
    all_labs = lapply(x, val_lab)
    all_labs = all_labs[lengths(all_labs) > 0]
    if (length(all_labs) > 0)
        res = do.call(combine_labels, all_labs)
    else
        res = NULL
    res
}

#' @export
val_lab.default <- function(x) {
    attr(x, "labels", exact = TRUE)
}

#####################

#' @export
#' @rdname val_lab
"val_lab<-" <- function(x, value) {
    set_val_lab(x, value, add = FALSE)
}

#####################

#' @export
#' @rdname val_lab
set_val_lab <- function(x, value, add = FALSE) {
    UseMethod("set_val_lab")
}

#' @export
#' @rdname val_lab
add_val_lab <- function(x, value)
    set_val_lab(x, value, add = TRUE)

#' @export
set_val_lab.default <- function(x, value, add = FALSE) {

    x = remove_incompatible_classes(x)
    if (length(value) == 0) {
        if (!add) {
            attr(x, "labels") = NULL
        }
        if (length(val_lab(x)) == 0 && is.null(var_lab(x))) {
            class(x) = setdiff(class(x), "labelled")
        }
        return(x)
    }
    if (is.factor(x)) {
        label = var_lab(x)
        x = as.character(x)
        if (!is.null(label))
            var_lab(x) = label

    }
    ! is.null(names(value)) ||
        stop("'set_val_lab' - labels should be named vector.")
    ! anyDuplicated(value)  ||
        stop("'set_val_lab' - duplicated values in labels: ",
             paste(value[duplicated(value)], collapse = " "))
    
    # this conversion is needed to avoid strange bug (incorrect residuals)
    # with 'lm' with labelled integers
    # if(is.integer(x)) x[] = as.double(x)
    if (add)
        value = combine_labels(value, val_lab(x))
    
    # Warning about dupliction was removed because it was generated too often for third party *.sav files.
    #    with_warning = "duplicated labels: "
    names(value) = make_items_unique(names(value))
    # value = sort(value)
    attr(x, "labels") = value
    class(x) = union("labelled", class(x))
    x
}

#' @export
set_val_lab.data.frame <- function(x, value, add = FALSE) {
    for (each in seq_along(x))
        x[[each]] = set_val_lab(x[[each]], value, add = add)
    x
}


#' @export
#' @rdname val_lab
has.labels <- function(x) {
    !is.null(attr(x, "labels"))
}


#######

#' @export
#' @rdname val_lab
"add_val_lab<-" <- function(x, value) {
    set_val_lab(x, value, add = TRUE)
}

######

#' @export
#' @rdname val_lab
unvl <- function(x) {
    set_val_lab(x, NULL)
}

#' @export
#' @rdname val_lab
drop_val_labs = unvl


##################

#' @export
#' @rdname val_lab
drop_unused_labels <- function(x) {
    UseMethod("drop_unused_labels")
}

#' @export
drop_unused_labels.default <- function(x) {
    curr_labs = val_lab(x)
    if (is.null(curr_labs))
        return(x)
    curr_values = unique(x)
    set_val_lab(x, curr_labs[curr_labs %in% curr_values])
}

#' @export
drop_unused_labels.category <- function(x) {
    curr_labs = val_lab(x)
    if (is.null(curr_labs))
        return(x)
    curr_values = unique(x)
    set_val_lab(x, curr_labs[curr_labs %in% curr_values])
}

#' @export
drop_unused_labels.list <- function(x) {
    lapply(x, drop_unused_labels)
}

#' @export
drop_unused_labels.data.frame <- function(x) {
    x[] = lapply(x, drop_unused_labels)
    x
}


##########################

#' Drop variable label and value labels
#'
#' \code{unlab} returns variable x without variable labels and value labels
#'
#' @param x Variable(s). Vector/data.frame/list.
#' @return \code{unlab} returns original variable x without variable label, value labels and class.
#' @seealso \code{\link{drop_lab}} \code{\link{unvl}}
#' @export
#' @examples
#' raw_var <- rep(1:2,5)
#' var_with_lab <- raw_var
#' var_lab(var_with_lab) <- "Income"
#' val_lab(var_with_lab) <- c("Low"=1,"High"=2)
#' identical(raw_var,unlab(var_with_lab)) # should be TRUE
#' 
unlab <- function(x) {
    UseMethod("unlab")
}

#' @export
unlab.default <- function(x) {
    if (is.null(x))
        return(x)
    if (is.list(x)) {
        return(unlab.list(x))
    }
    attr(x, "label") = NULL
    attr(x, "labels") = NULL
    class(x) = setdiff(class(x), c("labelled"))
    x
}

#' @export
unlab.data.frame <- function(x) {
    for (each in seq_along(x))
        x[[each]] = unlab(x[[each]])
    x
}

#' @export
unlab.list <- function(x) {
    for (each in seq_along(x))
        x[[each]] = unlab(x[[each]])
    x
}

#' @export
#' @rdname unlab
drop_all_labels = unlab

#' Recode vector into numeric vector with value labels 
#'
#' @param x numeric vector/character vector/factor 
#' @param label optional variable label
#'
#' @return numeric vector with value labels
#' @export
#' @examples
#' character_vector = c("one", "two",  "two", "three")
#' as.labelled(character_vector, label = "Numbers")
#' 
#'
as.labelled = function(x, label = NULL){
    UseMethod("as.labelled")
}

#' @export
as.labelled.default = function(x, label = NULL){
    labels = sort(unique(x), na.last = NA)
    values = seq_along(labels)
    res = fast_match(x, labels)
    names(values) = as.character(labels)
    val_lab(res) = values
    var_lab(res) = label
    res
}

#' @export
as.labelled.factor = function(x, label = NULL){
    values = seq_along(levels(x))
    names(values) = levels(x)
    x = as.numeric(x)
    val_lab(x) = values
    var_lab(x) = label
    x
    
}


#' @export
as.labelled.labelled = function(x, label = NULL){
    vallab = val_lab(x)
    label = c(label, var_lab(x))[1]
    if(length(vallab)>0){
        # we need to add labels if some values don't have labels
        values = unlab(unique(x))
        values = structure(values, names = values)
        vallab =  v_union(vallab, values)
        # vallab = sort(vallab, na.last = NA)
        if(!is.numeric(x)){
            # and we need to convert 'x' to numeric if it is not numeric
            x = fast_match(x, vallab) 
            vallab = structure(seq_along(vallab), names = names(vallab))
        }
        val_lab(x) = vallab
        var_lab(x) = label    
    } else {
        x = as.labelled(unlab(x), label = label)
    }
    x
}

#' @export
#' @rdname as.labelled
is.labelled = function(x){
    inherits(x, "labelled")
}

################

combine_labels <- function(...) {
    args = list(...)
    Reduce(v_union, args)
}

labelled_and_unlabelled <- function(uniqs, vallab) {
    uniqs = unlab(uniqs)
    if (length(uniqs) > 0) {
        uniqs = uniqs[!is.na(uniqs)]
        names(uniqs) = uniqs
    }
    v_union(vallab, uniqs)
}

v_union <- function(e1, e2) {
    if (is.null(e1))
        return(e2)
    c(e1, e2[!(e2 %in% e1)])
}


fast_match <- function(x, table, nomatch = NA_integer_, NA_incomparable = FALSE){
    if(is.character(x) && is.character(table)){
        ind = match(x, table, nomatch = nomatch) 
        if(NA_incomparable) {
            ind[is.na(x)] = nomatch
        }
    } else {
        if(NA_incomparable) {
            ind = match(x, table, 
                        nomatch = nomatch, 
                        incomparables = NA)
        } else {
            ind = match(x, table,
                        nomatch = nomatch, 
                        incomparables = NULL)
        }    
    }
    ind
}
adayim/cctab documentation built on Dec. 18, 2021, 10:26 p.m.