# These functions derived from expss package
#' Set or get variable label
#'
#' @description These functions set/get/drop variable labels. For
#' value labels see \link{val_lab}.
#' \itemize{
#' \item{\code{var_lab}}{ returns variable label or NULL if label doesn't
#' exist.}
#' \item{\code{var_lab<-}}{ set variable label.}
#' \item{\code{drop_lab}}{ drops variable label.}
#' \item{\code{has.label}}{ check if variable label exists.}
#' \item{\code{add_labelled_class}}{ Add missing 'labelled' class. This function
#' is needed when you load SPSS data with packages which in some cases don't set
#' 'labelled' class for variables with labels. For example, \code{haven} package
#' doesn't set 'labelled' class for variables which have variable label but
#' don't have value labels. Note that to use 'expss' with 'haven' you need to
#' load 'expss' strictly after 'haven' to avoid conflicts.} }
#' @param x Variable. In the most cases it is numeric vector.
#' @param value A character scalar - label for the variable x.
#' @param default A character scalar. What we want to get from 'var_lab' if
#' there is no variable label. NULL by default.
#' @param remove_classes A character vector of classes which should be removed
#' from the class attribute of the \code{x}.
#' @return \code{var_lab} return variable label. If label doesn't exist it return
#' NULL . \code{var_lab<-} return variable (vector x)
#' of class "labelled" with attribute "label" which equals submitted value.
#' @details Variable label is stored in attribute "label" (\code{attr(x,"label")}). For
#' preserving from dropping this attribute during some operations (such as \code{c})
#' variable class is set to "labelled". There are special methods of
#' subsetting and concatenation for this class. To drop variable label use
#' \code{var_lab(var) <- NULL} or \code{drop_lab(var)}.
#' @export
#' @examples
#' data(mtcars)
#' mtcars = within(mtcars,{
#' var_lab(mpg) = "Miles/(US) gallon"
#' var_lab(cyl) = "Number of cylinders"
#' var_lab(disp) = "Displacement (cu.in.)"
#' var_lab(hp) = "Gross horsepower"
#' var_lab(drat) = "Rear axle ratio"
#' var_lab(wt) = "Weight (lb/1000)"
#' var_lab(qsec) = "1/4 mile time"
#' var_lab(vs) = "V/S"
#' var_lab(am) = "Transmission"
#' val_lab(am) = c(automatic = 0, manual=1)
#' var_lab(gear) = "Number of forward gears"
#' var_lab(carb) = "Number of carburetors"
#' })
#'
#' table(mtcars$am)
#'
#'
var_lab <- function(x, default = NULL) {
UseMethod("var_lab")
}
#' @export
var_lab.default <- function (x, default = NULL) {
y = attr(x, "label", exact = TRUE)
if (is.null(y))
return(default)
y
}
#' @rdname var_lab
#' @export
var_lab.data.frame <- function(x, default = NULL) {
lapply(x, var_lab)
}
#' @rdname var_lab
#' @export
`var_lab<-` <- function(x, value) {
UseMethod("var_lab<-")
}
#' @export
`var_lab<-.default` <- function(x, value) {
if ((!is.character(value) & !is.null(value)) | length(value) > 1)
stop("`value` should be a single character string or NULL")
x = remove_incompatible_classes(x)
if (length(value) == 0) {
attr(x, "label") = NULL
if (length(val_lab(x)) == 0) {
class(x) = setdiff(class(x), "labelled")
}
return(x)
}
# this conversion is needed to avoid strange bug (incorrect residuals)
# with 'lm' with labelled integers
# if(is.integer(x)) x[] = as.double(x)
value = as.character(value)
length(value) == 1 ||
stop("'var_lab' - label should be vector of length 1.")
attr(x, "label") <- value
class(x) = union("labelled", class(x))
x
}
#' @export
`var_lab<-.data.frame` <- function(x, value) {
if ((!is.character(value) & !is.null(value)) & !is.list(value) |
(is.character(value) & length(value) > 1 & length(value) != ncol(x)))
stop("`value` should be a named list, NULL, a single character string or
a character vector of same length than the number of columns in `x`")
if (is.character(value) & length(value) == 1) {
value <- as.list(rep(value, ncol(x)))
names(value) <- names(x)
}
if (is.character(value) & length(value) == ncol(x)) {
value <- as.list(value)
names(value) <- names(x)
}
if (is.null(value)) {
value <- as.list(rep(1, ncol(x)))
names(value) <- names(x)
value <- lapply(value, function(x) {
x <- NULL
})
}
if (!all(names(value) %in% names(x)))
message("some variables not found in x")
value <- value[names(value) %in% names(x)]
for (var in names(value)) var_lab(x[[var]]) <- value[[var]]
x
}
#'@rdname var_lab
#' @export
has.label <- function(x) {
!is.null(attr(x, "label"))
}
#'@rdname var_lab
#' @export
drop_lab <- function(x) {
UseMethod("drop_lab")
}
#' @export
drop_lab.default <- function(x) {
attr(x, "label") <- NULL
x
}
#' @export
drop_lab.data.frame <- function(x) {
for (each in seq_along(x))
x[[each]] = drop_lab(x[[each]])
x
}
#' @export
drop_lab.list <- function(x) {
for (each in seq_along(x))
x[[each]] = drop_lab(x[[each]])
x
}
#'@rdname var_lab
#' @export
drop_var_labs = drop_lab
#### add_labelled_class
#' @rdname var_lab
#' @export
add_labelled_class <- function(x,
remove_classes = c("haven_labelled",
"spss_labelled",
"haven_labelled_spss",
"vctrs_vctr")) {
UseMethod("add_labelled_class")
}
#' @export
add_labelled_class.default <- function(x,
remove_classes = c("haven_labelled",
"spss_labelled",
"haven_labelled_spss",
"vctrs_vctr")) {
x = remove_class(x, remove_classes)
if ((!is.null(var_lab(x)) ||
!is.null(val_lab(x))) && !inherits(x, "labelled")) {
x = add_class(x, "labelled")
}
x
}
#' @export
add_labelled_class.list <-
function(x,
remove_classes = c("haven_labelled",
"spss_labelled",
"haven_labelled_spss",
"vctrs_vctr")) {
for (i in seq_along(x)) {
x[[i]] = add_labelled_class(x[[i]], remove_classes = remove_classes)
}
x
}
add_class = function(x, ...){
new_class = unlist(list(...))
class(x) = union(new_class, class(x))
x
}
remove_class = function(x, ...){
new_class = unlist(list(...))
class(x) = setdiff(class(x), new_class)
x
}
#' @export
add_labelled_class.data.frame = add_labelled_class.list
############# value labels #######################
#' Set or get value labels
#'
#' @description These functions set/get/drop value labels. Duplicated values are not allowed.
#' If argument \code{x} is data.frame or list then labels applied to all
#' elements of data.frame/list. To drop value labels, use \code{val_lab(var) <-
#' NULL} or \code{unvl(var)}. For variable labels see \link{var_lab}.
#' \itemize{
#' \item{\code{val_lab}}{ returns value labels or NULL if labels doesn't
#' exist.}
#' \item{\code{val_lab<-}}{ set value labels.}
#' \item{\code{set_val_lab}}{ returns variable with value labels.}
#' \item{\code{add_val_lab<-}}{ add value labels to already existing value labels.}
#' \item{\code{unvl}}{ drops value labels.}
#' \item{\code{has.labels}}{ check if value labels exists.}}
#' @param x Variable(s). Vector/data.frame/list.
#' @param value Named vector. Names of vector are labels for the
#' appropriate values of variable x.
#' @param add Logical. Should we add value labels to old labels or replace it?
#' Deafult is FALSE - we completely replace old values. If TRUE new value
#' labels will be combined with old value labels.
#' @return \code{val_lab} return value labels (named vector). If labels doesn't
#' exist it return NULL . \code{val_lab<-} and \code{set_val_lab} return
#' variable (vector x) of class "labelled" with attribute "labels" which
#' contains value labels.
#' @details Value labels are stored in attribute "labels"
#' (\code{attr(x,"labels")}). We set variable class to "labelled" for preserving
#' labels from dropping during some operations (such as \code{c} and \code{`[`}).
#' @export
#' @examples
#' # toy example
#' set.seed(123)
#' # score - evaluation of tested product
#'
#' score = sample(-1:1,20,replace = TRUE)
#' var_lab(score) = "Evaluation of tested brand"
#' val_lab(score) = c("Dislike it" = -1,
#' "So-so" = 0,
#' "Like it" = 1
#' )
#'
#'
val_lab <- function(x) {
UseMethod("val_lab")
}
#' @export
val_lab.data.frame <- function(x) {
# we consider data.frame as multiple response question
all_labs = lapply(x, val_lab)
all_labs = all_labs[lengths(all_labs) > 0]
if (length(all_labs) > 0)
res = do.call(combine_labels, all_labs)
else
res = NULL
res
}
#' @export
val_lab.default <- function(x) {
attr(x, "labels", exact = TRUE)
}
#####################
#' @export
#' @rdname val_lab
"val_lab<-" <- function(x, value) {
set_val_lab(x, value, add = FALSE)
}
#####################
#' @export
#' @rdname val_lab
set_val_lab <- function(x, value, add = FALSE) {
UseMethod("set_val_lab")
}
#' @export
#' @rdname val_lab
add_val_lab <- function(x, value)
set_val_lab(x, value, add = TRUE)
#' @export
set_val_lab.default <- function(x, value, add = FALSE) {
x = remove_incompatible_classes(x)
if (length(value) == 0) {
if (!add) {
attr(x, "labels") = NULL
}
if (length(val_lab(x)) == 0 && is.null(var_lab(x))) {
class(x) = setdiff(class(x), "labelled")
}
return(x)
}
if (is.factor(x)) {
label = var_lab(x)
x = as.character(x)
if (!is.null(label))
var_lab(x) = label
}
! is.null(names(value)) ||
stop("'set_val_lab' - labels should be named vector.")
! anyDuplicated(value) ||
stop("'set_val_lab' - duplicated values in labels: ",
paste(value[duplicated(value)], collapse = " "))
# this conversion is needed to avoid strange bug (incorrect residuals)
# with 'lm' with labelled integers
# if(is.integer(x)) x[] = as.double(x)
if (add)
value = combine_labels(value, val_lab(x))
# Warning about dupliction was removed because it was generated too often for third party *.sav files.
# with_warning = "duplicated labels: "
names(value) = make_items_unique(names(value))
# value = sort(value)
attr(x, "labels") = value
class(x) = union("labelled", class(x))
x
}
#' @export
set_val_lab.data.frame <- function(x, value, add = FALSE) {
for (each in seq_along(x))
x[[each]] = set_val_lab(x[[each]], value, add = add)
x
}
#' @export
#' @rdname val_lab
has.labels <- function(x) {
!is.null(attr(x, "labels"))
}
#######
#' @export
#' @rdname val_lab
"add_val_lab<-" <- function(x, value) {
set_val_lab(x, value, add = TRUE)
}
######
#' @export
#' @rdname val_lab
unvl <- function(x) {
set_val_lab(x, NULL)
}
#' @export
#' @rdname val_lab
drop_val_labs = unvl
##################
#' @export
#' @rdname val_lab
drop_unused_labels <- function(x) {
UseMethod("drop_unused_labels")
}
#' @export
drop_unused_labels.default <- function(x) {
curr_labs = val_lab(x)
if (is.null(curr_labs))
return(x)
curr_values = unique(x)
set_val_lab(x, curr_labs[curr_labs %in% curr_values])
}
#' @export
drop_unused_labels.category <- function(x) {
curr_labs = val_lab(x)
if (is.null(curr_labs))
return(x)
curr_values = unique(x)
set_val_lab(x, curr_labs[curr_labs %in% curr_values])
}
#' @export
drop_unused_labels.list <- function(x) {
lapply(x, drop_unused_labels)
}
#' @export
drop_unused_labels.data.frame <- function(x) {
x[] = lapply(x, drop_unused_labels)
x
}
##########################
#' Drop variable label and value labels
#'
#' \code{unlab} returns variable x without variable labels and value labels
#'
#' @param x Variable(s). Vector/data.frame/list.
#' @return \code{unlab} returns original variable x without variable label, value labels and class.
#' @seealso \code{\link{drop_lab}} \code{\link{unvl}}
#' @export
#' @examples
#' raw_var <- rep(1:2,5)
#' var_with_lab <- raw_var
#' var_lab(var_with_lab) <- "Income"
#' val_lab(var_with_lab) <- c("Low"=1,"High"=2)
#' identical(raw_var,unlab(var_with_lab)) # should be TRUE
#'
unlab <- function(x) {
UseMethod("unlab")
}
#' @export
unlab.default <- function(x) {
if (is.null(x))
return(x)
if (is.list(x)) {
return(unlab.list(x))
}
attr(x, "label") = NULL
attr(x, "labels") = NULL
class(x) = setdiff(class(x), c("labelled"))
x
}
#' @export
unlab.data.frame <- function(x) {
for (each in seq_along(x))
x[[each]] = unlab(x[[each]])
x
}
#' @export
unlab.list <- function(x) {
for (each in seq_along(x))
x[[each]] = unlab(x[[each]])
x
}
#' @export
#' @rdname unlab
drop_all_labels = unlab
#' Recode vector into numeric vector with value labels
#'
#' @param x numeric vector/character vector/factor
#' @param label optional variable label
#'
#' @return numeric vector with value labels
#' @export
#' @examples
#' character_vector = c("one", "two", "two", "three")
#' as.labelled(character_vector, label = "Numbers")
#'
#'
as.labelled = function(x, label = NULL){
UseMethod("as.labelled")
}
#' @export
as.labelled.default = function(x, label = NULL){
labels = sort(unique(x), na.last = NA)
values = seq_along(labels)
res = fast_match(x, labels)
names(values) = as.character(labels)
val_lab(res) = values
var_lab(res) = label
res
}
#' @export
as.labelled.factor = function(x, label = NULL){
values = seq_along(levels(x))
names(values) = levels(x)
x = as.numeric(x)
val_lab(x) = values
var_lab(x) = label
x
}
#' @export
as.labelled.labelled = function(x, label = NULL){
vallab = val_lab(x)
label = c(label, var_lab(x))[1]
if(length(vallab)>0){
# we need to add labels if some values don't have labels
values = unlab(unique(x))
values = structure(values, names = values)
vallab = v_union(vallab, values)
# vallab = sort(vallab, na.last = NA)
if(!is.numeric(x)){
# and we need to convert 'x' to numeric if it is not numeric
x = fast_match(x, vallab)
vallab = structure(seq_along(vallab), names = names(vallab))
}
val_lab(x) = vallab
var_lab(x) = label
} else {
x = as.labelled(unlab(x), label = label)
}
x
}
#' @export
#' @rdname as.labelled
is.labelled = function(x){
inherits(x, "labelled")
}
################
combine_labels <- function(...) {
args = list(...)
Reduce(v_union, args)
}
labelled_and_unlabelled <- function(uniqs, vallab) {
uniqs = unlab(uniqs)
if (length(uniqs) > 0) {
uniqs = uniqs[!is.na(uniqs)]
names(uniqs) = uniqs
}
v_union(vallab, uniqs)
}
v_union <- function(e1, e2) {
if (is.null(e1))
return(e2)
c(e1, e2[!(e2 %in% e1)])
}
fast_match <- function(x, table, nomatch = NA_integer_, NA_incomparable = FALSE){
if(is.character(x) && is.character(table)){
ind = match(x, table, nomatch = nomatch)
if(NA_incomparable) {
ind[is.na(x)] = nomatch
}
} else {
if(NA_incomparable) {
ind = match(x, table,
nomatch = nomatch,
incomparables = NA)
} else {
ind = match(x, table,
nomatch = nomatch,
incomparables = NULL)
}
}
ind
}
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.