#' Read rlms data (deprecated function)
#'
#' Read rlms data and all meta information (deprecated function)
#'
#' Read rlms data and all the meta information. Destroy useless attributes.
#' This function is deprecated. Please use rlms_read instead of.
#'
#' @param file the filename
#' @param suppress logical, if true the default message is suppressed
#' @param nine2na automatically convert 99999999 to NA for numeric variables
#' @export
#' @return dataframe
#' @examples
#' # read.rlms("r21i_os24a.sav")
read.rlms <- function(file, suppress = FALSE, nine2na = TRUE) {
message("This function is deprecated. Please use rlms_read instead of.")
df <- rlms_read(file = file,
suppress = suppress,
nine2na = nine2na,
yesno = FALSE,
apostrophe = FALSE)
return(df)
}
#' Remove apostrophes
#'
#' Remove apostrophes
#'
#' Remove apostrophes
#'
#' @param x character vector
#' @export
#' @return character vector with apostrophes removed
rlms_remove_apostrophe <- function(x) {
x_new <- stringr::str_replace_all(x, "\\u2018", "")
x_new <- stringr::str_replace_all(x_new, "\\u2019.\\u2019", "")
x_new <- stringr::str_replace_all(x_new, "\\u2019", "")
return(x_new)
}
#' Extract variable labels
#'
#' Extract variable labels
#'
#' Extract variable labels
#'
#' @param df data.frame read from rlms file
#' @export
#' @return data frame with variable labels
rlms_extract_variable_labels <- function(df) {
var_meta <- data.frame(var = names(df), varlabel = "", spss_format = "", stringsAsFactors = FALSE)
for (i in 1:ncol(df)) {
spss_format <- attr(df[[i]], "format.spss")
if (!is.null(spss_format)) {
var_meta$spss_format[i] <- spss_format
}
varlabel <- attr(df[[i]], "label")
if (!is.null(varlabel)) {
var_meta$varlabel[i] <- varlabel
}
}
return(var_meta)
}
#' Extract value labels
#'
#' Extract value labels
#'
#' Extract value labels
#'
#' @param df data.frame read from rlms file
#' @export
#' @return data frame with value labels
rlms_extract_value_labels <- function(df) {
value_meta <- NULL
for (var in names(df)) {
value <- get_labels(df[[var]])
if ("" %in% value) {
message("Variable ", var, " contains empty value label ''. Empty value label was removed.")
value <- setdiff(value, "")
}
# sometimes class is "labelled" but there are no labels :)
if (length(value) > 0) {
# message(var)
temp_value_meta <- data.frame(value = value, vallabel = names(value),
var = var, stringsAsFactors = FALSE, row.names = NULL)
value_meta <- dplyr::bind_rows(value_meta, temp_value_meta)
}
}
return(value_meta)
}
#' Show variable labels
#'
#' Show variable labels
#'
#' Show variable labels
#'
#' @param df data.frame read from rlms file
#' @export
#' @return data frame with variable labels
rlms_show_variable_labels <- function(df) {
return(attr(df, "var_meta"))
}
#' Show value labels
#'
#' Show value labels
#'
#' Show value labels
#'
#' @param df data.frame read from rlms file
#' @export
#' @return data frame with value labels
rlms_show_value_labels <- function(df) {
return(attr(df, "value_meta"))
}
#' Standartize yes/no
#'
#' Standartize yes/no
#'
#' Standartize yes/no
#'
#' @param x character vector
#' @export
#' @return character vector with standartized yes/no
rlms_yesno_standartize <- function(x) {
x_simple <- tolower(rlms_remove_apostrophe(x))
x_new <- x
yes <- "\u0434\u0430" # this is yes in russian
no <- "\u043d\u0435\u0442" # this is no in russian
# explicit codes are used to avoid warning in check()
x_new[x_simple == yes] <- yes
x_new[x_simple == no] <- no
return(x_new)
}
#' Clean rlms data
#'
#' Clean rlms data and all meta information
#'
#' Clean rlms data and all the meta information. Destroy useless attributes.
#'
#' @param df data.frame read by haven::read.spss
#' @param yesno convert yes/no answers to lowercase yes/no without apostrophes
#' @param apostrophe trim apostrophes, TRUE by default
#' @param remove_empty remove empty labels, TRUE by default
#' @param suppress logical, if true the default message is suppressed
#' @param nine2na convert 99999990+ to NA for numeric variables
#' @param empty2na convert empty character values to NA
#' @param nan2na convert NaN to NA
#' @param colnames_tolower a logical value, indicating whether variable names should be converted to lowercase.
#' @param verbose add some debugging output
#' TRUE by default.
#' @export
#' @return dataframe
rlms_cleanup <- function(df, suppress = TRUE,
empty2na = TRUE,
nan2na = TRUE,
nine2na = TRUE,
yesno = TRUE,
apostrophe = TRUE,
remove_empty = TRUE,
colnames_tolower = TRUE,
verbose = FALSE) {
if (verbose) {
message("Cleanup options:")
message("Convert '' to NA, empty2na = ", empty2na)
message("Convert NaN to NA, nan2na = ", nan2na)
message("Convert 99999990+ to NA, nine2na = ", nine2na)
message("Convert column names to lowercase, colnames_tolower = ", colnames_tolower)
message("Standartise Yes/NO to yes/no, yesno = ", yesno)
message("Remove redundant apostrophes, apostrophe = ", apostrophe)
message("Remove empty value label, remove_empty = ", remove_empty)
}
if (colnames_tolower) {
colnames(df) <- stringr::str_to_lower(colnames(df))
}
for (var in colnames(df)) {
var_class <- class(df[[var]])
if (verbose) {
# message("Processing variable: ", var, " of class ", var_class)
}
if (nan2na) {
df[[var]][is.nan(df[[var]])] <- NA
}
if ((nine2na) & (is.numeric(df[[var]]))) {
# is.numeric will work for pure numeric and labelled numeric
# replace 99999990+ for numeric variables
df[[var]][df[[var]] > 99999990] <- NA
# one cannot use ifelse as it destroys attributes!!!
}
if ((empty2na) & (is.character(df[[var]]))) {
df[[var]][df[[var]] == ""] <- NA
}
if (yesno) {
if (var_class == "character") {
df[[var]] <- rlms_yesno_standartize(df[[var]])
}
if ((var_class == "labelled") & length(attr(df[[var]], "labels") > 0)) {
attr(attr(df[[var]], "labels"), "names") <- rlms_yesno_standartize(attr(attr(df[[var]], "labels"), "names"))
}
}
if (apostrophe) {
if (var_class == "character") {
df[[var]] <- rlms_remove_apostrophe(df[[var]])
}
if ((var_class == "labelled") & length(attr(df[[var]], "labels") > 0)) {
attr(attr(df[[var]], "labels"), "names") <- rlms_remove_apostrophe(attr(attr(df[[var]], "labels"), "names"))
}
}
if (remove_empty) {
# remove "" in value labels
value_labels <- get_labels(df[[var]])
labels <- names(value_labels)
values_with_empty_labels <- value_labels[labels == ""]
if (length(values_with_empty_labels) > 0) {
# we play on the safe side and check that variable has no empty values
values_to_remove <- setdiff(values_with_empty_labels, unique(df[[var]]))
# setdiff kills names
attr(df[[var]], "labels") <- value_labels[!value_labels %in% values_to_remove]
}
}
}
return(df)
}
#' Transform all labelled variables into plain vector variables
#'
#' Transform all labelled variables into plain vector variables
#'
#' Transform all labelled variables into plain vector variables
#'
#' @param df data.frame with labelled variables
#' @export
#' @return df data.frame with numeric variables instead of labelled
#' @examples
#' df_labelled <- data.frame(x = haven::labelled(c(1, 1, 2, NA), c(Male = 1, Female = 2)), y = 1:4)
#' df_new <- rlms_labelled2plain(df_labelled)
rlms_labelled2plain <- function(df) {
for (var in names(df)) {
if (is_labelled(df[[var]])) {
# preserve variable label: it will show automatically in Rstudio
variable_label <- attr(df[[var]], "label")
# as.vector works well with both numeric and character variables
df[[var]] <- as.vector(df[[var]])
attr(df[[var]], "label") <- variable_label
}
}
return(df)
}
#' Transform one labelled variable or data.frame into factor or numeric
#'
#' Transform one labelled variable or data.frame into factor or numeric
#'
#' Transform one labelled variable or data.frame into factor or numeric
#'
#' @param x labelled vector or data.frame
#' @param ... other parameters
#' @export
#' @return factor or numeric variable instead of labelled
#' @examples
#' x = haven::labelled(c(1, 1, 2, NA), c(Male = 1, Female = 2))
#' rlms_labelled2factor(x)
rlms_labelled2factor <- function(x, ...) {
UseMethod("rlms_labelled2factor", x)
}
#' Transform one labelled variable into factor or numeric
#'
#' Transform one labelled variable into factor or numeric
#'
#' Transform one labelled variable into factor or numeric
#'
#' @export
#' @rdname rlms_labelled2factor
#' @return factor or numeric variable instead of labelled
#' @examples
#' x = haven::labelled(c(1, 1, 2, NA), c(Male = 1, Female = 2))
#' rlms_labelled2factor(x)
#' df_labelled <- data.frame(x = haven::labelled(c(1, 1, 2, NA), c(Male = 1, Female = 2)), y = 1:4)
#' df_new <- rlms_labelled2factor(df_labelled)
rlms_labelled2factor.labelled <- function(x, ...) {
variable_label <- attr(x, "label")
if (all_labelled(x)) {
# Rule 1: If all values are labelled then type is factor
x <- as_factor_safe(x)
} else if (all_but_one_labelled(x)) {
# Rule 2: If all values but one in the middle are labelled then type is factor
x <- as_factor_safe(x)
message("Variable was considered as factor: it has only one unlabelled value.")
message("This unlabelled value is neither minimal neither maximal.")
} else if (all_but_rlmsna_labelled(x)) {
# Rule 3: If all unlabelled values of a numeric variable are NA codes then type is factor
x <- as_factor_safe(x)
message("Variable was considered as factor: all unlabelled values are bigger than 99999990.")
}
attr(x, "label") <- variable_label
return(x)
}
#' Transform all labelled variables into factor or numeric
#'
#' Transform all labelled variables into factor or numeric
#'
#' Transform all labelled variables into factor or numeric
#'
#' @param verbose add some debugging information
#' @export
#' @rdname rlms_labelled2factor
#' @return df data.frame with factor or numeric variables instead of labelled
#' @examples
#' df_labelled <- data.frame(x = haven::labelled(c(1, 1, 2, NA), c(Male = 1, Female = 2)), y = 1:4)
#' df_new <- rlms_labelled2factor(df_labelled)
rlms_labelled2factor.data.frame <- function(x, verbose = FALSE, ...) {
if (verbose) {
message("The option haven = 'factor' is experimental and subject to change.")
}
for (var in names(x)) {
# preserve variable label: it will show automatically in Rstudio
var_class <- class(x[[var]])
if (verbose) {
# message("Converting variable ", var, " of class ", var_class)
}
variable_label <- attr(x[[var]], "label")
if (is_labelled(x[[var]])) {
if (all_labelled(x[[var]])) {
# Rule 1: If all values are labelled then type is factor
x[[var]] <- as_factor_safe(x[[var]])
# standard conversion will throw warning in the case of duplicate levels:
# x[[var]] <- haven::as_factor(x[[var]])
} else if (all_but_one_labelled(x[[var]])) {
# Rule 2: If all values but one in the middle are labelled then type is factor
x[[var]] <- as_factor_safe(x[[var]])
message("Labelled variable ", var, " was considered as factor: it has only one unlabelled value.")
message("This unlabelled value is neither minimal neither maximal.")
} else if (all_but_rlmsna_labelled(x[[var]])) {
# Rule 3: If all unlabelled values of a numeric variable are NA codes then type is factor
x[[var]] <- as_factor_safe(x[[var]])
message("Labelled variable ", var, " was considered as factor: all unlabelled values are bigger than 99999990.")
} else {
# numeric will be kept as numeric and character as character
x[[var]] <- as.vector(x[[var]])
}
}
attr(x[[var]], "label") <- variable_label
}
return(x)
}
#' Read rlms data, old legacy code
#'
#' Read rlms data and all meta information, old legacy code
#'
#' Read rlms data and all the meta information. Destroy useless attributes, old legacy code
#'
#' @param file the filename
#' @param yesno convert yes/no answers to lowercase yes/no without apostrophes
#' @param apostrophe trim apostrophes, TRUE by default
#' @param remove_empty remove empty labels, TRUE by default
#' @param suppress logical, if true the default message is suppressed
#' @param nine2na automatically convert 99999999 to NA for numeric variables
#' @param colnames_tolower a logical value, indicating whether variable names should be converted to lowercase.
#' TRUE by default.
#' @export
#' @return dataframe
#' @examples
#' # rlms_legacy_read("r21i_os24a.sav")
rlms_legacy_read <- function(file,
suppress = TRUE,
nine2na = TRUE,
yesno = TRUE,
apostrophe = TRUE,
remove_empty = TRUE,
colnames_tolower = TRUE) {
df <- foreign::read.spss(file, to.data.frame = TRUE, reencode = TRUE)
attr(df, "codepage") <- NULL
# get variable labels
varlabel <- attr(df, "variable.labels")
names(varlabel) <- NULL
var_meta <- data.frame(var = names(df), varlabel = varlabel,
stringsAsFactors = FALSE)
attr(df, "variable.labels") <- NULL
# get value labels
value_meta <- NULL
for (i in 1:ncol(df)) {
value <- attr(df[, i], "value.labels")
if (length(value) > 0) {
# NULL and numeric(0) are ignored
vallabel <- names(value)
attr(value, "names") <- NULL
temp <- data.frame(value = value,
vallabel = vallabel,
var = names(df)[i],
stringsAsFactors = FALSE)
value_meta <- rbind(value_meta, temp)
attr(df[, i], "value.labels") <- NULL
}
}
for (var in names(df)) {
var_class <- class(df[[var]])
if ((nine2na) & (var_class == "numeric")) {
# replace 99999990+ for numeric variables
df[[var]] <- ifelse(df[[var]] > 99999990, NA, df[[var]])
}
if ((apostrophe) & (var_class == "factor")) {
# trim apostrophes \\u2018 and \\u2019
levels(df[[var]]) <- rlms_remove_apostrophe(levels(df[[var]]))
}
if ((yesno) & (var_class == "factor")) {
# convert yes/no to lowercase without apostrophes
levels(df[[var]]) <- rlms_yesno_standartize(levels(df[[var]]))
}
# remove "" in levels
if (var_class == "factor") {
if (sum(df[[var]] == "", na.rm = TRUE) == 0) {
levels <- levels(df[[var]])
levels_new <- setdiff(levels, "")
df[[var]] <- factor(df[[var]], levels = levels_new)
}
}
}
if (yesno) {
value_meta <-
dplyr::mutate(value_meta, vallabel = rlms_yesno_standartize(vallabel))
}
if (apostrophe) {
value_meta <-
dplyr::mutate(value_meta, vallabel = rlms_remove_apostrophe(vallabel))
}
# add wave-level-sample:
fileinfo <- rlms_fileinfo(file)
df$wave <- fileinfo$wave
df$level <- fileinfo$level
df$sample <- fileinfo$sample
attr(df, "var_meta") <- var_meta
attr(df, "value_meta") <- value_meta
if (!suppress) {
message("Variable labels: rlms_show_variable_labels(df). ")
message("Value labels: rlms_show_value_labels(df). ")
message("You may extract meta information now.")
message("Later some functions may destroy meta information. ")
message("This message may be turned off with option: suppress = TRUE. ")
}
# to avoid long waiting time for occasional "df + enter":
df <- dplyr::as.tbl(df)
return(df)
}
#' Read rlms data
#'
#' Read rlms data and all meta information
#'
#' Read rlms data and all the meta information. Destroy useless attributes.
#'
#' @param file the filename
#' @param haven use haven package:
#' "labelled" - return labelled variables,
#' "factor" - return factor or numeric variables,
#' "numeric" - return numeric variables.
#' @param suppress deprecated
#' @param verbose logical verbose output
#' @param ... further parameters passed to rlms_cleanup() and rlms_labelled2factor() functions
#' @export
#' @return dataframe
#' @examples
#' # rlms_read("r21i_os24a.sav")
rlms_read <- function(file, haven = c("factor", "labelled", "numeric"),
suppress, verbose = FALSE, ...) {
haven <- match.arg(haven) # check numeric/labelled/factor
df <- haven::read_spss(file)
df <- rlms_cleanup(df, verbose = verbose, ...)
attr(df, "var_meta") <- rlms_extract_variable_labels(df)
attr(df, "value_meta") <- rlms_extract_value_labels(df)
if (haven == "factor") {
df <- rlms_labelled2factor(df, verbose = verbose, ...)
}
if (haven == "numeric") {
df <- rlms_labelled2plain(df)
}
# add wave-level-sample:
fileinfo <- rlms_fileinfo(file)
df$wave <- fileinfo$wave
df$level <- fileinfo$level
df$sample <- fileinfo$sample
if (!missing(suppress)) {
warning("Option 'supress' is deprecated. Use 'verbose' instead :)")
}
# to avoid long waiting time for occasional "df + enter":
df <- dplyr::as.tbl(df)
return(df)
}
#' Convert string with a number in Russian tradition in numeric
#'
#' Convert string with a number in Russian tradition in numeric
#'
#' Russian standards prescribes to use comma as a decimal separator.
#' This function removes spaces and converts string to number.
#'
#' @param x the string with the number
#' @return numeric the number converted from the string
#' @export
#' @examples
#' rus2num("34 345,34")
rus2num <- function(x) {
x <- gsub(",", ".", x)
x <- gsub(" ", "", x)
return(as.numeric(x))
}
#' Convert excel numeric date encoding to date
#'
#' Convert excel numeric date encoding to date
#'
#' While reading excel files dates are sometimes replaced by their numeric codes.
#' This function recovers original dates from these codes.
#'
#' @param x the vector of numeric date codes
#' @return the date
#' @export
#' @examples
#' excel2date(12345)
excel2date <- function(x) {
ans <- as.Date(
as.POSIXct( (x - 25569) * 86400,
tz = "GMT",
origin = "1970-01-01"))
return(ans)
}
#' Detect wave, sample and level from filename of rlms file
#'
#' Detect wave, sample and level from filename of rlms file
#'
#' RLMS filenames contain info about the number of wave, sample (representative or all) and
#' level (household, individual and a special case of reproductive)
#'
#' @param flist the string vector of filenames with or without path
#' @return data.frame containing short filename, wave, level and sample columns
#' @export
#' @examples
#' rlms_fileinfo("r06hall23.sav")
#' # specify rlms folder first
#' # flist_long <- list.files("~/Documents/rlms_data/", recursive = TRUE, pattern = "*.sav")
#' # rlms_fileinfo(flist_long)
rlms_fileinfo <- function(flist) {
flist_short <- basename(flist) # transform filenames with path to short filenames
flist_sep <- stringr::str_match(flist_short,
"r([0-9]{2})([ih])(all|_os)[a-zA-Z0-9]*.sav")
df <- data.frame(cbind(flist_short, matrix(flist_sep[, -1], ncol = 3)),
stringsAsFactors = FALSE)
# here we need to specify ncol=3 to correctly work with scalar flist
names(df) <- c("file_short", "wave", "level", "sample")
df$wave <- as.numeric(df$wave)
recode <- c(individual = "i", household = "h", reproductive = "r")
df$level <- factor(df$level, levels = recode, labels = names(recode))
recode <- c(all = "all", representative = "_os")
df$sample <- factor(df$sample, levels = recode, labels = names(recode))
women_rep <- df$file_short == "r19PHv2.sav"
df$wave[women_rep] <- 19
df$level[women_rep] <- "reproductive"
df$sample[women_rep] <- NA
return(df)
}
#' Display some RLMS related hints
#'
#' Display some RLMS related hints
#'
#' Display some RLMS related hints
#'
#' @return nothing, just prints some messages
#' @export
#' @examples
#' rlms_hints()
rlms_hints <- function() {
message("macos specific:")
message("To extract zip archive with correct cyrillic folder names one may use 'The unarchiver' (free), see http://unarchiver.c3.cx/")
}
#' Convert all RLMS files from .sav to .Rds
#'
#' Convert all RLMS files from .sav to .Rds
#'
#' Convert all RLMS files from .sav to .Rds
#'
#' @param rlms_folder path to rlms data
#' @param flatten a logical value indicating whether to flatten folder structure, default is TRUE
#' @param ... arguments passed to rlms_read
#' @return nothing
#' @export
#' @examples
#' # rlms_sav2rds("~/Documents/rlms_data/")
rlms_sav2rds <- function(rlms_folder = getwd(), flatten = TRUE, ...) {
# remove trailing "/" if present
if (stringr::str_sub(rlms_folder, start = -1) == "/") {
rlms_folder <- stringr::str_sub(rlms_folder, end = -2)
}
flist_in <- list.files(path = rlms_folder,
recursive = TRUE, pattern = "*.sav", full.names = TRUE)
flist_info <- rlms_fileinfo(flist_in)
flist_out <- stringr::str_replace(flist_in, ".sav", ".Rds")
if (flatten) {
flist_out <- paste0(rlms_folder, "/", basename(flist_out)) # remove path if we flatten folder structure
}
for (j in 1:length(flist_in)) {
message("Processing ", flist_info$file_short[j],
", wave: ", flist_info$wave[j],
", level: ", flist_info$level[j],
", sample: ", flist_info$sample[j],
", ", (100 * j) %/% length(flist_in), "% done")
temp <- rlms_read(flist_in[j], ...)
saveRDS(temp, file = flist_out[j])
}
}
#' Load RLMS data of specified wave/level/sample
#'
#' Load RLMS data of specified wave/level/sample
#'
#' Load RLMS data of specified wave/level/sample. This function automatically
#' determines the file name. The function tries to load .Rds file.
#' If .Rds file is missing then .sav file is loaded.
#'
#' @param wave the number of wave
#' @param rlms_folder path to rlms data
#' @param level the level (individual/household/reproductive)
#' @param sample the sample (all/representative)
#' @param ... arguments passed to rlms_read
#' @return data.frame with RLMS data
#' @export
#' @examples
#' # rlms_load("~/Documents/rlms_data/", wave = 20, level = "individual", sample = "rep" )
rlms_load <- function(rlms_folder = getwd(), wave,
level = c("individual", "household", "reproductive"),
sample = c("all", "representative"), ...) {
level <- match.arg(level)
sample <- match.arg(sample)
if ( (wave == 19) & (level == "reproductive") ) {
filename <- "r19PHv2"
} else {
filename <- "r"
# add wave
if (nchar(wave) == 1) {
filename <- paste0(filename,"0")
}
filename <- paste0(filename, wave)
# add level
if (level == "individual") {
filename <- paste0(filename, "i")
}
if (level == "household") {
filename <- paste0(filename, "h")
}
# add sample
if (sample == "all") {
filename <- paste0(filename, "all")
}
if (sample == "representative") {
filename <- paste0(filename, "_os")
}
}
# remove trailing "/" if present
if (stringr::str_sub(rlms_folder, start = -1) == "/") {
rlms_folder <- stringr::str_sub(rlms_folder, end = -2)
}
flist_sav <- list.files(path = rlms_folder, recursive = TRUE, pattern = "*.sav", full.names = TRUE)
flist_rds <- list.files(path = rlms_folder, recursive = TRUE, pattern = "*.Rds", full.names = TRUE)
rds_index <- stringr::str_detect(flist_rds, filename)
sav_index <- stringr::str_detect(flist_sav, filename)
if (sum(rds_index) > 0) {
# if Rds file is present...
df <- readRDS(flist_rds[rds_index]) # load it
} else {
# load original .sav file
df <- rlms_read(flist_sav[sav_index], ...)
}
return(df)
}
#' Check whether the variable is labelled
#'
#' Check whether the variable is labelled
#'
#' Check whether the variable is labelled
#'
#' @param x a vector
#' @export
#' @return TRUE/FALSE
#' @examples
#' x <- haven::labelled(c(1, 1, 2, 3, 4), c(Male = 1, Male = 2, Female = 3))
#' is_labelled(x)
is_labelled <- function(x) {
return(class(x) == "labelled")
}
#' Get variable label
#'
#' Get variable label
#'
#' Get variable label
#'
#' @param x a vector
#' @export
#' @return character variable label
get_label <- function(x) {
return(attr(x, "label"))
}
#' Get value labels of a vector
#'
#' Get value labels of a vector
#'
#' Get value labels of a vector
#'
#' @param x a vector
#' @export
#' @return character vector value labels
#' @examples
#' x <- haven::labelled(c(1, 1, 2, 3, 4), c(Male = 1, Male = 2, Female = 3))
#' get_labels(x)
get_labels <- function(x) {
return(attr(x, "labels"))
}
#' Get unlabelled values of a labelled vector
#'
#' Get unlabelled values of a labelled vector
#'
#' Get unlabelled values of a labelled vector
#'
#' @param x a vector
#' @param na.rm a logical value indicating whether NA values should be stripped
#' @export
#' @return vector of values without labels
#' @examples
#' x <- haven::labelled(c(1, 1, 2, 3, 4), c(Male = 1, Male = 2, Female = 3))
#' unlabelled_values(x)
unlabelled_values <- function(x, na.rm = FALSE) {
if (is_labelled(x)) {
actual_values <- unique(x)
if (na.rm) {
actual_values <- stats::na.omit(actual_values)
}
labelled_values <- get_labels(x)
unlabelled_values_answer <- setdiff(actual_values, labelled_values)
} else {
warning("The argument of `unlabelled_values` is not a labelled vector: NULL returned.")
unlabelled_values_answer <- NULL
}
return(unlabelled_values_answer)
}
#' Get labelled values of a labelled vector
#'
#' Get labelled values of a labelled vector
#'
#' Get labelled values of a labelled vector
#'
#' @param x a vector
#' @param na.rm a logical value indicating whether NA values should be stripped.
#' Normally NA is not labelled and is not returned even with na.rm = FALSE.
#' @export
#' @return vector of values with labels
#' @examples
#' x <- haven::labelled(c(1, 1, 2, 3, 4), c(Male = 1, Male = 2, Female = 3))
#' labelled_values(x)
labelled_values <- function(x, na.rm = FALSE) {
if (is_labelled(x)) {
actual_values <- unique(x)
if (na.rm) {
actual_values <- stats::na.omit(actual_values)
}
labelled_values <- get_labels(x)
labelled_values_answer <- actual_values[actual_values %in% labelled_values]
} else {
warning("The argument of `unlabelled_values` is not a labelled vector: NULL returned.")
labelled_values_answer <- NULL
}
return(labelled_values_answer)
}
#' Check whether all values have labels
#'
#' Check whether all values have labels
#'
#' Check whether all values of a labelled variable have labels.
#'
#' @param x labelled vector
#' @param na.rm a logical value indicating whether NA values should be stripped. TRUE by default
#' @export
#' @return TRUE/FALSE
#' @examples
#' x <- haven::labelled(c(1, 1, 2, 3, 4), c(Male = 1, Male = 2, Female = 3))
#' all_labelled(x)
all_labelled <- function(x, na.rm = TRUE) {
if (is_labelled(x)) {
all_labelled_answer <- length(unlabelled_values(x, na.rm = na.rm)) == 0
} else {
warning("The argument of `all_labelled` is not a labelled vector: TRUE returned.")
all_labelled_answer <- TRUE
}
return(all_labelled_answer)
}
#' Check whether all values but rlms na (99999990+) have labels
#'
#' Check whether all values but rlms na (99999990+) have labels
#'
#' Check whether all values but rlms na (99999990+) have labels
#'
#' @param x labelled vector
#' @export
#' @return TRUE/FALSE
#' @examples
#' x <- haven::labelled(c(1, 1, 2, 3, 99999995), c(Male = 1, Male = 2, Female = 3))
#' all_but_rlmsna_labelled(x)
all_but_rlmsna_labelled <- function(x) {
all_but_rlmsna_labelled_answer <- FALSE
if (is_labelled(x)) {
if (is.numeric(x)) {
if (min(unlabelled_values(x, na.rm = TRUE)) > 99999990) {
all_but_rlmsna_labelled_answer <- TRUE
}
}
} else {
warning("The argument of `all_but_rlmsna_labelled` is not a labelled vector: FALSE returned.")
}
return(all_but_rlmsna_labelled_answer)
}
#' Check whether all values but one in the middle have labels
#'
#' Check whether all values but one in the middle have labels
#'
#' Check whether all non-NA but one in the middle values of a labelled variable have labels.
#'
#' @param x labelled vector
#' @export
#' @return TRUE/FALSE
#' @examples
#' x <- haven::labelled(c(1, 1, 2, 3, 4), c(Male = 1, Male = 2, Female = 4))
#' all_but_one_labelled(x)
all_but_one_labelled <- function(x) {
all_but_one_labelled_answer <- FALSE
if (is_labelled(x)) {
if (is.numeric(x)) {
unlabelled_x <- unlabelled_values(x, na.rm = TRUE)
labelled_x <- labelled_values(x, na.rm = TRUE)
if ((length(unlabelled_x) == 1) & (length(labelled_x) > 0)) {
if (unlabelled_x > min(labelled_x) & unlabelled_x < max(labelled_x)) {
all_but_one_labelled_answer <- TRUE
}
}
}
} else {
warning("The argument of `all_but_one_labelled` is not a labelled vector: FALSE returned.")
}
return(all_but_one_labelled_answer)
}
#' Safe version of as_factor.
#'
#' Safe version of as_factor.
#'
#' Safe version of as_factor. This function keeps unlabelled values and
#' does not replace them with NA as `as_factor` in `haven` package.
#' It also avoids "duplicated levels" warning.
#'
#' @param x labelled vector
#' @export
#' @return TRUE/FALSE
#' @examples
#' x <- haven::labelled(c(1, 1, 2, 3, 4), c(Male = 1, Male = 2, Female = 3))
#' as_factor_safe(x)
as_factor_safe <- function(x) {
old_labels <- get_labels(x)
unlabelled_x <- unlabelled_values(x, na.rm = TRUE)
new_labels <- c(names(old_labels), unlabelled_x)
labels_df <- data.frame(values = c(old_labels, unlabelled_x),
new_labels = new_labels,
stringsAsFactors = FALSE)
# this will throw warning for duplicate labels:
# x_factor <- haven::as_factor(x)
# so we use magic of left_join: do we have smth faster?
x_df <- dplyr::left_join(data.frame(values = as.numeric(x)), labels_df, by = "values")
x_factor <- factor(x_df$new_labels, levels = unique(new_labels))
return(x_factor)
}
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.