#' @title Read SPSS file and parse factors from labels.
#' @name load_spss
#'
#' @description Reads an SPSS file with \code{sjmisc} and then converts
#' columns to factors.
#'
#' @param path (character), path of .sav file that should be loaded.
#' @param cleanLabel (function), pass through a function to clean the character
#' string of the column label.
#' @param rm_notAnswered (logical), if \code{TRUE} the 'Not Anwered' label will
#' not be included in factor levels if there are no instances.
#' @param parse_frame_labels (logical), if \code{TRUE}, will attempt to parse
#' "label" attribute into separate "label", "topic", and "category"
#' attributes based on string patterns in SPSS Labels exported by
#' Verint survey data.
#'
#' @return \code{data.frame} with factors and column attributes \code{label},
#' \code{topic}, and \code{category}.
#'
#' @note Labels and topics may be truncated if they are too long. This happens
#' when the .sav file is generated by Verint. SPSS has a maximum length
#' for variable labels, so those labels will be truncated in the SPSS file
#' itself.
#'
#' @examples
#' file_path <- "dataset.sav"
#' df <- load_spss(file_path)
#'
#'
#' @export
load_spss <- function(path, cleanLabel = NULL, rm_notAnswered = TRUE,
parse_frame_labels = TRUE) {
require(sjmisc)
# Use sjmisc to load the data frame, then parse the factors
# and parse the labels
df <- sjmisc::read_spss(path, attach.var.labels = TRUE)
df <- parse_factors(df, cleanLabel, rm_notAnswered)
if(parse_frame_labels == TRUE) {
df <- parse_labels(df)
}
df
}
parse_factors <- function(df, cleanLabel, rm_notAnswered) {
# Loop through every column and convert it to a factor, removing the
# 'Not Answered' level if specified
df[] <- lapply(df, atomic_to_fac, cleanLabel, rm_notAnswered)
df
}
atomic_to_fac <- function(vec, cleanLabel = NULL, rm_notAnswered = TRUE) {
# Converts from a named atomic vector to a factor while preserving
# variable attributes
# Only do the conversion if it has a "labels" attribute, which indicates a
# categorical variable in SPSS
retVal <- vec
if(length(attr(vec, "labels")) > 0) {
# Pull value labels and question label
labs <- attr(vec, "labels")
label <- attr(vec, "label")
# Clean label if function is passed through
if(!is.null(cleanLabel)) {
labs <- as.character(sapply(names(labs), cleanLabel))
label <- cleanLabel(label)
}
# Skip "Not Answered" Label under these conditions
start_index <- start_index(vec, labs, rm_notAnswered)
# Convert to factor
labelNames <- names(labs)
levs <- labs[start_index:length(labelNames)]
labs <- labelNames[start_index:length(labelNames)]
retVal <- factor(vec, levels = levs, labels = str_trim(labs))
# Add question text as attribute to vector
attr(retVal, "label") <- label
}
retVal
}
start_index <- function(vec, labels, rm_notAnswered) {
# This function will detect if the first or second element should be the
# start index of the labels vector in converting to a factor
ifelse(rm_notAnswered == TRUE &&
labels[1] == "Not Answered" &&
sum(vec == 0, na.rm=TRUE) == 0, 2, 1)
}
parse_label <- function(vec) {
require(stringr)
# Verint combines topics and question text in the SPSS variable labels.
# This function attempts to separate the question text from the topics and
# categories and give each its own attribute
lab <- attr(vec, "label")
questionAttr <- lab
topicAttr <- NULL
catAttr <- NULL
split1 <- str_split(lab, " \\[Question: ")[[1]]
split2 <- str_split(lab, " Question: ")[[1]]
if(length(split1) > 1) {
questionAttr <- split1[2]
topicAttr <- split1[1]
}
else if(length(split2) > 1) {
questionAttr <- split2[2]
topicAttr <- split2[1]
}
if(!is.null(topicAttr)) {
catAttr <- str_trim(str_split(topicAttr, "\\[Topic: ")[[1]][1])
topicAttr <- str_split(topicAttr, "\\[Topic: ")[[1]][2]
}
attr(vec, "label") <- questionAttr
attr(vec, "topic") <- topicAttr
attr(vec, "category") <- catAttr
vec
}
parse_labels <- function(df) {
# Loop through each column and parse the label attributes
df[] <- lapply(df, parse_label)
}
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.