Nothing
#' Write SPSS File
#'
#' This function writes a data frame or matrix into a SPSS file by either using
#' the \code{write_sav()} function in the \pkg{haven} package by Hadley Wickham
#' and Evan Miller (2019) or the free software \emph{PSPP} (see:
#' \url{https://www.gnu.org/software/pspp/pspp.html}).
#'
#' If arguments \code{pspp.path} is not specified (i.e., \code{pspp.path = NULL}),
#' \code{write_sav()} function in the \pkg{haven} is used. Otherwise the object
#' \code{x} is written as CSV file, which is subsequently imported into SPSS using
#' the free software \emph{PSPP} by executing a SPSS syntax written in R. Note that
#' \emph{PSPP} needs to be installed on your computer when using the \code{pspp.path}
#' argument.
#'
#' A SPSS file with 'variable labels', 'value labels', and 'user-missing values' is
#' written by specifying the \code{var.attr} argument. Note that the number of rows
#' in the matrix or data frame specified in \code{var.attr} needs to match with the
#' number of columns in the data frame or matrix specified in \code{x}, i.e., each
#' row in \code{var.attr} represents the variable attributes of the corresponding
#' variable in \code{x}. In addition, column names of the matrix or data frame
#' specified in \code{var.attr} needs to be labeled as \code{label} for 'variable
#' labels, \code{values} for 'value labels', and \code{missing} for 'user-missing
#' values'.
#'
#' Labels for the values are defined in the column \code{values} of the matrix or
#' data frame in \code{var.attr} using the equal-sign (e.g., \code{0 = female}) and
#' are separated by a semicolon (e.g., \code{0 = female; 1 = male}).
#'
#' User-missing values are defined in the column \code{missing} of the matrix or
#' data frame in \code{var.attr}, either specifying one user-missing value (e.g.,
#' \code{-99}) or more than one but up to three user-missing values separated
#' by a semicolon (e.g., \code{-77; -99}.
#'
#' @param x a matrix or data frame to be written in SPSS, vectors are
#' coerced to a data frame.
#' @param file a character string naming a file with or without file extension
#' '.sav', e.g., \code{"My_SPSS_Data.sav"} or \code{"My_SPSS_Data"}.
#' @param var.attr a matrix or data frame with variable attributes used in the
#' SPSS file, only 'variable labels' (column name \code{label}),
#' 'value labels' column name \code{values}, and 'user-missing values'
#' column name \code{missing} are supported (see 'Details').
#' @param pspp.path a character string indicating the path where the PSPP folder
#' is located on the computer, e.g.\code{C:/Program Files/PSPP/}.
#' @param digits an integer value indicating the number of decimal places shown
#' in the SPSS file for non-integer variables.
#' @param write.csv logical: if \code{TRUE}, CSV file is written along with the
#' SPSS file.
#' @param sep a character string for specifying the CSV file, either \code{";"}
#' for the separator and \code{"."}
#' for the decimal point (default, i.e. equivalent to
#' \code{write.csv2}) or \code{"."} for the decimal point and
#' \code{","} for the separator (i.e. equivalent to
#' \code{write.csv}), must be one of both \code{";"} (default)
#' or \code{","}.
#' @param na a character string for specifying missing values in the CSV file.
#' @param write.sps logical: if \code{TRUE}, SPSS syntax is written along with
#' the SPSS file when using PSPP.
#' @param check logical: if \code{TRUE} (default), variable attributes
#' specified in the argument \code{var.attr} is checked.
#'
#' @author
#' Takuya Yanagida \email{takuya.yanagida@@univie.ac.at}
#'
#' @seealso
#' \code{\link{read.sav}}, \code{\link{write.xlsx}}, \code{\link{write.dta}},
#' \code{\link{write.mplus}}
#'
#' @references
#' GNU Project (2018). \emph{GNU PSPP for GNU/Linux} (Version 1.2.0).
#' Boston, MA: Free Software Foundation. \url{https://www.gnu.org/software/pspp/}
#'
#' Wickham H., & Miller, E. (2019). \emph{haven: Import and Export 'SPSS', 'Stata'
#' and 'SAS' Files}. R package version 2.2.0. \url{https://CRAN.R-project.org/package=haven}
#'
#' Robitzsch, A., Grund, S., & Henke, T. (2019). \emph{miceadds: Some additional
#' multiple imputation functions, especially for mice}. R package version 3.4-17.
#' \url{https://CRAN.R-project.org/package=miceadds}
#'
#' @note
#' Part of the function using \emph{PSPP} was adapted from the \code{write.pspp()}
#' function in the \pkg{miceadds} package by Alexander Robitzsch, Simon Grund and
#' Thorsten Henke (2019).
#'
#' @export
#'
#' @examples
#' \dontrun{
#' dat <- data.frame(id = 1:5,
#' gender = c(NA, 0, 1, 1, 0),
#' age = c(16, 19, 17, NA, 16),
#' status = c(1, 2, 3, 1, 4),
#' score = c(511, 506, 497, 502, 491))
#'
#' # Example 1: Write SPSS file using the haven package
#' write.sav(dat, file = "Dataframe_haven.sav")
#'
#' # Example 2: Write SPSS file using PSPP,
#' # write CSV file and SPSS syntax along with the SPSS file
#' write.sav(dat, file = "Dataframe_PSPP.sav", pspp.path = "C:/Program Files/PSPP",
#' write.csv = TRUE, write.sps = TRUE)
#'
#' # Example 3: Specify variable attributes
#' # Note that it is recommended to manually specify the variables attributes in a CSV or
#' # Excel file which is subsequently read into R
#' attr <- data.frame(# Variable names
#' var = c("id", "gender", "age", "status", "score"),
#' # Variable labels
#' label = c("Identification number", "Gender", "Age in years",
#' "Migration background", "Achievement test score"),
#' # Value labels
#' values = c("", "0 = female; 1 = male", "",
#' "1 = Austria; 2 = former Yugoslavia; 3 = Turkey; 4 = other",
#' ""),
#' # User-missing values
#' missing = c("", "-99", "-99", "-99", "-99"))
#'
#' # Example 4: Write SPSS file with variable attributes using the haven package
#' write.sav(dat, file = "Dataframe_haven_Attr.sav", var.attr = attr)
#'
#' # Example 5: Write SPSS with variable attributes using PSPP
#' write.sav(dat, file = "Dataframe_PSPP_Attr.sav", var.attr = attr,
#' pspp.path = "C:/Program Files/PSPP")
#' }
write.sav <- function(x, file = "SPSS_Data.sav", var.attr = NULL, pspp.path = NULL, digits = 2,
write.csv = FALSE, sep = c(";", ","), na = "", write.sps = FALSE, check = TRUE) {
#_____________________________________________________________________________
#
# Initial Check --------------------------------------------------------------
# Check if input 'x' is missing
if (isTRUE(missing(x))) { stop("Please specify a matrix or data frame for the argument 'x'.", call. = FALSE) }
# Matrix or data frame for the argument 'x'?
if (isTRUE(!is.matrix(x) && !is.data.frame(x))) { stop("Please specifiy a matrix or data frame for the argument 'x'.", call. = FALSE) }
#_____________________________________________________________________________
#
# Data and Variables ---------------------------------------------------------
#~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
## Data frame ####
x <- as.data.frame(x, stringsAsFactors = FALSE)
#~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
## Variable names ####
varnames <- colnames(x)
#~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
## Number of variables ####
var.length <- length(varnames)
#~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
## File extension .sav ####
file <- ifelse(length(grep(".sav", file)) == 1L, file <- gsub(".sav", "", file), file)
#~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
## Separator ####
sep <- ifelse(all(c(";", ".") %in% sep), ";", sep)
#_____________________________________________________________________________
#
# Input Check ----------------------------------------------------------------
# Check inputs
.check.input(logical = c("write.csv", "write.sps"), character = list(file = 1L, pspp.path = 1L, na = 1L), s.character = list(sep = c(";", ",")), args = "digits", envir = environment(), input.check = check)
# Additional checks
if (isTRUE(check)) {
# Check input 'pspp.path'
if (isTRUE(!is.null(pspp.path))) { if (isTRUE(length(grep("pspp.exe", list.files(paste0(pspp.path, "/bin/")))) != 1L)) { stop("PSPP file \'pspp.exe\' was not found in the folder specified in the pspp.path argument.", call. = FALSE) } }
# Check input 'var.attr'
if (isTRUE(!is.null(var.attr))) {
# Number of rows in var.attr match with number of columns in x?
if (isTRUE(nrow(var.attr) != ncol(x))) { stop("Number of rows in the data frame or matrix specified in the argument var.attr does not match with the number of columns in x.", call. = FALSE) }
# Any column name "label", "values", "missing"?
if (isTRUE(all(is.na(match(names(var.attr), c("label", "values", "missing")))))) { stop("None of the column names of the data frame or matrix specified in the argument var.attr match with \"label\", \"values\" or \"missing\".", call. = FALSE) }
# Value labels match with data?
if (isTRUE(any(!is.na(match(names(var.attr), "values"))))) {
for (i in seq_len(var.length)) {
value.labels <- as.character(var.attr[i, "values"])
if (isTRUE(value.labels != "")) {
value.labels.split <- unlist(strsplit(value.labels, ";"))
value.labels.split.matrix <- matrix(misty::chr.trim(unlist(sapply(value.labels.split, function(y) strsplit(y, "=")))), ncol = length(value.labels.split))
# Duplicated values
if (isTRUE(any(duplicated(value.labels.split.matrix[1L, ])))) {
stop(paste0("Values in the column \"values\" specified in 'var.attr' are duplicated for the variable '", varnames[i], "'."), call. = FALSE)
}
# Values specified in 'values' in the data
if(isTRUE(!all(as.numeric(value.labels.split.matrix[1, ]) %in% x[, varnames[i]]))) {
warning(paste0("Values in the column \"values\" specified in 'var.attr' do not all match with the variable '", varnames[i], "'."), call. = FALSE)
}
}
}
}
}
}
#_____________________________________________________________________________
#
# Main Function --------------------------------------------------------------
#~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
## Variable attributes ####
if (isTRUE("label" %in% names(var.attr))) { var.attr[, "label"] <- na.as(var.attr[, "label"], na = "", check = FALSE) }
if (isTRUE("values" %in% names(var.attr))) { var.attr[, "values"] <- na.as(var.attr[, "values"], na = "", check = FALSE) }
if (isTRUE("missing" %in% names(var.attr))) { var.attr[, "missing"] <- na.as(var.attr[, "missing"], na = "", check = FALSE) }
#~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
## Use haven package ####
if (isTRUE(is.null(pspp.path))) {
# Package haven installed?
if (isTRUE(!requireNamespace("haven", quietly = TRUE))) { stop("Package \"haven\" is needed for this function to work, please install it.", call. = FALSE ) }
#...................
### Without variable attributes ####
if (isTRUE(is.null(var.attr))) {
# Write .sav
haven::write_sav(x, paste0(file, ".sav"), compress = FALSE)
#...................
### Witht variable attributes ####
} else {
# Variable labels, value labels, and user-missing values
if (isTRUE("label" %in% names(var.attr))) { label <- as.character(var.attr[, match("label", colnames(var.attr))]) } else { label <- rep("", times = nrow(var.attr)) }
if (isTRUE("values" %in% names(var.attr))) { labels <- as.character(var.attr[, match("values", colnames(var.attr))]) } else { labels <- rep("", times = nrow(var.attr)) }
if (isTRUE("missing" %in% names(var.attr))) { na <- as.character(var.attr[, match("missing", colnames(var.attr))]) } else { na <- rep("", times = nrow(var.attr)) }
# For numeric variables only, i.e., exclude factors, strings, and dates
for (i in which(vapply(x, is.numeric, FUN.VALUE = logical(1L)))) {
##### Value labels ####
if (misty::chr.trim(labels[i]) != "") {
x.labels <- unlist(strsplit(labels[i], ";"))
x.labels <- matrix(misty::chr.trim(unlist(sapply(x.labels, function(y) strsplit(y, "=")))), ncol = length(x.labels))
labels.i <- paste0("c(", paste(apply(x.labels, 2L, function(y) paste(paste0("\"", y[2L], "\""), y[1L], sep = " = ")), collapse = ", "), ")")
} else {
labels.i <- NULL
}
##### User-missing values ####
if (isTRUE(misty::chr.trim(na[i]) != "")) {
na.i <- paste0("c(", paste(misty::chr.trim(unlist(strsplit(na[i], ";"))), collapse = ", "), ")")
} else {
na.i <- NULL
}
# Labelled vectors for SPSS
eval(parse(text = paste0("x$", colnames(x)[i], " <- haven::labelled_spss(as.double(x$", colnames(x)[i], "), labels = ", ifelse(is.null(labels.i), "NULL", labels.i), ", na_values = ", ifelse(is.null(na.i), "NULL", na.i), ", label = \"", label[i], "\")")))
# Zero digits for integer values
if (isTRUE(all(na.omit(x[, i]) %% 1L == 0L))) {
eval(parse(text = paste0("attr(x$", colnames(x)[i], ", \"format.spss\") <- \"F8.0\"")))
} else {
eval(parse(text = paste0("attr(x$", colnames(x)[i], ", \"format.spss\") <- \"F8.", digits, "\"")))
}
}
# Write .sav
haven::write_sav(x, paste0(file, ".sav"), compress = FALSE)
}
# Save CSV file
if (isTRUE(write.csv)) {
if (isTRUE(sep == ";")) {
write.csv2(x, paste0(file, ".csv"), row.names = FALSE, quote = FALSE, na = na)
} else {
write.csv(x, paste0(file, ".csv"), row.names = FALSE, quote = FALSE, na = na)
}
}
#~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
## Use PSPP ####
} else {
# Function to add quotes
add.quote <- function(x) { paste0("\"", x, "\"") }
# Convert factors and logical to numeric
any.factors <- any(vapply(x, is.factor, FUN.VALUE = logical(1L)))
if (isTRUE(any.factors)) {
xf <- data.frame(lapply(x, function(x) if (is.factor(x) | is.logical(x)) as.numeric(x) else x), stringsAsFactors = FALSE)
} else {
xf <- x
}
# Write CSV
utils::write.csv2(xf, paste0(file, ".csv"), row.names = FALSE, quote = FALSE, na = na)
#...................
### Variable formats ####
type <- rep("F", times = var.length)
width <- rep(8L, times = var.length)
decimals <- rep(NA, times = var.length)
for (i in seq_len(var.length)) {
# Numeric
if (isTRUE(is.numeric(xf[, i]))) {
i.nchar <- nchar(round(xf[, i], digits = digits))
# Size of the elements > 8
if (isTRUE(any(na.omit(i.nchar) > 8L))) { width[i] <- max(i.nchar) }
# Digits for numeric = 2, digits for integer = 0
decimals[i] <- ifelse(is.integer(xf[, i]), 0L, digits)
decimals[i] <- ifelse(all(xf[, i] %% 1L == 0L), 0, digits)
} else {
# Character
type[i] <- "A"
width[i] <- max(nchar(xf[, i]))
}
}
# PSPP variable format
variables <- paste(varnames, ifelse(!is.na(decimals), paste0(type, paste(width, decimals, sep = ".")), paste0(type, width)), collapse = "\n ")
#...................
### Write PSPP syntax ####
code <- paste0(file, ".sps")
cat(paste0("GET DATA\n",
" /TYPE=TXT \n",
" /FILE='", getwd(), "/", file, ".csv' \n",
" /ARRANGEMENT=DELIMITED\n",
" /DELCASE=LINE \n",
" /FIRSTCASE=2 \n",
" /DELIMITERS=';'\n" ,
" /QUALIFIER='' \n" ,
" /VARIABLES=\n"), file = code)
cat(paste0(" ", variables, " ."), file = code, append = TRUE)
#...................
### Variable attributes ####
# Attributes from object var.attr
if (isTRUE(!is.null(var.attr))) {
###
# Define variable labels
label <- as.character(var.attr[, match("label", colnames(var.attr))])
indices <- which(label != "")
# PSPP variable labels
variable.label <- paste(varnames[indices], add.quote(label[indices]), collapse = " \n ")
cat("\nVARIABLE LABELS\n ", file = code, append = TRUE)
cat(paste0(" ", variable.label, " ."), file = code, append = TRUE)
###
# Define value labels
# Values from variable attributes
if (isTRUE(any(var.attr[, match("values", colnames(var.attr))] != ""))) {
for (i in seq_len(var.length)) {
value.labels <- as.character(var.attr[i, match("values", colnames(var.attr))])
if (isTRUE(value.labels != "")) {
x <- unlist(strsplit(value.labels, ";"))
x <- matrix(misty::chr.trim(unlist(sapply(x, function(x) strsplit(x, "=")))), ncol = length(x))
cat("\nVALUE LABELS\n",
paste0(" ", varnames[i], paste0(paste0(" ", x[1L, ], " '", x[2L, ], sep = "'"), collapse = "")), ".", file = code, append = TRUE)
}
}
}
###
# Values from factor levels
if (isTRUE(any.factors)) {
x.factor <- which(vapply(x, is.factor, FUN.VALUE = logical(1)))
for (i in x.factor) {
values <- unique(as.numeric(x[, i]))
labels <- levels(x[, i])
cat("\nVALUE LABELS\n",
paste0(" ", names(xf)[i], paste0(paste0(" ", values, " '", labels, sep = "'"), collapse = "")), ".", file = code, append = TRUE)
}
}
###
# Define missing values
miss.unique <- unique(misty::chr.trim(as.character(unique(var.attr[, match("missing", colnames(var.attr))]))))
miss.unique <- miss.unique[!miss.unique %in% c("", NA)]
# One pattern of missing data values
if (isTRUE(length(miss.unique) == 1L)) {
cat(paste0("\nMISSING VALUES\n ", paste(varnames[which(var.attr$missing == miss.unique)], collapse = " "),
" (", gsub(";", " ", miss.unique), ")", "."), file = code, append = TRUE)
}
###
# More than one pattern of missing data values
if (isTRUE(length(miss.unique) > 1L)) {
for (i in seq_len(var.length)) {
missing.values <- var.attr[i, match("missing", colnames(var.attr))]
if (isTRUE(missing.values != "")) {
cat("\nMISSING VALUES\n " , paste0(varnames[i], " (", paste(gsub(";", " ", missing.values), collapse = " "), ")", "."), file = code, append = TRUE)
}
}
}
#......
# Object var.attr not available
} else {
# Values from factor levels
if (isTRUE(any.factors)) {
x.factor <- which(vapply(x, is.factor, FUN.VALUE = logical(1)))
for (i in x.factor) {
values <- unique(as.numeric(x[, i]))
labels <- levels(x[, i])
cat("\nVALUE LABELS\n",
paste0(" ", names(xf)[i], paste0(paste0(" ", values, " '", labels, sep = "'"), collapse = "")), ".", file = code, append = TRUE)
}
}
}
#...................
### Save PSPP ####
cat("\nEXECUTE.\n", file = code, append = TRUE)
cat(paste0( "\nSAVE OUTFILE='", getwd() , "/" , file , ".sav'.\nEXECUTE."),
file = code, append = TRUE)
#...................
### Run PSPP ####
system(paste0("\"", pspp.path, "/bin/pspp.exe\" ", code))
#...................
### Remove sps and csv file ####
if (!isTRUE(write.sps)) { unlink(paste0(file, ".sps")) }
if (!isTRUE(write.csv) | sep == ",") { unlink(paste0(file, ".csv")) }
if (isTRUE(write.csv) & sep == ",") { utils::write.csv(xf, paste0(file, ".csv"), row.names = FALSE, quote = FALSE, na = na) }
}
}
#_______________________________________________________________________________
Any scripts or data that you put into this service are public.
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.