#' write.sav
#'
#' Function to write an SPSS sav or zsav file from a data.frame().
#' @param dat _data.frame_ a data.frame to store as SPSS file.
#' @param filepath _string_ full path where and how this file should be
#' stored
#' @param label _character_ if any provided this must be a vector of
#' labels. It must be of size `ncol(dat)`
#' @param add.rownames _logical_ If `TRUE`, a new variable rownames
#' will be added to the sav-file.
#' @param compress _logical_ should compression be used. If TRUE some
#' integers will be stored more efficiently. Everything will be stored in
#' chunks of 8 chars. Reduces memory size of sav-file.
#' @param convert.dates _logical_ should dates be converted to SPSS format.
#' @param tz _character_ The name of the timezone convert.dates will use.
#' @param debug _logical_ print debug information.
#' @param is_zsav _logical_ explicitly create a zsav file. If the file
#' ending zsav is used, this is selected as default.
#' @param disppar optional display parameter matrix. Needs documentation.
#' @details Writing of strings longer than 255 chars is not provided.
#'
#' @return `write.sav` returns nothing
#'
#' @export
write.sav <- function(dat, filepath, label, add.rownames = FALSE,
compress = FALSE, convert.dates = TRUE, tz = "GMT",
debug = FALSE, is_zsav = FALSE, disppar) {
filepath <- path.expand(filepath)
if (missing(filepath))
stop("need a path")
attrlab <- attr(dat, "var.label")
if (identical(attrlab, character(0)))
attrlab <- NULL
if (missing(label) && is.null(attrlab))
label <- ""
if (missing(label) && !is.null(attrlab))
label <- attrlab
if (!identical(label, "") && (length(label) != ncol(dat)))
stop("label and ncols differ. each col needs a label")
if (any(nchar(label)) > 255)
stop("longlabels not yet implemented")
if (add.rownames) {
dat <- data.frame(rownames = rownames(dat),
dat, stringsAsFactors = FALSE)
}
nams <- names(dat)
# get labtab prior to any modification due to string sizes
ff <- which(sapply(dat, is.factor))
labtab <- lapply(ff, function(x) {
ll <- levels(dat[[x]])
x <- as.integer(labels(ll))
names(x) <- ll
x
})
LONGVAR <- FALSE
if (all(nchar(nams) <= 8) && (identical(toupper(nams), nams))) {
nams <- toupper(nams)
nvarnames <- substr(nams, 0, 8)
} else {
nvarnames <- paste0("VAR", seq_along(nams))
LONGVAR <- TRUE
}
vtyp <- as.integer(sapply(dat, is.character))
vtyp[vtyp != 0] <- as.integer(sapply(dat[vtyp != 0],
function(x) max(nchar(x), na.rm = TRUE)))
if (any(vtyp > 255)) {
message("if you really need this, split the string into segments of 255")
stop("Strings longer than 255 characters not yet implemented")
}
vtyp <- ceiling(vtyp / 8) * 8
vtyp[vtyp > 255] <- 255
fun <- function(vec) {
vartypes <- NULL
for (i in seq_along(vec)) {
val <- vtyp[i]
if (val <= 8) {
vartypes <- c(vartypes, val)
} else {
vartypes <- c(vartypes, c(val, rep(-1, (ceiling(val / 8) - 1))))
}
}
vartypes
}
vartypes <- fun(vtyp)
vartypes[vartypes > 255] <- 255
nams <- vector("character", length(vartypes))
nams[vartypes > -1] <- nvarnames
nvarnames <- nams
# update factor position with new varnames
pos <- which(nvarnames != "")
if (length(ff) > 0) {
ff <- sapply(ff, function(x) {
# newnam <- nvm[x]
x <- pos[x]
# names(x) <- newnam
x
})
}
longvarnames <- ""
if ((length(nvarnames) > length(names(dat))) || LONGVAR)
longvarnames <- paste(
paste0(nvarnames[nvarnames != ""], "=", names(dat)),
collapse = "\t")
systime <- Sys.time()
timestamp <- substr(systime, 12, 19)
lct <- Sys.getlocale("LC_TIME")
Sys.setlocale("LC_TIME", "C")
datestamp <- format(Sys.Date(), "%d %b %y")
Sys.setlocale("LC_TIME", lct)
ii <- sapply(dat, is.integer)
nn <- sapply(dat, function(x) {
is.numeric(x) | is.factor(x)
})
itc <- rep(0, NCOL(dat))
# get vartyp used for display parameters. has to be selected prior to
# compression. otherwise factor will be wrongfully identified as integer.
vartypen <- sapply(dat, function(x) class(x)[[1]])
# if compression is selected, try to store numeric, logical and factor as
# integer and try to compress integer as uint8 (with bias). Since R does
# only know numeric and integer, this needs additional testing if a
# conversion is safe.
if (compress) {
message("Compression is still experimental. Testing is welcome!")
# check if numeric can be stored as integer
numToCompress <- sapply(dat[nn], saveToExport)
# convert numeric to integer without loss of information
if (any(numToCompress)) {
saveToConvert <- names(numToCompress[numToCompress])
# replace numeric as integer
dat[saveToConvert] <- sapply(dat[saveToConvert], as.integer)
}
# ii integer and not all missing
ii <- sapply(dat, function(x) {
(is.logical(x) | is.integer(x))
})
gg <- FALSE
dat_ii <- dat[names(ii)[ii]] # might have length 0
# gg check for ii if is.integer and min >= 100 and max < 151 (in range of)
# uint8 +100 bias. Values > 250 are missing.
if (length(dat_ii) > 0)
gg <- sapply(dat_ii, function(x) {
z <- NULL
# if all values are missing, return TRUE: will write 255 in output
if (all(is.na(x))) {
z <- TRUE
} else {
# check if value can be stored as uint8 with bias
z <- (min(x, na.rm = TRUE) >= -100 & max(x, na.rm = TRUE) < 151)
}
z
})
# adjust gg to the length of dat
gg <- gg[names(dat)]
# logical matrix: is integer and good for compression?
checkll <- rbind(ii, gg)
# logical for integer compression
itc <- apply(checkll, 2, all)
}
cc <- sapply(dat, is.character)
vartyp <- NA
vartyp[vartypen == "factor" | vartypen == "logical"] <- -1
vartyp[vartypen == "numeric" | vartypen == "integer"] <- 0
vartyp[vartypen == "character"] <- 1
vartyp[vartypen == "Date"] <- 20
vartyp[vartypen == "POSIXct"] <- 22
if (convert.dates) {
dates <- which(sapply(dat,
function(x) inherits(x, "Date"))
)
for (v in dates)
dat[[v]] <- as.vector(
julian(dat[[v]], as.Date("1582-10-14", tz = "GMT")) * 24 * 60 * 60
)
dates <- which(
sapply(dat, function(x) inherits(x, "POSIXt"))
)
for (v in dates)
dat[[v]] <- as.vector(
round(julian(dat[[v]], ISOdate(1582, 10, 14, tz = tz))) * 24 * 60 * 60
)
}
# optional disppar parameter. if none is passed to the function, create a
# default one with a few selected parameters.
# TODO: add a similar logic for varmatrix
if (missing(disppar)) {
measure <- rep(NA, ncol(dat))
# nominal if factor, logical or character; else metric
# (nominal 1, ordinal 2, metric 3)
sel <- vartyp == -1 | vartyp == 1
measure[sel] <- 1
measure[!sel] <- 3
colwidth <- rep(NA, ncol(dat))
# colwidth 10 if date; else 8
sel <- vartyp == 20 | vartyp == 22
colwidth[sel] <- 10
colwidth[!sel] <- 8
alignment <- rep(NA, ncol(dat))
# characters left aligned; else right
# (1 right, 2 center, 3 left)
sel <- vartyp == 1
alignment[sel] <- 3
alignment[!sel] <- 1
# create disppar matrix
disppar <- matrix(c(measure, colwidth, alignment),
ncol = 3)
}
# make it flat
disppar <- c(t(disppar))
attr(dat, "vtyp") <- vtyp
attr(dat, "vartyp") <- vartyp
attr(dat, "vartypes") <- vartypes
attr(dat, "nvarnames") <- nvarnames
attr(dat, "longvarnames") <- longvarnames
attr(dat, "timestamp") <- timestamp
attr(dat, "datestamp") <- datestamp
attr(dat, "label") <- label
attr(dat, "haslabel") <- ff
attr(dat, "labtab") <- labtab
attr(dat, "itc") <- itc
attr(dat, "cc") <- cc
attr(dat, "disppar") <- disppar
if (file_ext(filepath) == "zsav")
is_zsav <- TRUE
if (is_zsav)
message("Zsav compression is still experimental. Testing is welcome!")
writesav(filepath, dat, compress, debug, is_zsav)
}
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.