tests/spss.R

library(foreign)

sample100 <- read.spss("sample100.sav",FALSE)
summary(sample100)
str(sample100)
d.sample100 <- data.frame(sample100,stringsAsFactors=TRUE)
summary(d.sample100)
s100 <- sample100
sample100 <- read.spss("sample100.por",FALSE)
stopifnot(identical(s100, sample100)) # no need for further summary() etc

pbc <- read.spss("pbc.sav",FALSE)
summary(pbc)
str(pbc)
d.pbc <- data.frame(pbc)
summary(d.pbc)
pbco <- read.spss("pbcold.sav",FALSE)
stopifnot(identical(pbc, pbco))
## summary(pbco)
## str(pbco)
## d.pbco <- data.frame(pbco)
## summary(d.pbco)
pbc. <- read.spss("pbc.por",FALSE)
summary(pbc.)
str(pbc.) # has variable.labels
stopifnot(all.equal(d.pbc, data.frame(pbc.), tolerance = 1e-15))

electric.s  <- read.spss(system.file("files", "electric.sav", package = "foreign"), TRUE, TRUE)
electric.p  <- read.spss("electric.por",TRUE,TRUE)
electric.s4 <- read.spss(system.file("files", "electric.sav", package = "foreign"), TRUE, TRUE, max.value.labels = 4)
summary(electric.s)
ii <- c(2,10)
vl <- list(FIRSTCHD = c("OTHER   CHD"= 6, "FATAL   MI"= 5, "NONFATALMI"= 3,
            "SUDDEN  DEATH" = 2, "NO CHD" = 1),
       DAYOFWK = c(SATURDAY=7, FRIDAY=6, THURSDAY=5,
               WEDNSDAY=4, TUESDAY=3, MONDAY=2, SUNDAY=1))
stopifnot(identical(electric.s,      electric.p),
      identical(electric.s[-ii], electric.s4[-ii]),
      identical(vl, lapply(electric.s4[ii], attr, "value.labels")),
      identical(lapply(vl, names),
            lapply(electric.s[ii], function(.) rev(levels(.)))))


## after "long label patch":

invisible(Sys.setlocale (locale="C")) ## to resolve locale problem
ldat <- read.spss("spss_long.sav", to.data.frame=TRUE)
ldat
nnms <- nms <- names(ldat)
names(nnms) <- nms
stopifnot(identical(nms,  c("variable1", "variable2")),
      identical(nnms, attr(ldat, "variable.labels")))


## some new arkward testcases for problems found in foreign <= 0.8-68 and duplicated value labels in general:

## Expect lots of warnings as value labels (corresponding to R factor labels) are uncomplete, 
## and an unsupported long string variable is present in the data

setwd(system.file("files", package = "foreign"))
sav <- "testdata.sav"

x.nodat <- read.spss(file=sav, to.data.frame = FALSE, reencode="UTF-8")
str(x.nodat)

x.sort <- read.spss(file=sav, to.data.frame = TRUE, reencode="UTF-8",
                    stringsAsFactors=TRUE)
str(x.sort)
x.append <- read.spss(file=sav, to.data.frame = TRUE, 
    add.undeclared.levels = "append", reencode="UTF-8")
x.no <- read.spss(file=sav, to.data.frame = TRUE, 
    add.undeclared.levels = "no", reencode="UTF-8")

levels(x.sort$factor_n_undeclared)
levels(x.append$factor_n_undeclared)
str(x.no$factor_n_undeclared)


### Examples for duplicated.value.labels:
## duplicated.value.labels = "append" (default)
x.append <- read.spss(file=sav, to.data.frame=TRUE, reencode="UTF-8")
## duplicated.value.labels = "condense"
x.condense <- read.spss(file=sav, to.data.frame=TRUE, 
    duplicated.value.labels = "condense", reencode="UTF-8")

levels(x.append$factor_n_duplicated)
levels(x.condense$factor_n_duplicated)

as.numeric(x.append$factor_n_duplicated)
as.numeric(x.condense$factor_n_duplicated)

### ToDo:    
## Long Strings (>255 chars) are imported in consecutive separate variables 
## (see warning about subtype 14)
## we should get that right in the import function in future versions
x <- read.spss(file=sav, to.data.frame=TRUE, stringsAsFactors=FALSE, reencode="UTF-8")

cat.long.string <- function(x, w=70) cat(paste(strwrap(x, width=w), "\n"))

## first part: x$string_500:
cat.long.string(x$string_500)
## second part: x$STRIN0:
cat.long.string(x$STRIN0)
## complete long string:
long.string <- apply(x[,c("string_500", "STRIN0")], 1, paste, collapse="")
cat.long.string(long.string)

Try the foreign package in your browser

Any scripts or data that you put into this service are public.

foreign documentation built on Sept. 10, 2023, 1:08 a.m.