Nothing
spss_varlist <-
function(file){
# open the spss sav file for read in binary mode
sav <- file(file, "rb")
############################################################################
# set up a vector to translate format type code to mnemonic
############################################################################
ftype <- NULL
ftype <- c( ftype, 'A') # 1 alphanumeric
ftype <- c( ftype, 'AHEX') # 2 alphanumeric hexadecimal
ftype <- c( ftype, 'COMMA') # 3 F format with commas
ftype <- c( ftype, 'DOLLAR') # 4 commas and floating dollar sign
ftype <- c( ftype, 'F') # 5 F default numeric format
ftype <- c( ftype, 'IB') # 6 integer binary
ftype <- c( ftype, 'PIBHEX') # 7 packed integer binary (hexadecimal)
ftype <- c( ftype, 'P') # 8 packed decimal
ftype <- c( ftype, 'PIB') # 9 positive integer binary (unsigned)
ftype <- c( ftype, 'PK') # 10 positive packed decimal (unsigned)
ftype <- c( ftype, 'RB') # 11 floating point binary
ftype <- c( ftype, 'RBHEX') # 12 floating point binary hex
ftype <- c( ftype, 'UNKNOWN') # 13 --NOT USED--
ftype <- c( ftype, 'UNKNOWN') # 14 --NOT USED--
ftype <- c( ftype, 'Z') # 15 zoned decimal
ftype <- c( ftype, 'N') # 16 unsigned with leading spaces
ftype <- c( ftype, 'E') # 17 explicit power of 10
ftype <- c( ftype, 'UNKNOWN') # 18 --NOT USED--
ftype <- c( ftype, 'UNKNOWN') # 19 --NOT USED--
ftype <- c( ftype, 'DATE') # 20 date - dd-mmm-yyyy
ftype <- c( ftype, 'TIME') # 21 time - hh:mm:ss.s
ftype <- c( ftype, 'DATETIME') # 22 date and time
ftype <- c( ftype, 'ADATE') # 23 date - mm/dd/yyyy
ftype <- c( ftype, 'JDATE') # 24 julian date - yyyyddd
ftype <- c( ftype, 'DTIME') # 25 date-time - dd hh:mm:ss.s
ftype <- c( ftype, 'WKDAY') # 26 day of the week
ftype <- c( ftype, 'MONTH') # 27 month
ftype <- c( ftype, 'MOYR') # 28 mmm yyyy
ftype <- c( ftype, 'QYR') # 29 q Q yyyy
ftype <- c( ftype, 'WKYR') # 30 ww WK yyyy
ftype <- c( ftype, 'PCT') # 31 percent - F followed by '%'
ftype <- c( ftype, 'DOT') # 32 like COMMA, switching dot for comma
ftype <- c( ftype, 'CCA') # 33 ) User
ftype <- c( ftype, 'CCB') # 34 ) programmable
ftype <- c( ftype, 'CCC') # 35 ) currency
ftype <- c( ftype, 'CCD') # 36 ) formats
ftype <- c( ftype, 'CCE') # 37 )
ftype <- c( ftype, 'EDATE') # 38 date - dd.mm.yyyy
ftype <- c( ftype, 'SDATE') # 39 date - yyyy/mm/dd
variable_record <- function() {
##################################################
# variable_record -- Parse one variable record
#
# Return a either a vector of components,
# or NULL if the dictionary entry corresponded to
# "continuation of a string var"
##################################################
# read variable type code
TYPECODE <- readBin(sav, integer())
# if type is not -1, then record is for a numeric var or the
# first (and only) instance of a string var
if (TYPECODE != -1) {
# read label flag
HASLABEL <- readBin(sav, integer())
# read missing value format code
NMISSING <- readBin(sav, integer())
# read print format code 1st 3 bytes of Print Format
PDEC <- readBin(sav, integer(), size=1) # decimal places
PWID <- readBin(sav, integer(), size=1) # column width
PTYP <- readBin(sav, integer(), size=1) # format type
IGNORE <- readChar(sav, 1) # ignore 4th byte, always 0
# construct mnemonic with width (and dec.digits if non zero)
if (PDEC > 0) {
PRINTFMT <- substr(paste(ftype[PTYP], PWID,'.', PDEC, " ", sep=""),1,10)
} else {
PRINTFMT <- substr(paste(ftype[PTYP], PWID, " ", sep=""),1,10)
}
# !!!BUG: 'ftype[[PTYP]]' replaced by 'ftype[PTYP]'. Reason: ftype is not a list but a vector
# read write format code
# WRITEFMT <- readBin(sav, integer())
WDEC <- readBin(sav, integer(), size=1) # decimal places
WWID <- readBin(sav, integer(), size=1) # column width
WTYP <- readBin(sav, integer(), size=1) # format type
IGNORE <- readChar(sav, 1) # ignore 4th byte, always 0
# read varname
VARNAME <- readChar(sav, 8)
# read label length and label only if a label exists
VARLABEL <- ""
if (HASLABEL == 1) {
# read label length
LABELLEN <- readBin(sav, integer())
# round label len up to nearest multiple of 4 bytes
if (LABELLEN %% 4 != 0) {
LABELLEN <- 4 * ((LABELLEN %/% 4)+1)
}
# read label
VARLABEL <- readChar(sav, LABELLEN)
}
# read missing values only if present
MISSING1 <- NA
MISSING2 <- NA
MISSING3 <- NA
if (NMISSING != 0) {
# read each missing value (double)
# NMISSING negative means values are a range
if (abs(NMISSING) >= 1) {
MISSING1 <- readBin(sav, double())
}
if (abs(NMISSING) >= 2) {
MISSING2 <- readBin(sav, double())
}
if (abs(NMISSING) >= 3) {
MISSING3 <- readBin(sav, double())
}
}
result <- NULL
result <- c(result, VARNAME)
result <- c(result, PRINTFMT)
result <- c(result, VARLABEL)
result <- c(result, NMISSING)
result <- c(result, MISSING1)
result <- c(result, MISSING2)
result <- c(result, MISSING3)
return(result)
} else { # if TYPECODE is -1, record is a continuation of a string var
# read and ignore the next 24 bytes
IGNORE <- readChar(sav, 24)
return()
}
}
# check file signature, then read & ignore rest of fixed portion of header
if (readChar(sav, 4) != "$FL2") {
print("This file does not appear to be an SPSS SAV file.")
return()
}
IGNORE <- readChar(sav, 172)
version <- substr(IGNORE, 1, 60)
version <- unlist(strsplit(version," "))
version <- grep("^[0-9]+\\.[0-9]+\\.[0-9]+$", version, value=TRUE) ## XX.XX.XX
version <- as.double(unlist(strsplit(version,"\\."))[1])
if (length(version)==0) version<-11 #
# process all variable definitions, building up the following vectors
varname <- NULL
printfmt <- NULL
varlabel <- NULL
nmiss <- NULL
missing1 <- NULL
missing2 <- NULL
missing3 <- NULL
longname <- NULL # data present for spss version >= 12...
rectype <- readBin(sav, integer())
while (rectype == 2) {
v <- variable_record()
if (length(v) != 0) {
varname <- c(varname,v[1])
printfmt <- c(printfmt,v[2])
varlabel <- c(varlabel,v[3])
nmiss <- c(nmiss, as.integer(v[4]))
missing1 <- c(missing1, v[5])
missing2 <- c(missing2, v[6])
missing3 <- c(missing3, v[7])
}
rectype <- readBin(sav, integer())
}
index<-grep("-",printfmt)
aux<-as.double(unlist(lapply(strsplit(printfmt[index],"-"),function(x) x[2])))
printfmt[index]<-paste("A",256-aux,sep="")
if (version>=12){
# >>>
#cat('rectype', rectype, '\n')
# check for a value label set / variable index pair (rectypes 3 & 4)
while (rectype==3) {
elemcount= readBin(sav, integer()) # number of labels defined
while (elemcount > 0) {
#cat(' 3:', elemcount)
elemcount <- elemcount - 1
IGNORE <- readChar(sav, 8)
elemsize <- (readBin(sav, integer(), size=1) %/% 8)
IGNORE <- readChar(sav, 7)
while (elemsize > 0) { # may need to skip extra words if labelstring longer than 7
elemsize <- elemsize - 1
IGNORE <- readChar(sav, 8)
}
}
rectype <- readBin(sav, integer())
elemcount <- readBin(sav, integer())
if (rectype != 4) {
stop('\n\nEXPECTED RECORD TYPE 4 NOT FOUND.\n')
} else {
while (elemcount > 0) {
#cat(' 4:', elemcount)
elemcount <- elemcount - 1
IGNORE <- readChar(sav, 4)
}
rectype <- readBin(sav, integer())
}
#cat('\nNext rectype', rectype, '\n')
}
# Check for presence of Type 7 records.
# Type 7 records allow later versions of SPSS to write files containing
# dictionary information that earlier releases do not expect. These records
# consist of 4 integers followed by an array of data elements, the
# 4 integers provide (in this order):
# Record Type Code (7)
# Subtype code
# Data element length (eg. 1=char, 4=integer, 8=double, etc)
# Number of elements of that length which follow
# Data array of indicated length (meaning depending on Subtype)
#
# Specifically we look for a record of Type 7, SubType 13 (SPSS version >= 12).
# It contains a string of the form SHORTNAME=LongName<TAB> SHORTNAME=LongName<TAB> ...
# In other words the equivalent long variable names (allowing mixed case)
# associated with the short uppercase only names specified earlier (Type 2 recs)
#
while (rectype == 7) {
subtype <- readBin(sav, integer())
elemsize <- readBin(sav, integer())
elemcount<- readBin(sav, integer())
# save long var name info to vector 'longname', ie the text following an
# "=" sign, up to but not including a TAB char. Note that we DO NOT save the
# shortname part.
if (subtype == 13 && elemsize==1 ) {
TEMP <- NULL
while (elemcount > 0) {
elemcount <- elemcount-1
ch <- readChar(sav, 1)
if (ch != "=" && ch != "\t") {
TEMP<- paste(TEMP,ch,sep="")
}
if (ch == "=") { # = sign indicates start of long name value
TEMP <- NULL
}
if (ch == '\t') { # Tab indicates end of long name
longname <- c(longname, TEMP) #
TEMP <- NULL
}
}
longname <- c(longname, TEMP)
} else {
IGNORE <- readChar(sav, (elemsize*elemcount))
}
rectype <- readBin(sav, integer())
}
} else longname=varname
varname <- gsub(" ","",varname)
printfmt <- gsub(" ","",printfmt)
longname <- gsub(" ","",longname)
varlabel<-trim(varlabel)
dict <- cbind(varname,printfmt,nmiss,missing1,missing2, missing3, varlabel, longname)
dict <- apply(dict,2,as.character)
close(sav)
return(dict)
}
Any scripts or data that you put into this service are public.
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.