#' @title Read in sas datasets
#' @name sas.get
#'
#' @import chron
#' @import foreign
#' @export
## $Id$
sas.get <-
function(libraryName,
member,
variables = character(0),
ifs = character(0),
format.library = libraryName,
id,
dates. = c("sas","yymmdd","yearfrac","yearfrac2"),
keep.log = TRUE,
log.file = "_temp_.log",
macro = sas.get.macro,
data.frame.out = existsFunction("data.frame"),
clean.up = FALSE,
quiet = FALSE,
temp = tempfile("SaS"),
formats=TRUE,
recode=formats,
special.miss=FALSE,
sasprog="sas",
as.is=.5,
check.unique.id=TRUE,
force.single=FALSE,
pos,
uncompress=FALSE,
defaultencoding="latin1",
var.case="lower")
{
if(force.single) stop('force.single does not work under R')
dates. <- match.arg(dates.)
fexists <- function(name) {
w <- file.exists(name)
attr(w, 'which') <- name[w]
w
}
file.is.dir <- function(name) {
isdir <- file.info(name)$isdir
isdir && !is.na(isdir)
}
file.is.readable <- function(name) file.access(name,4)==0
fileShow <- function(x) file.show(x)
if(recode) formats <- TRUE
if(missing(formats) || formats) {
## ***** Next line begins mod from Mike Kattan edits 11 Sep 97
## Redone FEH 22Oct00
no.format <- all(!fexists(file.path(format.library,
c('formats.sc2','formats.sct','formats.sct01','formats.sas7bcat'))))
if(no.format) {
if((!missing(formats) && formats) || (!missing(recode) && recode))
warning(paste(paste(format.library,
"/formats.sc? or formats.sas7bcat",sep = ""),
" not found. Formatting ignored. \n"))
formats <- recode <- FALSE
}
## ***** End Mike Kattan edits 11 Sep 97
}
## 5 Changes here from Claudie Berger <claudie@osteo1.ri.mgh.mcgill.ca> 19feb00
## Allows work on sas v7.
sasin <- paste(temp, ".3.sas", sep = "")
sasout1 <- paste(temp, ".1.sas", sep = "")
sasout2 <- paste(temp, ".2.sas", sep = "")
sasout3 <- paste(temp, ".4.sas", sep = "")
sasout4 <- paste(temp, ".5.sas", sep = "")
nvariables <- length(variables)
if(nvariables>0) {
if(any(jdup <- duplicated(variables)))
stop(paste("duplicate variables requested: ", variables[jdup]))
}
varstring <- paste(variables, collapse = "\n ")
ifs <- paste("'",paste(ifs, collapse = ";\n "),"'",sep="")
if(length(sasin) != 1)
stop("Illegal temporary file name")
temp.files <- c(sasin, sasout1, sasout2, sasout3, sasout4)
if(!keep.log)
temp.files <- c(temp.files, log.file)
if(clean.up)
on.exit(unlink(temp.files))
##on.exit(sys(paste("rm -f", paste(temp.files, collapse = " "))))
## 4oct03
if(missing(member))
stop("SAS member name is required")
if(missing(libraryName))
stop("SAS library name is required")
## Encoding added by Reinhold Koch 24Jan14 <reinhold.koch@roche.com>
cat("%LET DEFAULTE=", defaultencoding, ";\n", sep="", file=sasin)
cat(macro, sep="\n", file=sasin, append=TRUE)
sasds.suffix <- c('sd2','sd7','ssd01','ssd02','ssd03','ssd04','sas7bdat')
## 22Oct00
if(libraryName == "") libraryName <- "."
if(!file.is.dir(libraryName))
stop(paste(sep = "", "library, \"", libraryName,
"\", is not a directory"))
unix.file <- file.path(libraryName, paste(member, sasds.suffix, sep="."))
if(uncompress) {
if(any(fe <- fexists(paste(unix.file,".gz",sep=""))))
system(paste("gunzip ", attr(fe,'which'),'.gz',sep=''))
else if(any(fe <- fexists(paste(unix.file,".Z",sep=""))))
system(paste("uncompress ",attr(fe,'which'),'.Z',sep=''))
}
if(!any(fe <- fexists(unix.file))) {
stop(paste(sep = "", "Unix file, \"",
paste(unix.file,collapse=' '),
"\", does not exist"))
} else {
file.name <- attr(fe,'which')
if(!file.is.readable(file.name)) {
stop(paste(sep = "",
"You do not have read permission for Unix file, \"",
file.name, "\"")) # 22Oct00
}
}
cat("libname temp '", libraryName, "';\n", file = sasin, append = TRUE,
sep = "")
## format.library should contain formats.sct containing user defined
## formats used by this dataset. It must be present.
cat("libname library '", format.library, "';\n", file = sasin,
append = TRUE, sep = "")
cat("%sas_get(temp.", member, ",\n",
" ", sasout1, ",\n",
" ", sasout2, ",\n",
" ", sasout3, ",\n",
" ", sasout4, ",\n",
" dates=", dates., ",\n",
" vars=", varstring, ",\n",
" ifs=", ifs, ",\n",
" formats=", as.integer(formats), "\n,",
" specmiss=", as.integer(special.miss), ");\n",
file = sasin, append = TRUE, sep = "")
status <- system(paste(shQuote(sasprog), shQuote(sasin), "-log",
shQuote(log.file)), intern=FALSE)
## 24nov03 added output=F
if(status != 0) {
if(!quiet && fexists(log.file)) fileShow(log.file) ## 4oct03
stop(paste("SAS job failed with status", status))
}
##
## Read in the variable information
##
if(!(fexists(sasout1) && fexists(sasout2))) {
if(!quiet)
fileShow(log.file) ## 4oct03
stop("SAS output files not found")
}
vars <-
scan(sasout1, list(name = "", type = 0, length = 0,
format = "", label = "", n = 0),
multi.line = FALSE, sep = "\022",
flush=TRUE, comment.char='', quote='')
## Thanks Don MacQueen for scan fix for R
nvar <- length(vars$name)
if(nvar == 0) {
if(!quiet)
fileShow(log.file) ## 4oct03
stop("First SAS output is empty")
}
nrow <- vars$n[1] #n is the same for each variable
## Read the data in
## We try to be clever about the variable type. If SAS is character
## use char of course. If is numeric and length >4, use double. If
## numeric and length <4, use single. We could also use the format to
## choose further, if it consists of a number followed by a "."
## can we safely assume integer.
##
type <- ifelse(vars$type == 2, "character(nrow)",
ifelse(force.single, ##28Mar01
"single(nrow)", "double(nrow)"))
##BILL: I corrected the macro so the following isn't needed:
## get rid of trailing blank on names
## vars$name <- unix("sed 's/ $//'", vars$name)
inlist <- paste("\"", vars$name, "\"=", type,
sep = "", collapse = ", ")
inlist <- parse(text = paste("list(", inlist, ")"))
## Inlist would now be the size of the final data structure, if I had
## evaluated it.
## Read the data
ds <-
scan(sasout2, eval(inlist), sep = "\022", multi.line = FALSE,
flush=TRUE, comment.char='', quote='')
if(length(ds) < nvariables) {
m <- variables[is.na(match(variables, names(ds)))]
if(length(m) > 0) {
warning(paste(length(m),
"requested variables did not exist:",
paste("\"", m, "\"", sep = "", collapse = " "),
"\n\t(use sas.contents())"))
}
}
format <- vars$format
format[format=='$'] <- ' ' # 1Mar00
label <- vars$label
name <- vars$name
esasout3 <- formats && fexists(sasout3) #added formats && 1/20/93
if(recode && !esasout3) recode <- FALSE
FORMATS <- NULL
if(formats && esasout3) {
FORMATS <- dget(sasout3)
if(length(FORMATS)==0) {
FORMATS <- NULL;
recode <- FALSE
}
}
smiss <- NULL
if(special.miss && fexists(sasout4))
smiss <-
scan(sasout4, list(name="", code="", obs=integer(1)),
multi.line=FALSE, flush=TRUE, sep="\022",
comment.char='', quote='')
sasdateform <- c("date","mmddyy","yymmdd","ddmmyy","yyq","monyy",
"julian","qtr","weekdate","weekdatx","weekday","month")
dateform <-
list(as.name("ddmmmyy"),"m/d/y","y/m/d","d/m/y",as.name("ddmmmyy"),
"mon year",as.name("ddmmmyy"),"mon",as.name("ddmmmyy"),
as.name("ddmmmyy"), as.name("ddmmmyy"),"m")
sastimeform <- c("hhmm","hour","mmss","time")
timeform <- c("h:m","h","m:s","h:m:s")
sasdatetimeform <- c("datetime","tod")
datetimeform <- list(list(as.name("ddmmmyy"),"h:m:s"), c("m/d/y"," "))
z <- "%02d%b%Y"
dateform4 <-
c(z,"%02m/%02d/%Y","%Y/%02m/%02d","%02d/%02m/%Y", z,"%02m %Y",
z,"%02m", z, z, z,"%02m")
timeform4 <- c("%02H:%02M","%02H","%02M:%02S","%02H:%02M:%02S")
datetimeform4 <- c("%02d%b%Y %02h:%02m:%02s","%02m/%02d/%Y")
## Don MacQueen
days.to.adj <- as.numeric(difftime(ISOdate(1970,1,1,0,0,0) ,
ISOdate(1960,1,1,0,0,0), 'days'))
secs.to.adj <- days.to.adj*24*60*60
for(i in 1:nvar) {
atr <- list()
dsi <- ds[[i]]
fname <- format[i]
rec <- FALSE
if(fname!=" ") {
ff <- fname
if(dates.=="sas" & (m <- match(fname,sasdateform,0)) >0) {
##look for partial dates
dd <- dsi-floor(dsi)
ddn <- !is.na(dd)
if(any(ddn) && any(dd[ddn]!=0)) {
ll <- 1:length(dd)
atr$partial.date <-
list(month=ll[dd==.5],day=ll[dd==.25],both=ll[dd==.75])
atr$imputed <- ll[dd!=0]
dsi <- floor(dsi)
}
dsi <- importConvertDateTime(dsi, 'date', 'sas',
form=dateform[m])
if(length(atr$imputed))
attr(dsi,'class') <- c("impute",attr(dsi,'class'))
ff <- NULL
} else {
if((m <- match(fname,sastimeform,0)) >0) {
dsi <- importConvertDateTime(dsi, 'time', 'sas',
form=timeform[m])
ff <- NULL
} else if((m <- match(fname,sasdatetimeform,0))>0) {
dsi <- importConvertDateTime(dsi, 'datetime', 'sas',
form=datetimeform[m])
ff <- NULL
}
}
atr$format <- ff
if(recode & length(g <- FORMATS[[fname]])) {
labs <- g$labels
if(!is.logical(recode)) {
labs <- if(recode==1) paste(g$values,":",labs,sep="")
else paste(labs,"(",g$values,")",sep="")
}
dsi <- factor(dsi, g$values, labs)
atr$sas.codes <- g$values
rec <- TRUE
}
}
if(data.frame.out && !rec && vars$type[i]==2 &&
((is.logical(as.is) && !as.is) ||
(is.numeric(as.is) && length(unique(dsi)) < as.is*length(dsi))))
dsi <- factor(dsi, exclude="") #exclude added 5Mar93
## For data frames, char. var usually factors
if(label[i]!=" ")
label(dsi) <- label[i] #atr$label <- label[i]
if(length(smiss$name)) {
j <- smiss$name==name[i]
if(any(j)) {
atr$special.miss <-
list(codes=smiss$code[j],obs=smiss$obs[j])
attr(dsi,'class') <- c("special.miss",attr(dsi,'class'))
}
}
if(!is.null(atr))
attributes(dsi) <- c(attributes(dsi),atr)
if(missing(pos))
ds[[i]] <- dsi
else
assign(name[i], dsi, pos=pos)
}
if(!missing(pos))
return(structure(pos, class="pos"))
atr <- list()
if(missing(id)) {
if(data.frame.out)
atr$row.names <- as.character(1:nrow)
} else {
idname <- id
jj <- match(idname, names(ds), 0)
if(any(jj==0))
stop(paste("id variable(s) not in dataset:",
paste(idname[jj==0],collapse=" ")))
if(length(idname)==1) {
id <- ds[[idname]] #Need since not use data.frame
} else {
id <- as.character(ds[[idname[1]]])
for(jj in 2:length(idname))
id <- paste(id, as.character(ds[[idname[jj]]]))
}
if(check.unique.id) {
dup <- duplicated(id)
if(any(dup))
warning(paste("duplicate IDs:",
paste(id[dup], collapse=" ")))
}
if(data.frame.out)
atr$row.names <- as.character(id)
else atr$id <- id
}
if(var.case=="lower"){
names(ds)=tolower(names(ds))
}
if(var.case=="upper"){
names(ds)=toupper(names(ds))
}
if(!is.null(FORMATS))
atr$formats <- FORMATS
if(data.frame.out)
atr$class <- "data.frame"
attributes(ds) <- c(attributes(ds),atr)
ds
}
importConvertDateTime <-
function(x, type=c('date','time','datetime'),
input=c('sas','spss','dataload'), form) {
type <- match.arg(type)
input <- match.arg(input)
if(input != 'sas' && type != 'date')
stop('only date variables are support for spss, dataload')
adjdays <- c(sas=3653, spss=141428, dataload=135080)[input]
## 1970-1-1 minus 1960-1-1, 1582-10-14, or 1600-3-1
origin <- c(sas='1960-01-01', spss='1582-10-14', dataload='1600-03-01')[input]
if(input=='spss') x <- x/86400
switch(type,
date = structure(x - adjdays, class='Date'),
time = {
## Don MacQueen 3Apr02
z <- structure(x, class=c('POSIXt','POSIXct'))
f <- format(z, tz='GMT')
z <- as.POSIXct(format(z, tz='GMT'), tz='')
structure(z, class=c('timePOSIXt','POSIXt','POSIXct'))},
datetime = as.POSIXct(x, origin=origin, tz='GMT'))
# chron((x - adjdays*86400)/86400,
# out.format=c(dates='day mon year', times='h:m:s'))})
}
## Don MacQueen 3Apr02
## slightly modified copy of format.POSIXct() from R base
format.timePOSIXt <- function (x, format = "%H:%M:%S", tz = "",
usetz = FALSE, ...) {
if (!inherits(x, c("timePOSIXt","POSIXct"))) stop("wrong class")
class(x) <- class(x)[-1]
structure(format.POSIXlt(as.POSIXlt(x, tz), format, usetz, ...),
names = names(x))
}
print.timePOSIXt <- function(x, ...) print(format(x, ...))
##if(!.R.) {
## Output format routine needed by chron for usual SAS date format
ddmmmyy <- function(x)
{
y <- month.day.year(trunc(unclass(x)), attr(x,"origin"))
yr <- y$year
m <- c("Jan","Feb","Mar","Apr","May","Jun","Jul","Aug","Sep","Oct",
"Nov","Dec")[y$month]
ifelse(yr<1900 | yr>=2000, paste(y$day,m,yr,sep=""),
paste(y$day,m,yr-1900,sep=""))
}
## Functions to handle special.miss class
is.special.miss <- function(x, code)
{
sm <- attr(x, "special.miss")
if(!length(sm))
return(rep(FALSE, length(x)))
if(missing(code)) {
z <- rep(FALSE, length(x))
z[sm$obs] <- TRUE
} else {
z <- rep(FALSE, length(x))
z[sm$obs[sm$codes==code]] <- TRUE
}
z
}
"[.special.miss" <- function(x, ..., drop=FALSE)
{
ats <- attributes(x)
ats$dimnames <- NULL
ats$dim <- NULL
ats$names <- NULL
attr(x,'class') <- NULL
y <- x[..., drop = drop]
if(length(y) == 0)
return(y)
k <- seq(along=x)
names(k) <- names(x)
k <- k[...]
attributes(y) <- c(attributes(y), ats)
smiss <- attr(y, "special.miss")
codes <- rep("ZZ",length(x))
codes[smiss$obs] <- smiss$codes
codes <- codes[...]
which <- codes!="ZZ"
if(sum(which)) attr(y,"special.miss") <-
list(obs=seq(along=k)[codes!="ZZ"],codes=codes[codes!="ZZ"])
else {
attr(y,"special.miss") <- NULL
attr(y,'class') <- attr(y,'class')[attr(y,'class') != "special.miss"]
if(length(attr(y,'class'))==0)
attr(y,'class') <- NULL
}
y
}
format.special.miss <- function(x, ...)
{
w <-
if(is.factor(x))
as.character(x)
else {
cl <- attr(x,'class');
cl <- cl[cl!="special.miss"]
if(length(cl)) {
attr(x,'class') <- cl;
format(x, ...)
} else format.default(x, ...)
}
sm <- attr(x, "special.miss")
names(w) <- names(x)
if(!length(sm))
return(w)
w[sm$obs] <- sm$codes
attr(w,"label") <- attr(w,"special.miss") <- attr(w,"class") <- NULL
w
}
print.special.miss <- function(x, ...)
{
sm <- attr(x, "special.miss")
if(!length(sm)) {
print.default(x)
return(invisible())
}
w <- format.special.miss(x)
print.default(w, quote=FALSE)
invisible()
}
sas.codes <- function(object) attr(object, "sas.codes")
code.levels <- function(object) {
if(length(cod <- attr(object,"sas.codes")))
levels(object) <- paste(cod,":",levels(object),sep="")
object
}
as.data.frame.special.miss <- function(x, row.names = NULL, optional = FALSE, ...)
{
nrows <- length(x)
if(is.null(row.names)) {
## the next line is not needed for the 1993 version of data.class and is
## included for compatibility with 1992 version
if(length(row.names <- names(x)) == nrows &&
!any(duplicated(row.names))) {
}
else if(optional)
row.names <- character(nrows)
else row.names <- as.character(1:nrows)
}
value <- list(x)
if(!optional)
names(value) <- deparse(substitute(x))[[1]]
structure(value, row.names=row.names, class='data.frame')
}
## val{nval}=compress(value)||"" was =value 23mar04
sas.get.macro <-
c("/* Macro sas_get (modified by F. Harrell 30Jan90, Bill Dunlap Dec90, FH Mar92,",
"\t\t\tFH Apr95 (extend LENGTH smiss))",
" Sets up for conversion of SAS dataset to S dataset.",
" Arguments:", "\tdataset - name of SAS dataset",
"\ttemp1\t- Name of temporary dataset to contain data dictionar (unquoted)",
"\t\t default=/tmp/file.1",
"\ttemp2\t- Name of temporary dataset to contain ASCII version of SAS",
"\t\t dataset (unquoted)", "\t\t default=/tmp/file.2",
"\ttemp3 - Name of temporary dataset to contain ASCII file with S",
"\t\t program to store format values and labels",
"\ttemp4 - Name of temporary dataset to contain ASCII file with",
"\t\t locations of special missing values",
"\tdates\t- SAS to store date variables in SAS format ( # days from 1/1/60)",
"\t\t (default)",
"\t\t- YEARFRAC to store as days from 1/1/1900, divided by 365.25",
"\t\t- YEARFRAC2 to store as year + fraction of current year",
"\t\t- YYMMDD to store as numeric YYMMDD",
"\tvars - list of variable in dataset that you want returned to S",
" (unquoted, separate variable names with spaces) If empty,",
" then return all variables.",
" ifs - sequence of SAS subsetting if statements, (unquoted,",
" separated by semicolons).",
"\tformats - 0 (default) - do not create file on temp3 containing S",
"\t\t statements to store format values and labels, 1 do create",
"\tspecmiss- 0 (default). Set to 1 to write a data file on temp4 with",
"\t\t the fields: variable name, special missing value code,",
"\t\t observation number",
"\tdefencod - default encoding of dataset if it does not specify",
" */",
"%macro sas_get(dataset, temp1, temp2, temp3, temp4, dates=SAS, vars=, ifs=, ",
"\tformats=0, specmiss=0, defencod=&DEFAULTE);",
"OPTIONS NOFMTERR;",
"%LET DSID=%SYSFUNC(open(&dataset,i));",
"%LET ENCODE=%SCAN(%SYSFUNC(ATTRC(&DSID,ENCODING)),1);",
"%IF &ENCODE=Default %THEN %LET dataset=&dataset(encoding=&defencod);",
"%IF %QUOTE(&temp1)= %THEN %LET temp1=/tmp/file.1;",
"%IF %QUOTE(&temp2)= %THEN %LET temp2=/tmp/file.2;",
"%IF %QUOTE(&temp3)= %THEN %LET temp3=/tmp/file.3;",
"%IF %QUOTE(&temp4)= %THEN %LET temp4=/tmp/file.4;",
## Next line had %QUOTE(&ifs),1,\"'\" 31oct02
"%LET dates=%UPCASE(&dates);", "%LET ifs=%SCAN(%QUOTE(&ifs),1,'');",
"%LET _s_=_sav_;",
"/* BILL: Can these 2 subsets be combined into one pass of the data? -Frank*/",
"/* Subset by observation first */", "%IF %QUOTE(&ifs)^= %THEN %DO;",
" data _osub_ ;", " set &dataset ;", " &ifs ;",
" %LET dataset=_osub_ ;", " %END;", "/* Then subset by variable */",
"%IF &vars^= %THEN %DO;", " data _vsub_ ;", " set &dataset ;",
" keep &vars ;", " %LET dataset=_vsub_ ;", " %END;",
"proc contents data=&dataset out=&_s_(KEEP=name type length label format nobs ",
" varnum) noprint; ", "%IF &formats=1 %THEN %DO;",
" PROC FORMAT LIBRARY=LIBRARY CNTLOUT=f(KEEP=fmtname type start end label);",
" DATA f; SET f; RETAIN n 0; n+1; IF type=\"C\" THEN fmtname=\"$\"||fmtname;",
" PROC SORT DATA=f OUT=f(DROP=n); BY fmtname n; ",
" *Sort by n instead of start for numerics so 13 sorts after 2;",
" *Dont consider formats containing ANY range of values;",
" *Dont consider formats that dont have at least one non-missing (if",
" numeric) starting value. This gets rid of formats that are used",
" only to label special missing values;",
" DATA f2; SET f; BY fmtname; RETAIN anyrange 0 anynmiss 0;",
" IF FIRST.fmtname THEN DO;anyrange=0;anynmiss=0;END;",
" IF start^=end THEN anyrange=1;",
" IF TYPE=\"C\" THEN anynmiss=1; ",
" ELSE IF (start+0)>. THEN anynmiss=1;",
" IF LAST.fmtname & anynmiss & ^anyrange THEN OUTPUT; KEEP fmtname;",
" DATA f; MERGE f f2(IN=in2); BY fmtname; IF in2;",
" IF TYPE=\"N\" THEN DO; IF (start+0)>.; *S cannot handle special missings;",
" END;", " RENAME fmtname=format start=value; DROP end;",
" PROC SORT DATA=&_s_(KEEP=format) OUT=sform; BY format;",
" DATA sform; SET sform; BY format; IF LAST.format;",
" DATA f; MERGE sform(IN=in1) f(IN=in2); BY format; ",
" IF in1 & in2;",
" *This keeps formats ever used by any variable;",
" DATA _NULL_; SET f END=_eof_; BY format;",
" ARRAY val{*} $ 16 val1-val500; ARRAY lab{*} $ 40 lab1-lab500; ",
" RETAIN done 0 nform 0 nval 0 val1-val500 \" \" lab1-lab500 \" \" bk -1; ",
" FILE \"&temp3\" LRECL=4096;", " IF FIRST.format THEN DO;",
" IF ^done THEN PUT 'list(' @@; done=1;",
" nform=nform+1; nval=0;",
" format=TRANSLATE(format,\".abcdefghijklmnopqrstuvwxyz\",",
" \"_ABCDEFGHIJKLMNOPQRSTUVWXYZ\");",
" IF nform=1 THEN PUT '\"' format +bk '\"=list(' @@;",
" ELSE PUT ', \"' format +bk '\"=list(' @@;", " END;",
" nval=nval+1; ",
" IF nval>500 THEN DO; ERROR \">500 format values not allowed\";ABORT ABEND;",
" END;", ' val{nval}=compress(value)||""; lab{nval}=label; ',
" IF LAST.format THEN DO;", " PUT \"values=c(\" @@; ",
" DO i=1 TO nval; IF i>1 THEN PUT \",\" @@;",
" IF type=\"N\" THEN PUT val{i} +bk @@;",
" ELSE PUT '\"' val{i} +bk '\"' @@;", " END;",
" PUT \"),labels=c(\" @@;",
" DO i=1 TO nval; IF i>1 THEN PUT \",\" @@;",
" PUT '\"' lab{i} +bk '\"' @@;", " END;",
" PUT \"))\";", " END;",
" IF _eof_ THEN PUT \")\";", " %END;",
"PROC SORT DATA=&_s_;BY varnum;", "data _null_;", " set &_s_ end=eof;",
" FILE \"&temp1\"; RETAIN _bk_ -1;", " if _n_ = 1 then do;",
"%IF &specmiss=0 %THEN %LET ofile=_NULL_; ",
"%ELSE %LET ofile=smiss(KEEP=vname val obs);",
" put \"data &ofile; set &dataset end=eof;\";",
" put ' file \"&temp2\" RECFM=D LRECL=4096;';",
" put \" retain __delim 18 _bk_ -1 obs 0; LENGTH _xx_ $ 20 obs 5;obs+1; \";",
"%IF &specmiss=1 %THEN %DO;",
" put \"LENGTH vname $ 8 val $ 1;\"; %END;", " end;",
" IF type=2 THEN DO;", " PUT 'FORMAT ' name ';' @;",
" PUT 'IF ' name '=\" \" THEN PUT __delim IB1. @;';",
"/* $char added F.H. 24Mar92, dropped +_bk_ before __delim */",
"/* $CHAR. removed FEH 2Aug92, added null FORMAT above, added back +_bk_ */",
" PUT 'ELSE PUT ' name '+_bk_ __delim IB1. @;';", " END;",
" ELSE DO; ", " PUT 'IF ' name '<=.Z THEN _xx_=\"NA\";' @;",
" PUT 'ELSE _xx_=LEFT(PUT(' @;", " format=UPCASE(format);",
" IF format=\"DATE\"|format=\"MMDDYY\"|format=\"YYMMDD\"|",
"format=\"DDMMYY\"|format=\"YYQ\"|format=\"MONYY\"|format=\"JULIAN\" THEN DO;",
" %IF &dates=SAS %THEN", " PUT name \",BEST18.)\";",
" %ELSE %IF &dates=YYMMDD %THEN", " PUT name \",YYMMDD6.)\";",
" %ELSE %IF &dates=YEARFRAC %THEN",
" PUT \"(\" name \"-MDY(1,1,1900))/365.25,7.3)\";",
" %ELSE %IF &dates=YEARFRAC2 %THEN %DO;",
" PUT \"YEAR(\" name \")-1900+(\" name \"-MDY(1,1,YEAR(\" name \")))/\" @;",
" PUT \"(MDY(12,31,YEAR(\" name \"))-MDY(1,1,YEAR(\" name \"))+1),7.3)\";",
" %END;", " ;", " END;\t",
" ELSE DO;PUT name \",BEST18.)\" @;END;",
" PUT '); PUT _xx_ +_bk_ __delim IB1. @;'; *Added +_bk_ 2Aug92;",
"%IF &specmiss=1 %THEN %DO;",
" put 'IF .A<=' name '<=.Z THEN DO;",
" vname=\"' name +_bk_ '\"; val=put(' name ',1.); OUTPUT; END;';",
" %END;", " END;", "if eof then PUT 'PUT; RUN;';", "run;",
"%include \"&temp1\";", "data _null_; set &_s_;",
" retain __delim 18 _bk_ -1; ", " file \"&temp1\" LRECL=4096;",
" format=TRANSLATE(format,\".abcdefghijklmnopqrstuvwxyz\",",
" \"_ABCDEFGHIJKLMNOPQRSTUVWXYZ\");",
" put name +_bk_ __delim IB1. type +_bk_ __delim IB1. length +_bk_ __delim IB1.",
" format +_bk_ __delim IB1. label +_bk_ __delim IB1. nobs +_bk_ __delim IB1.;",
"run;", "%IF &specmiss=1 %THEN %DO;",
" PROC SORT DATA=smiss OUT=smiss;BY vname val obs;",
" DATA _NULL_; SET smiss;FILE \"&temp4\" RECFM=D LRECL=30;",
" RETAIN _bk_ -1 __delim 18;",
" vname=TRANSLATE(vname,\".abcdefghijklmnopqrstuvwxyz\",",
"\t\t \"_ABCDEFGHIJKLMNOPQRSTUVWXYZ\");",
" PUT vname +_bk_ __delim IB1. val +_bk_ __delim IB1. obs +_bk_ __delim IB1.;",
" RUN;", " %END;", "%mend sas_get;")
cleanup.import <-
function(obj, labels=NULL, lowernames=FALSE,
force.single=TRUE, force.numeric=TRUE,
rmnames=TRUE,
big=1e20, sasdict,
print=prod(dimobj) > 5e5,
datevars=NULL, datetimevars=NULL,
dateformat='%F', fixdates=c('none','year'),
charfactor=FALSE)
{
fixdates <- match.arg(fixdates)
nam <- names(obj)
dimobj <- dim(obj)
nv <- length(nam)
if(!missing(sasdict))
{
sasvname <- makeNames(sasdict$NAME)
if(any(w <- nam %nin% sasvname))
stop(paste('The following variables are not in sasdict:',
paste(nam[w],collapse=' ')))
saslabel <- structure(as.character(sasdict$LABEL),
names=as.character(sasvname))
labels <- saslabel[nam]
names(labels) <- NULL
}
if(length(labels) && length(labels) != dimobj[2])
stop('length of labels does not match number of variables')
if(lowernames)
names(obj) <- casefold(nam)
if(print)
cat(dimobj[2],'variables; Processing variable:')
for(i in 1:dimobj[2])
{
if(print) cat(i,'')
x <- obj[[i]];
modif <- FALSE
if(length(dim(x)))
next
if(rmnames)
{
if(length(attr(x,'names')))
{
attr(x,'names') <- NULL
modif <- TRUE
} else if(length(attr(x,'.Names')))
{
attr(x,'.Names') <- NULL
modif <- TRUE
}
}
if(length(attr(x,'Csingle'))) {
attr(x,'Csingle') <- NULL
modif <- TRUE
}
if(length(c(datevars,datetimevars)) &&
nam[i] %in% c(datevars,datetimevars) &&
!all(is.na(x))) {
if(!(is.factor(x) || is.character(x)))
stop(paste('variable',nam[i],
'must be a factor or character variable for date conversion'))
x <- as.character(x)
## trim leading and trailing white space
x <- sub('^[[:space:]]+','',sub('[[:space:]]+$','', x))
xt <- NULL
if(nam[i] %in% datetimevars) {
xt <- gsub('.* ([0-9][0-9]:[0-9][0-9]:[0-9][0-9])','\\1',x)
xtnna <- setdiff(xt, c('',' ','00:00:00'))
if(!length(xtnna)) xt <- NULL
x <- gsub(' [0-9][0-9]:[0-9][0-9]:[0-9][0-9]','',x)
}
if(fixdates != 'none') {
if(dateformat %nin% c('%F','%y-%m-%d','%m/%d/%y','%m/%d/%Y'))
stop('fixdates only supported for dateformat %F %y-%m-%d %m/%d/%y %m/%d/%Y')
x <- switch(dateformat,
'%F' =gsub('^([0-9]{2})-([0-9]{1,2})-([0-9]{1,2})', '20\\1-\\2-\\3',x),
'%y-%m-%d'=gsub('^[0-9]{2}([0-9]{2})-([0-9]{1,2})-([0-9]{1,2})', '\\1-\\2-\\3',x),
'%m/%d/%y'=gsub('^([0-9]{1,2})/([0-9]{1,2})/[0-9]{2}([0-9]{2})', '\\1/\\2/\\3',x),
'%m/%d/%Y'=gsub('^([0-9]{1,2})/([0-9]{1,2})/([0-9]{2})$','\\1/\\2/20\\3',x))
}
x <- if(length(xt)) {
require('chron')
cform <- if(dateformat=='%F') 'y-m-d'
else gsub('%','',tolower(dateformat))
chron(x, xt, format=c(dates=cform,times='h:m:s'))
}
else as.Date(x, format=dateformat)
modif <- TRUE
}
if(length(labels)) {
label(x) <- labels[i]
modif <- TRUE
}
if(force.numeric && length(lev <- levels(x))) {
if(all.is.numeric(lev)) {
labx <- attr(x,'label')
x <- as.numeric(as.character(x))
label(x) <- labx
modif <- TRUE
}
}
if(storage.mode(x) == 'double') {
xu <- unclass(x)
j <- is.infinite(xu) | is.nan(xu) | abs(xu) > big
if(any(j,na.rm=TRUE)) {
x[j] <- NA
modif <- TRUE
if(print)
cat('\n')
cat(sum(j,na.rm=TRUE),'infinite values set to NA for variable',
nam[i],'\n')
}
isdate <- testDateTime(x)
if(force.single && !isdate) {
allna <- all(is.na(x))
if(allna) {
storage.mode(x) <- 'integer'
modif <- TRUE
}
if(!allna) {
notfractional <- !any(floor(x) != x, na.rm=TRUE)
if(max(abs(x),na.rm=TRUE) <= (2^31-1) && notfractional) {
storage.mode(x) <- 'integer'
modif <- TRUE
}
}
}
}
if(charfactor && is.character(x)) {
if(length(unique(x)) < .5*length(x)) {
x <- sub(' +$', '', x) # remove trailing blanks
x <- factor(x, exclude='')
modif <- TRUE
}
}
if(modif) obj[[i]] <- x
NULL
}
if(print) cat('\n')
if(!missing(sasdict)) {
sasat <- sasdict[1,]
attributes(obj) <- c(attributes(obj),
sasds=as.character(sasat$MEMNAME),
sasdslabel=as.character(sasat$MEMLABEL))
}
obj
}
upData <- function(object, ...,
rename=NULL, drop=NULL, keep=NULL,
labels=NULL, units=NULL, levels=NULL,
force.single=TRUE, lowernames=FALSE, caplabels=FALSE,
moveUnits=FALSE, charfactor=FALSE, print=TRUE) {
upfirst <- function(txt) gsub("(\\w)(\\w*)", "\\U\\1\\L\\2", txt, perl=TRUE)
n <- nrow(object)
if(!length(n)) {
x <- object[[1]]
d <- dim(x)
n <- if(length(d)) d[1]
else length(x)
}
rnames <- row.names(object)
if(lowernames)
names(object) <- casefold(names(object))
no <- names(object)
if(print) cat('Input object size:\t',object.size(object),'bytes;\t',
length(no),'variables\n')
## The following is targeted at R workspaces exported from StatTransfer
al <- attr(object, 'var.labels')
if(length(al)) {
if(caplabels) al <- upfirst(al)
for(i in 1:length(no))
if(al[i] != '') label(object[[i]]) <- al[i]
attr(object, 'var.labels') <- NULL
if(missing(force.single)) force.single <- FALSE
} else if(caplabels) {
for(i in 1:length(no))
if(length(la <- attr(object[[i]], 'label')))
attr(object[[i]], 'label') <- upfirst(la)
}
if(moveUnits)
for(i in 1:length(no)) {
z <- object[[i]]
lab <- attr(z,'label')
if(!length(lab) || length(attr(z,'units')))
next
paren <- length(grep('\\(.*\\)',lab))
brack <- length(grep('\\[.*\\]',lab))
if(paren+brack == 0)
next
if(print) cat('Label for',no[i],'changed from',lab,'to ')
u <- if(paren)regexpr('\\(.*\\)',lab)
else regexpr('\\[.*\\]',lab)
len <- attr(u,'match.length')
un <- substring(lab, u+1, u+len-2)
lab <- substring(lab, 1, u-1)
if(substring(lab, nchar(lab), nchar(lab)) == ' ')
lab <- substring(lab, 1, nchar(lab)-1) # added 2nd char above 8jun03
if(print) cat(lab,'\n\tunits set to ',un,'\n',sep='')
attr(z,'label') <- lab
attr(z,'units') <- un
object[[i]] <- z
}
if(length(rename)) {
nr <- names(rename)
if(length(nr)==0 || any(nr==''))
stop('the list or vector specified in rename must specify variable names')
for(i in 1:length(rename)) {
if(nr[i] %nin% no)
stop(paste('unknown variable name:',nr[i]))
if(print) cat('Renamed variable\t', nr[i], '\tto', rename[[i]], '\n')
}
no[match(nr, no)] <- unlist(rename)
names(object) <- no
}
z <- substitute(list(...))
if(length(z) > 1) {
z <- z[-1]
vn <- names(z)
if(!length(vn) || any(vn==''))
stop('variables must all have names')
for(i in 1:length(z)) {
v <- vn[i]
if(v %in% no && print)
cat('Modified variable\t',v,'\n')
else {
if(print) cat('Added variable\t\t', v,'\n')
no <- c(no, v)
}
x <- eval(z[[i]], object)
d <- dim(x)
lx <- if(length(d))d[1]
else length(x)
if(lx != n) {
if(lx == 1)
warning(paste('length of ',v,
' is 1; will replicate this value.',sep=''))
else {
f <- find(v)
if(length(f) && print) cat('Variable',v,'found in',
paste(f,collapse=' '),'\n')
stop(paste('length of ',v,' (',lx, ')\n',
'does not match number of rows in object (',
n,')',sep=''))
}
}
## If x is factor and is all NA, user probably miscoded. Add
## msg.
if(is.factor(x) && all(is.na(x)))
warning(paste('Variable ',v,'is a factor with all values NA.\n',
'Check that the second argument to factor() matched the original levels.\n',
sep=''))
object[[v]] <- x
}
}
if(force.single) {
sm <- sapply(object, storage.mode)
if(any(sm=='double'))
for(i in 1:length(sm)) {
if(sm[i]=='double') {
x <- object[[i]]
if(testDateTime(x) || is.matrix(x))
next
if(all(is.na(x)))
storage.mode(object[[i]]) <- 'integer'
else {
notfractional <- !any(floor(x) != x, na.rm=TRUE)
if(notfractional && max(abs(x),na.rm=TRUE) <= (2^31-1))
storage.mode(object[[i]]) <- 'integer'
}
}
}
}
if(charfactor) {
g <- function(z) {
if(!is.character(z)) return(FALSE)
length(unique(z)) < .5*length(z)
}
mfact <- sapply(object, g)
if(any(mfact))
for(i in (1:length(mfact))[mfact]) {
x <- sub(' +$', '', object[[i]]) # remove trailing blanks
object[[i]] <- factor(x, exclude='')
}
}
if(length(drop) && length(keep)) stop('cannot specify both drop and keep')
if(length(drop)) {
if(print) {
if(length(drop) == 1)
cat('Dropped variable\t',drop,'\n')
else
cat('Dropped variables\t', paste(drop,collapse=','), '\n')
}
s <- drop %nin% no
if(any(s))
warning(paste('The following variables in drop= are not in object:',
paste(drop[s], collapse=' ')))
no <- no[no %nin% drop]
object <- object[no]
}
if(length(keep)) {
if(print) {
if(length(keep) == 1)
cat('Kept variable\t', keep, '\n')
else
cat('Kept variables\t', paste(keep, collapse=','), '\n')
}
s <- keep %nin% no
if(any(s))
warning(paste('The following variables in keep= are not in object:',
paste(keep[s], collapse=' ')))
no <- no[no %in% keep]
object <- object[no]
}
if(length(levels)) {
if(!is.list(levels))
stop('levels must be a list')
nl <- names(levels)
s <- nl %nin% no
if(any(s)) {
warning(paste('The following variables in levels= are not in object:',
paste(nl[s], collapse=' ')))
nl <- nl[! s]
}
for(n in nl) {
if(! is.factor(object[[n]]))
object[[n]] <- as.factor(object[[n]])
levels(object[[n]]) <- levels[[n]]
## levels[[nn]] will usually be a list; S+ invokes merge.levels
}
}
if(length(labels)) {
nl <- names(labels)
if(!length(nl)) stop('elements of labels were unnamed')
s <- nl %nin% no
if(any(s)) {
warning(paste('The following variables in labels= are not in object:',
paste(nl[s], collapse=' ')))
nl <- nl[!s]
}
for(n in nl)
label(object[[n]]) <- labels[[n]]
}
if(length(units)) {
nu <- names(units)
s <- nu %nin% no
if(any(s)) {
warning(paste('The following variables in units= are not in object:',
paste(nu[s], collapse=' ')))
nu <- nu[!s]
}
for(n in nu)
attr(object[[n]],'units') <- units[[n]]
}
if(print) cat('New object size:\t',object.size(object),'bytes;\t',
length(no),'variables\n')
object
}
dataframeReduce <- function(data, fracmiss=1, maxlevels=NULL,
minprev=0, print=TRUE)
{
g <- function(x, fracmiss, maxlevels, minprev)
{
if(is.matrix(x))
{
f <- mean(is.na(x %*% rep(1,ncol(x))))
return(if(f > fracmiss)
paste('fraction missing>',fracmiss,sep='') else '')
}
h <- function(a, b)
if(a=='') b else if(b=='') a else paste(a, b, sep=';')
f <- mean(is.na(x))
x <- x[!is.na(x)]
n <- length(x)
r <- if(f > fracmiss)
paste('fraction missing>',fracmiss,sep='') else ''
if(is.character(x)) x <- factor(x)
if(length(maxlevels) && is.factor(x) &&
length(levels(x)) > maxlevels)
return(h(r, paste('categories>',maxlevels,sep='')))
s <- ''
if(is.factor(x) || length(unique(x))==2)
{
tab <- table(x)
if((min(tab) / n) < minprev)
{
if(is.factor(x))
{
x <- combine.levels(x, minlev=minprev)
s <- 'grouped categories'
if(length(levels(x)) < 2)
s <- paste('prevalence<', minprev, sep='')
}
else s <- paste('prevalence<', minprev, sep='')
}
}
h(r, s)
}
h <- sapply(data, g, fracmiss, maxlevels, minprev)
if(all(h=='')) return(data)
if(print)
{
cat('\nVariables Removed or Modified\n\n')
print(data.frame(Variable=names(data)[h!=''],
Reason=h[h!=''], row.names=NULL, check.names=FALSE))
cat('\n')
}
s <- h=='grouped categories'
if(any(s)) for(i in which(s))
data[[i]] <- combine.levels(data[[i]], minlev=minprev)
if(any(h != '' & !s)) data <- data[h=='' | s]
data
}
spss.get <- function(file, lowernames=FALSE,
datevars=NULL,
use.value.labels=TRUE,
to.data.frame=TRUE,
max.value.labels=Inf,
force.single=TRUE, allow=NULL, charfactor=FALSE) {
require('foreign')
if(length(grep('http://', file))) {
tf <- tempfile()
download.file(file, tf, mode='wb', quiet=TRUE)
file <- tf
}
w <- read.spss(file, use.value.labels=use.value.labels,
to.data.frame=to.data.frame,
max.value.labels=max.value.labels)
a <- attributes(w)
vl <- a$variable.labels
nam <- a$names
nam <- makeNames(a$names, unique=TRUE, allow=allow)
if(lowernames) nam <- casefold(nam)
names(w) <- nam
lnam <- names(vl)
if(length(vl))
for(i in 1:length(vl)) {
n <- lnam[i]
lab <- vl[i]
if(lab != '' && lab != n) label(w[[i]]) <- lab
}
attr(w, 'variable.labels') <- NULL
if(force.single || length(datevars) || charfactor)
for(v in nam) {
x <- w[[v]]
changed <- FALSE
if(v %in% datevars) {
x <- importConvertDateTime(x, 'date', 'spss')
changed <- TRUE
} else if(all(is.na(x))) {
storage.mode(x) <- 'integer'
changed <- TRUE
} else if(!(is.factor(x) || is.character(x))) {
if(all(is.na(x))) {
storage.mode(x) <- 'integer'
changed <- TRUE
} else if(max(abs(x),na.rm=TRUE) <= (2^31-1) &&
all(floor(x) == x, na.rm=TRUE)) {
storage.mode(x) <- 'integer'
changed <- TRUE
}
} else if(charfactor && is.character(x)) {
if(length(unique(x)) < .5*length(x))
{
x <- sub(' +$', '', x) # remove trailing blanks
x <- factor(x, exclude='')
changed <- TRUE
}
}
if(changed) w[[v]] <- x
}
w
}
sasxport.get <- function(file, force.single=TRUE,
method=c('read.xport','dataload','csv'),
formats=NULL, allow=NULL, out=NULL,
keep=NULL, drop=NULL, as.is=0.5, FUN=NULL) {
method <- match.arg(method)
if(length(out) && method!='csv')
stop('out only applies to method="csv"')
if(method != 'csv')
require('foreign') || stop('foreign package is not installed')
rootsoftware <- if(method=='dataload')'dataload'
else 'sas'
sasdateform <-
toupper(c("date","mmddyy","yymmdd","ddmmyy","yyq","monyy",
"julian","qtr","weekdate","weekdatx","weekday","month"))
sastimeform <- toupper(c("hhmm","hour","mmss","time"))
sasdatetimeform <- toupper(c("datetime","tod"))
if(length(grep('http://', file))) {
tf <- tempfile()
download.file(file, tf, mode='wb', quiet=TRUE)
file <- tf
}
dsinfo <-
if(method == 'csv') lookupSASContents(file)
else lookup.xport(file)
whichds <-
if(length(keep))
keep
else
setdiff(names(dsinfo), c(drop,'_CONTENTS_','_contents_'))
ds <- switch(method,
read.xport= read.xport(file),
dataload = read.xportDataload(file, whichds),
csv = if(!length(out))
readSAScsv(file, dsinfo, whichds))
if(method=='read.xport' && (length(keep) | length(drop)))
ds <- ds[whichds]
## PROC FORMAT CNTLOUT= dataset present?
fds <- NULL
if(!length(formats)) {
fds <- sapply(dsinfo, function(x)
all(c('FMTNAME','START','END','MIN','MAX','FUZZ')
%in% x$name))
fds <- names(fds)[fds]
if(length(fds) > 1) {
warning('transport file contains more than one PROC FORMAT CNTLOUT= dataset; using only the first')
fds <- fds[1]
}
}
finfo <- NULL
if(length(formats) || length(fds)) {
finfo <-
if(length(formats))
formats
else if(length(out))
readSAScsv(file, dsinfo, fds)
else ds[[fds]]
## Remove leading $ from char format names
## fmtname <- sub('^\\$','',as.character(finfo$FMTNAME))
fmtname <- as.character(finfo$FMTNAME)
finfo <- split(finfo[c('START','END','LABEL')], fmtname)
finfo <- lapply(finfo,
function(f)
{
rb <- function(a)
{ # remove leading + trailing blanks
a <- sub('[[:space:]]+$', '', as.character(a))
sub('^[[:space:]]+', '', a)
}
st <- rb(f$START)
en <- rb(f$END)
lab <- rb(f$LABEL)
##j <- is.na(st) | is.na(en)
## st %in% c('','.','NA') | en %in% c('','.','NA')
j <- is.na(st) | is.na(en) | st == '' | en == ''
if(any(j)) {
warning('NA in code in FORMAT definition; removed')
st <- st[!j]; en <- en[!j]; lab <- lab[!j]
}
if(!all(st==en))
return(NULL)
list(value = all.is.numeric(st, 'vector'),
label = lab)
})
}
## Number of non-format datasets
nods <- length(whichds)
nds <- nods - (length(formats) == 0 && length(finfo) > 0)
which.regular <- setdiff(whichds, fds)
dsn <- tolower(which.regular)
if((nds > 1) && !length(out)) {
res <- vector('list', nds)
names(res) <- gsub('_','.',dsn)
}
if(length(FUN)) {
funout <- vector('list', length(dsn))
names(funout) <- gsub('_','.',dsn)
}
possiblyConvertChar <- if(method=='read.xport')
(is.logical(as.is) && as.is) ||
(is.numeric(as.is) && as.is < 1) else
(is.logical(as.is) && !as.is) ||
(is.numeric(as.is) && as.is > 0)
## reverse logic because read.xport always converts characters to factors
j <- 0
for(k in which.regular) {
j <- j + 1
cat('Processing SAS dataset', k, '\t ')
w <-
if(length(out))
readSAScsv(file, dsinfo, k)
else if(nods==1)
ds
else ds[[k]]
cat('.')
if(!length(w)) {
cat('Empty dataset', k, 'ignored\n')
next
}
nam <- tolower(makeNames(names(w), allow=allow))
names(w) <- nam
dinfo <- dsinfo[[k]]
fmt <- sub('^\\$','',dinfo$format)
lab <- dinfo$label
ndinfo <- tolower(makeNames(dinfo$name, allow=allow))
names(lab) <- names(fmt) <- ndinfo
for(i in 1:length(w)) {
changed <- FALSE
x <- w[[i]]
fi <- fmt[nam[i]]; names(fi) <- NULL
if(fi != '' && length(finfo) && (fi %in% names(finfo))) {
f <- finfo[[fi]]
if(length(f)) { ## may be NULL because had a range in format
x <- factor(x, f$value, f$label)
attr(x, 'format') <- fi
changed <- TRUE
}
}
if(is.numeric(x)) {
if(fi %in% sasdateform) {
x <- importConvertDateTime(x, 'date', rootsoftware)
changed <- TRUE
} else if(fi %in% sastimeform) {
x <- importConvertDateTime(x, 'time', rootsoftware)
changed <- TRUE
} else if(fi %in% sasdatetimeform) {
x <- importConvertDateTime(x, 'datetime', rootsoftware)
changed <- TRUE
} else if(force.single) {
if(all(is.na(x))) {
storage.mode(x) <- 'integer'
changed <- TRUE
} else if(max(abs(x),na.rm=TRUE) <= (2^31-1) &&
all(floor(x) == x, na.rm=TRUE)) {
storage.mode(x) <- 'integer'
changed <- TRUE
}
}
} else if(method=='read.xport' && possiblyConvertChar && is.factor(x)) {
if((is.logical(as.is) && as.is) ||
(is.numeric(as.is) && length(unique(x)) >= as.is*length(x))) {
x <- as.character(x)
changed <- TRUE
}
} else if(possiblyConvertChar && is.character(x)) {
if((is.logical(as.is) && !as.is) ||
(is.numeric(as.is) && length(unique(x)) < as.is*length(x))) {
x <- factor(x, exclude='')
changed <- TRUE
}
}
lz <- lab[nam[i]]
if(lz != '') {
names(lz) <- NULL
label(x) <- lz
changed <- TRUE
}
if(changed)
w[[i]] <- x
}
cat('.\n')
if(length(out)) {
nam <- gsub('_','.',dsn[j])
assign(nam, w)
## ugly, but a way to get actual data frame name into first
## argument of save( )
eval(parse(text=paste('save(',nam,', file="',
paste(out, '/', nam,'.rda',sep=''),
'", compress=TRUE)',sep='')))
if(length(FUN) && length(w))
funout[[nam]] <- FUN(w)
remove(nam)
} else if(nds > 1)
res[[j]] <- w
}
if(length(out)) {
names(dsinfo) <- gsub('_','.',tolower(names(dsinfo)))
if(length(FUN))
attr(dsinfo, 'FUN') <- funout
invisible(dsinfo)
} else if(nds > 1)
res
else w
}
## Use dataload program to create a structure like read.xport does
read.xportDataload <- function(file, dsnames) {
outf <- substring(tempfile(tmpdir=''),2)
file.copy(file, paste(tempdir(),outf,sep='/'))
curwd <- getwd()
on.exit(setwd(curwd))
setwd(tempdir())
n <- length(dsnames)
w <- vector('list', n); names(w) <- dsnames
for(a in dsnames) {
status <- system(paste('dataload', outf, 'zzzz.rda', a),
intern=FALSE)
if(status==0) {
load('zzzz.rda')
names(zzzz) <- makeNames(names(zzzz))
w[[a]] <- zzzz
}
}
w
}
utils::globalVariables(c("NOBS", "memname", "memlabel"))
## Read _contents_.csv and store it like lookup.xport output
lookupSASContents <- function(sasdir) {
w <- read.csv(paste(sasdir,'_contents_.csv',sep='/'), as.is=TRUE)
z <- tapply(w$NOBS, w$MEMNAME, function(x)x[1])
if(any(z == 0)) {
cat('\nDatasets with 0 observations ignored:\n')
print(names(z)[z == 0], quote=FALSE)
w <- subset(w, NOBS > 0)
}
w$TYPE <- ifelse(w$TYPE==1, 'numeric', 'character')
names(w) <- tolower(names(w))
unclass(split(subset(w,select=-c(memname,memlabel)), w$memname))
}
## Read all SAS csv export files and store in a list
readSAScsv <- function(sasdir, dsinfo, dsnames=names(dsinfo)) {
sasnobs <- sapply(dsinfo, function(x)x$nobs[1])
multi <- length(dsnames) > 1
if(multi) {
w <- vector('list', length(dsnames))
names(w) <- dsnames
}
for(a in dsnames) {
z <- read.csv(paste(sasdir,'/',a,'.csv', sep=''),
as.is=TRUE, blank.lines.skip=FALSE,
comment.char="")
importedLength <- length(z[[1]])
if(importedLength != sasnobs[a])
cat('\nError: NOBS reported by SAS (',sasnobs[a],') for dataset ',
a,' is not the same as imported length (', importedLength,
')\n', sep='')
if(multi)
w[[a]] <- z
}
if(multi)
w
else z
}
csv.get <- function(file, lowernames=FALSE, datevars=NULL, datetimevars=NULL,
dateformat='%F', fixdates=c('none','year'),
comment.char = "", autodates=TRUE, allow=NULL,
charfactor=FALSE,
sep=',', skip=0, vnames=NULL, labels=NULL, ...){
fixdates <- match.arg(fixdates)
if(length(vnames))
vnames <- scan(file, what=character(0), skip=vnames-1, nlines=1,
sep=sep, quiet=TRUE)
if(length(labels))
labels <- scan(file, what=character(0), skip=labels-1, nlines=1,
sep=sep, quiet=TRUE)
w <- if(length(vnames))
read.csv(file, check.names=FALSE, comment.char=comment.char,
header=FALSE, col.names=vnames, skip=skip, sep=sep, ...)
else read.csv(file, check.names=FALSE, comment.char=comment.char,
sep=sep, skip=skip, ...)
n <- nam <- names(w)
m <- makeNames(n, unique=TRUE, allow=allow)
if(length(labels)) n <- labels
if(lowernames)
m <- casefold(m)
changed <- any(m != nam)
if(changed)
names(w) <- m
if(autodates) {
tmp <- w
names(tmp) <- NULL
for(i in 1:length(tmp)) {
if(! is.character(tmp[[1]]))
next
}
}
cleanup.import(w,
labels=if(length(labels))labels else if(changed)n else NULL,
datevars=datevars, datetimevars=datetimevars,
dateformat=dateformat,
fixdates=fixdates, charfactor=charfactor)
}
sasdsLabels <- function(file)
{
w <- scan(file, sep='\n', what='', quiet=TRUE)
i <- grep('Data Set Name:', w)
if(!length(i))
return(NULL)
n <- tolower(sub('.*\\.([A-Z0-9\\_]*)[[:space:]]+.*','\\1',w[i]))
w <- gsub('\t','',w)
labs <- ifelse(nchar(w[i-1])==0,w[i-2],w[i-1])
names(labs) <- n
labs
}
label <- function(x, default=NULL, ...) UseMethod("label")
label.default <- function(x, default=NULL, units=FALSE, plot=FALSE,
grid=FALSE, ...)
{
if(length(default) > 1)
stop("the default string cannot be of length greater then one")
at <- attributes(x)
lab <- at$label
if(length(default) && (!length(lab) || lab==''))
lab <- default
un <- at$units
labelPlotmath(lab,
if(units) un else NULL,
plotmath=plot, grid=grid)
}
label.Surv <- function(x, default=NULL, units=FALSE,
plot=FALSE, grid=FALSE,
type=c('any', 'time', 'event'), ...)
{
type <- match.arg(type)
if(length(default) > 1)
stop("the default string cannot be of length greater then one")
at <- attributes(x)
lab <- at$label
ia <- at$inputAttributes
if((! length(lab) || lab == '') && length(ia)) {
poss <- switch(type,
any = c(ia$event$label, ia$time2$label, ia$time$label),
time = c( ia$time2$label, ia$time$label),
event = ia$event$label )
for(lb in poss)
if(! length(lab) && lb != '') lab <- lb
}
if(length(default) && (!length(lab) || lab=='')) lab <- default
un <- NULL
if(units) {
un <- at$units
if(! length(un) && length(ia)) {
un <- ia$time2$units
if(! length(un)) un <- ia$time$units
}
}
labelPlotmath(lab, un,
plotmath=plot, grid=grid)
}
label.data.frame <- function(x, default=NULL, self=FALSE, ...) {
if(self) {
label.default(x)
} else {
if(length(default) > 0 && length(default) != length(x)) {
stop('length of default must same as x')
} else if(length(default) == 0) {
default <- list(default)
}
labels <- mapply(FUN=label, x=x, default=default, MoreArgs=list(self=TRUE), USE.NAMES=FALSE)
names(labels) <- names(x)
return(labels)
}
}
labelPlotmath <- function(label, units=NULL, plotmath=TRUE, grid=FALSE)
{
if(!length(label)) label <- ''
if(!length(units) || (length(units)==1 && is.na(units))) units <- ''
g <-
if(plotmath) function(x, y=NULL, xstyle=NULL, ystyle=NULL)
{
h <- function(w, style=NULL)
if(length(style))
paste(style,'(',w,')',sep='')
else
w
tryparse <- function(z, original)
{
p <- try(parse(text=z), silent=TRUE)
if(is.character(p)) original else p
}
if(!length(y))
return(tryparse(h(plotmathTranslate(x), xstyle), x))
w <- paste('list(',h(plotmathTranslate(x), xstyle), ',',
h(plotmathTranslate(y), ystyle), ')', sep='')
tryparse(w, paste(x, y))
} else function(x, y=NULL, ...) if(length(y)) paste(x,y) else x
if(units=='') g(label)
else if(label=='') g(units)
else if(plotmath)
g(label, units, ystyle='scriptstyle')
else paste(label,' [',units,']',sep='')
}
plotmathTranslate <- function(x)
{
if(length(grep('paste', x))) return(x)
specials <- c(' ','%','_')
spec <- FALSE
for(s in specials)
if(length(grep(s,x)))
spec <- TRUE
if(spec) x <- paste('paste("',x,'")',sep='')
else if(substring(x,1,1)=='/') x <- paste('phantom()', x, sep='')
x
}
labelLatex <- function(x=NULL, label='', units='', size='smaller[2]',
hfill=FALSE, bold=FALSE, default='', double=FALSE) {
if(length(x)) {
if(label == '') label <- label(x)
if(units == '') units <- units(x)
}
if(default == '' && length(x)) default <- deparse(substitute(x))
if(label == '') return(default)
label <- latexTranslate(label)
bs <- if(double) '\\\\' else '\\'
if(bold) label <- paste('{', bs, 'textbf ', label, '}', sep='')
if(units != '') {
units <- latexTranslate(units)
if(length(size) && size != '')
units <- paste('{', bs, size, ' ', units, '}', sep='')
if(hfill) units <- paste(bs, 'hfill ', units, sep='')
else
units <- paste(' ', units, sep='')
label <- paste(label, units, sep='')
}
label
}
"label<-" <- function(x, ..., value) UseMethod("label<-")
##From Bill Dunlap, StatSci 15Mar95:
"label<-.default" <- function(x, ..., value)
{
if(is.list(value)) {
stop("cannot assign a list to be a object label")
}
if(length(value) != 1L) {
stop("value must be character vector of length 1")
}
attr(x, 'label') <- value
if('labelled' %nin% class(x)) {
class(x) <- c('labelled', class(x))
}
return(x)
}
## } else function(x, ..., value)
## {
## ## Splus 5.x, 6.x
## ## oldClass(x) <- unique(c('labelled', oldClass(x),
## ## if(is.matrix(x))'matrix'))
## attr(x,'label') <- value
## return(x)
## }
"label<-.data.frame" <- function(x, self=TRUE, ..., value) {
if(!is.data.frame(x)) {
stop("x must be a data.frame")
}
if(missing(self) && is.list(value)) {
self <- FALSE
}
if(self) {
xc <- class(x)
xx <- unclass(x)
label(xx) <- value
class(xx) <- xc
return(xx)
} else {
if(length(value) != length(x)) {
stop("value must have the same length as x")
}
for (i in seq(along.with=x)) {
label(x[[i]]) <- value[[i]]
}
}
return(x)
}
"[.labelled"<- function(x, ...) {
tags <- valueTags(x)
x <- NextMethod("[")
valueTags(x) <- tags
x
}
"print.labelled"<- function(x, ...) {
x.orig <- x
u <- attr(x,'units')
if(length(u))
attr(x,'units') <- NULL # so won't print twice
cat(attr(x, "label"),
if(length(u))
paste('[', u, ']', sep=''),
"\n")
attr(x, "label") <- NULL
class(x) <-
if(length(class(x))==1 && class(x)=='labelled')
NULL
else
class(x)[class(x) != 'labelled']
## next line works around print bug
if(!length(attr(x,'class')))
attr(x,'class') <- NULL
NextMethod("print")
invisible(x.orig)
}
as.data.frame.labelled <- as.data.frame.vector
Label <- function(object, ...) UseMethod("Label")
Label.data.frame <- function(object, file='', append=FALSE, ...)
{
nn <- names(object)
for(i in 1:length(nn)) {
lab <- attr(object[[nn[i]]],'label')
lab <- if(length(lab)==0) '' else lab
cat("label(",nn[i],")\t<- '",lab,"'\n",
append=if(i==1)
append
else
TRUE,
file=file, sep='')
}
invisible()
}
reLabelled <- function(object)
{
for(i in 1:length(object))
{
x <- object[[i]]
lab <- attr(x, 'label')
cl <- class(x)
if(length(lab) && !any(cl=='labelled')) {
class(x) <- c('labelled',cl)
object[[i]] <- x
}
}
object
}
llist <- function(..., labels=TRUE)
{
dotlist <- list(...)
lname <- names(dotlist)
name <- vname <- as.character(sys.call())[-1]
for(i in 1:length(dotlist))
{
vname[i] <-
if(length(lname) && lname[i]!='')
lname[i]
else
name[i]
## R barked at setting vname[i] to NULL
lab <- vname[i]
if(labels)
{
lab <- attr(dotlist[[i]],'label')
if(length(lab) == 0)
lab <- vname[i]
}
label(dotlist[[i]]) <- lab
}
names(dotlist) <- vname[1:length(dotlist)]
dotlist
}
combineLabels <- function(...)
{
w <- list(...)
labs <- sapply(w[[1]], label)
lw <- length(w)
if(lw > 1) for(j in 2:lw)
{
lab <- sapply(w[[j]], label)
lab <- lab[lab != '']
if(length(lab)) labs[names(lab)] <- lab
}
labs[labs != '']
}
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.