makessa <-
function(formula,data=NULL,type=NULL,nknots=NULL,rparm=NA,
lambdas=NULL,skip.iter=TRUE,se.fit=FALSE,rseed=1234,
gcvopts=NULL,knotcheck=TRUE,gammas=NULL,weights=NULL,
random=NULL,remlalg=c("FS","NR","EM","none"),
remliter=500,remltol=10^-4,remltau=NULL){
###### Makes Smoothing Spline Anova models
###### Nathaniel E. Helwig (helwig@umn.edu)
###### Last modified: January 30, 2017
### get initial info
mf <- match.call()
m <- match(c("formula","data"),names(mf),0L)
mf <- mf[c(1L, m)]
mf[[1L]] <- as.name("model.frame")
mf <- eval(mf, parent.frame()) # mf contains response and predictor info
mt <- attr(mf, "terms") # mt contains model info and terms
et <- attr(mt,"factors") # et is effects table
mfdim <- dim(et) # dim of effects table
ndpts <- dim(mf)[1] # total number of data points
nxvar <- mfdim[1]-1L # number of predictors
nterm <- mfdim[2] # number of model terms
xynames <- row.names(et)
xnames <- xynames[2:(nxvar+1L)]
tnames <- colnames(et)
if(any(colSums(et>0L)>3L)){stop("Four-way (and higher-order) interactions are not supported.")}
### check response
yvar <- as.matrix(model.response(mf, "numeric")+0.0) # response variable
if(ncol(yvar)>1){stop("Response must be unidimensional (vector).")}
if(nrow(yvar)!=ndpts){stop("Response vector must have same length as predictors.")}
ysm <- sum(yvar)
nunewr <- ndpts
fweights <- rep.int(1L,ndpts)
yty <- crossprod(yvar)
### check random
ZtZy <- reinfo <- remlopts <- NULL
if(!is.null(random)){
if(class(random)!="formula"){stop("Input 'random' must be a formula.")}
remlalg <- remlalg[1]
if(any(remlalg==c("FS","NR","EM","none"))==FALSE){stop("Input 'remlalg' must be 'FS', 'NR', 'EM', or 'none'.")}
remliter <- as.integer(remliter[1])
if(remliter<1) stop("Input 'remliter' must be a positive integer.")
remltol <- as.numeric(remltol[1])
if(remltol<=0) stop("Input 'remltol' must be a postive scalar.")
reinfo <- getRandom(formula=random,ndpts=ndpts,data=data)
ZtZy <- makeZtZ(reinfo[[1]],reinfo[[2]],reinfo[[3]],yvar)
ntau <- length(ZtZy$rdm)
if(is.null(remltau)){
remltau <- rep(1,ntau)
} else {
if(length(remltau)!=ntau | any(remltau<=0)){stop("Input 'remltau' must contain positive initial parameter values (one for each variance component).")}
}
remlopts <- list(iter=remliter,tol=remltol,itau=remltau)
}
### check gcvopts
if(is.null(gcvopts[1])==FALSE){
gmatch <- match(c("maxit","gcvtol","alpha"),names(gcvopts))
if(any(is.na(gmatch))){stop("Invalid input for 'gcvopts'.")}
gcvopts$maxit <- as.integer(gcvopts$maxit[1])
gcvopts$gcvtol <- gcvopts$gcvtol[1]
gcvopts$alpha <- gcvopts$alpha[1]
ugcv <- unlist(gcvopts)
if(any(ugcv<=0)){stop("Invalid input for 'gcvopts'.")}
if(gcvopts$maxit<1L){stop("Invalid input for gcvopts$maxit")}
}
### check formula
testform <- strsplit(as.character(formula),":")
for(k in 1:length(testform)){
if(length(testform[[k]])>1L){stop("Formula syntax 'x1:x2' is not supported (i.e., main effects must accompany interactions). \n Use 'x1*x2' to include interaction with main effects.")}
}
### check weights
if(is.null(weights[1])==FALSE){
weights <- as.numeric(weights)
if(length(weights)!=ndpts){stop("Weights vector must have same length as response.")}
if(any(weights<=0)){stop("Weights must be positive.")}
if(is.na(rparm[1])==FALSE & min(weights)<=0.005){stop("Minimum weight is too small (less than 0.005).")}
wtchk <- TRUE
} else {
weights <- 1
wtchk <- FALSE
}
### check order of things for nonparametric predictors
if(any(type=="prm")){stop("Parametric predictors are not supported. Use 'bigssp' function.")}
if(nxvar>1){
snidx <- match(xnames,names(type))
if(any(is.na(snidx))){stop('Variable names in "type" must match variable names in "formula".')}
type <- type[snidx]
if(is.na(rparm[1])==FALSE){
rnidx <- match(xnames,names(rparm))
if(any(is.na(rnidx))){stop('Variable names in "rparm" must match variable names in "formula".')}
rparm <- rparm[rnidx]
}
if(is.null(gammas[1])==FALSE){
gnidx <- match(xnames,names(gammas))
if(any(is.na(gnidx))){stop('Variable names in "gammas" must match variable names in "formula".')}
gammas <- unlist(gammas[gnidx])
if(any(gammas<=0)){stop('Input "gammas" must be nonnegative smoothing parameters.')}
}
} else{
type <- type[[1]]
if(is.null(type)){type="cub"}
rparm <- rparm[[1]]
}
### collect predictors
xvars <- vector("list",nxvar)
for(jj in 1:nxvar){xvars[[jj]] <- mf[,jj+1L]}
rm(mf)
names(xvars) <- xnames
### check predictor types
flvls <- xrng <- vector("list",nxvar)
xdim <- vector("integer",nxvar)
for(k in 1:nxvar){
if(any(type[[k]]==c("lin","cub","cub0","per","tps"))){
xvars[[k]] <- as.matrix(xvars[[k]]+0.0)
xdim[k] <- ncol(xvars[[k]])
if(type[[k]]=="tps"){
if(xdim[k]>3){stop(paste("Too many predictors for",xnames[k]))}
} else{
if(xdim[k]>1){stop(paste("Too many predictors for",xnames[k]))}
}
if(!is.na(rparm[1])){xrng[[k]]=apply(xvars[[k]],2,range)}
flvls[[k]] <- NA
} else if (type[[k]]=="ord"){
xvars[[k]] <- as.ordered(xvars[[k]])
flvls[[k]] <- levels(xvars[[k]])
xvars[[k]] <- matrix(as.integer(xvars[[k]]))
xrng[[k]] <- matrix(c(1,length(flvls[[k]])),2,1)
xdim[k] <- 1L
} else if (type[[k]]=="nom"){
xvars[[k]] <- as.factor(xvars[[k]])
flvls[[k]] <- levels(xvars[[k]])
xvars[[k]] <- matrix(as.integer(xvars[[k]]))
xrng[[k]] <- matrix(c(1,length(flvls[[k]])),2,1)
xdim[k] <- 1L
} else{
stop('You must set type to either "nom", "ord", "lin", "cub", "cub0", "per", or "tps" for each effect in formula.')
}
} # end for(k in 1:nxvar)
### check knots and random seed
if(is.null(nknots)){
if(nxvar==1L){
if(xdim[1]==1L){nknots <- 30} else if(xdim[1]==2L){nknots <- 100} else {nknots <- 200}
} else if(nxvar==2L) {
if(any(xdim>1)){nknots <- 200} else {nknots <- 100}
} else {nknots <- 300} # end if(nxvar==1L)
}
if(!is.null(rseed) && length(nknots)==1L){set.seed(rseed)}
theknots <- vector("list",nxvar)
if(length(nknots)>1){
kidx <- as.integer(nknots)
nknots <- length(kidx)
if(any(kidx<1L) || any(kidx>nunewr)){stop("Input 'nknots' out of data index range.")}
} else {kidx <- NA}
### check for rounding
if(is.na(rparm[1])){
for(k in 1:nxvar){
if(any(type[[k]]==c("lin","cub","cub0","per"))){
xrng[[k]] <- range(xvars[[k]])
xvars[[k]] <- (xvars[[k]]-xrng[[k]][1])/(xrng[[k]][2]-xrng[[k]][1])
}
if(length(kidx)>1){for(k in 1:nxvar){theknots[[k]] <- as.matrix(xvars[[k]][kidx,])}}
} # end for(k in 1:nxvar)
xorig <- yorig <- NA
} else{
# check rparms
rpall <- NULL
for(k in 1:nxvar){rpall <- c(rpall,rparm[[k]])}
for(k in 1:length(rpall)){
rplog <- log(c(rpall[k],rpall[k]/2,rpall[k]/5),base=10)
rpchk <- rep(FALSE,3)
for(jj in 1:3){rpchk[jj] <- (rplog[jj]==as.integer(rplog[jj]))}
if(!any(rpchk)){stop("Must set input 'rparm' such that rparm=a*(10^-b) with a in {1,2,5} and b>=1 (integer).")}
}
# save original variables
xorig <- xvars
yorig <- yvar
# get rounded point indices
gvec <- matrix(1,ndpts,1)
kconst <- 1
for(k in 1:nxvar){
if(type[[k]]=="nom"){
gvec <- gvec + kconst*(xvars[[k]]-1L)
kconst <- kconst*xrng[[k]][2]
} else if(type[[k]]=="ord"){
gvec <- gvec + kconst*(xvars[[k]]-1L)
kconst <- kconst*xrng[[k]][2]
} else if(type[[k]]=="tps"){
if(length(rparm[[k]])!=xdim[k]){
rprep <- rep(rparm[[k]][1],xdim[k])
if(nxvar==1L){rparm <- list(rprep)} else {rparm[[k]] <- rprep}
}
rxrng <- xrng[[k]]
for(j in 1:xdim[k]){
rxrng[,j] <- round(rxrng[,j]/rparm[[k]][j])*rparm[[k]][j]
gvec <- gvec + kconst*round((xvars[[k]][,j]-rxrng[1,j])/rparm[[k]][j])
kconst <- kconst*round(1+(rxrng[2,j]-rxrng[1,j])/rparm[[k]][j])
xvars[[k]][,j] <- as.matrix(round(xvars[[k]][,j]/rparm[[k]][j]))*rparm[[k]][j]
}
} else{
gidx <- round(((xvars[[k]]-xrng[[k]][1])/(xrng[[k]][2]-xrng[[k]][1]))/rparm[[k]])
gvec <- gvec + kconst*gidx
kconst <- kconst*round(1+1/rparm[[k]])
xvars[[k]] <- as.matrix(gidx*rparm[[k]]); rm(gidx)
}
} # end for(k in 1:nxvar)
# account for weights
if(wtchk){
rmw <- round(min(weights)/.01)*.01
gvec <- gvec + kconst*round((weights-rmw)/.01)
weights <- round(weights/.01)*.01
}
# get unique points, frequencies, and sums
gvec <- as.factor(gvec)
glindx <- split(cbind(1:ndpts,yvar),gvec)
if(is.na(kidx[1])==FALSE){for(k in 1:nxvar){theknots[[k]] <- as.matrix(xvars[[k]][kidx,])}}
fs <- matrix(unlist(lapply(glindx,unifqsum)),ncol=3,byrow=TRUE)
for(k in 1:nxvar){xvars[[k]] <- as.matrix(xvars[[k]][fs[,1],])}
yvar <- fs[,2]
fweights <- fs[,3]
nunewr <- length(yvar)
if(wtchk){weights <- weights[fs[,1]]}
} # end if(is.na(rparm[1]))
### get knot indices
if(length(kidx)==1L){
nknots <- as.integer(min(c(nunewr,nknots)))
if(nknots<1){stop("Input 'nknots' must be positive integer.")}
kidx <- sample(nunewr,nknots,prob=(fweights/ndpts))
for(k in 1:nxvar){theknots[[k]] <- as.matrix(xvars[[k]][kidx,])}
}
### check knots
if(knotcheck){
matknots <- NULL
for(k in 1:nxvar){matknots <- cbind(matknots,theknots[[k]])}
matknots <- unique(matknots)
if(nrow(matknots)<nknots){
if(nxvar>1){
csdim <- c(0,cumsum(xdim))
for(k in 1:nxvar){theknots[[k]] <- as.matrix(matknots[,(csdim[k]+1):(csdim[k]+xdim[k])])}
} else {theknots[[1]] <- as.matrix(matknots)}
nknots <- nrow(matknots)
}
}
### make marginal reproducing kernel matrices
rks <- makerkm(xvars,type,theknots,xrng)
### collect output
ylist <- list(yvar)
names(ylist) <- xynames[1]
uidx <- NULL
if(!is.null(random)){if(is.na(rparm[1])){uidx <- 1:ndpts} else{uidx <- as.integer(gvec)}}
ssamk <- list(et=et,n=c(nunewr,ndpts),xvars=c(xvars,ylist),nxvar=nxvar,yty=yty,
fweights=fweights,type=type,xdim=xdim,theknots=theknots,nknots=nknots,
lambdas=lambdas,rks=rks[1:4],gcvopts=gcvopts,xorig=xorig,yorig=yorig,
se.fit=se.fit,skip.iter=skip.iter,ysm=ysm,rparm=rparm,xrng=xrng,
flvls=flvls,tpsinfo=rks$tpsinfo,formula=formula,gammas=gammas,weights=weights,
uidx=uidx,ZtZy=ZtZy,reinfo=reinfo,random=random,remlalg=remlalg,remlopts=remlopts)
class(ssamk) <- "makessa"
return(ssamk)
}
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.