# R/mgcv.r In mgcv: Mixed GAM Computation Vehicle with Automatic Smoothness Estimation

#### Documented in concurvityfixDependenceformula.gamfull.scoregamgam.outergam.sidegam.vcompinfluence.gaminitial.spinterpret.gammagicmagic.post.procmodel.matrix.gammrootpclspen.edfpredict.gamprint.anova.gamprint.summary.gampsum.chisqresiduals.gamrigRrankslanczossp.vcovvcov.gam

##  R routines for the package mgcv (c) Simon Wood 2000-2016
##  With contributions from Henric Nilsson

Rrank <- function(R,tol=.Machine$double.eps^.9) { ## Finds rank of upper triangular matrix R, by estimating condition ## number of upper rank by rank block, and reducing rank until this is ## acceptably low... assumes R pivoted m <- nrow(R) rank <- min(m,ncol(R)) ok <- TRUE while (ok) { Rcond <- .C(C_R_cond,R=as.double(R),r=as.integer(m),c=as.integer(rank), work=as.double(rep(0,4*m)),Rcond=as.double(1))$Rcond
if (Rcond*tol<1) ok <- FALSE else rank <- rank - 1
}
rank
}

slanczos <- function(A,k=10,kl=-1,tol=.Machine$double.eps^.5,nt=1) { ## Computes truncated eigen decomposition of symmetric A by ## Lanczos iteration. If kl < 0 then k largest magnitude ## eigenvalues returned, otherwise k highest and kl lowest. ## Eigenvectors are always returned too. ## set.seed(1);n <- 1000;A <- matrix(runif(n*n),n,n);A <- A+t(A);er <- slanczos(A,10) ## um <- eigen(A,symmetric=TRUE);ind <- c(1:5,(n-5+1):n) ## range(er$values-um$values[ind]);range(abs(er$vectors)-abs(um$vectors[,ind])) ## It seems that when k (or k+kl) is beyond 10-15% of n ## then you might as well use eigen(A,symmetric=TRUE), but the ## extra cost is the expensive accumulation of eigenvectors. ## Should re-write whole thing using LAPACK routines for eigenvectors. if (tol<=0||tol>.01) stop("silly tolerance supplied") k <- round(k);kl <- round(kl) if (k<0) stop("argument k must be positive.") m <- k + max(0,kl) n <- nrow(A) if (m<1) return(list(values=rep(0,0),vectors=matrix(0,n,0),iter=0)) if (n != ncol(A)) stop("A not square") if (m>n) stop("Can not have more eigenvalues than nrow(A)") oo <- .C(C_Rlanczos,A=as.double(A),U=as.double(rep(0,n*m)),D=as.double(rep(0,m)), n=as.integer(n),m=as.integer(k),ml=as.integer(kl),tol=as.double(tol),nt=as.integer(nt)) list(values = oo$D,vectors = matrix(oo$U,n,m),iter=oo$n)
}

rig <- function(n,mean,scale) {
## inverse gaussian deviates generated by algorithm 5.7 of
## Gentle, 2003. scale = 1/lambda.
if (length(n)>1) n <- length(n)
x <- y <- rnorm(n)^2
mys <- mean*scale*y
mu <- 0*y + mean ## y is there to ensure mu is a vector
mu2 <- mu^2;
ind <- mys < .Machine$double.eps^-.5 ## cut off for tail computation x[ind] <- mu[ind]*(1 + 0.5*(mys[ind] - sqrt(mys[ind]*4+mys[ind]^2))) x[!ind] <- mu[!ind]/mys[!ind] ## tail term (derived from Taylor of sqrt(1+eps) etc) #my <- mean*y; sc <- 0*y + scale #ind <- my > 1 ## cancellation error can be severe, without splitting #x[!ind] <- mu[!ind]*(1 + 0.5*sc[!ind]*(my[!ind] - sqrt(4*my[!ind]/sc[!ind] + my[!ind]^2))) ## really the sqrt in the next term should be expanded beyond first order and then ## worked on - otherwise too many exact zeros? #x[ind] <- pmax(0,mu[ind]*(1+my[ind]*.5*sc[ind]*(1-sqrt(1+ 4/(sc[ind]*my[ind]))))) ind <- runif(n) > mean/(mean+x) x[ind] <- mu2[ind]/x[ind] x ## E(x) = mean; var(x) = scale*mean^3 } strip.offset <- function(x) # sole purpose is to take a model frame and rename any "offset(a.name)" # columns "a.name" { na <- names(x) for (i in 1:length(na)) { if (substr(na[i],1,7)=="offset(") na[i] <- substr(na[i],8,nchar(na[i])-1) } names(x) <- na x } pcls <- function(M) # Function to perform penalized constrained least squares. # Problem to be solved is: # # minimise ||W^0.5 (y - Xp)||^2 + p'Bp # subject to Ain p >= b & C p = "constant" # # where B = \sum_{i=1}^m \theta_i S_i and W=diag(w) # on entry this routine requires a list M, with the following elements: # M$X - the design matrix for this problem.
# M$p - a feasible initial parameter vector - note that this should not be set up to # lie exactly on all the inequality constraints - which can easily happen if M$p=0!
# M$y - response variable # M$w - weight vector: W= diag(M$w) # M$Ain - matrix of inequality constraints
# M$bin - b above # M$C  - fixed constraint matrix
# M$S - List of (minimal) penalty matrices # M$off - used for unpacking M$S # M$sp - array of theta_i's
# Ain, bin and p are not in the object needed to call mgcv....
#
{ nar<-c(length(M$y),length(M$p),dim(M$Ain)[1],dim(M$C)[1])
## sanity checking ...
if (nrow(M$X)!=nar[1]) stop("nrow(M$X) != length(M$y)") if (ncol(M$X)!=nar[2]) stop("ncol(M$X) != length(M$p)")
if (length(M$w)!=nar[1]) stop("length(M$w) != length(M$y)") if (nar[3]!=length(M$bin)) stop("nrow(M$Ain) != length(M$bin)")
if (nrow(M$Ain)>0) { if (ncol(M$Ain)!=nar[2]) stop("nrow(M$Ain) != length(M$p)")
res <- as.numeric(M$Ain%*%M$p) - as.numeric(M$bin) if (sum(res<0)>0) stop("initial parameters not feasible") res <- abs(res) if (sum(res<.Machine$double.eps^.5)>0)
warning("initial point very close to some inequality constraints")
res <- mean(res)
if (res<.Machine$double.eps^.5) warning("initial parameters very close to inequality constraints") } if (nrow(M$C)>0) if (ncol(M$C)!=nar[2]) stop("ncol(M$C) != length(M$p)") if (length(M$S)!=length(M$off)) stop("M$S and M$off have different lengths") if (length(M$S)!=length(M$sp)) stop("M$sp has different length to M$S and M$off")

# pack the S array for mgcv call
m<-length(M$S) Sa<-array(0,0);df<-0 if (m>0) for (i in 1:m) { Sa<-c(Sa,M$S[[i]])
df[i]<-nrow(M$S[[i]]) if (M$off[i]+df[i]-1>nar[2]) stop(gettextf("M$S[%d] is too large given M$off[%d]", i, i))
}
qra.exist <- FALSE
if (ncol(M$X)>nrow(M$X)) {
if (m>0) stop("Penalized model matrix must have no more columns than rows")
else { ## absorb M$C constraints qra <- qr(t(M$C))
j <- nrow(M$C);k <- ncol(M$X)
M$X <- t(qr.qty(qra,t(M$X))[(j+1):k,])
M$Ain <- t(qr.qty(qra,t(M$Ain))[(j+1):k,])
M$C <- matrix(0,0,0) M$p <- rep(0,ncol(M$X)) nar[2] <- length(M$p)
qra.exist <- TRUE
if  (ncol(M$X)>nrow(M$X)) stop("Model matrix not full column rank")
}
}
o<-.C(C_RPCLS,as.double(M$X),as.double(M$p),as.double(M$y),as.double(M$w),as.double(M$Ain),as.double(M$bin)
,as.double(M$C),as.double(Sa),as.integer(M$off),as.integer(df),as.double(M$sp), as.integer(length(M$off)),as.integer(nar))
p <- array(o[[2]],length(M$p)) if (qra.exist) p <- qr.qy(qra,c(rep(0,j),p)) p } ## pcls all_vars1 <- function(form) { ## version of all.vars that doesn't split up terms like x$y into x and y
vars <- all.vars(form)
vn <- all.names(form)
vn <- vn[vn%in%c(vars,"$","[[")] ## actual variable related names if ("[["%in%vn) stop("can't handle [[ in formula") ii <- which(vn%in%"$") ## index of '$' if (length(ii)) { ## assemble variable names vn1 <- if (ii[1]>1) vn[1:(ii[1]-1)] go <- TRUE k <- 1 while (go) { n <- 2; while(k<length(ii) && ii[k]==ii[k+1]-1) { k <- k + 1;n <- n + 1 } vn1 <- c(vn1,paste(vn[ii[k]+1:n],collapse="$"))
if (k==length(ii)) {
go <- FALSE
ind <- if (ii[k]+n<length(vn)) (ii[k]+n+1):length(vn) else rep(0,0)
} else {
k <- k +  1
ind <- if (ii[k-1]+n<ii[k]-1) (ii[k-1]+n+1):(ii[k]-1) else rep(0,0)
}
vn1 <- c(vn1,vn[ind])
}
} else vn1 <- vn
vn1
} ## all_vars1

interpret.gam0 <- function(gf,textra=NULL,extra.special=NULL)
# interprets a gam formula of the generic form:
#   y~x0+x1+x3*x4 + s(x5)+ s(x6,x7) ....
# and returns:
# 1. a model formula for the parametric part: pf (and pfok indicating whether it has terms)
# 2. a list of descriptors for the smooths: smooth.spec
# this is function does the work, and is called by in interpret.gam
# 'textra' is optional text to add to term labels
# 'extra.special' is label of extra smooth within formula.
{ p.env <- environment(gf) # environment of formula
tf <- terms.formula(gf,specials=c("s","te","ti","t2",extra.special)) # specials attribute indicates which terms are smooth

terms <- attr(tf,"term.labels") # labels of the model terms
nt <- length(terms) # how many terms?

if (attr(tf,"response") > 0) {  # start the replacement formulae
response <- as.character(attr(tf,"variables")[2])
} else {
response <- NULL
}
sp <- attr(tf,"specials")$s # array of indices of smooth terms tp <- attr(tf,"specials")$te    # indices of tensor product terms
tip <- attr(tf,"specials")$ti # indices of tensor product pure interaction terms t2p <- attr(tf,"specials")$t2   # indices of type 2 tensor product terms
zp <- if (is.null(extra.special)) NULL else attr(tf,"specials")[[extra.special]]
off <- attr(tf,"offset") # location of offset in formula

## have to translate sp, tp, tip, t2p (zp) so that they relate to terms,
## rather than elements of the formula...
vtab <- attr(tf,"factors") # cross tabulation of vars to terms
if (length(sp)>0) for (i in 1:length(sp)) {
ind <- (1:nt)[as.logical(vtab[sp[i],])]
sp[i] <- ind # the term that smooth relates to
}
if (length(tp)>0) for (i in 1:length(tp)) {
ind <- (1:nt)[as.logical(vtab[tp[i],])]
tp[i] <- ind # the term that smooth relates to
}
if (length(tip)>0) for (i in 1:length(tip)) {
ind <- (1:nt)[as.logical(vtab[tip[i],])]
tip[i] <- ind # the term that smooth relates to
}
if (length(t2p)>0) for (i in 1:length(t2p)) {
ind <- (1:nt)[as.logical(vtab[t2p[i],])]
t2p[i] <- ind # the term that smooth relates to
}
if (length(zp)>0) for (i in 1:length(zp)) {
ind <- (1:nt)[as.logical(vtab[zp[i],])]
zp[i] <- ind # the term that smooth relates to
} ## re-referencing is complete

k <- kt <- kti <- kt2 <- ks <- kz <- kp <- 1 # counters for terms in the 2 formulae
len.sp <- length(sp)
len.tp <- length(tp)
len.tip <- length(tip)
len.t2p <- length(t2p)
len.zp <- length(zp)
ns <- len.sp + len.tp + len.tip + len.t2p + len.zp# number of smooths
pav <- av <- rep("",0)
smooth.spec <- list()
#mgcvat <- "package:mgcv" %in% search() ## is mgcv in search path?
if (nt) for (i in 1:nt) { # work through all terms
if (k <= ns&&((ks<=len.sp&&sp[ks]==i)||(kt<=len.tp&&tp[kt]==i)||(kz<=len.zp&&zp[kz]==i)||
(kti<=len.tip&&tip[kti]==i)||(kt2<=len.t2p&&t2p[kt2]==i))) { # it's a smooth
## have to evaluate in the environment of the formula or you can't find variables
## supplied as smooth arguments, e.g. k <- 5;gam(y~s(x,k=k)), fails,
## but if you don't specify namespace of mgcv then stuff like
## loadNamespace('mgcv'); k <- 10; mgcv::interpret.gam(y~s(x,k=k)) fails (can't find s)
## following may supply namespace of mgcv explicitly if not on search path...
## If 's' etc are masked then we can fail even if mgcv on search path, hence paste
## of explicit mgcv reference into first attempt...

st <- try(eval(parse(text=paste("mgcv::",terms[i],sep="")),envir=p.env),silent=TRUE)
if (inherits(st,"try-error")) st <-
eval(parse(text=terms[i]),enclos=p.env,envir=mgcvns)

if (!is.null(textra)) { ## modify the labels on smooths with textra
pos <- regexpr("(",st$lab,fixed=TRUE)[1] st$label <- paste(substr(st$label,start=1,stop=pos-1),textra, substr(st$label,start=pos,stop=nchar(st$label)),sep="") } smooth.spec[[k]] <- smooth.info(st) ## smooth.info supplies any extra specification info for class if (ks<=len.sp&&sp[ks]==i) ks <- ks + 1 else # counts s() terms if (kt<=len.tp&&tp[kt]==i) kt <- kt + 1 else # counts te() terms if (kti<=len.tip&&tip[kti]==i) kti <- kti + 1 else # counts ti() terms if (kt2<=len.t2p&&t2p[kt2]==i) kt2 <- kt2 + 1 # counts t2() terms else kz <- kz + 1 k <- k + 1 # counts smooth terms } else { # parametric av[kp] <- terms[i] ## element kp on rhs of parametric kp <- kp+1 # counts parametric terms } } if (!is.null(off)) { ## deal with offset av[kp] <- as.character(attr(tf,"variables")[1+off]) kp <- kp+1 } pf <- paste(response,"~",paste(av,collapse=" + ")) if (attr(tf,"intercept")==0) { pf <- paste(pf,"-1",sep="") if (kp>1) pfok <- 1 else pfok <- 0 } else { pfok <- 1;if (kp==1) { pf <- paste(pf,"1"); } } fake.formula <- pf if (length(smooth.spec)>0) for (i in 1:length(smooth.spec)) { nt <- length(smooth.spec[[i]]$term)
ff1 <- paste(smooth.spec[[i]]$term[1:nt],collapse="+") fake.formula <- paste(fake.formula,"+",ff1) if (smooth.spec[[i]]$by!="NA") {
fake.formula <- paste(fake.formula,"+",smooth.spec[[i]]$by) av <- c(av,smooth.spec[[i]]$term,smooth.spec[[i]]$by) } else av <- c(av,smooth.spec[[i]]$term)
}
fake.formula <- as.formula(fake.formula,p.env)
if (length(av)) {
pred.formula <- as.formula(paste("~",paste(av,collapse="+")))
pav <- all.vars(pred.formula) ## trick to strip out 'offset(x)' etc...
pred.formula <- reformulate(pav,env=p.env)
} else  pred.formula <- ~1
ret <- list(pf=as.formula(pf,p.env),pfok=pfok,smooth.spec=smooth.spec,
fake.formula=fake.formula,response=response,fake.names=av,
pred.names=pav,pred.formula=pred.formula)
#environment(ret$fake.formula) <- environment(ret$pred.formula) <- p.env
class(ret) <- "split.gam.formula"
ret
} ## interpret.gam0

interpret.gam <- function(gf,extra.special=NULL) {
## wrapper to allow gf to be a list of formulae or
## a single formula. This facilitates general penalized
## likelihood models in which several linear predictors
## may be involved...
##
## The list syntax is as follows. The first formula must have a response on
## the lhs, rather than labels. For m linear predictors, there
## must be m 'base formulae' in linear predictor order. lhs labels will
## be ignored in a base formula. Empty base formulae have '-1' on rhs.
## Further formulae have labels up to m labels 1,...,m on the lhs, in a
## syntax like this: 3 + 5 ~ s(x), which indicates that the same s(x)
## should be added to both linear predictors 3 and 5.
## e.g. A bivariate normal model with common expected values might be
## list(y1~-1,y2~-1,1+2~s(x)), whereas if the second component was contaminated
## by something else we might have list(y1~-1,y2~s(v)-1,1+2~s(x))
##
## For a list argument, this routine returns a list of split.formula objects
## with an extra field "lpi" indicating the linear predictors to which each
## contributes...
if (is.list(gf)) {
d <- length(gf)
p.env <- environment(gf[[1]])
## make sure all formulae have a response, to avoid
## problems with parametric sub formulae of the form ~1
#if (length(gf[[1]])<3) stop("first formula must specify a response")
resp <- gf[[1]][2]

ret <- list()
pav <- av <- rep("",0)
nlp <- 0 ## count number of linear predictors (may be different from number of formulae)
for (i in 1:d) {
textra <- if (i==1) NULL else paste(".",i-1,sep="") ## modify smooth labels to identify to predictor

lpi <- getNumericResponse(gf[[i]]) ## get linear predictors to which this applies, if explicit
if (length(lpi)==1) warning("single linear predictor indices are ignored")
if (length(lpi)>0) gf[[i]][[2]] <- NULL else { ## delete l.p. labels from formula response
nlp <- nlp + 1;lpi <- nlp ## this is base formula for l.p. number nlp
}
ret[[i]] <- interpret.gam0(gf[[i]],textra,extra.special=extra.special)
ret[[i]]$lpi <- lpi ## record of the linear predictors to which this applies ## make sure all parametric formulae have a response, to avoid ## problems with parametric sub formulae of the form ~1 respi <- rep("",0) ## no extra response terms if (length(ret[[i]]$pf)==2) {
ret[[i]]$pf[3] <- ret[[i]]$pf[2];ret[[i]]$pf[2] <- resp respi <- rep("",0) } else if (i>1) respi <- ret[[i]]$response ## extra response terms
av <- c(av,ret[[i]]$fake.names,respi) ## accumulate all required variable names pav <- c(pav,ret[[i]]$pred.names) ## predictors only
}
av <- unique(av) ## strip out duplicate variable names
pav <- unique(pav)
if (length(av)>0) {
## work around - reformulate with response = "log(x)" will treat log(x) as a name,
## not the call it should be...
fff <- formula(paste(ret[[1]]$response,"~ .")) ret$fake.formula <- reformulate(av,response=ret[[1]]$response,env=p.env) ret$fake.formula[[2]] <- fff[[2]] ## fix messed up response
} else ret$fake.formula <- ret[[1]]$fake.formula ## create fake formula containing all variables
ret$pred.formula <- if (length(pav)>0) reformulate(pav) else ~1 ## predictor only formula environment(ret$pred.formula) <- p.env
ret$response <- ret[[1]]$response
ret$nlp <- nlp ## number of linear predictors for (i in 1:d) if (max(ret[[i]]$lpi)>nlp||min(ret[[i]]$lpi)<1) stop("linear predictor labels out of range") class(ret) <- "split.gam.formula" return(ret) } else interpret.gam0(gf,extra.special=extra.special) } ## interpret.gam fixDependence <- function(X1,X2,tol=.Machine$double.eps^.5,rank.def=0,strict=FALSE)
# model matrix X2 may be linearly dependent on X1. This
# routine finds which columns of X2 should be zeroed to
# fix this. If rank.def>0 then it is taken as the known degree
# of dependence of X2 on X1 and tol is ignored.
{ qr1 <- qr(X1,LAPACK=TRUE)
R11 <- abs(qr.R(qr1)[1,1])
r<-ncol(X1);n<-nrow(X1)
if (strict) { ## only delete columns of X2 individually dependent on X1
## Project columns of X2 into space of X1 and look at difference
## to orignal X2 to check for deficiency...
QtX2 <- qr.qty(qr1,X2)
QtX2[-(1:r),] <- 0
mdiff <- colMeans(abs(X2 - qr.qy(qr1,QtX2)))
if (rank.def>0) ind <- (1:ncol(X2))[rank(mdiff) <= rank.def] else
ind <- (1:ncol(X2))[mdiff < R11*tol]
if (length(ind)<1) ind <- NULL
} else { ## make X2 full rank given X1
QtX2 <- qr.qty(qr1,X2)[(r+1):n,] # Q'X2
qr2 <- qr(QtX2,LAPACK=TRUE)
R <- qr.R(qr2)
# now final diagonal block of R may be zero, indicating rank
# deficiency.
r0 <- r <- nrow(R)
if (rank.def > 0 && rank.def <= nrow(R)) r0 <- r - rank.def else ## degree of rank def known
while (r0>0 && mean(abs(R[r0:r,r0:r]))< R11*tol) r0 <- r0 -1 ## compute rank def
r0 <- r0 + 1
if (r0>r) return(NULL) else
ind <- qr2$pivot[r0:r] # the columns of X2 to zero in order to get independence } ind } ## fixDependence augment.smX <- function(sm,nobs,np) { ## augments a smooth model matrix with a square root penalty matrix for ## identifiability constraint purposes. ns <- length(sm$S) ## number of penalty matrices
if (ns==0) { ## nothing to do
return(rbind(sm$X,matrix(0,np,ncol(sm$X))))
}
ind <- colMeans(abs(sm$S[[1]]))!=0 sqrmaX <- mean(abs(sm$X[,ind]))^2
alpha <- sqrmaX/mean(abs(sm$S[[1]][ind,ind])) St <- sm$S[[1]]*alpha
if (ns>1) for (i in 2:ns) {
ind <- colMeans(abs(sm$S[[i]]))!=0 alpha <- sqrmaX/mean(abs(sm$S[[i]][ind,ind]))
St <- St +  sm$S[[i]]*alpha } rS <- mroot(St,rank=ncol(St)) ## get sqrt of penalty X <- rbind(sm$X,matrix(0,np,ncol(sm$X))) ## create augmented model matrix X[nobs+sm$p.ind,] <- t(rS) ## add in
X ## scaled augmented model matrix
} ## augment.smX

gam.side <- function(sm,Xp,tol=.Machine$double.eps^.5,with.pen=FALSE) # works through a list of smooths, sm, aiming to identify nested or partially # nested terms, and impose identifiability constraints on them. # Xp is the parametric model matrix. It is needed in order to check whether # there is a constant (or equivalent) in the model. If there is, then this needs # to be included when working out side constraints, otherwise dependencies can be # missed. # Note that with.pen is quite extreme, since you then pretty much only pick # up dependencies in the null spaces { if (!with.pen) { ## check that's possible and reset if not! with.pen <- nrow(Xp) < ncol(Xp) + sum(unlist(lapply(sm,function(x) ncol(x$X))))
}
m <- length(sm)
if (m==0) return(sm)
v.names<-array("",0);maxDim<-1
for (i in 1:m) { ## collect all term names and max smooth dim'
vn <- sm[[i]]$term ## need to include by variables in names if (sm[[i]]$by!="NA") vn <- paste(vn,sm[[i]]$by,sep="") ## need to distinguish levels of factor by variables... if (!is.null(sm[[i]]$by.level))  vn <- paste(vn,sm[[i]]$by.level,sep="") sm[[i]]$vn <- vn ## use this record to identify variables from now
v.names <- c(v.names,vn)
if (sm[[i]]$dim > maxDim) maxDim <- sm[[i]]$dim
}
lv <- length(v.names)
v.names <- unique(v.names)
if (lv == length(v.names)) return(sm) ## no repeats => no nesting

## Only get this far if there is nesting.
## Need to test for intercept or equivalent in Xp
intercept <- FALSE
if (ncol(Xp)) {
## first check columns directly...
if (sum(apply(Xp,2,sd)<.Machine$double.eps^.75)>0) intercept <- TRUE else { ## no constant column, so need to check span of Xp... f <- rep(1,nrow(Xp)) ff <- qr.fitted(qr(Xp),f) if (max(abs(ff-f))<.Machine$double.eps^.75) intercept <- TRUE
}
}

sm.id <- as.list(v.names)
names(sm.id) <- v.names
for (i in 1:length(sm.id)) sm.id[[i]]<-array(0,0)
sm.dim <- sm.id
for (d in 1:maxDim) {
for (i in 1:m) {
if (sm[[i]]$dim==d&&sm[[i]]$side.constrain) for (j in 1:d) { ## work through terms
term<-sm[[i]]$vn[j] a <- sm.id[[term]] la <- length(a)+1 sm.id[[term]][la] <- i ## record smooth i.d. for this variable sm.dim[[term]][la] <- d ## ... and smooth dim. } } } ## so now each unique variable name has an associated array of ## the smooths of which it is an argument, arranged in ascending ## order of dimension. Smooths for which side.constrain==FALSE are excluded. if (maxDim==1) warning("model has repeated 1-d smooths of same variable.") ## Now set things up to enable term specific model matrices to be ## augmented with square root penalties, on the fly... if (with.pen) { k <- 1 for (i in 1:m) { ## create parameter indices for each term k1 <- k + ncol(sm[[i]]$X) - 1
sm[[i]]$p.ind <- k:k1 k <- k1 + 1 } np <- k-1 ## number of penalized parameters } nobs <- nrow(sm[[1]]$X) ## number of observations

for (d in 1:maxDim) { ## work up through dimensions
for (i in 1:m) { ## work through smooths
if (sm[[i]]$dim == d&&sm[[i]]$side.constrain) { ## check for nesting
if (with.pen) X1 <- matrix(c(rep(1,nobs),rep(0,np)),nobs+np,as.integer(intercept)) else
X1 <- matrix(1,nobs,as.integer(intercept))
X1comp <- rep(0,0) ## list of components of X1 to avoid duplication
for (j in 1:d) { ## work through variables
b <- sm.id[[sm[[i]]$vn[j]]] # list of smooths dependent on this variable k <- (1:length(b))[b==i] ## locate current smooth in list if (k>1) for (l in 1:(k-1)) if (!b[l] %in% X1comp) { ## collect X columns X1comp <- c(X1comp,b[l]) ## keep track of components to avoid adding same one twice if (with.pen) { ## need to use augmented model matrix in testing if (is.null(sm[[b[l]]]$Xa)) sm[[b[l]]]$Xa <- augment.smX(sm[[b[l]]],nobs,np) X1 <- cbind(X1,sm[[b[l]]]$Xa)
} else X1 <- cbind(X1,sm[[b[l]]]$X) ## penalties not considered } } ## Now X1 contains columns for all lower dimensional terms if (ncol(X1)==as.integer(intercept)) ind <- NULL else { if (with.pen) { if (is.null(sm[[i]]$Xa)) sm[[i]]$Xa <- augment.smX(sm[[i]],nobs,np) ind <- fixDependence(X1,sm[[i]]$Xa,tol=tol)
} else ind <- fixDependence(X1,sm[[i]]$X,tol=tol) } ## ... the columns to zero to ensure independence if (!is.null(ind)) { sm[[i]]$X <- sm[[i]]$X[,-ind] ## work through list of penalty matrices, applying constraints... nsmS <- length(sm[[i]]$S)
if (nsmS>0) for (j in nsmS:1) { ## working down so that dropping is painless
sm[[i]]$S[[j]] <- sm[[i]]$S[[j]][-ind,-ind]
if (sum(sm[[i]]$S[[j]]!=0)==0) rank <- 0 else rank <- qr(sm[[i]]$S[[j]],tol=tol,LAPACK=FALSE)$rank sm[[i]]$rank[j] <- rank ## replace previous rank with new rank
if (rank == 0) { ## drop the penalty
sm[[i]]$rank <- sm[[i]]$rank[-j]
sm[[i]]$S[[j]] <- NULL sm[[i]]$S.scale <- sm[[i]]$S.scale[-j] if (!is.null(sm[[i]]$L)) sm[[i]]$L <- sm[[i]]$L[-j,,drop=FALSE]
}
} ## penalty matrices finished
## Now we need to establish null space rank for the term
mi <- length(sm[[i]]$S) if (mi>0) { St <- sm[[i]]$S[[1]]/norm(sm[[i]]$S[[1]],type="F") if (mi>1) for (j in 1:mi) St <- St + sm[[i]]$S[[j]]/norm(sm[[i]]$S[[j]],type="F") es <- eigen(St,symmetric=TRUE,only.values=TRUE) sm[[i]]$null.space.dim <- sum(es$values<max(es$values)*.Machine$double.eps^.75) } ## rank found if (!is.null(sm[[i]]$L)) {
ind <- as.numeric(colSums(sm[[i]]$L!=0))!=0 sm[[i]]$L <- sm[[i]]$L[,ind,drop=FALSE] ## retain only those sps that influence something! } sm[[i]]$df <- ncol(sm[[i]]$X) attr(sm[[i]],"del.index") <- ind ## Now deal with case in which prediction constraints differ from fit constraints if (!is.null(sm[[i]]$Xp)) { ## need to get deletion indices under prediction parameterization
## Note that: i) it doesn't matter what the identifiability con on X1 is
##            ii) the degree of rank deficiency can't be changed by an identifiability con
if (with.pen) {
smi <- sm[[i]]  ## clone smooth
smi$X <- smi$Xp ## copy prediction Xp to X slot.
smi$S <- smi$Sp ## and make sure penalty parameterization matches.
Xpa <- augment.smX(smi,nobs,np)
ind <- fixDependence(X1,Xpa,rank.def=length(ind))
} else ind <- fixDependence(X1,sm[[i]]$Xp,rank.def=length(ind)) sm[[i]]$Xp <- sm[[i]]$Xp[,-ind,drop=FALSE] attr(sm[[i]],"del.index") <- ind ## over-writes original } } sm[[i]]$vn <- NULL
} ## end if (sm[[i]]$dim == d) } ## end i in 1:m loop } if (with.pen) for (i in 1:m) { ## remove working variables that were added sm[[i]]$p.ind <- NULL
if (!is.null(sm[[i]]$Xa)) sm[[i]]$Xa <- NULL
}
sm
} ## gam.side

clone.smooth.spec <- function(specb,spec) {
## produces a version of base smooth.spec, specb', but with
## the variables relating to spec'. Used by gam.setup' in handling
## check dimensions same...
if (specb$dim!=spec$dim) stop("id' linked smooths must have same number of arguments")
## Now start cloning...
if (inherits(specb,c("tensor.smooth.spec","t2.smooth.spec"))) { ##te' or t2' generated base smooth.spec
specb$term <- spec$term
specb$label <- spec$label
specb$by <- spec$by
k <- 1
for (i in 1:length(specb$margin)) { if (is.null(spec$margin)) { ## sloppy user -- have to construct margin info...
for (j in 1:length(specb$margin[[i]]$term)) {
specb$margin[[i]]$term[j] <- spec$term[k] k <- k + 1 } specb$margin[[i]]$label <- "" } else { ## second term was at least te'/t2', so margin cloning is easy specb$margin[[i]]$term <- spec$margin[[i]]$term specb$margin[[i]]$label <- spec$margin[[i]]$label specb$margin[[i]]$xt <- spec$margin[[i]]$xt } } } else { ## s' generated case specb$term <- spec$term specb$label <- spec$label specb$by <- spec$by specb$xt <- spec$xt ## don't generally know what's in here => don't clone } specb ## return clone } ## clone.smooth.spec parametricPenalty <- function(pterms,assign,paraPen,sp0) { ## routine to process any penalties on the parametric part of the model. ## paraPen is a list whose items have names corresponding to the ## term.labels in pterms. Each list item may have named elements ## L, rank and sp. All other elements should be penalty coefficient matrices. S <- list() ## penalty matrix list off <- rep(0,0) ## offset array rank <- rep(0,0) ## rank array sp <- rep(0,0) ## smoothing param array full.sp.names <- rep("",0) ## names for sp's multiplying penalties (not underlying) L <- matrix(0,0,0) k <- 0 tind <- unique(assign) ## unique term indices n.t <- length(tind) if (n.t>0) for (j in 1:n.t) if (tind[j]>0) { term.label <- attr(pterms[tind[j]],"term.label") P <- paraPen[[term.label]] ## get any penalty information for this term if (!is.null(P)) { ## then there is information ind <- (1:length(assign))[assign==tind[j]] ## index of coefs involved here Li <- P$L;P$L <- NULL spi <- P$sp;P$sp <- NULL ranki <- P$rank;P$rank <- NULL ## remaining terms should be penalty matrices... np <- length(P) if (!is.null(ranki)&&length(ranki)!=np) stop("rank' has wrong length in paraPen'") if (np) for (i in 1:np) { ## unpack penalty matrices, offsets and ranks k <- k + 1 S[[k]] <- P[[i]] off[k] <- min(ind) ## index of first coef penalized by this term if ( ncol(P[[i]])!=nrow(P[[i]])||nrow(P[[i]])!=length(ind)) stop(" a parametric penalty has wrong dimension") if (is.null(ranki)) { ev <- eigen(S[[k]],symmetric=TRUE,only.values=TRUE)$values
rank[k] <- sum(ev>max(ev)*.Machine$double.eps*10) ## estimate rank } else rank[k] <- ranki[i] } ## now deal with L matrices if (np) { ## only do this stuff if there are any penalties! if (is.null(Li)) Li <- diag(np) if (nrow(Li)!=np) stop("L has wrong dimension in paraPen'") L <- rbind(cbind(L,matrix(0,nrow(L),ncol(Li))), cbind(matrix(0,nrow(Li),ncol(L)),Li)) ind <- (length(sp)+1):(length(sp)+ncol(Li)) ind2 <- (length(sp)+1):(length(sp)+nrow(Li)) ## used to produce names for full sp array if (is.null(spi)) { sp[ind] <- -1 ## auto-initialize } else { if (length(spi)!=ncol(Li)) stop("sp' dimension wrong in paraPen'") sp[ind] <- spi } ## add smoothing parameter names.... if (length(ind)>1) names(sp)[ind] <- paste(term.label,ind-ind[1]+1,sep="") else names(sp)[ind] <- term.label if (length(ind2)>1) full.sp.names[ind2] <- paste(term.label,ind2-ind2[1]+1,sep="") else full.sp.names[ind2] <- term.label } } ## end !is.null(P) } ## looped through all terms if (k==0) return(NULL) if (!is.null(sp0)) { if (length(sp0)<length(sp)) stop("sp' too short") sp0 <- sp0[1:length(sp)] sp[sp<0] <- sp0[sp<0] } ## S is list of penalty matrices, off[i] is index of first coefficient penalized by each S[[i]] ## sp is array of underlying smoothing parameter (-ve to estimate), L is matrix mapping log ## underlying smoothing parameters to log smoothing parameters, rank[i] is the rank of S[[i]]. list(S=S,off=off,sp=sp,L=L,rank=rank,full.sp.names=full.sp.names) } ## parametricPenalty getNumericResponse <- function(form) { ## takes a formula for which the lhs contains numbers, but no variable ## names, and returns an array of those numbers. For example if 'form' ## is '1+26~s(x)', this will return the numeric vector c(1,26). ## A zero length vector is returned if the lhs contains no numbers, ## or contains variable names. ## This is useful for setting up models in which ## multiple linear predictors share terms. The lhs numbers can then ## indicate which linear predictors the rhs will contribute to. ## if the response contains variables or there is no ## response then return nothing... if (length(form)==2||length(all.vars(form[[2]]))>0) return(rep(0,0)) ## deparse turns lhs into a string; strsplit extracts the characters ## corresponding to numbers; unlist deals with the fact that deparse ## will split long lines resulting in multiple list items from ## strsplit; as.numeric converts the numbers; na.omit drops NAs ## resulting from "" elements; unique & round are obvious... round(unique(na.omit(as.numeric(unlist(strsplit(deparse(form[[2]]), "[^0-9]+")))))) } ## getNumericResponse olid <- function(X,nsdf,pstart,flpi,lpi) { ## X is a model matrix, made up of nf=length(nsdf) column blocks. ## The ith block starts at column pstart[i] and its first nsdf[i] ## columns are unpenalized. X is used to define nlp=length(lpi) ## linear predictors. lpi[[i]] gives the columns of X used in the ## ith linear predictor. flpi[j] gives the linear predictor(s) ## to which the jth block of X belongs. The problem is that the ## unpenalized blocks need not be identifiable when used in combination. ## This function returns a vector dind of columns of X to drop for ## identifiability, along with modified lpi, pstart and nsdf vectors. nlp <- length(lpi) ## number of linear predictors n <- nrow(X) nf <- length(nsdf) ## number of formulae blocks Xp <- matrix(0,n*nlp,sum(nsdf)) start <- 1 ii <- 1:n tind <- rep(0,0) ## complete index of all parametric columns in X ## create a block matrix, Xp, with the same identifiability properties as ## unpenalized part of model... for (i in 1:nf) { stop <- start - 1 + nsdf[i] if (stop>=start) { ind <- pstart[i] + 1:nsdf[i] - 1 for (k in flpi[[i]]) { Xp[ii+(k-1)*n,start:stop] <- X[,ind] } tind <- c(tind,ind) start <- start + nsdf[i] } } ## rank deficiency of Xp will reveal number of redundant parametric ## terms, and a pivoted QR will reveal which to drop to restore ## full rank... qrx <- qr(Xp,LAPACK=TRUE,tol=0.0) ## unidentifiable columns get pivoted to final cols r <- Rrank(qr.R(qrx)) ## get rank from R factor of pivoted QR if (r==ncol(Xp)) { ## full rank, all fine, drop nothing dind <- rep(0,0) } else { ## reduced rank, drop some columns dind <- tind[sort(qrx$pivot[(r+1):ncol(X)],decreasing=TRUE)] ## columns to drop
## now we need to adjust nsdf, pstart and lpi
for (d in dind) { ## working down through drop indices
## following commented out code is useful should it ever prove necessary to
## adjust pstart and nsdf, but at present these are only used in prediction,
## and it is cleaner to leave them unchanged, and simply drop using dind during prediction.
#k <- if (d>=pstart[nf]) nlp else which(d >= pstart[1:(nf-1)] & d < pstart[2:nf])
#nsdf[k] <- nsdf[k] - 1 ## one less unpenalized column in this block
#if (k<nf) pstart[(k+1):nf] <-  pstart[(k+1):nf] - 1 ## later block starts move down 1
for (i in 1:nlp) {
k <- which(d == lpi[[i]])
if (length(k)>0) lpi[[i]] <- lpi[[i]][-k] ## drop row
k <- which(lpi[[i]]>d)
if (length(k)>0) lpi[[i]][k] <- lpi[[i]][k] - 1 ## close up
}
} ## end of drop index loop
}
list(dind=dind,lpi=lpi) ##,pstart=pstart,nsdf=nsdf)
} ## olid

gam.setup.list <- function(formula,pterms,
data=stop("No data supplied to gam.setup"),knots=NULL,sp=NULL,
scale.penalty=TRUE,paraPen=NULL,gamm.call=FALSE,drop.intercept=NULL,apply.by=TRUE,modCon=0) {
## version of gam.setup for when gam is called with a list of formulae,
## specifying several linear predictors...
## key difference to gam.setup is an attribute to the model matrix, "lpi", which is a list
## of column indices for each linear predictor
if (!is.null(paraPen)) stop("paraPen not supported for multi-formula models")
if (!absorb.cons) stop("absorb.cons must be TRUE for multi-formula models")
d <- length(pterms) ## number of formulae
if (is.null(drop.intercept)) drop.intercept <- rep(FALSE, d)
if (length(drop.intercept) != d) stop("length(drop.intercept) should be equal to number of model formulas")

lp.overlap <- if (formula$nlp<d) TRUE else FALSE ## predictors share terms G <- gam.setup(formula[[1]],pterms[[1]], data,knots,sp,min.sp,H,absorb.cons,sparse.cons,select, idLinksBases,scale.penalty,paraPen,gamm.call,drop.intercept[1],apply.by=apply.by,list.call=TRUE,modCon=modCon) G$pterms <- pterms

G$offset <- list(G$offset)
#G$contrasts <- list(G$contrasts)
G$xlevels <- list(G$xlevels)
G$assign <- list(G$assign)
used.sp <- length(G$lsp0) if (!is.null(sp)&&used.sp>0) sp <- sp[-(1:used.sp)] ## need to strip off already used sp's if (!is.null(min.sp)&&nrow(G$L)>0) min.sp <- min.sp[-(1:nrow(G$L))] ## formula[[1]] always relates to the base formula of the first linear predictor... flpi <- lpi <- list() for (i in 1:formula$nlp) lpi[[i]] <- rep(0,0)
lpi[[1]] <- 1:ncol(G$X) ## lpi[[j]] is index of cols for jth linear predictor flpi[[1]] <- formula[[1]]$lpi ## used in identifiability testing by olid, later

pof <- ncol(G$X) ## counts the model matrix columns produced so far pstart <- rep(0,d) ## indexes where parameteric columns start in each formula block of X pstart[1] <- 1 if (d>1) for (i in 2:d) { if (is.null(formula[[i]]$response)) {  ## keep gam.setup happy
formula[[i]]$response <- formula$response
mv.response <- FALSE
} else mv.response <- TRUE
#spind <- if (is.null(sp)) 1 else (length(G$S)+1):length(sp) formula[[i]]$pfok <- 1 ## empty formulae OK here!
um <- gam.setup(formula[[i]],pterms[[i]],
data,knots,sp,min.sp,#sp[spind],min.sp[spind],
H,absorb.cons,sparse.cons,select,
used.sp <- length(um$lsp0) if (!is.null(sp)&&used.sp>0) sp <- sp[-(1:used.sp)] ## need to strip off already used sp's if (!is.null(min.sp)&&nrow(um$L)>0) min.sp <- min.sp[-(1:nrow(um$L))] flpi[[i]] <- formula[[i]]$lpi
for (j in formula[[i]]$lpi) { ## loop through all l.p.s to which this term contributes lpi[[j]] <- c(lpi[[j]],pof + 1:ncol(um$X)) ## add these cols to lpi[[j]]
##lpi[[i]] <- pof + 1:ncol(um$X) ## old code } if (mv.response) G$y <- cbind(G$y,um$y)
if (i>formula$nlp&&!is.null(um$offset)) {
stop("shared offsets not allowed")
}
G$offset[[i]] <- um$offset
#G$contrasts[[i]] <- um$contrasts
if (!is.null(um$contrasts)) G$contrasts <- c(G$contrasts,um$contrasts)
G$xlevels[[i]] <- um$xlevels
G$assign[[i]] <- um$assign
G$rank <- c(G$rank,um$rank) pstart[i] <- pof+1 G$X <- cbind(G$X,um$X) ## extend model matrix
## deal with the smooths...
k <- G$m if (um$m) for (j in 1:um$m) { um$smooth[[j]]$first.para <- um$smooth[[j]]$first.para + pof um$smooth[[j]]$last.para <- um$smooth[[j]]$last.para + pof k <- k + 1 G$smooth[[k]] <- um$smooth[[j]] } ## L, S and off... ks <- length(G$S)
M <- length(um$S) if (!is.null(um$L)||!is.null(G$L)) { if (is.null(G$L)) G$L <- diag(1,nrow=ks) if (is.null(um$L)) um$L <- diag(1,nrow=M) G$L <- rbind(cbind(G$L,matrix(0,nrow(G$L),ncol(um$L))),cbind(matrix(0,nrow(um$L),ncol(G$L)),um$L))
}

G$off <- c(G$off,um$off+pof) if (M) for (j in 1:M) { ks <- ks + 1 G$S[[ks]] <- um$S[[j]] } G$m <- G$m + um$m ## number of smooths
##G$nsdf <- G$nsdf + um$nsdf ## or list?? G$nsdf[i] <- um$nsdf if (!is.null(um$P)||!is.null(G$P)) { if (is.null(G$P)) G$P <- diag(1,nrow=pof) k <- ncol(um$X)
if (is.null(um$P)) um$P <- diag(1,nrow=k)
G$P <- rbind(cbind(G$P,matrix(0,pof,k)),cbind(matrix(0,k,pof),um$P)) } G$cmX <- c(G$cmX,um$cmX)
if (um$nsdf>0) um$term.names[1:um$nsdf] <- paste(um$term.names[1:um$nsdf],i-1,sep=".") G$term.names <- c(G$term.names,um$term.names)
G$lsp0 <- c(G$lsp0,um$lsp0) G$sp <- c(G$sp,um$sp)
pof <- ncol(G$X) } ## formula loop end ## If there is overlap then there is a danger of lack of identifiability of the ## parameteric terms, especially if there are factors present in shared components. ## The following code deals with this possibility... if (lp.overlap) { rt <- olid(G$X,G$nsdf,pstart,flpi,lpi) if (length(rt$dind)>0) { ## then columns have to be dropped
warning("dropping unidentifiable parametric terms from model",call.=FALSE)
G$X <- G$X[,-rt$dind] ## drop cols G$cmX <- G$cmX[-rt$dind]
G$term.names <- G$term.names[-rt$dind] ## adjust indexing in smooth list, noting that coefs of smooths ## are never dropped by dind for (i in 1:length(G$smooth)) {
k <- sum(rt$dind < G$smooth[[i]]$first.para) G$smooth[[i]]$first.para <- G$smooth[[i]]$first.para - k G$smooth[[i]]$last.para <- G$smooth[[i]]$last.para - k } for (i in 1:length(G$off)) G$off[i] <- G$off[i] - sum(rt$dind < G$off[i])
## replace various indices with updated versions...
# pstart <- rt$pstart; G$nsdf <- rt$nsdf ## these two only needed in predict.gam - cleaner to leave unchanged lpi <- rt$lpi
attr(G$nsdf,"drop.ind") <- rt$dind ## store drop index
}
}
attr(lpi,"overlap") <- lp.overlap
attr(G$X,"lpi") <- lpi attr(G$nsdf,"pstart") <- pstart ##unlist(lapply(lpi,min))

## assemble a global indicator array for non-linear parameters...
G$g.index <- rep(FALSE,ncol(G$X))
n.sp0 <- 0
if (length(G$smooth)) for (i in 1:length(G$smooth)) {
if (!is.null(G$smooth[[i]]$g.index)) G$g.index[G$smooth[[i]]$first.para:G$smooth[[i]]$last.para] <- G$smooth[[i]]$g.index n.sp <- length(G$smooth[[i]]$S) if (n.sp) { G$smooth[[i]]$first.sp <- n.sp0 + 1 n.sp0 <- G$smooth[[i]]$last.sp <- n.sp0 + n.sp } } if (!any(G$g.index)) G$g.index <- NULL G } ## gam.setup.list gam.setup <- function(formula,pterms, data=stop("No data supplied to gam.setup"),knots=NULL,sp=NULL, min.sp=NULL,H=NULL,absorb.cons=TRUE,sparse.cons=0,select=FALSE,idLinksBases=TRUE, scale.penalty=TRUE,paraPen=NULL,gamm.call=FALSE,drop.intercept=FALSE, diagonal.penalty=FALSE,apply.by=TRUE,list.call=FALSE,modCon=0) ## set up the model matrix, penalty matrices and auxilliary information about the smoothing bases ## needed for a gam fit. ## elements of returned object: ## * m - number of smooths ## * min.sp - minimum smoothing parameters ## * H supplied H matrix ## * pearson.extra, dev.extra, n.true --- entries to hold these quantities ## * pterms - terms object for parametric terms ## * intercept TRUE if intercept present ## * offset - the model offset ## * nsdf - number of strictly parameteric coefs ## * contrasts ## * xlevels - records levels of factors ## * assign - indexes which parametric model matrix columns map to which term in pterms ## * smooth - list of smooths ## * S - penalties (non-zero block only) ## * off - first coef penalized by each element of S ## * cmX - col mean of X ## * P - maps parameters in fit constraint parameterization to those in prediction parameterization ## * X - model matrix ## * sp ## * rank ## * n.paraPen ## * L ## * lsp0 ## * y - response ## * C - constraint matrix - only if absorb.cons==FALSE ## * n - dim(y) ## * w - weights ## * term.names ## * nP { # split the formula if the object being passed is a formula, otherwise it's already split if (inherits(formula,"split.gam.formula")) split <- formula else if (inherits(formula,"formula")) split <- interpret.gam(formula) else stop("First argument is no sort of formula!") if (length(split$smooth.spec)==0) {
if (split$pfok==0) stop("You've got no model....") m <- 0 } else m <- length(split$smooth.spec) # number of smooth terms

G <- list(m=m,min.sp=min.sp,H=H,pearson.extra=0,
dev.extra=0,n.true=-1,pterms=pterms) ## dev.extra gets added to deviance if REML/ML used in gam.fit3

if (is.null(attr(data,"terms"))) # then data is not a model frame
mf <- model.frame(split$pf,data,drop.unused.levels=FALSE) # must be false or can end up with wrong prediction matrix! else mf <- data # data is already a model frame G$intercept <-  attr(attr(mf,"terms"),"intercept")>0

## get any model offset. Complicated by possibility of offsets in multiple formulae...
if (list.call) {
offi <- attr(pterms,"offset")
if (!is.null(offi)) {
G$offset <- mf[[names(attr(pterms,"dataClasses"))[offi]]] } } else G$offset <- model.offset(mf)   # get any model offset including from offset argument

if (!is.null(G$offset)) G$offset <- as.numeric(G$offset) # construct strictly parametric model matrix.... if (drop.intercept) attr(pterms,"intercept") <- 1 ## ensure there is an intercept to drop X <- model.matrix(pterms,mf) if (drop.intercept) { ## some extended families require intercept to be dropped xat <- attributes(X);ind <- xat$assign>0 ## index of non intercept columns
X <- X[,ind,drop=FALSE] ## some extended families need to drop intercept
xat$assign <- xat$assign[ind];xat$dimnames[[2]]<-xat$dimnames[[2]][ind];
xat$dim[2] <- xat$dim[2]-1;attributes(X) <- xat
G$intercept <- FALSE } rownames(X) <- NULL ## save memory G$nsdf <- ncol(X)
G$contrasts <- attr(X,"contrasts") G$xlevels <- .getXlevels(pterms,mf)
G$assign <- attr(X,"assign") # used to tell which coeffs relate to which pterms ## now deal with any user supplied penalties on the parametric part of the model... PP <- parametricPenalty(pterms,G$assign,paraPen,sp)
if (!is.null(PP)) { ## strip out supplied sps already used
ind <- 1:length(PP$sp) if (!is.null(sp)) sp <- sp[-ind] if (!is.null(min.sp)) { PP$min.sp <- min.sp[ind]
min.sp <- min.sp[-ind]
}
}

# next work through smooth terms (if any) extending model matrix.....

G$smooth <- list() G$S <- list()

if (gamm.call) { ## flag that this is a call from gamm --- some smoothers need to know!
if (m>0) for (i in 1:m) attr(split$smooth.spec[[i]],"gamm") <- TRUE } if (m>0 && idLinksBases) { ## search smooth.spec[[]] for terms linked by common id's id.list <- list() ## id information list for (i in 1:m) if (!is.null(split$smooth.spec[[i]]$id)) { id <- as.character(split$smooth.spec[[i]]$id) if (length(id.list)&&id%in%names(id.list)) { ## it's an existing id ni <- length(id.list[[id]]$sm.i) ## number of terms so far with this id
id.list[[id]]$sm.i[ni+1] <- i ## adding smooth.spec index to this id's list ## clone smooth.spec from base smooth spec.... base.i <- id.list[[id]]$sm.i[1]

split$smooth.spec[[i]] <- clone.smooth.spec(split$smooth.spec[[base.i]],
split$smooth.spec[[i]]) ## add data for this term to the data list for basis setup... temp.term <- split$smooth.spec[[i]]$term ## note cbind deliberate in next line, as construction will handle matrix argument ## correctly... for (j in 1:length(temp.term)) id.list[[id]]$data[[j]] <- cbind(id.list[[id]]$data[[j]], get.var(temp.term[j],data,vecMat=FALSE)) } else { ## new id id.list[[id]] <- list(sm.i=i) ## start the array of indices of smooths with this id id.list[[id]]$data <- list()
## need to collect together all data for which this basis will be used,
## for basis setup...
term <- split$smooth.spec[[i]]$term
for (j in 1:length(term)) id.list[[id]]$data[[j]] <- get.var(term[j],data,vecMat=FALSE) } ## new id finished } } ## id.list complete G$off<-array(0,0)
first.para<-G$nsdf+1 sm <- list() newm <- 0 if (m>0) for (i in 1:m) { # idea here is that terms are set up in accordance with information given in split$smooth.spec
# appropriate basis constructor is called depending on the class of the smooth
# constructor returns penalty matrices model matrix and basis specific information

id <- split$smooth.spec[[i]]$id
if (is.null(id)||!idLinksBases) { ## regular evaluation
sml <- smoothCon(split$smooth.spec[[i]],data,knots,absorb.cons,scale.penalty=scale.penalty, null.space.penalty=select,sparse.cons=sparse.cons, diagonal.penalty=diagonal.penalty,apply.by=apply.by,modCon=modCon) } else { ## it's a smooth with an id, so basis setup data differs from model matrix data names(id.list[[id]]$data) <- split$smooth.spec[[i]]$term ## give basis data suitable names
sml <- smoothCon(split$smooth.spec[[i]],id.list[[id]]$data,knots,
absorb.cons,n=nrow(data),dataX=data,scale.penalty=scale.penalty,
null.space.penalty=select,sparse.cons=sparse.cons,
diagonal.penalty=diagonal.penalty,apply.by=apply.by,modCon=modCon)
}
#for (j in 1:length(sml)) {
#  newm <- newm + 1
#  sm[[newm]] <- sml[[j]]
#}
ind <- 1:length(sml)
sm[ind+newm] <- sml[ind]
newm <- newm + length(sml)
}

G$m <- m <- newm ## number of actual smooths ## at this stage, it is neccessary to impose any side conditions required ## for identifiability if (m>0) { sm <- gam.side(sm,X,tol=.Machine$double.eps^.5)
if (!apply.by) for (i in 1:length(sm)) { ## restore any by-free model matrices
if (!is.null(sm[[i]]$X0)) { ## there is a by-free matrix to restore ind <- attr(sm[[i]],"del.index") ## columns, if any to delete sm[[i]]$X <- if (is.null(ind)) sm[[i]]$X0 else sm[[i]]$X0[,-ind,drop=FALSE]
}
}
}

## The matrix, L, mapping the underlying log smoothing parameters to the
## log of the smoothing parameter multiplying the S[[i]] must be
## worked out...
idx <- list() ## idx[[id]]$c contains index of first col in L relating to id L <- matrix(0,0,0) lsp.names <- sp.names <- rep("",0) ## need a list of names to identify sps in global sp array if (m>0) for (i in 1:m) { id <- sm[[i]]$id
## get the L matrix for this smooth...
length.S <- if (is.null(sm[[i]]$updateS)) length(sm[[i]]$S) else sm[[i]]$n.sp ## deals with possibility of non-linear penalty Li <- if (is.null(sm[[i]]$L)) diag(length.S) else sm[[i]]$L if (length.S > 0) { ## there are smoothing parameters to name if (length.S == 1) lspn <- sm[[i]]$label else {
Sname <- names(sm[[i]]$S) lspn <- if (is.null(Sname)) paste(sm[[i]]$label,1:length.S,sep="") else
paste(sm[[i]]$label,Sname,sep="") ## names for all sp's } spn <- lspn[1:ncol(Li)] ## names for actual working sps } ## extend the global L matrix... if (is.null(id)||is.null(idx[[id]])) { ## new id' if (!is.null(id)) { ## create record in idx' idx[[id]]$c <- ncol(L)+1   ## starting column in L for this id'
idx[[id]]$nc <- ncol(Li) ## number of columns relating to this id' } L <- rbind(cbind(L,matrix(0,nrow(L),ncol(Li))), cbind(matrix(0,nrow(Li),ncol(L)),Li)) if (length.S > 0) { ## there are smoothing parameters to name sp.names <- c(sp.names,spn) ## extend the sp name vector lsp.names <- c(lsp.names,lspn) ## extend full.sp name vector } } else { ## it's a repeat id => shares existing sp's L0 <- matrix(0,nrow(Li),ncol(L)) if (ncol(Li)>idx[[id]]$nc) {
stop("Later terms sharing an id' can not have more smoothing parameters than the first such term")
}
L0[,idx[[id]]$c:(idx[[id]]$c+ncol(Li)-1)] <- Li
L <- rbind(L,L0)
if (length.S > 0) { ## there are smoothing parameters to name
lsp.names <- c(lsp.names,lspn) ## extend full.sp name vector
}
}
}

## create the model matrix...

Xp <- NULL ## model matrix under prediction constraints, if given
if (m>0) for (i in 1:m) {
n.para <- ncol(sm[[i]]$X) # define which elements in the parameter vector this smooth relates to.... sm[[i]]$first.para<-first.para
first.para <- first.para+n.para
sm[[i]]$last.para <- first.para-1 ## termwise offset handling ... Xoff <- attr(sm[[i]]$X,"offset")
if (!is.null(Xoff)) {
if (is.null(G$offset)) G$offset <- Xoff
else G$offset <- G$offset + Xoff
}
## model matrix accumulation ...

## alternative version under alternative constraint first (prediction only)
if (is.null(sm[[i]]$Xp)) { if (!is.null(Xp)) Xp <- cbind2(Xp,sm[[i]]$X)
} else {
if (is.null(Xp)) Xp <- X
Xp <- cbind2(Xp,sm[[i]]$Xp);sm[[i]]$Xp <- NULL
}
## now version to use for fitting ...
X <- cbind2(X,sm[[i]]$X);sm[[i]]$X<-NULL

G$smooth[[i]] <- sm[[i]] } if (is.null(Xp)) { G$cmX <- colMeans(X) ## useful for componentwise CI construction
} else {
G$cmX <- colMeans(Xp) ## transform from fit params to prediction params... ## G$P <- qr.coef(qr(Xp),X) ## old code assumes always full rank!!

qrx <- qr(Xp,LAPACK=TRUE)
R <- qr.R(qrx)
p <- ncol(R)
rank <- Rrank(R) ## rank of Xp/R
QtX <- qr.qty(qrx,X)[1:rank,]
if (rank<p) { ## rank deficient
R <- R[1:rank,]
qrr <- qr(t(R),tol=0)
R <- qr.R(qrr)
G$P <- forwardsolve(t(R),QtX) } else { G$P <- backsolve(R,QtX)
}
if (rank<p) {
G$P <- qr.qy(qrr,rbind(G$P,matrix(0,p-rank,p)))
}
G$P[qrx$pivot,] <- G$P } ## cmX relates to computation of CIs incorportating uncertainty about the mean ## It may make more sense to incorporate all uncertainty about the mean, ## rather than just the uncertainty in the fixed effects mean. This means ## incorporating the mean of random effects and unconstrained smooths. Hence ## comment out the following. #if (G$nsdf>0) G$cmX[-(1:G$nsdf)] <- 0 ## zero the smooth parts here
#else G$cmX <- G$cmX * 0
G$X <- X;rm(X) n.p <- ncol(G$X)
# deal with penalties

## min.sp must be length nrow(L) to make sense
## sp must be length ncol(L) --- need to partition
## L into columns relating to free log smoothing parameters,
## and columns, L0, corresponding to values supplied in sp.
## lsp0 = L0%*%log(sp[sp>=0]) [need to fudge sp==0 case by
## setting log(0) to log(effective zero) computed case-by-case]

## following deals with supplied and estimated smoothing parameters...
## first process the sp' array supplied to gam'...

if (!is.null(sp)) { # then user has supplied fixed smoothing parameters
ok <- TRUE
if (length(sp) < ncol(L)) {
warning("Supplied smoothing parameter vector is too short - ignored.")
ok <- FALSE
}
if (sum(is.na(sp))) {
warning("NA's in supplied smoothing parameter vector - ignoring.")
ok <- FALSE
}
} else ok <- FALSE
G$sp <- if (ok) sp[1:ncol(L)] else rep(-1,ncol(L)) names(G$sp) <- sp.names

## now work through the smooths searching for any sp' elements
## supplied in s' or te' terms.... This relies on idx' created
## above...

k <- 1 ## current location in sp' array
if (m>0) for (i in 1:m) {
id <- sm[[i]]$id if (is.null(sm[[i]]$L)) Li <- diag(length(sm[[i]]$S)) else Li <- sm[[i]]$L
if (is.null(id)) { ## it's a smooth without an id
spi <- sm[[i]]$sp if (!is.null(spi)) { ## sp supplied in s' or te' if (length(spi)!=ncol(Li)) stop("incorrect number of smoothing parameters supplied for a smooth term") G$sp[k:(k+ncol(Li)-1)] <- spi
}
k <- k + ncol(Li)
} else { ## smooth has an id
spi <- sm[[i]]$sp if (is.null(idx[[id]]$sp.done)) { ## not already dealt with these sp's
if (!is.null(spi)) { ## sp supplied in s' or te'
if (length(spi)!=ncol(Li)) stop("incorrect number of smoothing parameters supplied for a smooth term")
G$sp[idx[[id]]$c:(idx[[id]]$c+idx[[id]]$nc-1)] <- spi
}
idx[[id]]$sp.done <- TRUE ## only makes sense to use supplied sp' from defining term k <- k + idx[[id]]$nc
}
}
} ## finished processing sp' vectors supplied in s' or te' terms

## copy initial sp's back into smooth objects, so there is a record of
## fixed and free...
k <- 1
if (length(idx)) for (i in 1:length(idx)) idx[[i]]$sp.done <- FALSE if (m>0) for (i in 1:m) { ## work through all smooths id <- sm[[i]]$id
if (!is.null(id)) { ## smooth with id
if (idx[[id]]$nc>0) { ## only copy if there are sp's G$smooth[[i]]$sp <- G$sp[idx[[id]]$c:(idx[[id]]$c+idx[[id]]$nc-1)] } if (!idx[[id]]$sp.done) { ## only update k on first encounter with this smooth
idx[[id]]$sp.done <- TRUE k <- k + idx[[id]]$nc
}

} else { ## no id, just work through sp
if (is.null(sm[[i]]$L)) nc <- length(sm[[i]]$S) else nc <- ncol(sm[[i]]$L) if (nc>0) G$smooth[[i]]$sp <- G$sp[k:(k+nc-1)]
k <- k + nc
}
} ## now all elements of G$smooth have a record of initial sp. if (!is.null(min.sp)) { # then minimum s.p.'s supplied if (length(min.sp)<nrow(L)) stop("length of min.sp is wrong.") if (nrow(L)>0) min.sp <- min.sp[1:nrow(L)] if (sum(is.na(min.sp))) stop("NA's in min.sp.") if (sum(min.sp<0)) stop("elements of min.sp must be non negative.") } k.sp <- 0 # count through sp and S G$rank <- array(0,0)
if (m>0) for (i in 1:m) {
sm<-G$smooth[[i]] if (length(sm$S)>0)
for (j in 1:length(sm$S)) { # work through penalty matrices k.sp <- k.sp+1 G$off[k.sp] <- sm$first.para G$S[[k.sp]] <- sm$S[[j]] G$rank[k.sp]<-sm$rank[j] if (!is.null(min.sp)) { if (is.null(H)) H<-matrix(0,n.p,n.p) H[sm$first.para:sm$last.para,sm$first.para:sm$last.para] <- H[sm$first.para:sm$last.para,sm$first.para:sm$last.para]+min.sp[k.sp]*sm$S[[j]]
}
}
}

## need to modify L, lsp.names, G$S, G$sp, G$rank and G$off to include any penalties
## on parametric stuff, at this point....
if (!is.null(PP)) { ## deal with penalties on parametric terms
L <- rbind(cbind(L,matrix(0,nrow(L),ncol(PP$L))), cbind(matrix(0,nrow(PP$L),ncol(L)),PP$L)) G$off <- c(PP$off,G$off)
G$S <- c(PP$S,G$S) G$rank <- c(PP$rank,G$rank)
G$sp <- c(PP$sp,G$sp) lsp.names <- c(PP$full.sp.names,lsp.names)
G$n.paraPen <- length(PP$off)
if (!is.null(PP$min.sp)) { ## deal with minimum sps if (is.null(H)) H <- matrix(0,n.p,n.p) for (i in 1:length(PP$S)) {
ind <- PP$off[i]:(PP$off[i]+ncol(PP$S[[i]])-1) H[ind,ind] <- H[ind,ind] + PP$min.sp[i] * PP$S[[i]] } } ## min.sp stuff finished } else G$n.paraPen <- 0

## Now remove columns of L and rows of sp relating to fixed
## smoothing parameters, and use removed elements to create lsp0

fix.ind <- G$sp>=0 if (sum(fix.ind)) { lsp0 <- G$sp[fix.ind]
ind <- lsp0==0 ## find the zero s.p.s
ef0 <- indi <- (1:length(ind))[ind]
if (length(indi)>0) for (i in 1:length(indi)) {
## find "effective zero" to replace each zero s.p. with
ii <- G$off[i]:(G$off[i]+ncol(G$S[[i]])-1) ef0[i] <- norm(G$X[,ii],type="F")^2/norm(G$S[[i]],type="F")*.Machine$double.eps*.1
}
lsp0[!ind] <- log(lsp0[!ind])
lsp0[ind] <- log(ef0) ##log(.Machine$double.xmin)*1000 ## zero fudge lsp0 <- as.numeric(L[,fix.ind,drop=FALSE]%*%lsp0) L <- L[,!fix.ind,drop=FALSE] G$sp <- G$sp[!fix.ind] } else {lsp0 <- rep(0,nrow(L))} G$H <- H

if (ncol(L)==nrow(L)&&!sum(L!=diag(ncol(L)))) L <- NULL ## it's just the identity

G$L <- L;G$lsp0 <- lsp0
names(G$lsp0) <- lsp.names ## names of all smoothing parameters (not just underlying) if (absorb.cons==FALSE) { ## need to accumulate constraints G$C <- matrix(0,0,n.p)
if (m>0) {
for (i in 1:m) {
if (is.null(G$smooth[[i]]$C)) n.con<-0
else n.con<- nrow(G$smooth[[i]]$C)
C <- matrix(0,n.con,n.p)
C[,G$smooth[[i]]$first.para:G$smooth[[i]]$last.para]<-G$smooth[[i]]$C
G$C <- rbind(G$C,C)
G$smooth[[i]]$C <- NULL
}
rm(C)
}
} ## absorb.cons == FALSE

G$y <- drop(data[[split$response]])
ydim <- dim(G$y) if (!is.null(ydim)&&length(ydim)<2) dim(G$y) <- NULL

##data[[deparse(split$full.formula[[2]],backtick=TRUE)]] G$n <- nrow(data)

if (is.null(data$"(weights)")) G$w <- rep(1,G$n) else G$w <- data$"(weights)" ## Create names for model coefficients... if (G$nsdf > 0) term.names <- colnames(G$X)[1:G$nsdf] else term.names<-array("",0)
n.smooth <- length(G$smooth) ## create coef names, if smooth has any coefs, and create a global indicator of non-linear parameters ## g.index, if needed n.sp0 <- 0 if (n.smooth) for (i in 1:n.smooth) { k <- 1 jj <- G$smooth[[i]]$first.para:G$smooth[[i]]$last.para if (G$smooth[[i]]$df > 0) for (j in jj) { term.names[j] <- paste(G$smooth[[i]]$label,".",as.character(k),sep="") k <- k+1 } n.sp <- length(G$smooth[[i]]$S) if (n.sp) { ## record sp this relates to in full sp vector G$smooth[[i]]$first.sp <- n.sp0 + 1 n.sp0 <- G$smooth[[i]]$last.sp <- n.sp0 + n.sp } if (!is.null(G$smooth[[i]]$g.index)) { if (is.null(G$g.index)) G$g.index <- rep(FALSE,n.p) G$g.index[jj] <- G$smooth[[i]]$g.index
}

}
G$term.names <- term.names ## Deal with non-linear parameterizations... G$pP <- PP ## return paraPen object, if present

G
} ## gam.setup

formula.gam <- function(x, ...)
# formula.lm and formula.glm reconstruct the formula from x$terms, this is # problematic because of the way mgcv handles s() and te() terms { x$formula
}

BC <- function(y,lambda=1) {
y <- if (lambda==0) log(y) else (y^lambda-1)/lambda
}

corBC <- function(lambda,y,mu) {
n <- length(y)
qn <- qnorm((1:n-.5)/n)
-cor(qn,sort(BC(y,lambda)-BC(mu,lambda)))
}

neico4 <- function(nei,dd,dd1) {
## nei is neighbourhood structure. dd are leave one out perturbations.
## dd1 the same for a different residual
## This routine computes the most basic covariance
## matrix estimate, based on observed correlation within neighbourhoods
## and assumption of zero correlation without. Somehow it is a testement
## to my extreme slowness that it took me 6 months to come up with this
## skull-thumpingly obvious solution having tried every other hare brained
## scheme first.
n <- length(nei$i) i1 <- 0 W <- dd*0 ## dbeta/dy (y-mu) for (i in 1:n) { i0 <- i1+1 i1 <- nei$m[i]
ii <- nei$k[i0:i1] ## neighbours of i W[nei$i[i],] <-  W[nei$i[i],] + colSums(dd[ii,,drop=FALSE]) } V <- t(dd1)%*%W } ## neico4 pdef <- function(V,eps = .Machine$double.eps^.9) {
## find nearest pdf matrix to symmetric matrix V
ev <- eigen(V,symmetric=TRUE)
thresh <- max(abs(ev$values))*eps ii <- ev$values<thresh
if (any(ii)) {
ev$values[ii] <- thresh V <- ev$vectors %*% (ev$values*t(ev$vectors))
}
V
} ## pdef

gam.outer <- function(lsp,fscale,family,control,method,optimizer,criterion,scale,gamma,G,start=NULL,nei=NULL,...)
# function for smoothing parameter estimation by outer optimization. i.e.
# P-IRLS scheme iterated to convergence for each trial set of smoothing
# parameters.
# MAJOR STEPS:
#  1. Call appropriate smoothing parameter optimizer, and extract fitted model
#    object'
#  2. Call gam.fit3.post.proc' to get parameter covariance matrices, edf etc to
{ if (is.na(optimizer[2])) optimizer[2] <- "newton"
if (!optimizer[2]%in%c("newton","bfgs","nlm","optim")) stop("unknown outer optimization method.")

if (length(lsp)==0) { ## no sp estimation to do -- run a fit instead
optimizer[2] <- "no.sps" ## will cause gam2objective to be called, below
}
nbGetTheta <- substr(family$family[1],1,17)=="Negative Binomial" && length(family$getTheta())>1
if (nbGetTheta) stop("Please provide a single value for theta or use nb to estimate it")

## some preparations for the other methods, which all use gam.fit3...

family <- fix.family.var(family)
if (method%in%c("REML","ML","P-REML","P-ML")) family <- fix.family.ls(family)

if (optimizer[1]=="efs"&& optimizer[2] != "no.sps" ) { ## experimental extended efs
##warning("efs is still experimental!")
if (inherits(family,"general.family")) {
object <- efsud(x=G$X,y=G$y,lsp=lsp,Sl=G$Sl,weights=G$w,offset=G$offxset,family=family, control=control,Mp=G$Mp,start=start)
} else {
family <- fix.family.ls(family)
object <- efsudr(x=G$X,y=G$y,lsp=lsp,Eb=G$Eb,UrS=G$UrS,weights=G$w,family=family,offset=G$offset,
start=start,
U1=G$U1, intercept = TRUE,scale=scale,Mp=G$Mp,control=control,n.true=G$n.true,...) } object$gcv.ubre <- object$REML } else if (optimizer[2]=="newton"||optimizer[2]=="bfgs"){ ## the gam.fit3 method if (optimizer[2]=="bfgs") b <- bfgs(lsp=lsp,X=G$X,y=G$y,Eb=G$Eb,UrS=G$UrS,L=G$L,lsp0=G$lsp0,offset=G$offset,U1=G$U1,Mp = G$Mp,
family=family,weights=G$w,control=control,gamma=gamma,scale=scale,conv.tol=control$newton$conv.tol, maxNstep= control$newton$maxNstep,maxSstep=control$newton$maxSstep,maxHalf=control$newton$maxHalf, printWarn=FALSE,scoreType=criterion,null.coef=G$null.coef,start=start,
pearson.extra=G$pearson.extra,dev.extra=G$dev.extra,n.true=G$n.true,Sl=G$Sl,nei=nei,...) else
b <- newton(lsp=lsp,X=G$X,y=G$y,Eb=G$Eb,UrS=G$UrS,L=G$L,lsp0=G$lsp0,offset=G$offset,U1=G$U1,Mp=G$Mp, family=family,weights=G$w,control=control,gamma=gamma,scale=scale,conv.tol=control$newton$conv.tol,
maxNstep= control$newton$maxNstep,maxSstep=control$newton$maxSstep,maxHalf=control$newton$maxHalf,
printWarn=FALSE,scoreType=criterion,null.coef=G$null.coef,start=start, pearson.extra=G$pearson.extra,dev.extra=G$dev.extra,n.true=G$n.true,Sl=G$Sl, edge.correct=control$edge.correct,,nei=nei,...)

object <- b$object object$REML <- object$REML1 <- object$REML2 <-
object$GACV <- object$D2 <- object$P2 <- object$UBRE2 <- object$trA2 <- object$GACV1 <- object$GACV2 <- object$GCV2 <- object$D1 <- object$P1 <- NULL
object$sp <- as.numeric(exp(b$lsp))
object$gcv.ubre <- b$score
b <- list(conv=b$conv,iter=b$iter,grad=b$grad,hess=b$hess,score.hist=b$score.hist) ## return info object$outer.info <- b
} else { ## methods calling gam.fit3
args <- list(X=G$X,y=G$y,Eb=G$Eb,UrS=G$UrS,offset=G$offset,U1=G$U1,Mp=G$Mp,family=family, weights=G$w,control=control,scoreType=criterion,gamma=gamma,scale=scale,
L=G$L,lsp0=G$lsp0,null.coef=G$null.coef,n.true=G$n.true,Sl=G$Sl,start=start,nei=nei) if (optimizer[2]=="nlm") { b <- nlm(gam4objective, lsp, typsize = lsp, fscale = fscale, stepmax = control$nlm$stepmax, ndigit = control$nlm$ndigit, gradtol = control$nlm$gradtol, steptol = control$nlm$steptol, iterlim = control$nlm$iterlim, check.analyticals=control$nlm$check.analyticals, args=args,...) lsp <- b$estimate

} else if (optimizer[2]=="optim") {
b<-optim(par=lsp,fn=gam2objective,gr=gam2derivative,method="L-BFGS-B",control=
list(fnscale=fscale,factr=control$optim$factr,lmm=min(5,length(lsp))),args=args,...)
lsp <- b$par } else b <- NULL obj <- gam2objective(lsp,args,printWarn=TRUE,...) # final model fit, with warnings object <- attr(obj,"full.fit") attr(obj,"full.fit") <- NULL object$gcv.ubre <- obj
object$outer.info <- b object$sp <- exp(lsp)
} # end of methods calling gam.fit2

if (scale>0) {
object$scale.estimated <- FALSE; object$scale <- scale} else {
object$scale <- object$scale.est;object$scale.estimated <- TRUE } object$control <- control
object$method <- method if (inherits(family,"general.family")) { mv <- gam.fit5.post.proc(object,G$Sl,G$L,G$lsp0,G$S,G$off,gamma)
## object$coefficients <- Sl.initial.repara(G$Sl,object$coefficients,inverse=TRUE) } else mv <- gam.fit3.post.proc(G$X,G$L,G$lsp0,G$S,G$off,object,gamma)

object[names(mv)] <- mv
if (!is.null(nei)) {
if (isTRUE(all.equal(sort(nei$i),1:nrow(G$X))) && length(nei$mi)==nrow(G$X) && criterion=="NCV") { ## each point predicted on its own
loocv <- length(nei$k)==length(nei$i) && isTRUE(all.equal(nei$i,nei$k)) && length(nei$m) == length(nei$k) ## leave one out CV,
if (is.logical(nei$jackknife)&&nei$jackknife) loocv <- TRUE ## straight jackknife requested
if (nei$jackknife < 0) nei$jackknife <- TRUE ## signal cov matrix calc
} else {
loocv <- FALSE
if (nei$jackknife < 0) nei$jackknife <- FALSE
}
}
if (!is.null(nei)&&(criterion!="NCV"||nei$jackknife)) { ## returning NCV when other criterion used for sp selection, or computing perturbations if (!is.null(nei$QNCV)&&nei$GNCV) family$qapprox <- TRUE
if (is.null(family$qapprox)) family$qapprox <- FALSE
lsp <- if (is.null(G$L)) log(object$sp) + G$lsp0 else G$L%*%log(object$sp)+G$lsp0
if (object$scale.estimated && criterion %in% c("REML","ML","EFS")) lsp <- lsp[-length(lsp)] ## drop log scale estimate if (is.null(nei$gamma)) nei$gamma <- 1 ## a major application of this NCV is to select gamma - so it must not itself change with gamma! if (criterion!="NCV") nei$jackknife <- FALSE ## no cov matrix stuff.
if (nei$jackknife) { n <- length(nei$i)
nei1 <- if (loocv) nei else list(i=1:n,mi=1:n,m=1:n,k=1:n) ## set up for LOO CV or requested straight jackknife
nei1$jackknife <- 10 ## signal that cross-validated beta perturbations are required } else nei1 <- nei ## called with another criteria b <- gam.fit3(x=G$X, y=G$y, sp=lsp,Eb=G$Eb,UrS=G$UrS, offset = G$offset,U1=G$U1,Mp=G$Mp,family = family,weights=G$w,deriv=0, control=control,gamma=nei$gamma,
scale=scale,printWarn=FALSE,start=start,scoreType="NCV",null.coef=G$null.coef, pearson.extra=G$pearson.extra,dev.extra=G$dev.extra,n.true=G$n.true,Sl=G$Sl,nei=nei1,...) object$NCV <- as.numeric(b$NCV) if (nei$jackknife) { ## need to compute direct cov matrix estimate
dd <- attr(b$NCV,"dd") ## leave-one-out parameter perturbations if (!loocv) { ## need to account for assumed independence structure rsd0 <- residuals.gam(object) ## compute cross validated residuals... eta.cv <- attr(object$gcv.ubre,"eta.cv")
fitted0 <- object$fitted.values if (inherits(object$family,"general.family")) {
for (j in 1:ncol(eta.cv)) {
object$fitted.values[,j] <- if (j <= length(G$offset)&&!is.null(G$offset[[j]])) family$linfo[[j]]$linkinv(eta.cv[,j]+G$offset[[j]]) else
family$linfo[[j]]$linkinv(eta.cv[,j])
}
} else {
object$fitted.values <- object$family$linkinv(eta.cv+G$offset)
}
rsd <- residuals.gam(object)
object$fitted.values <- fitted0 ii <- !is.finite(rsd);if (any(ii)) rsd[ii] <- rsd0[ii] dd1 <- dd*rsd/rsd0 ## correct to avoid underestimation (over-estimation if CV residuals used alone) Vcv <- pdef(neicov(dd1,nei)) #*n/(n-sum(object$edf)) ## cross validated V (too large)
V0 <- pdef(neicov(dd,nei))  #*n/(n-sum(object$edf)) ## direct V (too small) Vj <- (Vcv+V0)/2 ## combination less bad alpha <- max(sum(diag(Vj))/sum(diag(object$Ve)),1) ## inverse learning rate - does lower limit make sense?
alpha1 <- max(sum(Vj*object$Ve)/sum(object$Ve^2),1)
Vcv <- Vcv + (object$Vp-object$Ve)*alpha1 ## bias correct conservative (too large)
Vj <- Vj + (object$Vp-object$Ve)*alpha ## bias correct
attr(Vj,"Vcv") <- Vcv ## conservative as attribute
} else { ## LOO or straight jackknife requested
Vj <- pdef(crossprod(dd)) ## straight jackknife is fine.
alpha <- sum(diag(Vj))/sum(diag(object$Ve)) ## inverse learning rate (do not impose lower limit) Vj <- Vj + (object$Vp-object$Ve)*alpha ## bias correct } attr(object$Ve,"Vp") <- object$Vp ## keep original attr(object$Ve,"inv.learn") <- alpha
object$Vp <- Vj } } ## note: use of the following (Vc) in place of Vp appears to mess up p-values for smooths, ## but doesn't change r.e. p-values of course. #if (!is.null(mv$Vc)) object$Vc <- mv$Vc
#if (!is.null(mv$edf2)) object$edf2 <- mv$edf2 #object$Vp <- mv$Vb #object$V.sp <- mv$V.sp #object$hat<-mv$hat #object$Ve <- mv$Ve #object$edf<-mv$edf #object$edf1 <- mv$edf1 ##object$F <- mv$F ## DoF matrix --- probably not needed #object$R <- mv$R ## qr.R(sqrt(W)X) object$aic <- object$aic + 2*sum(object$edf)
object$nsdf <- G$nsdf
object$K <- object$D1 <-  object$D2 <- object$P <-  object$P1 <- object$P2 <- object$dw.drho <- object$GACV <-  object$GACV1 <- object$GACV2 <-  object$REML <- object$REML1 <-  object$REML2 <- object$GCV<-object$GCV1<- object$GCV2 <- object$UBRE <-object$UBRE1 <- object$UBRE2 <- object$trA <-
object$trA1<- object$trA2 <- object$alpha <- object$alpha1 <- object$scale.est <- NULL object$sig2 <- object$scale object } ## gam.outer get.null.coef <- function(G,start=NULL,etastart=NULL,mustart=NULL,...) { ## Get an estimate of the coefs corresponding to maximum reasonable deviance... y <- G$y
weights <- G$w nobs <- G$n ## ignore codetools warning!!
##start <- etastart <- mustart <- NULL
family <- G$family eval(family$initialize) ## have to do this to ensure y numeric
y <- as.numeric(y)
mum <- mean(y)+0*y
etam <- family$linkfun(mum) null.coef <- qr.coef(qr(G$X),etam)
null.coef[is.na(null.coef)] <- 0;
## get a suitable function scale for optimization routines
null.scale <- sum(family$dev.resids(y,mum,weights))/nrow(G$X)
list(null.coef=null.coef,null.scale=null.scale)
}

estimate.gam <- function (G,method,optimizer,control,in.out,scale,gamma,start=NULL,nei=NULL,...) {
## Do gam estimation and smoothness selection...

if (!is.null(G$family$method) && !method %in% G$family$method) method <- G$family$method[1]

if (method %in% c("QNCV","NCV")||!is.null(nei)) {
optimizer <- c("outer","bfgs")
if (method=="QNCV") { method <- "NCV";G$family$qapprox <- TRUE } else G$family$qapprox <- FALSE
if (is.null(nei)) nei <- list(i=1:G$n,mi=1:G$n,m=1:G$n,k=1:G$n) ## LOOCV
if (is.null(nei$k)||is.null(nei$m)) nei$k <- nei$m <- nei$mi <- nei$i <- 1:G$n if (is.null(nei$i)) if (length(nei$m)==G$n) nei$mi <- nei$i <- 1:G$n else stop("unclear which points NCV neighbourhoods belong to") if (length(nei$mi)!=length(nei$m)) stop("for NCV number of dropped and predicted neighbourhoods must match") nei$m <- round(nei$m); nei$mi <- round(nei$mi); nei$k <- round(nei$k); nei$i <- round(nei$i); if (min(nei$i)<1||max(nei$i>G$n)||min(nei$k)<1||max(nei$k>G$n)) stop("nei indexes non-existent points") if (nei$m[1]<1||max(nei$m)>length(nei$k)||length(nei$m)<2||any(diff(nei$m)<1)) stop('nei$m faulty') if (nei$mi[1]<1||max(nei$mi)>length(nei$i)||length(nei$mi)<2||any(diff(nei$mi)<1)) stop('nei$mi faulty') if (is.null(nei$jackknife)) nei$jackknife <- -1 } if (inherits(G$family,"extended.family")) { ## then there are some restrictions...
if (!(method%in%c("REML","ML","NCV"))) method <- "REML"
if (inherits(G$family,"general.family")) { if (!(method%in%c("REML","NCV"))||optimizer[1]=="efs") method <- "REML" if (method=="NCV"&&is.null(G$family$ncv)) { warning("family lacks a Neighbourhood Cross Validation method") method <- "REML" } G$Sl <- Sl.setup(G) ## prepare penalty sequence

G$X <- Sl.initial.repara(G$Sl,G$X,both.sides=FALSE) ## re-parameterize accordingly if (!is.null(start)) start <- Sl.initial.repara(G$Sl,start,inverse=FALSE,both.sides=FALSE)

## make sure optimizer appropriate for available derivatives
if (!is.null(G$family$available.derivs)) {
if (G$family$available.deriv==1 && optimizer[1]!="efs")  optimizer <- c("outer","bfgs")
if (G$family$available.derivs==0) optimizer <- "efs"
}
}
}

if (!optimizer[1]%in%c("outer","efs")) stop("unknown optimizer")
if (optimizer[1]=="efs") method <- "REML"
if (!method%in%c("GCV.Cp","GACV.Cp","REML","P-REML","ML","P-ML","NCV")) stop("unknown smoothness selection criterion")
G$family <- fix.family(G$family)
G$rS <- mini.roots(G$S,G$off,ncol(G$X),G$rank) reml <- method%in%c("REML","P-REML","ML","P-ML","NCV") Ssp <- totalPenaltySpace(G$S,G$H,G$off,ncol(G$X)) G$Eb <- Ssp$E ## balanced penalty square root for rank determination purposes G$U1 <- cbind(Ssp$Y,Ssp$Z) ## eigen space basis
G$Mp <- ncol(Ssp$Z) ## null space dimension
G$UrS <- list() ## need penalty matrices in overall penalty range space... if (length(G$S)>0) for (i in 1:length(G$S)) G$UrS[[i]] <- t(Ssp$Y)%*%G$rS[[i]] else i <- 0
if (!is.null(G$H)) { ## then the sqrt fixed penalty matrix H is needed for (RE)ML G$UrS[[i+1]] <- t(Ssp$Y)%*%mroot(G$H)
}

# is outer looping needed ?
outer.looping <- (!G$am ||reml||method=="GACV.Cp"||method=="NCV"||!is.null(nei)) ## sort out exact sp selection criterion to use fam.name <- G$family$family[1] if (scale==0) { ## choose criterion for performance iteration if (fam.name == "binomial"||fam.name == "poisson") G$sig2<-1 #ubre
else G$sig2 <- -1 #gcv } else {G$sig2 <- scale}

if (reml||method=="NCV") { ## then RE(ML) selection, but which variant?
criterion <- method
if (fam.name == "binomial"||fam.name == "poisson") scale <- 1
if (inherits(G$family,"extended.family") && scale <=0) { scale <- if (is.null(G$family$scale)) 1 else G$family$scale } } else { if (scale==0) { if (fam.name=="binomial"||fam.name=="poisson") scale <- 1 #ubre else scale <- -1 #gcv } if (scale > 0) criterion <- "UBRE" else { if (method=="GCV.Cp") criterion <- "GCV" else criterion <- "GACV" } } if (substr(fam.name,1,17)=="Negative Binomial") { scale <- 1; ## no choice if (method%in%c("GCV.Cp","GACV.Cp")) criterion <- "UBRE" } ## Reset P-ML or P-REML in known scale parameter cases if (scale>0) { if (method=="P-ML") criterion <- method <- "ML" else if (method=="P-REML") criterion <- method <- "REML" } family <- G$family; nb.fam.reset <- FALSE

if (substr(G$family$family[1],1,17)=="Negative Binomial") { ## initialize sensibly
scale <- G$sig2 <- 1 G$family <- negbin(max(family$getTheta()),link=family$link)
nb.fam.reset <- TRUE
}

## extended family may need to manipulate G...

if (!is.null(G$family$preinitialize)) {
if (inherits(G$family,"general.family")) { Gmod <- G$family$preinitialize(G) ## modifies some elements of G for (gnam in names(Gmod)) G[[gnam]] <- Gmod[[gnam]] ## copy these into G } else { ## extended family - usually just initializes theta and possibly y if (!is.null(attr(G$family$preinitialize,"needG"))) attr(G$family,"G") <- G ## more complicated
pini <- G$family$preinitialize(G$y,G$family)
attr(G$family,"G") <- NULL if (!is.null(pini$family)) G$family <- pini$family
if (!is.null(pini$Theta)) G$family$putTheta(pini$Theta)
if (!is.null(pini$y)) G$y <- pini$y } } if (length(G$sp)>0) lsp2 <- log(initial.spg(G$X,G$y,G$w,G$family,G$S,G$rank,G$off, offset=G$offset,L=G$L,lsp0=G$lsp0,E=G$Eb,...)) else lsp2 <- rep(0,0) if (!outer.looping) { ## additive GCV/UBRE object <- am.fit(G,control=control,gamma=gamma,...) lsp <- log(object$sp)
} else {
if (!is.null(in.out)) { # initial s.p.s and scale provided
ok <- TRUE ## run a few basic checks
if (is.null(in.out$sp)||is.null(in.out$scale)) ok <- FALSE
if (length(in.out$sp)!=length(G$sp)) ok <- FALSE
if (!ok) stop("in.out incorrect: see documentation")
lsp <- log(in.out$sp) } else { lsp <- lsp2 } if (nb.fam.reset) G$family <- family ## restore, in case manipulated for negative binomial

## Get an estimate of the coefs corresponding to maximum reasonable deviance,
## and an estimate of the function scale, suitable for optimizers that need this.
## Doesn't make sense for general families that have to initialize coefs directly.

null.stuff  <- if (inherits(G$family,"general.family")) list() else { if (is.null(G$family$get.null.coef)) get.null.coef(G,...) else G$family$get.null.coef(G,...) } scale.as.sp <- (criterion%in%c("REML","ML")||(criterion=="NCV"&&inherits(G$family,"extended.family")))&&scale<=0

if (scale.as.sp) { ## log(scale) to be estimated as a smoothing parameter
if (is.null(in.out)) {
log.scale <- log(null.stuff$null.scale/10) } else { log.scale <- log(in.out$scale)
}
## extend G$L, if present... if (!is.null(G$L)) {
G$L <- cbind(rbind(G$L,rep(0,ncol(G$L))),c(rep(0,nrow(G$L)),1))
}
if (!is.null(G$lsp0)) G$lsp0 <- c(G$lsp0,0) } ## check if there are extra parameters to estimate if (inherits(G$family,"extended.family")&&!inherits(G$family,"general.family")&&G$family$n.theta>0) { th0 <- G$family$getTheta() ## additional (initial) parameters of likelihood nth <- length(th0) nlsp <- length(lsp) ind <- 1:nlsp + nth ## only used if nlsp>0 lsp <- c(th0,lsp) ## append to start of lsp ## extend G$L, G$lsp0 if present... if (!is.null(G$L)&&nth>0) {
L <- rbind(cbind(diag(nth),matrix(0,nth,ncol(G$L))), cbind(matrix(0,nrow(G$L),nth),G$L)) #sat <- attr(G$L,"scale")
G$L <- L #attr(G$L,"scale") <- sat
#attr(G$L,"not.sp") <- nth ## first not.sp params are not smoothing params } if (!is.null(G$lsp0)) G$lsp0 <- c(th0*0,G$lsp0)
} else nth <- 0

G$null.coef <- null.stuff$null.coef

object <- gam.outer(lsp,fscale=null.stuff$null.scale, ##abs(object$gcv.ubre)+object$sig2/length(G$y),
family=G$family,control=control,criterion=criterion,method=method, optimizer=optimizer,scale=scale,gamma=gamma,G=G,start=start,nei=nei,...) if (scale.as.sp) object$sp <- object$sp[-length(object$sp)] ## drop scale estimate from sp array

if (inherits(G$family,"extended.family")&&nth>0) object$sp <- object$sp[-(1:nth)] ## drop theta params } ## finished outer looping ## correct null deviance if there's an offset [Why not correct calc in gam.fit/3???].... if (!inherits(G$family,"extended.family")&&G$intercept&&any(G$offset!=0)) object$null.deviance <- glm(object$y~offset(G$offset),family=object$family,weights=object$prior.weights)$deviance

object$method <- if (method=="NCV"&&G$family$qapprox) "QNCV" else criterion object$smooth<-G$smooth names(object$edf) <- G$term.names names(object$edf1) <- G$term.names if (inherits(family,"general.family")) { object$coefficients <- Sl.initial.repara(G$Sl,object$coefficients,inverse=TRUE)
}

## extended family may need to manipulate fit object. Code
## will need to include the following line if G$X used... ## G$X <- Sl.initial.repara(G$Sl,G$X,inverse=TRUE,cov=FALSE,both.sides=FALSE)
if (!is.null(G$family$postproc)) {
if (inherits(G$family,"general.family")) eval(G$family$postproc) else { posr <- G$family$postproc(family=object$family,y=G$y,prior.weights=object$prior.weights,
fitted=object$fitted.values,linear.predictors=object$linear.predictors,offset=G$offset, intercept=G$intercept)
if (!is.null(posr$family)) object$family$family <- posr$family
if (!is.null(posr$deviance)) object$deviance <- posr$deviance if (!is.null(posr$null.deviance)) object$null.deviance <- posr$null.deviance
}
}

if (!is.null(G$P)) { ## matrix transforming from fit to prediction parameterization object$coefficients <- as.numeric(G$P %*% object$coefficients)
object$Vp <- G$P %*% object$Vp %*% t(G$P)
object$Ve <- G$P %*% object$Ve %*% t(G$P)
rownames(object$Vp) <- colnames(object$Vp) <- G$term.names rownames(object$Ve) <- colnames(object$Ve) <- G$term.names
}
names(object$coefficients) <- G$term.names

object
} ## end estimate.gam

variable.summary <- function(pf,dl,n) {
## routine to summarize all the variables in dl, which is a list
## containing raw input variables to a model (i.e. no functions applied)
## pf is a formula containing the strictly parametric part of the
## model for the variables in dl. A list is returned, with names given by
## the variables. For variables in the parametric part, then the list elements
## may be:
## * a 1 column matrix with elements set to the column medians, if variable
##   is a matrix.
## * a 3 figure summary (min,median,max) for a numeric variable.
## * a factor variable, with the most commonly occuring factor (all levels)
## --- classes are as original data type, but anything not numeric, factor or matrix
## is coerced to numeric.
## For non-parametric variables, any matrices are coerced to numeric, otherwise as
## parametric.
## medians in the above are always observed values (to deal with variables coerced to
## factors in the model formulae in a nice way).
## variables with less than n' entries are discarded
v.n <- length(dl)
## if (v.n) for (i in 1:v.n) if (length(dl[[i]])<n) dl[[i]] <- NULL

v.name <- v.name1 <- names(dl)
if (v.n) ## need to strip out names of any variables that are too short.
{ k <- 0 ## counter for retained variables
for (i in 1:v.n) if (length(dl[[i]])>=n) {
k <- k+1
v.name[k] <- v.name1[i] ## save names of variables of correct length
}
if (k>0) v.name <- v.name[1:k] else v.name <- rep("",k)
}

## v.name <- names(dl)    ## the variable names
p.name <- all.vars(pf[-2]) ## variables in parametric part (not response)
vs <- list()
v.n <- length(v.name)
if (v.n>0) for (i in 1:v.n) {
if (v.name[i]%in%p.name) para <- TRUE else para <- FALSE ## is variable in the parametric part?

if (para&&is.matrix(dl[[v.name[i]]])&&ncol(dl[[v.name[i]]])>1) { ## parametric matrix --- a special case
x <- matrix(apply(dl[[v.name[i]]],2,quantile,probs=0.5,type=3,na.rm=TRUE),1,ncol(dl[[v.name[i]]])) ## nearest to median entries
} else { ## anything else
x <- dl[[v.name[i]]]
if (is.character(x)) x <- as.factor(x)
if (is.factor(x)) {
x <- x[!is.na(x)]
lx <- levels(x)
freq <- tabulate(x)
ii <- min((1:length(lx))[freq==max(freq)])
x <- factor(lx[ii],levels=lx)
} else {
x <- as.numeric(x)
x <- c(min(x,na.rm=TRUE),as.numeric(quantile(x,probs=.5,type=3,na.rm=TRUE)) ,max(x,na.rm=TRUE)) ## 3 figure summary
}
}
vs[[v.name[i]]] <- x
}
vs
} ## end variable.summary

nanei <- function(nb,k) {
## nb is an NCV neighbourhood defining list, k an array of points to drop.
## this function adjusts nb to remove the dropped points and adjust the
## indices accordingly, so that the structure works with a data frame
## from which rows in k have been dropped.
if (!length(k)) return()
if (is.null(nb$k)||is.null(nb$m)||is.null(nb$mi)||is.null(nb$i)) stop("full nei list needed if data incomplete")
## first work on dropped folds...
kk <- which(nb$k %in% k) ## position of dropped in nb$k
## adjust m for the fact that points are to be dropped...
nb$m <- nb$m - cumsum(tabulate(findInterval(kk-1,nb$m)+1,nbins=length(nb$m)))
ii <- which(diff(c(0,nb$m))==0) ## identify zero length folds if (length(ii)) nb$m <- nb$m[-ii] ## drop zero length folds nb$k <- nb$k[-kk] ## drop the elements of k nb$k <- nb$k - findInterval(nb$k,sort(k)) ## and shift indices to account for dropped

## now the prediction folds...
kk <- which(nb$i %in% k) ## position of dropped in nb$i
## adjust mi for the fact that points are to be dropped...
nb$mi <- nb$mi - cumsum(tabulate(findInterval(kk-1,nb$mi)+1,nbins=length(nb$m)))
ii <- which(diff(c(0,nb$mi))==0) ## identify zero length folds if (length(ii)) nb$mi <- nb$mi[-ii] ## drop zero length folds nb$i <- nb$i[-kk] ## drop the elements of k nb$i <- nb$i - findInterval(nb$i,sort(k)) ## and shift indices to account for dropped
nb
} ## nanei

## don't be tempted to change to control=list(...) --- messes up passing on other stuff via ...

gam <- function(formula,family=gaussian(),data=list(),weights=NULL,subset=NULL,na.action,offset=NULL,
method="GCV.Cp",optimizer=c("outer","newton"),control=list(),#gam.control(),
scale=0,select=FALSE,knots=NULL,sp=NULL,min.sp=NULL,H=NULL,gamma=1,fit=TRUE,
paraPen=NULL,G=NULL,in.out=NULL,drop.unused.levels=TRUE,drop.intercept=NULL,
nei=NULL,discrete=FALSE,...) {
## Routine to fit a GAM to some data. The model is stated in the formula, which is then
## interpreted to figure out which bits relate to smooth terms and which to parametric terms.
## Basic steps:
## 1. Formula is split up into parametric and non-parametric parts,
##    and a fake formula constructed to be used to pick up data for
##    model frame. pterms "terms" object(s) created for parametric
##    components, model frame created along with terms object.
## 2. 'gam.setup' called to do most of basis construction and other
##    elements of model setup.
## 3. 'estimate.gam' is called to estimate the model. This performs further
##    pre- and post- fitting steps and calls either 'gam.fit' (performance
##    iteration) or 'gam.outer' (default method). 'gam.outer' calls the actual
##    smoothing parameter optimizer ('newton' by default) and then any post
##    processing. The optimizer calls 'gam.fit3/4/5' to estimate the model
##    coefficients and obtain derivatives w.r.t. the smoothing parameters.
## 4. Finished 'gam' object assembled.
control <- do.call("gam.control",control)
if (is.null(G) && discrete) { ## get bam to do the setup
cl <- match.call() ## NOTE: check all arguments more carefully
cl[[1]] <- quote(bam)
cl$fit = FALSE G <- eval(cl,parent.frame()) ## NOTE: cl probaby needs modifying in G to work properly (with fit=FALSE reset?? also below??) } if (is.null(G)) { ## create model frame..... gp <- interpret.gam(formula) # interpret the formula cl <- match.call() # call needed in gam object for update to work mf <- match.call(expand.dots=FALSE) mf$formula <- gp$fake.formula mf$family <- mf$control<-mf$scale<-mf$knots<-mf$sp<-mf$min.sp<-mf$H<-mf$select <- mf$drop.intercept <- mf$nei <- mf$gamma<-mf$method<-mf$fit<-mf$paraPen<-mf$G<-mf$optimizer <- mf$in.out <- mf$discrete <- mf$...<-NULL
mf$drop.unused.levels <- drop.unused.levels mf[[1]] <- quote(stats::model.frame) ## as.name("model.frame") pmf <- mf mf <- eval(mf, parent.frame()) # the model frame now contains all the data if (nrow(mf)<2) stop("Not enough (non-NA) data to do anything meaningful") terms <- attr(mf,"terms") if (!is.null(nei)) { ## check if data dropped k <- attr(mf,"na.action") if (!is.null(k)) { ## need to adjust nei for dropped data nei <- nanei(nei,as.numeric(k)) } } ## summarize the *raw* input variables ## note can't use get_all_vars here -- buggy with matrices vars <- all_vars1(gp$fake.formula[-2]) ## drop response here
inp <- parse(text = paste("list(", paste(vars, collapse = ","),")"))

## allow a bit of extra flexibility in what data' is allowed to be (as model.frame actually does)
if (!is.list(data)&&!is.data.frame(data)) data <- as.data.frame(data)

dl <- eval(inp, data, parent.frame())
names(dl) <- vars ## list of all variables needed
var.summary <- variable.summary(gp$pf,dl,nrow(mf)) ## summarize the input data rm(dl) ## save space ## pterms are terms objects for the parametric model components used in ## model setup - don't try obtaining by evaluating pf in mf - doesn't ## work in general (e.g. with offset)... if (is.list(formula)) { ## then there are several linear predictors environment(formula) <- environment(formula[[1]]) ## e.g. termplots needs this pterms <- list() tlab <- rep("",0) for (i in 1:length(formula)) { pmf$formula <- gp[[i]]$pf pterms[[i]] <- attr(eval(pmf, parent.frame()),"terms") tlabi <- attr(pterms[[i]],"term.labels") if (i>1&&length(tlabi)>0) tlabi <- paste(tlabi,i-1,sep=".") tlab <- c(tlab,tlabi) } attr(pterms,"term.labels") <- tlab ## labels for all parametric terms, distinguished by predictor } else { ## single linear predictor case pmf$formula <- gp$pf pmf <- eval(pmf, parent.frame()) # pmf contains all data for parametric part pterms <- attr(pmf,"terms") ## pmf only used for this } if (is.character(family)) family <- eval(parse(text=family)) if (is.function(family)) family <- family() if (is.null(family$family)) stop("family not recognized")

if (family$family[1]=="gaussian" && family$link=="identity") am <- TRUE
else am <- FALSE

if (!control$keepData) rm(data) ## save space ## check whether family requires intercept to be dropped... # drop.intercept <- if (is.null(family$drop.intercept) || !family$drop.intercept) FALSE else TRUE # drop.intercept <- as.logical(family$drop.intercept)
if (is.null(family$drop.intercept)) { ## family does not provide information lengthf <- if (is.list(formula)) length(formula) else 1 if (is.null(drop.intercept)) drop.intercept <- rep(FALSE, lengthf) else { drop.intercept <- rep(drop.intercept,length=lengthf) ## force drop.intercept to correct length if (sum(drop.intercept)) family$drop.intercept <- drop.intercept ## ensure prediction works
}
} else drop.intercept <- as.logical(family$drop.intercept) ## family overrides argument if (inherits(family,"general.family")&&!is.null(family$presetup)) eval(family$presetup) gsname <- if (is.list(formula)) "gam.setup.list" else "gam.setup" G <- do.call(gsname,list(formula=gp,pterms=pterms, data=mf,knots=knots,sp=sp,min.sp=min.sp, H=H,absorb.cons=TRUE,sparse.cons=0,select=select, idLinksBases=control$idLinksBases,scale.penalty=control$scalePenalty, paraPen=paraPen,drop.intercept=drop.intercept)) G$var.summary <- var.summary
G$family <- family if ((is.list(formula)&&(is.null(family$nlp)||family$nlp!=gp$nlp))||
(!is.list(formula)&&!is.null(family$npl)&&(family$npl>1))) stop("incorrect number of linear predictors for family")

G$terms<-terms; G$mf<-mf;G$cl<-cl; G$am <- am

if (is.null(G$offset)) G$offset<-rep(0,G$n) G$min.edf <- G$nsdf ## -dim(G$C)[1]
if (G$m) for (i in 1:G$m) G$min.edf<-G$min.edf+G$smooth[[i]]$null.space.dim

G$formula <- formula G$pred.formula <- gp$pred.formula environment(G$formula)<-environment(formula)
} else { ## G not null
if (!is.null(sp)&&any(sp>=0)) { ## request to modify smoothing parameters
if (is.null(G$L)) G$L <- diag(length(G$sp)) if (length(sp)!=ncol(G$L)) stop('length of sp must be number of free smoothing parameters in original model')
ind <- sp>=0 ## which smoothing parameters are now fixed
spind <- log(sp[ind]);
spind[!is.finite(spind)] <- -30 ## set any zero parameters to effective zero
G$lsp0 <- G$lsp0 + drop(G$L[,ind,drop=FALSE] %*% spind) ## add fix to lsp0 G$L <- G$L[,!ind,drop=FALSE] ## drop the cols of G G$sp <- rep(-1,ncol(G$L)) } } if (!fit) { class(G) <- "gam.prefit" return(G) } ## if (ncol(G$X)>nrow(G$X)) warning("Model has more coefficients than data") ##stop("Model has more coefficients than data") G$conv.tol <- control$mgcv.tol # tolerence for mgcv G$max.half <- control$mgcv.half # max step halving in Newton update mgcv object <- estimate.gam(G,method,optimizer,control,in.out,scale,gamma,nei=nei,...) if (!is.null(G$L)) {
object$full.sp <- as.numeric(exp(G$L%*%log(object$sp)+G$lsp0))
names(object$full.sp) <- names(G$lsp0)
}
names(object$sp) <- names(G$sp)
object$paraPen <- G$pP
object$formula <- G$formula
## store any lpi attribute of G$X for use in predict.gam... if (is.list(object$formula)) attr(object$formula,"lpi") <- attr(G$X,"lpi")
object$var.summary <- G$var.summary
object$cmX <- G$cmX ## column means of model matrix --- useful for CIs
object$model<-G$mf # store the model frame
object$na.action <- attr(G$mf,"na.action") # how to deal with NA's
object$control <- control object$terms <- G$terms object$pred.formula <- G$pred.formula attr(object$pred.formula,"full") <- reformulate(all.vars(object$terms)) object$pterms <- G$pterms object$assign <- G$assign # applies only to pterms object$contrasts <- G$contrasts object$xlevels <- G$xlevels object$offset <- G$offset if (!is.null(G$Xcentre)) object$Xcentre <- G$Xcentre
if (control$keepData) object$data <- data
object$df.residual <- nrow(G$X) - sum(object$edf) object$min.edf <- G$min.edf if (G$am&&!(method%in%c("REML","ML","P-ML","P-REML"))) object$optimizer <- "magic" else object$optimizer <- optimizer
object$call <- G$cl # needed for update() to work
class(object) <- c("gam","glm","lm")
if (is.null(object$deviance)) object$deviance <- sum(residuals(object,"deviance")^2)
names(object$gcv.ubre) <- method ## The following lines avoid potentially very large objects in hidden environments being stored ## with fitted gam objects. The downside is that functions like 'termplot' that rely on searching in ## the environment of the formula can fail... environment(object$formula) <- environment(object$pred.formula) <- environment(object$terms) <- environment(object$pterms) <- .GlobalEnv if (!is.null(object$model))  environment(attr(object$model,"terms")) <- .GlobalEnv if (!is.null(attr(object$pred.formula,"full"))) environment(attr(object$pred.formula,"full")) <- .GlobalEnv object } ## gam print.gam<-function (x,...) # default print function for gam objects { print(x$family)
cat("Formula:\n")
if (is.list(x$formula)) for (i in 1:length(x$formula)) print(x$formula[[i]]) else print(x$formula)
n.smooth<-length(x$smooth) if (n.smooth==0) cat("Total model degrees of freedom",sum(x$edf),"\n")
else
{ edf<-0
cat("\nEstimated degrees of freedom:\n")
for (i in 1:n.smooth)
edf[i]<-sum(x$edf[x$smooth[[i]]$first.para:x$smooth[[i]]$last.para]) edf.str <- format(round(edf,digits=4),digits=3,scientific=FALSE) for (i in 1:n.smooth) { cat(edf.str[i]," ",sep="") if (i%%7==0) cat("\n") } cat(" total =",round(sum(x$edf),digits=2),"\n")
}
if (!is.null(x$method)&&!(x$method%in%c("PQL","lme.ML","lme.REML")))
cat("\n",x$method," score: ",x$gcv.ubre,"     ",sep="")
if (!is.null(x$rank) && x$rank< length(x$coefficients)) cat("rank: ",x$rank,"/",length(x$coefficients),sep="") cat("\n") invisible(x) } ## print.gam gam.control <- function (nthreads=1,ncv.threads=1,irls.reg=0.0,epsilon = 1e-7, maxit = 200, mgcv.tol=1e-7,mgcv.half=15,trace =FALSE, rank.tol=.Machine$double.eps^0.5,
nlm=list(),optim=list(),newton=list(),#outerPIsteps=0,
keepData=FALSE,scale.est="fletcher",edge.correct=FALSE)
# Control structure for a gam.
# irls.reg is the regularization parameter to use in the GAM fitting IRLS loop.
# epsilon is the tolerance to use in the IRLS MLE loop. maxit is the number
# of IRLS iterations to use. mgcv.tol is the tolerance to use in the mgcv call within each IRLS. mgcv.half is the
# number of step halvings to employ in the mgcv search for the optimal GCV score, before giving up
# on a search direction. trace turns on or off some de-bugging information.
# rank.tol is the tolerance to use for rank determination
# outerPIsteps was the number of performance iteration steps used to intialize
#                         outer iteration (unused for a while)
{   scale.est <- match.arg(scale.est,c("fletcher","pearson","deviance"))
if (!is.logical(edge.correct)&&(!is.numeric(edge.correct)||edge.correct<0)) stop(
"edge.correct must be logical or a positive number")
if (!is.numeric(irls.reg) || irls.reg <0.0) stop("IRLS regularizing parameter must be a non-negative number.")
if (!is.numeric(epsilon) || epsilon <= 0)
stop("value of epsilon must be > 0")
if (!is.numeric(maxit) || maxit <= 0)
stop("maximum number of iterations must be > 0")
if (rank.tol<0||rank.tol>1)
{ rank.tol=.Machine$double.eps^0.5 warning("silly value supplied for rank.tol: reset to square root of machine precision.") } # work through nlm defaults if (is.null(nlm$ndigit)||nlm$ndigit<2) nlm$ndigit <- max(2,ceiling(-log10(epsilon)))
nlm$ndigit <- round(nlm$ndigit)
ndigit <- floor(-log10(.Machine$double.eps)) if (nlm$ndigit>ndigit) nlm$ndigit <- ndigit if (is.null(nlm$gradtol)) nlm$gradtol <- epsilon*10 nlm$gradtol <- abs(nlm$gradtol) ## note that nlm will stop after hitting stepmax 5 consecutive times ## hence should not be set too small ... if (is.null(nlm$stepmax)||nlm$stepmax==0) nlm$stepmax <- 2
nlm$stepmax <- abs(nlm$stepmax)
if (is.null(nlm$steptol)) nlm$steptol <- 1e-4
nlm$steptol <- abs(nlm$steptol)
if (is.null(nlm$iterlim)) nlm$iterlim <- 200
nlm$iterlim <- abs(nlm$iterlim)
## Should be reset for a while anytime derivative code altered...
if (is.null(nlm$check.analyticals)) nlm$check.analyticals <- FALSE
nlm$check.analyticals <- as.logical(nlm$check.analyticals)

# and newton defaults
if (is.null(newton$conv.tol)) newton$conv.tol <- 1e-6
if (is.null(newton$maxNstep)) newton$maxNstep <- 5
if (is.null(newton$maxSstep)) newton$maxSstep <- 2
if (is.null(newton$maxHalf)) newton$maxHalf <- 30
if (is.null(newton$use.svd)) newton$use.svd <- FALSE

# and optim defaults
if (is.null(optim$factr)) optim$factr <- 1e7
optim$factr <- abs(optim$factr)
if (efs.tol<=0) efs.tol <- .1

trace = trace, mgcv.tol=mgcv.tol,mgcv.half=mgcv.half,
rank.tol=rank.tol,nlm=nlm,
optim=optim,newton=newton,#outerPIsteps=outerPIsteps,
keepData=as.logical(keepData[1]),scale.est=scale.est,edge.correct=edge.correct)

} ## gam.control

mgcv.get.scale<-function(Theta,weights,good,mu,mu.eta.val,G)
# Get scale implied by current fit and trial -ve binom Theta, I've used
# mu and mu.eta.val used in fit rather than implied by it....
{ variance<- negbin(Theta)$variance w<-sqrt(weights[good]*mu.eta.val[good]^2/variance(mu)[good]) wres<-w*(G$y-G$X%*%G$p)
sum(wres^2)/(G$n-sum(G$edf))
}

mgcv.find.theta<-function(Theta,T.max,T.min,weights,good,mu,mu.eta.val,G,tol)
# searches for -ve binomial theta between given limits to get scale=1
{ scale<-mgcv.get.scale(Theta,weights,good,mu,mu.eta.val,G)
T.hi<-T.low<-Theta
while (scale<1&&T.hi<T.max)
{ T.hi<-T.hi*2
T.hi<-min(T.hi,T.max)
scale<-mgcv.get.scale(T.hi,weights,good,mu,mu.eta.val,G)
}
if (all.equal(T.hi,T.max)==TRUE && scale<1) return(T.hi)
T.low<-T.hi
while (scale>=1&&T.low>T.min)
{ T.low<-T.low/2
T.low<-max(T.low,T.min)
scale<-mgcv.get.scale(T.low,weights,good,mu,mu.eta.val,G)
}
if (all.equal(T.low,T.min)==TRUE && scale>1) return(T.low)
# (T.low,T.hi) now brackets scale=1.
while (abs(scale-1)>tol)
{ Theta<-(T.low+T.hi)/2
scale<-mgcv.get.scale(Theta,weights,good,mu,mu.eta.val,G)
if (scale<1) T.low<-Theta
else T.hi<-Theta
}
Theta
}

full.score <- function(sp,G,family,control,gamma,...)
# function suitable for calling from nlm in order to polish gam fit
# so that actual minimum of score is found in generalized cases
{ .Deprecated(msg="Internal mgcv function full.score is no longer used and will be removed soon.")
if (is.null(G$L)) { G$sp<-exp(sp);
} else {
G$sp <- as.numeric(exp(G$L%*%sp + G$lsp0)) } # set up single fixed penalty.... q<-NCOL(G$X)
if (is.null(G$H)) G$H<-matrix(0,q,q)
for (i in 1:length(G$S)) { j<-ncol(G$S[[i]])
off1<-G$off[i];off2<-off1+j-1 G$H[off1:off2,off1:off2]<-G$H[off1:off2,off1:off2]+G$sp[i]*G$S[[i]] } G$S<-list() # have to reset since length of this is used as number of penalties
G$L <- NULL xx<-gam.fit(G,family=family,control=control,gamma=gamma,...) res <- xx$gcv.ubre.dev
attr(res,"full.gam.object")<-xx
res
} ## full.score

am.fit <- function (G, #start = NULL, etastart = NULL,
#mustart = NULL,# family = gaussian(),
control = gam.control(),gamma=1,...) {
## fit additive model using 'magic' returning gam type object.

family <- gaussian()
intercept<-G$intercept n <- nobs <- NROW(G$y) ## n just there to keep codetools happy
y <- G$y # original data X <- G$X # original design matrix
if (ncol(G$X) == 0) stop("Model seems to contain no terms") olm <- G$am   # only need 1 iteration as it's a pure additive model.

# obtain average element sizes for the penalties
n.S <- length(G$S) if (n.S>0) { S.size <- rep(0,n.S) for (i in 1:n.S) S.size[i] <- mean(abs(G$S[[i]]))
}
weights<-G$w # original weights n.score <- sum(weights!=0) ## n to use in GCV score (i.e. don't count points with no influence) offset<-G$offset

variance <- family$variance;dev.resids <- family$dev.resids
aic <- family$aic if (NCOL(y) > 1) stop("y must be univariate unless binomial") scale <- G$sig2

msp <- G$sp magic.control<-list(tol=G$conv.tol,step.half=G$max.half,#maxit=control$maxit+control$globit, rank.tol=control$rank.tol)

## solve the penalized LS problem ...
good <- weights > 0
G$y <- y[good] - offset[good] G$X <- X[good,,drop=FALSE]
G$w <- sqrt(weights[good]) ## note magic assumes sqrt weights mr <- magic(G$y,G$X,msp,G$S,G$off,L=G$L,lsp0=G$lsp0,G$rank,G$H,matrix(0,0,ncol(G$X)),
G$w,gamma=gamma,G$sig2,G$sig2<0, ridge.parameter=control$irls.reg,control=magic.control,n.score=n.score,nthreads=control$nthreads) coef <- mr$b;msp <- mr$sp;G$sig2 <- mr$scale;G$gcv.ubre <- mr$score; if (any(!is.finite(coef))) warning(gettextf("Non-finite coefficients at iteration")) mu <- eta <- drop(X %*% coef + offset) dev <- sum(dev.resids(y, mu, weights)) residuals <- rep(NA, nobs) residuals[good] <- G$y - (mu - offset)[good]

wtdmu <- if (intercept) sum(weights * y)/sum(weights) else offset
nulldev <- sum(dev.resids(y, wtdmu, weights))
n.ok <- nobs - sum(weights == 0)
nulldf <- n.ok - as.integer(intercept)

mv <- magic.post.proc(G$X,mr,w=G$w^2)
G$Vp<-mv$Vb;G$hat<-mv$hat;
G$Ve <- mv$Ve # frequentist cov. matrix
G$edf<-mv$edf
G$conv<-mr$gcv.info
G$sp<-msp rank<-G$conv$rank ## use MLE of scale in Gaussian case - best estimate otherwise. dev1 <- if (scale>0) scale*sum(weights) else dev aic.model <- aic(y, n, mu, weights, dev1) + 2 * sum(G$edf)
if (scale < 0) { ## deviance based GCV
gcv.ubre.dev <- n.score*dev/(n.score-gamma*sum(G$edf))^2 } else { # deviance based UBRE, which is just AIC gcv.ubre.dev <- dev/n.score + 2 * gamma * sum(G$edf)/n.score - G$sig2 } list(coefficients = as.vector(coef), residuals = residuals, fitted.values = mu, family = family,linear.predictors = eta, deviance = dev, null.deviance = nulldev, iter = 1, weights = weights, prior.weights = weights, df.null = nulldf, y = y, converged = TRUE,sig2=G$sig2,edf=G$edf,edf1=mv$edf1,hat=G$hat, R=mr$R,
boundary = FALSE,sp = G$sp,nsdf=G$nsdf,Ve=G$Ve,Vp=G$Vp,rV=mr$rV,mgcv.conv=G$conv,
gcv.ubre=G$gcv.ubre,aic=aic.model,rank=rank,gcv.ubre.dev=gcv.ubre.dev,scale.estimated = (scale < 0)) } ## am.fit gam.fit <- function (G, start = NULL, etastart = NULL, mustart = NULL, family = gaussian(), control = gam.control(),gamma=1, fixedSteps=(control$maxit+1),...)
# fitting function for a gam, modified from glm.fit.
# note that smoothing parameter estimates from one irls iterate are carried over to the next irls iterate
# unless the range of s.p.s is large enough that numerical problems might be encountered (want to avoid
# completely flat parts of gcv/ubre score). In the latter case autoinitialization is requested.
# fixedSteps < its default causes at most fixedSteps iterations to be taken,
# without warning if convergence has not been achieved. This is useful for
# obtaining starting values for outer iteration.
{   .Deprecated(msg="Internal mgcv function gam.fit is no longer used - see gam.fit3.")
intercept<-G$intercept conv <- FALSE n <- nobs <- NROW(G$y) ## n just there to keep codetools happy
nvars <- NCOL(G$X) # check this needed y<-G$y # original data
X<-G$X # original design matrix if (nvars == 0) stop("Model seems to contain no terms") olm <- G$am   # only need 1 iteration as it's a pure additive model.
if (!olm)  .Deprecated(msg="performance iteration with gam is deprecated, use bam instead")
find.theta<-FALSE # any supplied -ve binomial theta treated as known, G$sig2 is scale parameter if (substr(family$family[1],1,17)=="Negative Binomial")
{ Theta <- family$getTheta() if (length(Theta)==1) { ## Theta fixed find.theta <- FALSE G$sig2 <- 1
} else {
if (length(Theta)>2)
warning("Discrete Theta search not available with performance iteration")
Theta <- range(Theta)
T.max <- Theta[2]          ## upper search limit
T.min <- Theta[1]          ## lower search limit
Theta <- sqrt(T.max*T.min) ## initial value
find.theta <- TRUE
}
nb.link<-family$link # negative.binomial family, there's a choise of links } # obtain average element sizes for the penalties n.S<-length(G$S)
if (n.S>0)
{ S.size<-0
for (i in 1:n.S) S.size[i]<-mean(abs(G$S[[i]])) } weights<-G$w # original weights

n.score <- sum(weights!=0) ## n to use in GCV score (i.e. don't count points with no influence)

offset<-G$offset variance <- family$variance;dev.resids <- family$dev.resids aic <- family$aic
linkinv <- family$linkinv;linkfun <- family$linkfun;mu.eta <- family$mu.eta if (!is.function(variance) || !is.function(linkinv)) stop("illegal family' argument") valideta <- family$valideta
if (is.null(valideta))
valideta <- function(eta) TRUE
validmu <- family$validmu if (is.null(validmu)) validmu <- function(mu) TRUE if (is.null(mustart)) # new from version 1.5.0 { eval(family$initialize)}
else
{ mukeep <- mustart
eval(family\$initialize)
mustart <- mukeep
}
if (NCOL(y) > 1)
stop("y must be univariate unless binomial")

coefold <- NULL</