Nothing
## code for fast REML computation. key feature is that first and
## second derivatives come at no increase in leading order
## computational cost, relative to evaluation!
## (c) Simon N. Wood, 2010-2019
singleStrans <- function(S,rank=NULL,ldet=FALSE) {
## transform single penalty matrix to partial identity using Cholesky
## t(D)%*%S%*%D transforms to a partial identity (rank rank)
## if ldet==TRUE then the generalized determinant of S is also returned
Ri <- R <- suppressWarnings(chol(S,pivot=TRUE))
k <- ncol(S)
if (is.null(rank)) {
rank <- Rrank(R)
normS <- norm(S)
while (rank>1&&max(abs(crossprod(R[1:(rank-1),rank:k,drop=FALSE])-S[rank:k,rank:k,drop=FALSE]))
<.Machine$double.eps^.75*normS) rank <- rank - 1
}
piv <- attr(R,"pivot")
if (rank<k) {
R[(rank+1):k,] <- 0
diag(R)[(rank+1):k] <- 1
}
## compute the log determinant if required...
ldet <- if (ldet) 2*sum(log(diag(chol(tcrossprod(R[1:rank,]))))) else 0
Ri[piv,] <- solve(R)
R[,piv] <- R
list(D=Ri,Di=R,rank=rank,ldet=ldet)
} ## singleStrans
iniStrans <- function(S,rank = NULL,trans.ldet=FALSE) {
## let a block penalty matrix be \sum_i \lambda_i S_i, and of rank r.
## This routine finds a reparameterization such that the Si are confined to
## the initial r by r block.
St <- S[[1]]/norm(S[[1]]); m <- length(S)
if (m>1) for (i in 2:m) St <- St + S[[i]]/norm(S[[i]])
R <- suppressWarnings(chol(St,pivot=TRUE))
p <- nrow(St)
if (is.null(rank)) {
rank <- Rrank(R)
normS <- norm(St) ## direct check that rank not over-estimated
while (rank>1&&max(abs(crossprod(R[1:(rank-1),rank:p,drop=FALSE])-St[rank:p,rank:p,drop=FALSE]))
<.Machine$double.eps^.75*normS) rank <- rank - 1
}
piv <- attr(R,"pivot")
ipiv <- piv; ipiv[piv] <- 1:p
if (rank==p) { ## nothing to do - penalty is full rank
return(list(S=S,T=diag(p),Ti=diag(p),trans.ldet=0))
}
ind <- (rank+1):p
R[ind,ind] <- diag(length(ind))
# S1 <- S # save original for checking
for (i in 1:m) {
S[[i]] <- forwardsolve(t(R),t(forwardsolve(t(R),S[[i]][piv,piv])))[1:rank,1:rank]
S[[i]] <- (S[[i]] + t(S[[i]]))*.5
}
T <- backsolve(R,diag(p))[ipiv,]
#range((t(T)%*%S1[[1]]%*%T)[1:rank,1:rank]-S[[1]])
#range(T%*%R[,ipiv]-diag(p))
## So T will map original S to transformed S, and R[,ipiv] is inverse transform.
## now get the correction to be added to the log|sum_i S_i| to get log generalized
## determinant in orginal parameterization...
tldet <- if (trans.ldet) 2*sum(log(diag(chol(tcrossprod(R[1:rank,]))))) else 0
list(S=S,T=T,Ti=R[,ipiv],rank=rank,trans.ldet=tldet)
} ## iniStrans
Sl.setup <- function(G,cholesky=FALSE,no.repara=FALSE,sparse=FALSE) {
## Sets up a list representing a block diagonal penalty matrix.
## from the object produced by `gam.setup'.
## Uses only pivoted Cholesky if cholesky==TRUE.
## flags all blocks as not to be reparameterized if no.repara=TRUE,
## in which case initial reparameterization is only used to
## compute log generalized determinant, but not otherwise.
## Return object is a list, Sl, with an element for each block.
## For block, b, Sl[[b]] is a list with the following elements
## * repara - should re-parameterization be applied to model matrix etc
## usually false if non-linear in coefs
## * start, stop: start:stop indexes the parameters of this block
## * S a list of penalty matrices for the block (dim = stop-start+1)
## - If length(S)==1 then this will be an identity penalty.
## - Otherwise it is a multiple penalty, and an rS list of square
## root penalty matrices will be added. S (if repara) and rS (always)
## will be projected into range space of total penalty matrix.
## If cholesky==TRUE then rS contains the projected S if !repara and otherwise
## rS not returned (since S contains same thing).
## * rS sqrt penalty matrices if it's a multiple penalty and cholesky==FALSE.
## projected penalty if cholesky==TRUE and !repara. NULL otherwise.
## * D a reparameterization matrix for the block
## - Applies to cols/params from start:stop.
## - If numeric then X[,start:stop]%*%diag(D) is repara X[,start:stop],
## b.orig = D*b.repara
## - If matrix then X[,start:stop]%*%D is repara X[,start:stop],
## b.orig = D%*%b.repara
## * Di is inverse of D, but is only supplied if D is not orthogonal, or
## diagonal.
## The penalties in Sl are in the same order as those in G
## Also returns attribute "E" a square root of the well scaled total
## penalty, suitable for rank deficiency testing, and attribute "lambda"
## the corresponding smoothing parameters.
##if (!is.null(G$H)) stop("paraPen min sp not supported")
Sl <- list()
b <- 1 ## block counter
if (G$n.paraPen) { ## Have to proccess paraPen stuff first
off <- unique(G$off[1:G$n.paraPen]) ## unique offset lists relating to paraPen
for (i in 1:length(off)) { ## loop over blocks
ind <- (1:G$n.paraPen)[G$off[1:G$n.paraPen]%in%off[i]] ## terms in same block
if (length(ind)>1) { ## additive block
nr <- 0;for (k in 1:length(ind)) nr <- max(nr,nrow(G$S[[ind[k]]])) ## get block size
## now fill Sl[[b]]$S, padding out any penalties that are not "full size"
Sl[[b]] <- list()
Sl[[b]]$S <- list()
St <- matrix(0,nr,nr) ## accumulate a total matrix for rank determination
for (k in 1:length(ind)) { ## work through all penalties for this block
nk <- nrow(G$S[[ind[k]]])
if (nr>nk) { ## have to pad out this one
Sl[[b]]$S[[k]] <- matrix(0,nr,nr)
Sl[[b]]$S[[k]][1:nk,1:nk] <- G$S[[ind[k]]]
} else Sl[[b]]$S[[k]] <- G$S[[ind[[k]]]]
St <- St + Sl[[b]]$S[[k]]
}
Sl[[b]]$start <- off[ind[1]]
Sl[[b]]$stop <- Sl[[b]]$start + nr - 1
Sl[[b]]$lambda <- rep(1,length(ind)) ## dummy at this stage
Sl[[b]]$repara <- FALSE
} else { ## singleton
Sl[[b]] <- list(start=off[ind], stop=off[ind]+nrow(G$S[[ind]])-1,
rank=G$rank[ind],S=list(G$S[[ind]]))
Sl[[b]]$S <- list(G$S[[ind]])
Sl[[b]]$lambda <- 1 ## dummy at this stage
Sl[[b]]$repara <- !no.repara ## allow repara unless completely turned off
} ## finished singleton
Sl[[b]]$linear <- TRUE
b <- b + 1
} ## finished this block
} ## finished paraPen
## now work through the smooths....
if (length(G$smooth)) for (i in 1:length(G$smooth)) {
if (!is.null(G$smooth[[i]]$fixed)&&G$smooth[[i]]$fixed) m <- 0 else
m <- length(G$smooth[[i]]$S)
if (m>0) {
Sl[[b]] <- list()
Sl[[b]]$nl.reg <- if (is.null(G$smooth[[i]]$nl.reg)||G$smooth[[i]]$nl.reg<=0) NULL else G$smooth[[i]]$nl.reg ## any fixed regularizer for non-linear coefs
Sl[[b]]$start <- G$smooth[[i]]$first.para
Sl[[b]]$stop <- G$smooth[[i]]$last.para
## if the smooth has a g.index field it indicates non-linear params,
## in which case re-parameterization will usually break the model.
## Or global supression of reparameterization may be requested...
# Sl[[b]]$repara <- if (is.null(G$smooth[[i]]$g.index) && !no.repara) TRUE else FALSE
Sl[[b]]$repara <- G$smooth[[i]]$repara && !no.repara
if (!is.null(G$smooth[[i]]$updateS)) { ## then this block is nonlinear in smoothing parameters
Sl[[b]]$repara <-FALSE
Sl[[b]]$linear <- FALSE
labs <- c("inisp","updateS","AS","AdS","ldS","St","n.sp","nlinfo")
Sl[[b]][labs] <- G$smooth[[i]][labs] ## copy the non-linear interface functions
Sl[[b]]$lambda <- rep(0,Sl[[b]]$n.sp) ## dummy
} else Sl[[b]]$linear <- TRUE
}
if (m==0) {} else ## fixed block
if (m==1) { ## singleton
Sl[[b]]$rank <- G$smooth[[i]]$rank
Sl[[b]]$S <- G$smooth[[i]]$S
Sl[[b]]$lambda <- 1
b <- b + 1
} else { ## additive block...
## first test whether block can *easily* be split up into singletons
## easily here means no overlap in penalties
Sl[[b]]$S <- G$smooth[[i]]$S
Sl[[b]]$lambda <- rep(1,m)
if (Sl[[b]]$linear) {
nb <- nrow(Sl[[b]]$S[[1]])
sbdiag <- sbStart <- sbStop <- rep(NA,m)
ut <- upper.tri(Sl[[b]]$S[[1]],diag=FALSE)
## overlap testing requires the block ranges
for (j in 1:m) { ## get block range for each S[[j]]
sbdiag[j] <- sum(abs(Sl[[b]]$S[[j]][ut]))==0 ## is penalty diagonal??
ir <- range((1:nb)[rowSums(abs(Sl[[b]]$S[[j]]))>0])
sbStart[j] <- ir[1];sbStop[j] <- ir[2] ## individual ranges
}
split.ok <- TRUE
for (j in 1:m) { ## test for overlap
itot <- rep(FALSE,nb)
if (all(sbdiag)) { ## it's all diagonal - can allow interleaving
for (k in 1:m) if (j!=k) itot[diag(Sl[[b]]$S[[k]])!=0] <- TRUE
if (sum(itot[diag(Sl[[b]]$S[[j]])!=0])>0) { ## no good, some overlap detected
split.ok <- FALSE; break
}
} else { ## not diagonal - really need overlapping blocks
for (k in 1:m) if (j!=k) itot[sbStart[k]:sbStop[k]] <- TRUE
if (sum(itot[sbStart[j]:sbStop[j]])>0) { ## no good, some overlap detected
split.ok <- FALSE; break
}
}
}
} else split.ok <- FALSE
if (split.ok) { ## can split this block into m separate singleton blocks
for (j in 1:m) {
Sl[[b]] <- list()
ind <- sbStart[j]:sbStop[j]
Sl[[b]]$S <- list(G$smooth[[i]]$S[[j]][ind,ind,drop=FALSE])
Sl[[b]]$start <- G$smooth[[i]]$first.para + sbStart[j]-1
Sl[[b]]$stop <- G$smooth[[i]]$first.para + sbStop[j]-1
Sl[[b]]$rank <- G$smooth[[i]]$rank[j]
Sl[[b]]$lambda <- 1 ## dummy here
Sl[[b]]$repara <- !no.repara ## signals ok to linearly reparameterize, now check this is really ok...
if (!is.null(G$smooth[[i]]$g.index)) { ## then some parameters are non-linear - can't re-param
if (any(G$smooth[[i]]$g.index[ind])) Sl[[b]]$repara <- FALSE
}
Sl[[b]]$linear <- TRUE ## linear in smoothing params - assumed always true for multi-blocks
b <- b + 1
}
} else { ## not possible to split
Sl[[b]]$S <- G$smooth[[i]]$S
b <- b + 1 ## next block!!
} ## additive block finished
} ## additive block finished
}
## At this stage Sl contains the penalties, identified as singletons or
## multiple S blocks. Now the blocks need re-parameterization applied.
## Singletons need to be transformed to identity penalties, while
## multiples need to be projected into total penalty range space.
if (length(Sl)==0) return(Sl) ## nothing to do
np <- ncol(G$X)
## In dense case E is a dense matrix and its block get filled in. In sparse
## case E is a list of lists of rows (i), cols (j) and elements (x) defining each
## block. So E$i[[b]],E$j[[b]],E$x[[b]] defines block b...
E <- if (sparse) list(i=list(),j=list(),x=list()) else matrix(0,np,np) ## well scaled square root penalty
lambda <- rep(0,0)
## NOTE: computing transforms for repara=FALSE blocks and then not using them
## looks wasteful - remove this??
for (b in 1:length(Sl)) { ## once more into the blocks, dear friends...
if (!Sl[[b]]$linear) { ## nonlinear term
## never re-parameterized, but need contribution to penalty square root, E
Sl[[b]] <- Sl[[b]]$updateS(Sl[[b]]$lambda,Sl[[b]])
lambda <- c(lambda,Sl[[b]]$lambda)
if (sparse) {
## E0 <- as(Sl[[b]]$St(Sl[[b]],1)$E,"dgTMatrix") deprecated
E0 <- as(as(as(Sl[[b]]$St(Sl[[b]],1)$E, "dMatrix"), "generalMatrix"), "TsparseMatrix")
E$i[[b]] <- E0@i + Sl[[b]]$start
E$j[[b]] <- E0@j + Sl[[b]]$start
E$x[[b]] <- E0@x
} else { ## dense
ind <- Sl[[b]]$start:Sl[[b]]$stop
E[ind,ind] <- Sl[[b]]$St(Sl[[b]],1)$E ## add to the square root
}
} else if (length(Sl[[b]]$S)==1) { ## then we have a singleton
if (sum(abs(Sl[[b]]$S[[1]][upper.tri(Sl[[b]]$S[[1]],diag=FALSE)]))==0) { ## S diagonal
## Reparameterize so that S has 1's or zero's on diagonal
## In new parameterization smooth specific model matrix is X%*%diag(D)
## ind indexes penalized parameters from this smooth's set.
D <- diag(Sl[[b]]$S[[1]])
ind <- D > 0 ## index penalized elements
Sl[[b]]$rank <- sum(ind)
Sl[[b]]$ldet <- if (cholesky) sum(log(D[ind])) else 0
D[ind] <- 1/sqrt(D[ind]);D[!ind] <- 1 ## X' = X%*%diag(D)
Sl[[b]]$D <- D; Sl[[b]]$ind <- ind
} else { ## S is not diagonal
if (cholesky&&is.null(Sl[[b]]$nl.reg)) { ## use Cholesky based reparameterization
tr <- singleStrans(Sl[[b]]$S[[1]],Sl[[b]]$rank,ldet=!Sl[[b]]$repara)
ind <- rep(FALSE,ncol(tr$D))
ind[1:tr$rank] <- TRUE
Sl[[b]]$D <- tr$D
Sl[[b]]$Di <- tr$Di
Sl[[b]]$rank <- tr$rank
Sl[[b]]$ldet=tr$ldet
} else { ## use eigen based re-parameterization
es <- eigen(Sl[[b]]$S[[1]],symmetric=TRUE)
U <- es$vectors;D <- es$values
if (is.null(Sl[[b]]$rank)) { ## need to estimate rank
Sl[[b]]$rank <- sum(D>.Machine$double.eps^.8*max(D))
}
## non-cholesky stabilized method code ignores the log det
## of transform as it cancels between the two log det terms...
Sl[[b]]$ldet <- 0 ## sum(log(D[1:Sl[[b]]$rank]))
ind <- rep(FALSE,length(D))
ind[1:Sl[[b]]$rank] <- TRUE ## index penalized elements
if (is.null(Sl[[b]]$nl.reg)) {
D[ind] <- 1/sqrt(D[ind]);D[!ind] <- 1
Sl[[b]]$D <- t(D*t(U)) ## D <- U%*%diag(D)
Sl[[b]]$Di <- t(U)/D
} else {
Sl[[b]]$repara <- FALSE ## should be FALSE anyway to get here
Sl[[b]]$ev <- D ## store the penalty eigenvalues to allow penalized ldet computation
Sl[[b]]$U <- U ## store the eigen-vectors to allow computation of regularized penalty square root
}
}
## so if X is smooth model matrix X%*%D is re-parameterized form
## and t(D)%*%Sl[[b]]$S[[1]]%*%D is the reparameterized penalty
## -- a partial identity matrix.
## Di is the inverse of D and crossprod(Di[1:rank,]) is the original
## penalty matrix
Sl[[b]]$ind <- ind
}
## add penalty square root into E
if (Sl[[b]]$repara) { ## then it is just the identity
ind <- (Sl[[b]]$start:Sl[[b]]$stop)[Sl[[b]]$ind]
if (sparse) {
E$j[[b]] <- E$i[[b]] <- ind
E$x[[b]] <- ind*0 + 1
} else { ## dense
diag(E)[ind] <- 1
}
lambda <- c(lambda,1) ## record corresponding lambda
} else { ## need scaled root penalty in *original* parameterization
#D <- Sl[[b]]$Di[1:Sl[[b]]$rank,]
D <- if (is.null(Sl[[b]]$nl.reg)) Sl[[b]]$Di[1:Sl[[b]]$rank,] else
sqrt(Sl[[b]]$ev+Sl[[b]]$nl.reg)*t(Sl[[b]]$U)
D.norm <- norm(D); D <- D/D.norm
if (sparse) {
## D <- as(D,"dgTMatrix") deprecated
D <- as(as(as(D, "dMatrix"), "generalMatrix"), "TsparseMatrix")
E$i[[b]] <- D@i + Sl[[b]]$start
E$j[[b]] <- D@j + Sl[[b]]$start
E$x[[b]] <- D@x
} else {
indc <- Sl[[b]]$start:(Sl[[b]]$start+ncol(D)-1)
indr <- Sl[[b]]$start:(Sl[[b]]$start+nrow(D)-1)
E[indr,indc] <- D
}
lambda <- c(lambda,1/D.norm^2)
}
} else { ## multiple S block
## must be in range space of total penalty...
Sl[[b]]$ind <- rep(FALSE,ncol(Sl[[b]]$S[[1]]))
if (cholesky) {
tr <- iniStrans(Sl[[b]]$S,Sl[[b]]$rank,trans.ldet=!Sl[[b]]$repara)
if (!Sl[[b]]$repara) Sl[[b]]$rS <- list()
for (i in 1:length(tr$S)) {
if (Sl[[b]]$repara) Sl[[b]]$S[[i]] <- tr$S[[i]] else
Sl[[b]]$rS[[i]] <- tr$S[[i]] ## only need to store here if !repara
}
ind <- 1:tr$rank
Sl[[b]]$rank <- tr$rank
Sl[[b]]$D <- tr$T
Sl[[b]]$Di <- tr$Ti
Sl[[b]]$ldet = tr$trans.ldet
} else {
Sl[[b]]$ldet = 0 ## this is pure orthogonal transform
Sl[[b]]$rS <- list() ## needed for adaptive re-parameterization
S <- Sl[[b]]$S[[1]]
for (j in 2:length(Sl[[b]]$S)) S <- S + Sl[[b]]$S[[j]] ## scaled total penalty
es <- eigen(S,symmetric=TRUE);U <- es$vectors; D <- es$values
Sl[[b]]$D <- U
if (is.null(Sl[[b]]$rank)) { ## need to estimate rank
Sl[[b]]$rank <- sum(D>.Machine$double.eps^.8*max(D))
}
ind <- 1:Sl[[b]]$rank
for (j in 1:length(Sl[[b]]$S)) { ## project penalties into range space of total penalty
bob <- t(U[,ind])%*%Sl[[b]]$S[[j]]%*%U[,ind]
bob <- (t(bob) + bob)/2 ## avoid over-zealous chol sym check
if (Sl[[b]]$repara) { ## otherwise want St and E in original parameterization
Sl[[b]]$S[[j]] <- bob
}
Sl[[b]]$rS[[j]] <- mroot(bob,Sl[[b]]$rank)
}
}
#Sl[[b]]$ind <- rep(FALSE,ncol(Sl[[b]]$S[[1]]))
Sl[[b]]$ind[ind] <- TRUE ## index penalized within sub-range
## now compute well scaled sqrt
S.norm <- norm(Sl[[b]]$S[[1]])
St <- Sl[[b]]$S[[1]]/S.norm
lambda <- c(lambda,1/S.norm)
for (j in 2:length(Sl[[b]]$S)) {
S.norm <- norm(Sl[[b]]$S[[j]])
St <- St + Sl[[b]]$S[[j]]/S.norm
lambda <- c(lambda,1/S.norm)
}
St <- (t(St) + St)/2 ## avoid over-zealous chol sym check
St <- t(mroot(St,Sl[[b]]$rank))
if (sparse) {
## St <- as(St,"dgTMatrix") - deprecated
St <- as(as(as(St, "dMatrix"), "generalMatrix"), "TsparseMatrix")
E$i[[b]] <- St@i + Sl[[b]]$start
E$j[[b]] <- St@j + Sl[[b]]$start
E$x[[b]] <- St@x
} else { ## dense
indc <- Sl[[b]]$start:(Sl[[b]]$start+ncol(St)-1)
indr <- Sl[[b]]$start:(Sl[[b]]$start+nrow(St)-1)
E[indr,indc] <- St
}
}
} ## re-para finished
if (sparse) E <- sparseMatrix(i=unlist(E$i),j=unlist(E$j),x=unlist(E$x),dims=c(np,np))
attr(Sl,"E") <- E ## E'E = scaled total penalty
attr(Sl,"lambda") <- lambda ## smoothing parameters corresponding to E
attr(Sl,"cholesky") <- cholesky ## store whether this is Cholesky based or not
Sl ## the penalty list
} ## end of Sl.setup
Sl.Sb <- function(Sl,rho,beta) {
## computes S %*% beta where S is total penalty matrix defined by Sl and rho,
## the log smoothing parameters. Assumes initial re-parameterization has taken
## place, so single penalties are multiples of identity and uses S for
## multi-S blocks. Logic is identical to Sl.addS.
k <- 1
a <- beta * 0
if (length(Sl)>0) for (b in 1:length(Sl)) {
ind <- (Sl[[b]]$start:Sl[[b]]$stop)[Sl[[b]]$ind]
if (length(Sl[[b]]$S)==1) { ## singleton - multiple of identity
a[ind] <- a[ind] + beta[ind] * exp(rho[k])
k <- k + 1
} else { ## multi-S block
for (j in 1:length(Sl[[b]]$S)) {
a[ind] <- a[ind] + exp(rho[k]) * (Sl[[b]]$S[[j]] %*% beta[ind])
k <- k + 1
}
}
}
a
} ## Sl.Sb
Sl.rSb <- function(Sl,rho,beta) {
## Computes vector 'a' containing all terms rS %*% beta stacked end to end.
## sum of squares of 'a' this is bSb, but 'a' is linear in beta
## Assumes initial re-parameterization has taken
## place, so single penalties are multiples of identity and uses S for
## multi-S blocks. Logic is identical to Sl.addS.
k <- 1 ## sp counter
kk <- 0 ## total length of returned vector
if (length(Sl)>0) for (b in 1:length(Sl)) {
ind <- (Sl[[b]]$start:Sl[[b]]$stop)[Sl[[b]]$ind]
kk <- kk + length(Sl[[b]]$S)*length(ind)
}
a <- rep(0,kk)
kk <- 0
if (length(Sl)>0) for (b in 1:length(Sl)) {
ind <- (Sl[[b]]$start:Sl[[b]]$stop)[Sl[[b]]$ind]
if (length(Sl[[b]]$S)==1) { ## singleton - multiple of identity
a[kk + 1:length(ind)] <- beta[ind] * exp(rho[k]/2)
k <- k + 1
kk <- kk + length(ind)
} else { ## multi-S block
for (j in 1:length(Sl[[b]]$S)) {
a[kk + 1:length(ind)] <- exp(rho[k]/2) * (beta[ind] %*% Sl[[b]]$rS[[j]])
k <- k + 1
kk <- kk + length(ind)
}
}
}
a
} ## Sl.rSb
Sl.inirep <- function(Sl,X,l=0,r=0,nt=1) {
## Re-parameterize X using initial Sl reparameterization info.
## l,r = -2,-1,0,1,2. O is do not apply, negative to apply inverse transform
## Di (t(D) if no Di indicating D orthogonal),
## positive for transform D, 1 for transform, 2 for its transpose.
## Aim is for simpler and cleaner than Sl.initial.repara
if (length(Sl)==0 && !l && !r) return(X) ## nothing to do
if (is.matrix(X)) {
for (b in 1:length(Sl)) if (Sl[[b]]$repara) {
nuDi <- is.null(Sl[[b]]$Di)
ind <- Sl[[b]]$start:Sl[[b]]$stop
if (l) X[ind,] <- if (l == 1) Sl[[b]]$D%*%X[ind,,drop=FALSE] else if (l == 2) t(Sl[[b]]$D)%*%X[ind,,drop=FALSE] else
if (l == -1) { if (nuDi) t(Sl[[b]]$D)%*%X[ind,,drop=FALSE] else Sl[[b]]$Di%*%X[ind,,drop=FALSE] } else
{ if (nuDi) Sl[[b]]$D%*%X[ind,,drop=FALSE] else t(Sl[[b]]$Di)%*%X[ind,,drop=FALSE]}
if (r) X[,ind] <- if (l == 1) X[,ind,drop=FALSE]%*%Sl[[b]]$D else if (l == 2) X[,ind,drop=FALSE]%*%t(Sl[[b]]$D) else
if (l == -1) { if (nuDi) X[,ind,drop=FALSE]%*%t(Sl[[b]]$D) else X[,ind,drop=FALSE]%*%Sl[[b]]$Di} else
{ if (nuDi) X[,ind,drop=FALSE]%*%Sl[[b]]$D else X[,ind,drop=FALSE]%*%t(Sl[[b]]$Di)}
}
} else { ## it's a vector
for (b in 1:length(Sl)) if (Sl[[b]]$repara) {
ind <- Sl[[b]]$start:Sl[[b]]$stop; nuDi <- is.null(Sl[[b]]$Di)
if (l) X[ind] <- if (l == 1) Sl[[b]]$D%*%X[ind] else if (l == 2) t(Sl[[b]]$D)%*%X[ind] else
if (l == -1) { if (nuDi) t(Sl[[b]]$D)%*%X[ind] else Sl[[b]]$Di%*%X[ind] } else
{ if (nuDi) Sl[[b]]$D%*%X[ind] else t(Sl[[b]]$Di)%*%X[ind] }
if (r) X[ind] <- if (l == 1) X[ind]%*%Sl[[b]]$D else if (l == 2) X[ind]%*%t(Sl[[b]]$D) else
if (l == -1) { if (nuDi) X[ind]%*%t(Sl[[b]]$D) else X[ind]%*%Sl[[b]]$Di } else
{ if (nuDi) X[ind]%*%Sl[[b]]$D else X[ind]%*%t(Sl[[b]]$Di)}
}
}
X
} ## Sl.inirep
Sl.initial.repara <- function(Sl,X,inverse=FALSE,both.sides=TRUE,cov=TRUE,nt=1) {
## Routine to apply initial Sl re-parameterization to model matrix X,
## or, if inverse==TRUE, to apply inverse re-para to parameter vector
## or cov matrix. If inverse is TRUE and both.sides=FALSE then
## re-para only applied to rhs, as appropriate for a choleski factor.
## If both.sides==FALSE, X is a vector and inverse==FALSE then X is
## taken as a coefficient vector (so re-para is inverse of that for model
## matrix...)
if (length(Sl)==0) return(X) ## nothing to do
if (inverse) { ## apply inverse re-para
if (is.matrix(X)) {
if (cov) { ## then it's a covariance matrix
for (b in 1:length(Sl)) if (Sl[[b]]$repara) {
ind <- Sl[[b]]$start:Sl[[b]]$stop
if (is.matrix(Sl[[b]]$D)) {
if (both.sides) X[ind,] <- if (nt==1) Sl[[b]]$D%*%X[ind,,drop=FALSE] else
pmmult(Sl[[b]]$D,X[ind,,drop=FALSE],FALSE,FALSE,nt=nt)
X[,ind] <- if (nt==1) X[,ind,drop=FALSE]%*%t(Sl[[b]]$D) else
pmmult(X[,ind,drop=FALSE],Sl[[b]]$D,FALSE,TRUE,nt=nt)
} else { ## Diagonal D
X[,ind] <- t(Sl[[b]]$D * t(X[,ind,drop=FALSE]))
if (both.sides) X[ind,] <- Sl[[b]]$D * X[ind,,drop=FALSE]
}
}
} else { ## regular matrix: need to use Di
for (b in 1:length(Sl)) if (Sl[[b]]$repara) {
ind <- Sl[[b]]$start:Sl[[b]]$stop
if (is.matrix(Sl[[b]]$D)) {
Di <- if(is.null(Sl[[b]]$Di)) t(Sl[[b]]$D) else Sl[[b]]$Di
if (both.sides) X[ind,] <- if (nt==1) t(Di)%*%X[ind,,drop=FALSE] else
pmmult(Di,X[ind,,drop=FALSE],TRUE,FALSE,nt=nt)
X[,ind] <- if (nt==1) X[,ind,drop=FALSE]%*%Di else
pmmult(X[,ind,drop=FALSE],Di,FALSE,FALSE,nt=nt)
} else { ## Diagonal D
Di <- 1/Sl[[b]]$D
X[,ind] <- t(Di * t(X[,ind,drop=FALSE]))
if (both.sides) X[ind,] <- Di * X[ind,,drop=FALSE]
}
}
}
} else { ## it's a parameter vector
for (b in 1:length(Sl)) if (Sl[[b]]$repara) {
ind <- Sl[[b]]$start:Sl[[b]]$stop
if (is.matrix(Sl[[b]]$D)) X[ind] <- Sl[[b]]$D%*%X[ind] else
X[ind] <- Sl[[b]]$D*X[ind]
}
}
} else for (b in 1:length(Sl)) if (Sl[[b]]$repara) { ## model matrix re-para
ind <- Sl[[b]]$start:Sl[[b]]$stop
if (is.matrix(X)) {
if (is.matrix(Sl[[b]]$D)) {
if (both.sides) X[ind,] <- if (nt==1) t(Sl[[b]]$D)%*%X[ind,,drop=FALSE] else
pmmult(Sl[[b]]$D,X[ind,,drop=FALSE],TRUE,FALSE,nt=nt)
X[,ind] <- if (nt==1) X[,ind,drop=FALSE]%*%Sl[[b]]$D else
pmmult(X[,ind,drop=FALSE],Sl[[b]]$D,FALSE,FALSE,nt=nt)
} else {
if (both.sides) X[ind,] <- Sl[[b]]$D * X[ind,,drop=FALSE]
X[,ind] <- t(Sl[[b]]$D*t(X[,ind,drop=FALSE])) ## X[,ind]%*%diag(Sl[[b]]$D)
}
} else {
if (both.sides) { ## signalling vector to be treated like model matrix X...
if (is.matrix(Sl[[b]]$D)) X[ind] <- t(Sl[[b]]$D)%*%X[ind] else
X[ind] <- Sl[[b]]$D*X[ind]
} else { ## both.sides == FALSE is just a signal that X is a parameter vector
if (is.matrix(Sl[[b]]$D)) X[ind] <-
if (is.null(Sl[[b]]$Di)) t(Sl[[b]]$D)%*%X[ind] else Sl[[b]]$Di%*%X[ind] else
X[ind] <- X[ind]/Sl[[b]]$D
}
}
}
X
} ## end Sl.initial.repara
ldetSblock <- function(rS,rho,deriv=2,root=FALSE,nt=1) {
## finds derivatives wrt rho of log|S| where
## S = sum_i tcrossprod(rS[[i]]*exp(rho[i]))
## when S is full rank +ve def and no
## reparameterization is required....
lam <- exp(rho)
S <- pcrossprod(rS[[1]],trans=TRUE,nt=nt)*lam[1]
##tcrossprod(rS[[1]])*lam[1] ## parallel
p <- ncol(S)
m <- length(rS)
if (m > 1) for (i in 2:m) S <- S + pcrossprod(rS[[i]],trans=TRUE,nt=nt)*lam[i]
## S <- S + tcrossprod(rS[[i]])*lam[i] ## parallel
if (!root) E <- S
d <- diag(S);d[d<=0] <- 1;d <- sqrt(d)
S <- t(S/d)/d ## diagonally pre-condition
R <- if (nt>1) pchol(S,nt) else suppressWarnings(chol(S,pivot=TRUE))
piv <- attr(R,"pivot")
r <- attr(R,"rank")
if (r<p) R[(r+1):p,(r+1):p] <- 0 ## fix chol bug
if (root) {
rp <- piv;rp[rp] <- 1:p ## reverse pivot
E <- t(t(R[,rp])*d)
}
if (r<p) { ## rank deficiency
R <- R[1:r,1:r]
piv <- piv[1:r]
}
RrS <- list();dS1 <- rep(0,m);dS2 <- matrix(0,m,m)
## use dlog|S|/drhoi = lam_i tr(S^{-1}S_i) = tr(R^{-T}rS[[i]]rS[[i]]R^{-1} etc...
for (i in 1:m) {
RrS[[i]] <- pforwardsolve(R,rS[[i]][piv,]/d[piv],nt=nt) ## note R transposed internally - unlike forwardsolve!!
dS1[i] <- sum(RrS[[i]]^2)*lam[i] ## dlog|S|/drhoi
if (deriv==2) {
RrS[[i]] <- pcrossprod(RrS[[i]],trans=TRUE,nt=nt)
#tcrossprod(RrS[[i]]) ## parallel
for (j in 1:i) {
dS2[i,j] <- dS2[j,i] <- -sum(RrS[[i]]*RrS[[j]])*lam[i]*lam[j]
}
dS2[i,i] <- dS2[i,i] + dS1[i]
}
}
list(det = 2*sum(log(diag(R))+log(d[piv])),det1=dS1,det2=dS2,E=E)
} ## ldetSblock
ginv1 <- function(a) {
## Cholesky generalized inverse and log det function
eps <- norm(a)*.Machine$double.eps^.8
da <- diag(a+eps)^-.5
R <- suppressWarnings(chol(t(da*t(da*a)),pivot=TRUE))
piv <- attr(R,"pivot")
r <- attr(R,"rank")
r <- Rrank(R)
b <- a*0
E <- R[1:r,]*0
E[,piv] <- R[1:r,]
E <- t(t(E)/da)
piv <- piv[1:r]
b[piv,piv] <- chol2inv(R[1:r,1:r])
b <- t(t(da*b)*da)
ldet <- sum(log(diag(R)[1:r])) - sum(log(da))
list(inv = b, ldet = ldet,E=E)
} ## ginv1
ldetSt <- function(S,lam,deriv=0,repara=TRUE) {
## completely Cholesky based computation of the determinant of
## an additive block. Assumes initial reparameterization using
## cholesky approach in Sl.setup.
## Formally, St, the sum of S[[i]]*lam[i] must be full rank.
## The S[[i]] are not all full rank (or this is pointless)
## Returns a transformed St which can be used for stable
## determinant evaluation by QR or Cholesky *without pivoting*
## --- pivoting the Cholesky will mess up the very structure
## that ensures stable computation. Also returns the matrix
## mapping original St to its transformed version, and the
## corresponding inverse transform. The returned S is such that
## the sum of S[[i]]*lam[i] is *exactly* the transformed St.
## Set repara to FALSE to get determinant computations without
## stabilizing transforms.
## NOTE: If T is the transformation matrix, log|T| is ommited from
## returned log|S| as if will cancel with log|X'X+S| in
## LAML. Can be re-instated by commenting in below.
dominant.set <- function(nos,nD) {
## on entry 'nD' is the index of the current set and 'nos' the
## norms for these. On exit 'D' contains the indices (from nD)
## of the dominant terms, and nD the remainder...
#j <- min(which(nos==max(nos))) ## single selector
j <- which(nos>max(nos)*1e-5)
D <- nD[j]; nD <- nD[-j]
# cat("Dset =",j,"\n")
return(list(D=D,nD=nD))
} ## dominant.set
Rldet0 <- 0
m <- length(S)
nD <- 1:m ## index of terms not yet dealt with
a <- 1 ## starting row/col
p <- ncol(S[[1]]) ## dimension
T <- Ti <- diag(p)
while (repara&&length(nD)>1 && a <= p) {
## get indices of dominant terms and remainder
nos <- rep(0,length(nD)) ## for dominance determining norms
j <- 1
for (i in nD) { nos[j] <- norm(S[[i]][a:p,a:p,drop=FALSE])*lam[i]; j <- j + 1}
ds <- dominant.set(nos,nD)
nD <- ds$nD ## not dominant set
D <- ds$D ## dominant set
## Form the dominant term and its pivoted Cholesky
k <- p-a+1 ## current block dimension
Sd <- matrix(0,k,k)
for (i in D) Sd <- Sd + lam[i] * S[[i]][a:p,a:p,drop=FALSE]
R <- suppressWarnings(chol(Sd,pivot=TRUE))
rank <- min(Rrank(R),attr(R,"rank"))
piv <- attr(R,"pivot")
ipiv <- piv; ipiv[piv] <- 1:k
Sp <- Sd[piv,piv,drop=FALSE]; normS <- norm(Sp)
## more expensive refinement of rank...
while (rank>1&&max(abs(crossprod(R[1:(rank-1),rank:k,drop=FALSE])-Sp[rank:k,rank:k,drop=FALSE]))<.Machine$double.eps^.75*normS) rank <- rank - 1
if (rank < k) {
ind <- (rank+1):k
R[ind,ind] <- diag(length(ind)) ## augment to full rank k
}
## Apply transform to components of D, suppressing definite machine zeroes
for (i in D) {
S[[i]][,a:p] <- t(forwardsolve(t(R),t(S[[i]][,a:p,drop=FALSE][,piv,drop=FALSE]))) ## SRi
S[[i]][a:p,] <- forwardsolve(t(R),S[[i]][a:p,,drop=FALSE][piv,,drop=FALSE]) ## Ri'S
if (rank < k) S[[i]][a:p,a:p][ind,] <- S[[i]][a:p,a:p][,ind] <- 0
}
## Apply transform to components of nD
for (i in nD) {
S[[i]][,a:p] <- t(forwardsolve(t(R),t(S[[i]][,a:p,drop=FALSE][,piv,drop=FALSE]))) ## SRi
S[[i]][a:p,] <- forwardsolve(t(R),S[[i]][a:p,,drop=FALSE][piv,,drop=FALSE]) ## Ri'S
}
## Update the total transform matrix, its log determinant and inverse...
Rldet0 <- Rldet0 + sum(log(diag(R)))
## Accumulate T such that |sum_i lam_i*S_i| = |T' sum_i lam_i * St_i T|
## St_i being transformed versions...
Ti[,a:p] <- t(forwardsolve(t(R),t(Ti[,a:p,drop=FALSE][,piv,drop=FALSE]))) ## this is inverse
T[a:p,] <- R %*% T[a:p,,drop=FALSE][piv,,drop=FALSE]
a <- a + rank
} ## finished transforming
## compute the log determinant
St <- matrix(0,p,p)
for (i in 1:m) St <- St + lam[i]*S[[i]]
if (repara) {
E <- R1 <- chol(St)
Rldet <- sum(log(diag(R1))) # + Rldet ## note: no log|T| - cancels in REML
} else { ## use stabilized generalized inverse
gi <- ginv1(St)
E <- gi$E
Rldet <- gi$ldet #+ Rldet ## note: no log|T| - cancels in REML
}
det1 <- det2 <- NULL
if (deriv>0) {
R1 <- if (repara) chol2inv(R1) else gi$inv
det1 <- lam*0
for (i in 1:m) det1[i] <- sum(R1*S[[i]]*lam[i])
}
if (deriv>1) {
SiS <- list()
det2 <- matrix(0,m,m)
for (i in 1:m) {
SiS[[i]] <- R1 %*% S[[i]]
for (j in 1:i) det2[i,j] <- det2[j,i] <- -sum(SiS[[i]]*t(SiS[[j]]))*lam[i]*lam[j]
det2[i,i] <- det2[i,i] + det1[i]
}
}
list(det=2*Rldet,det0=2*Rldet0,T=T,S=S,Ti=Ti,det1=det1,det2 = det2,St=St,E=E,kappa=kappa(St))
} ## ldetSt
ldetS <- function(Sl,rho,fixed,np,root=FALSE,repara=TRUE,nt=1,deriv=2,sparse=FALSE) {
## Get log generalized determinant of S stored blockwise in an Sl list.
## If repara=TRUE multi-term blocks will be re-parameterized using gam.reparam, and
## a re-parameterization object supplied in the returned object.
## rho contains log smoothing parameters, fixed is an array indicating whether they
## are fixed (or free). np is the number of coefficients. root indicates
## whether or not to return E, and sparse whethr or not it should be sparse.
## Returns: Sl, with modified rS terms, if needed and rho added to each block
## rp, a re-parameterization list
## E a total penalty square root such that E'E = S_tot (if root==TRUE)
## ldetS,ldetS1,ldetS2 the value, grad vec and Hessian
n.deriv <- sum(!fixed)
k.deriv <- k.sp <- k.rp <- 1
ldS <- 0
d1.ldS <- rep(0,n.deriv)
d2.ldS <- matrix(0,n.deriv,n.deriv)
cholesky <- attr(Sl,"cholesky")
rp <- list() ## reparameterization list
## In dense case E is a dense matrix and its block get filled in. In sparse
## case E is a list of lists of rows (i), cols (j) and elements (x) defining each
## block. So E$i[[b]],E$j[[b]],E$x[[b]] defines block b...
if (root) { E <- if (sparse) list(i=list(),j=list(),x=list()) else matrix(0,np,np) } else E <- NULL
if (length(Sl)>0) for (b in 1:length(Sl)) { ## work through blocks
if (!Sl[[b]]$linear) { ## non-linear block
ind <- k.sp + 1:Sl[[b]]$n.sp - 1 ## smoothing param index
Sl[[b]] <- Sl[[b]]$updateS(rho[ind],Sl[[b]]) ## update the block with current params
nldS <- Sl[[b]]$ldS(Sl[[b]],deriv) ## get the log determinant and any derivatives
ldS <- ldS + nldS$ldS
nldS$ldS1 <- nldS$ldS1[!fixed[ind]] ## discard fixed param derivatives
nderiv <- length(nldS$ldS1)
if (nderiv) d1.ldS[k.deriv+1:nderiv-1] <- nldS$ldS1
if (deriv>1) stop("second derivs not available for non-linear penalties")
k.deriv <- k.deriv + nderiv
k.sp <- k.sp + Sl[[b]]$n.sp
Sl[[b]]$lambda <- rho[ind] ## not really used in non-linear interface
if (root) if (sparse) {
## E0 <- as(Sl[[b]]$St(Sl[[b]],1)$E,"dgTMatrix") - deprecated
E0 <- as(as(as(Sl[[b]]$St(Sl[[b]],1)$E, "dMatrix"), "generalMatrix"), "TsparseMatrix")
E$i[[b]] <- E0@i + Sl[[b]]$start
E$j[[b]] <- E0@j + Sl[[b]]$start
E$x[[b]] <- E0@x
} else {
ind <- Sl[[b]]$start:Sl[[b]]$stop
E[ind,ind] <- Sl[[b]]$St(Sl[[b]],1)$E
}
} else if (length(Sl[[b]]$S)==1) { ## linear singleton
ldS <- ldS + Sl[[b]]$ldet ## initial repara log det correction for this block
ldS <- ldS + if (is.null(Sl[[b]]$nl.reg)) rho[k.sp] * Sl[[b]]$rank else
sum(log(exp(rho[k.sp])*Sl[[b]]$ev + Sl[[b]]$nl.reg))
if (!fixed[k.sp]) {
d1.ldS[k.deriv] <- if (is.null(Sl[[b]]$nl.reg)) Sl[[b]]$rank else
sum(exp(rho[k.sp])*Sl[[b]]$ev/(exp(rho[k.sp])*Sl[[b]]$ev + Sl[[b]]$nl.reg))
k.deriv <- k.deriv + 1
}
if (root) {
## insert diagonal from block start to end
if (Sl[[b]]$repara) {
ind <- (Sl[[b]]$start:Sl[[b]]$stop)[Sl[[b]]$ind]
if (sparse) {
E$i[[b]] <- E$j[[b]] <- ind; E$x[[b]] <- ind*0 + exp(rho[k.sp]*.5)
} else {
diag(E)[ind] <- exp(rho[k.sp]*.5) ## sqrt smoothing param
}
} else { ## root has to be in original parameterization...
if (sparse) {
## dgTMatrix is triplet form, which makes combining easier...
#D <- if (is.null(Sl[[b]]$nl.reg)) as(Sl[[b]]$Di[1:Sl[[b]]$rank,]* exp(rho[k.sp]*.5),"dgTMatrix") else
# as(sqrt(Sl[[b]]$ev*exp(rho[k.sp])+Sl[[b]]$nl.reg)*t(Sl[[b]]$U),"dgTMatrix")
D <- if (is.null(Sl[[b]]$nl.reg)) as(as(as(Sl[[b]]$Di[1:Sl[[b]]$rank,]* exp(rho[k.sp]*.5), "dMatrix"),
"generalMatrix"), "TsparseMatrix") else as(as(as(sqrt(Sl[[b]]$ev*exp(rho[k.sp])+Sl[[b]]$nl.reg)*
t(Sl[[b]]$U) , "dMatrix"), "generalMatrix"), "TsparseMatrix")
E$i[[b]] <- D@i + Sl[[b]]$start
E$j[[b]] <- D@j + Sl[[b]]$start
E$x[[b]] <- D@x
} else {
D <- if (is.null(Sl[[b]]$nl.reg)) Sl[[b]]$Di[1:Sl[[b]]$rank,]* exp(rho[k.sp]*.5) else
sqrt(Sl[[b]]$ev*exp(rho[k.sp])+Sl[[b]]$nl.reg)*t(Sl[[b]]$U)
indc <- Sl[[b]]$start:(Sl[[b]]$start+ncol(D)-1)
indr <- Sl[[b]]$start:(Sl[[b]]$start+nrow(D)-1)
E[indr,indc] <- D
}
}
}
Sl[[b]]$lambda <- exp(rho[k.sp])
k.sp <- k.sp + 1
} else { ## linear multi-S block
ldS <- ldS + Sl[[b]]$ldet ## initial repara log det correction for this block
m <- length(Sl[[b]]$S) ## number of components for this block
ind <- k.sp:(k.sp+m-1) ## index for smoothing parameters
## call gam.reparam to deal with this block
## in a stable way...
if (cholesky) {
grp <- if (Sl[[b]]$repara) ldetSt(Sl[[b]]$S,lam=exp(rho[ind]),deriv=deriv,repara) else
ldetSt(Sl[[b]]$rS,lam=exp(rho[ind]),deriv=deriv,repara)
} else {
grp <- if (repara) gam.reparam(Sl[[b]]$rS,lsp=rho[ind],deriv=deriv) else
ldetSblock(Sl[[b]]$rS,rho[ind],deriv=deriv,root=root,nt=nt)
grp$det0 <- 0
}
Sl[[b]]$lambda <- exp(rho[ind])
## If stabilizing repara not applied to coefs, then need to correct log det
## for transform, as it won't cancel, hence grp$det0...
ldS <- ldS + grp$det + if (Sl[[b]]$repara) 0 else grp$det0
## next deal with the derivatives...
grp$det1 <- grp$det1[!fixed[ind]] ## discard derivatives for fixed components
grp$det2 <- if (deriv>1) grp$det2[!fixed[ind],!fixed[ind]] else 0 ##NULL
nd <- length(grp$det1)
if (nd>0) { ## then not all sp's are fixed
dind <- k.deriv:(k.deriv+nd-1)
d1.ldS[dind] <- grp$det1
d2.ldS[dind,dind] <- grp$det2
k.deriv <- k.deriv + nd
}
## now store the reparameterization information
if (repara) {
## note that Ti is equivalent to Qs...
rp[[k.rp]] <- if (cholesky) list(block =b,ind = (Sl[[b]]$start:Sl[[b]]$stop)[Sl[[b]]$ind],T=grp$T,
Ti=grp$Ti,repara=Sl[[b]]$repara) else list(block =b,ind = (Sl[[b]]$start:Sl[[b]]$stop)[Sl[[b]]$ind],
Qs = grp$Qs,repara=Sl[[b]]$repara)
k.rp <- k.rp + 1
for (i in 1:m) {
Sl[[b]]$Srp[[i]] <- if (cholesky) Sl[[b]]$lambda[i]*grp$S[[i]] else
Sl[[b]]$lambda[i]*grp$rS[[i]]%*%t(grp$rS[[i]])
}
}
k.sp <- k.sp + m
if (Sl[[b]]$repara) {
if (root) { ## unpack the square root E'E
if (sparse) {
# E0 <- as(grp$E,"dgTMatrix") deprecated
E0 <- as(as(as(grp$E, "dMatrix"), "generalMatrix"), "TsparseMatrix")
E$i[[b]] <- E0@i + Sl[[b]]$start
E$j[[b]] <- E0@j + Sl[[b]]$start
E$x[[b]] <- E0@x
} else {
ic <- Sl[[b]]$start:(Sl[[b]]$start+ncol(grp$E)-1)
ir <- Sl[[b]]$start:(Sl[[b]]$start+nrow(grp$E)-1)
E[ir,ic] <- grp$E
}
Sl[[b]]$St <- crossprod(grp$E)
} else {
## gam.reparam always returns root penalty in E, but
## ldetSblock returns penalty in E if root==FALSE
if (cholesky) Sl[[b]]$St <- grp$St else
Sl[[b]]$St <- if (repara) crossprod(grp$E) else grp$E
}
} else { ## square root block and St need to be in original parameterization...
Sl[[b]]$St <- Sl[[b]]$lambda[1]*Sl[[b]]$S[[1]]
for (i in 2:m) {
Sl[[b]]$St <- Sl[[b]]$St + Sl[[b]]$lambda[i]*Sl[[b]]$S[[i]]
}
if (root) {
Eb <- t(mroot(Sl[[b]]$St,Sl[[b]]$rank))
if (sparse) {
# Eb <- as(Eb,"dgTMatrix") - deprecated
Eb <- as(as(as(Eb, "dMatrix"), "generalMatrix"), "TsparseMatrix")
E$i[[b]] <- Eb@i + Sl[[b]]$start
E$j[[b]] <- Eb@j + Sl[[b]]$start
E$x[[b]] <- Eb@x
} else {
indc <- Sl[[b]]$start:(Sl[[b]]$start+ncol(Eb)-1)
indr <- Sl[[b]]$start:(Sl[[b]]$start+nrow(Eb)-1)
E[indr,indc] <- Eb
}
}
}
} ## end of multi-S block branch
} ## end of block loop
if (root) E <- if (sparse) sparseMatrix(i=unlist(E$i),j=unlist(E$j),x=unlist(E$x),dims=c(np,np)) else
E[rowSums(abs(E))!=0,,drop=FALSE] ## drop zero rows.
list(ldetS=ldS,ldet1=d1.ldS,ldet2=d2.ldS,Sl=Sl,rp=rp,E=E)
} ## end ldetS
Sl.addS <- function(Sl,A,rho) {
## Routine to add total penalty to matrix A. Sl is smooth penalty
## list from Sl.setup, so initial reparameterizations have taken place,
## and should have already been applied to A using Sl.initial.repara
k <- 1
A <- A*1 ## force a copy to be made so that A not modified in calling env!!
if (length(Sl)>0) for (b in 1:length(Sl)) {
ind <- (Sl[[b]]$start:Sl[[b]]$stop)[Sl[[b]]$ind]
if (length(Sl[[b]]$S)==1) { ## singleton
B <- exp(rho[k]);diag <- -1
dummy <- .Call(C_mgcv_madi,A,B,ind,diag)
## diag(A)[ind] <- diag(A)[ind] + exp(rho[k]) ## penalty is identity times sp
k <- k + 1
} else {
for (j in 1:length(Sl[[b]]$S)) {
B <- exp(rho[k]) * Sl[[b]]$S[[j]]; diag <- 0
.Call(C_mgcv_madi,A,B,ind,diag)
## A[ind,ind] <- A[ind,ind] + exp(rho[k]) * Sl[[b]]$S[[j]]
k <- k + 1
}
}
}
A
} ## Sl.addS
Sl.addS0 <- function(Sl,A,rho) {
## Routine to add total penalty to matrix A. Sl is smooth penalty
## list from Sl.setup, so initial reparameterizations have taken place,
## and should have already been applied to A using Sl.initial.repara
## inefficient prototype of Sl.addS
k <- 1
for (b in 1:length(Sl)) {
ind <- (Sl[[b]]$start:Sl[[b]]$stop)[Sl[[b]]$ind]
if (length(Sl[[b]]$S)==1) { ## singleton
diag(A)[ind] <- diag(A)[ind] + exp(rho[k]) ## penalty is identity times sp
k <- k + 1
} else {
for (j in 1:length(Sl[[b]]$S)) {
A[ind,ind] <- A[ind,ind] + exp(rho[k]) * Sl[[b]]$S[[j]]
k <- k + 1
}
}
}
A
} ## Sl.addS0
Sl.repa <- function(rp,X,l=0,r=0) {
## Re-parameterize X using rp reparameterization info.
## l,r = -2,-1,0,1,2. O is do not apply, negative to apply inverse transform Di,
## positive for transform D, 1 for transform, 2 for its transpose.
## If b' is transformed and b orginal. Di b' = b and b' = D b.
## If T present D=T and Di = Ti. Otherwise D = t(Qs) and Di = Qs.
## Aim is for simpler and cleaner than Sl.repara
nr <- length(rp);if (nr==0) return(X)
for (i in 1:nr) if (rp[[i]]$repara) {
if (l) {
T <- if (is.null(rp[[i]]$Qs)) { if (l<0) { if (l==-2) t(rp[[i]]$Ti) else rp[[i]]$Ti
} else { if (l==2) t(rp[[i]]$T) else rp[[i]]$T }} else { if (l<0) { if (l==-2) t(rp[[i]]$Qs) else rp[[i]]$Qs
} else { if (l==2) rp[[i]]$Qs else t(rp[[i]]$Qs) }}
if (is.matrix(X)) X[rp[[i]]$ind,] <- T %*% X[rp[[i]]$ind,] else X[rp[[i]]$ind] <- T %*% X[rp[[i]]$ind]
}
if (r) {
T <- if (is.null(rp[[i]]$Qs)) { if (r<0) { if (r==-2) t(rp[[i]]$Ti) else rp[[i]]$Ti
} else { if (r==2) t(rp[[i]]$T) else rp[[i]]$T }} else {if (r<0) { if (r==-2) t(rp[[i]]$Qs) else rp[[i]]$Qs
} else { if (r==2) rp[[i]]$Qs else t(rp[[i]]$Qs) }}
if (is.matrix(X)) X[,rp[[i]]$ind] <- X[,rp[[i]]$ind]%*%T else X[rp[[i]]$ind] <- t(X[rp[[i]]$ind])%*%T
}
}
X
} ## Sl.repa
Sl.repara <- function(rp,X,inverse=FALSE,both.sides=TRUE) {
## Apply re-parameterization from ldetS to X, blockwise.
## If X is a matrix it is assumed to be a model matrix
## whereas if X is a vector it is assumed to be a parameter vector.
## If inverse==TRUE applies the inverse of the re-para to
## parameter vector X or cov matrix X...
## beta_trans = Ti beta_original T is inverse Ti
nr <- length(rp);if (nr==0) return(X)
if (inverse) {
if (is.matrix(X)) { ## X is a cov matrix
for (i in 1:nr) if (rp[[i]]$repara) {
if (both.sides) X[rp[[i]]$ind,] <- if (is.null(rp[[i]]$Qs)) rp[[i]]$Ti %*% X[rp[[i]]$ind,,drop=FALSE] else
rp[[i]]$Qs %*% X[rp[[i]]$ind,,drop=FALSE]
X[,rp[[i]]$ind] <- if (is.null(rp[[i]]$Qs)) X[,rp[[i]]$ind,drop=FALSE] %*% t(rp[[i]]$Ti) else
X[,rp[[i]]$ind,drop=FALSE] %*% t(rp[[i]]$Qs)
}
} else { ## X is a vector
for (i in 1:nr) if (rp[[i]]$repara) X[rp[[i]]$ind] <- if (is.null(rp[[i]]$Qs))
rp[[i]]$Ti %*% X[rp[[i]]$ind] else rp[[i]]$Qs %*% X[rp[[i]]$ind]
}
} else { ## apply re-para to X
if (is.matrix(X)) {
for (i in 1:nr) if (rp[[i]]$repara) X[,rp[[i]]$ind] <- if (is.null(rp[[i]]$Qs))
X[,rp[[i]]$ind]%*%rp[[i]]$Ti else X[,rp[[i]]$ind]%*%rp[[i]]$Qs
} else {
for (i in 1:nr) if (rp[[i]]$repara) X[rp[[i]]$ind] <- if (is.null(rp[[i]]$Qs))
rp[[i]]$T %*% X[rp[[i]]$ind] else t(rp[[i]]$Qs) %*% X[rp[[i]]$ind]
}
}
X
} ## end Sl.repara
Sl.mult <- function(Sl,A,k = 0,full=TRUE) {
## Sl contains the blocks of block diagonal penalty S.
## If k<=0 this routine forms S%*%A.
## If k>0 then the routine forms S_k%*%A, zero padded
## if full==TRUE, but in smallest number of rows form otherwise.
nb <- length(Sl) ## number of blocks
if (nb==0) return(A*0)
Amat <- is.matrix(A)
if (k<=0) { ## apply whole penalty
B <- A*0
for (b in 1:nb) { ## block loop
ind <- Sl[[b]]$start:Sl[[b]]$stop ## index of coeffs for this bock
if (!Sl[[b]]$linear) { ## non-linear block
if (Amat) B[ind,] <- t(Sl[[b]]$AS(t(A[ind,]),Sl[[b]])) else
B[ind] <- drop(Sl[[b]]$AS(A[ind],Sl[[b]]))
} else if (length(Sl[[b]]$S)==1) { ## singleton
if (Sl[[b]]$repara) {
ind <- ind[Sl[[b]]$ind]
if (Amat) B[ind,] <- Sl[[b]]$lambda*A[ind,] else
B[ind] <- Sl[[b]]$lambda*A[ind]
} else { ## original penalty has to be applied
if (Amat) {
B[ind,] <- Sl[[b]]$lambda*Sl[[b]]$S[[1]] %*% A[ind,] + if (is.null(Sl[[b]]$nl.reg)) 0 else Sl[[b]]$nl.reg * A[ind,]
} else {
B[ind] <- Sl[[b]]$lambda*Sl[[b]]$S[[1]] %*% A[ind] + if (is.null(Sl[[b]]$nl.reg)) 0 else Sl[[b]]$nl.reg * A[ind]
}
}
} else { ## multi-S block
if (Sl[[b]]$repara) ind <- ind[Sl[[b]]$ind]
if (Amat) B[ind,] <- Sl[[b]]$St %*% A[ind,] else
B[ind] <- Sl[[b]]$St %*% A[ind]
}
} ## end of block loop
A <- B
} else { ## single penalty matrix selected
j <- 0 ## S counter
for (b in 1:nb) { ## block loop
for (i in 1:length(Sl[[b]]$S)) { ## S loop within blocks
j <- j + 1
if (j==k) { ## found block
if (!Sl[[b]]$linear || length(Sl[[b]]$S)==1) { ## singleton
ind <- Sl[[b]]$start:Sl[[b]]$stop
if (Sl[[b]]$repara) {
ind <- ind[Sl[[b]]$ind]
if (full) { ## return zero answer with all zeroes in place
B <- A*0
if (Amat) B[ind,] <- Sl[[b]]$lambda*A[ind,] else
B[ind] <- Sl[[b]]$lambda*A[ind]
A <- B
} else { ## strip zero rows from answer
A <- if (Amat) Sl[[b]]$lambda*A[ind,] else as.numeric(Sl[[b]]$lambda*A[ind])
}
} else if (Sl[[b]]$linear) { ## not reparameterized version, but linear
if (full) {
B <- A*0
if (Amat) {
B[ind,] <- Sl[[b]]$lambda*Sl[[b]]$S[[1]]%*%A[ind,] + if (is.null(Sl[[b]]$nl.reg)) 0 else Sl[[b]]$nl.reg * A[ind,]
} else {
B[ind] <- Sl[[b]]$lambda*Sl[[b]]$S[[1]]%*%A[ind] + if (is.null(Sl[[b]]$nl.reg)) 0 else Sl[[b]]$nl.reg * A[ind]
}
A <- B
} else {
A <- if (Amat) {
Sl[[b]]$lambda*Sl[[b]]$S[[1]]%*%A[ind,] + if (is.null(Sl[[b]]$nl.reg)) 0 else Sl[[b]]$nl.reg * A[ind,]
} else {
as.numeric(Sl[[b]]$lambda*Sl[[b]]$S[[1]]%*%A[ind]) + if (is.null(Sl[[b]]$nl.reg)) 0 else Sl[[b]]$nl.reg * A[ind]
}
}
} else { ## non-linear
if (full) {
B <- A*0
if (Amat) B[ind,] <- t(Sl[[b]]$AS(t(A[ind,]),Sl)) else
B[ind] <- drop(Sl[[b]]$AS(A[ind],Sl))
A <- B
} else {
A <- if (Amat) t(Sl[[b]]$AS(t(A[ind,]),Sl)) else drop(Sl[[b]]$AS(A[ind],Sl))
}
} ## non-linear
} else { ## multi-S block
ind <- if (Sl[[b]]$repara) (Sl[[b]]$start:Sl[[b]]$stop)[Sl[[b]]$ind] else Sl[[b]]$start:Sl[[b]]$stop
if (full) { ## return zero answer with all zeroes in place
B <- A*0
if (Amat) {
B[ind,] <- if (is.null(Sl[[b]]$Srp)||!Sl[[b]]$repara) Sl[[b]]$lambda[i]*(Sl[[b]]$S[[i]]%*%A[ind,]) else
Sl[[b]]$Srp[[i]]%*%A[ind,]
} else {
B[ind] <- if (is.null(Sl[[b]]$Srp)||!Sl[[b]]$repara) Sl[[b]]$lambda[i]*(Sl[[b]]$S[[i]]%*%A[ind]) else
Sl[[b]]$Srp[[i]]%*%A[ind]
}
A <- B
} else { ## strip zero rows from answer
if (is.null(Sl[[b]]$Srp)||!Sl[[b]]$repara) {
A <- if (Amat) Sl[[b]]$lambda[i]*(Sl[[b]]$S[[i]]%*%A[ind,]) else
Sl[[b]]$lambda[i]*as.numeric(Sl[[b]]$S[[i]]%*%A[ind])
} else {
A <- if (Amat) Sl[[b]]$Srp[[i]]%*%A[ind,] else as.numeric(Sl[[b]]$Srp[[i]]%*%A[ind])
}
}
}
break
}
} ## end of S loop
if (j==k) break
} ## end of block loop
}
A
} ## end Sl.mult
Sl.termMult <- function(Sl,A,full=FALSE,nt=1) {
## returns a list containing the derivative of the penalty w.r.t
## each smoothing/variance parameter multiplied by A. For linear terms
## this is just the product of each element S of Sl
## with A. If full==TRUE then the results include the zero rows
## otherwise these are stripped out, but in that case each element
## of the return object contains an "ind" attribute, indicating
## which rows of the full matrix it relates to.
Amat <- !is.null(dim(A)) #is.matrix(A) ## allow sparse matrices also
SA <- list()
k <- 0 ## component counter
nb <- length(Sl) ## number of blocks
if (nb>0) for (b in 1:nb) { ## block loop
if (!Sl[[b]]$linear) { ## non-linear term
ind <- Sl[[b]]$start:Sl[[b]]$stop
for (i in 1:Sl[[b]]$n.sp) { ## loop over its paramaters
k <- k + 1
if (full) {
B <- A*0
if (Amat) B[ind,] <- t(Sl[[b]]$AdS(t(A[ind,,drop=FALSE]),Sl[[b]],i)) else
B[ind] <- drop(Sl[[b]]$AdS(A[ind],Sl[[b]],i))
SA[[k]] <- B
} else {
SA[[k]] <- if (Amat) t(Sl[[b]]$AdS(t(A[ind,,drop=FALSE]),Sl[[b]],i)) else
drop(Sl[[b]]$AdS(A[ind],Sl[[b]],i))
attr(SA[[k]],"ind") <- ind
}
}
} else if (length(Sl[[b]]$S)==1) { ## singleton
k <- k + 1
ind <- Sl[[b]]$start:Sl[[b]]$stop
if (Sl[[b]]$repara) {
ind <- ind[Sl[[b]]$ind]
if (full) { ## return zero answer with all zeroes in place
B <- A*0
if (Amat) B[ind,] <- Sl[[b]]$lambda*A[ind,,drop=FALSE] else
B[ind] <- Sl[[b]]$lambda*A[ind]
SA[[k]] <- B
} else { ## strip zero rows from answer
SA[[k]] <- if (Amat) Sl[[b]]$lambda*A[ind,,drop=FALSE] else
drop(Sl[[b]]$lambda*A[ind])
attr(SA[[k]],"ind") <- ind
}
} else {
if (full) { ## return zero answer with all zeroes in place
B <- A*0
if (Amat) B[ind,] <- Sl[[b]]$lambda*Sl[[b]]$S[[1]]%*%A[ind,,drop=FALSE] else
B[ind] <- Sl[[b]]$lambda*Sl[[b]]$S[[1]]%*%A[ind]
SA[[k]] <- B
} else { ## strip zero rows from answer
SA[[k]] <- if (Amat) Sl[[b]]$lambda*Sl[[b]]$S[[1]]%*%A[ind,,drop=FALSE] else
drop(Sl[[b]]$lambda*Sl[[b]]$S[[1]]%*%A[ind])
attr(SA[[k]],"ind") <- ind
}
}
} else { ## multi-S block
ind <- if (Sl[[b]]$repara) (Sl[[b]]$start:Sl[[b]]$stop)[Sl[[b]]$ind] else Sl[[b]]$start:Sl[[b]]$stop
for (i in 1:length(Sl[[b]]$S)) { ## work through S terms
k <- k + 1
if (full) { ## return answer with all zeroes in place
B <- A*0
if (is.null(Sl[[b]]$Srp)||!Sl[[b]]$repara) {
if (Amat) {
B[ind,] <- if (nt==1) Sl[[b]]$lambda[i]*(Sl[[b]]$S[[i]]%*%A[ind,,drop=FALSE]) else
Sl[[b]]$lambda[i]*pmmult(Sl[[b]]$S[[i]],A[ind,,drop=FALSE],nt=nt)
} else B[ind] <- Sl[[b]]$lambda[i]*(Sl[[b]]$S[[i]]%*%A[ind])
} else {
if (Amat) {
B[ind,] <- if (nt==1) Sl[[b]]$Srp[[i]]%*%A[ind,,drop=FALSE] else
pmmult(Sl[[b]]$Srp[[i]],A[ind,,drop=FALSE],nt=nt)
} else B[ind] <- Sl[[b]]$Srp[[i]]%*%A[ind]
}
SA[[k]] <- B
} else { ## strip zero rows from answer
if (is.null(Sl[[b]]$Srp)||!Sl[[b]]$repara) {
if (Amat) {
SA[[k]] <- if (nt==1) Sl[[b]]$lambda[i]*(Sl[[b]]$S[[i]]%*%A[ind,,drop=FALSE]) else
Sl[[b]]$lambda[i]*pmmult(Sl[[b]]$S[[i]],A[ind,,drop=FALSE],nt=nt)
} else SA[[k]] <- Sl[[b]]$lambda[i]*as.numeric(Sl[[b]]$S[[i]]%*%A[ind])
} else {
if (Amat) {
SA[[k]] <- if (nt==1) Sl[[b]]$Srp[[i]]%*%A[ind,,drop=FALSE] else
pmmult(Sl[[b]]$Srp[[i]],A[ind,,drop=FALSE],nt=nt)
} else SA[[k]] <- as.numeric(Sl[[b]]$Srp[[i]]%*%A[ind])
}
attr(SA[[k]],"ind") <- ind
}
} ## end of S loop for block b
}
} ## end block loop
SA
} ## end Sl.termMult
d.detXXS <- function(Sl,PP,nt=1,deriv=2) {
## function to obtain derivatives of log |X'X+S| given unpivoted PP' where
## P is inverse of R from the QR of the augmented model matrix.
SPP <- Sl.termMult(Sl,PP,full=FALSE,nt=nt) ## SPP[[k]] is S_k PP'
nd <- length(SPP)
d1 <- rep(0,nd);d2 <- matrix(0,nd,nd)
if (nd>0) for (i in 1:nd) {
indi <- attr(SPP[[i]],"ind")
d1[i] <- sum(diag(SPP[[i]][,indi,drop=FALSE]))
if (deriv==2) {
for (j in i:nd) {
indj <- attr(SPP[[j]],"ind")
d2[i,j] <- d2[j,i] <- -sum(t(SPP[[i]][,indj,drop=FALSE])*SPP[[j]][,indi,drop=FALSE])
}
d2[i,i] <- d2[i,i] + d1[i]
}
}
list(d1=d1,d2=d2)
} ## end d.detXXS
Sl.ift <- function(Sl,R,X,y,beta,piv,rp) {
## function to obtain derviatives of \hat \beta by implicit differentiation
## and to use these directly to evaluate derivs of b'Sb and the RSS.
## piv and rp are the pivots and inverse pivots from the qr that produced R.
## rssj and bSbj only contain the terms that will not cancel in rssj + bSbj
beta <- beta[rp] ## unpivot
Sb <- Sl.mult(Sl,beta,k = 0) ## unpivoted
Skb <- Sl.termMult(Sl,beta,full=TRUE) ## unpivoted
rsd <- (X%*%beta - y)
#Xrsd <- t(X)%*%rsd ## X'Xbeta - X'y
nd <- length(Skb)
np <- length(beta)
db <- matrix(0,np,nd)
rss1 <- bSb1 <- rep(0,nd)
for (i in 1:nd) { ## compute the first derivatives
db[,i] <- -backsolve(R,forwardsolve(t(R),Skb[[i]][piv]))[rp] ## d beta/ d rho_i
## rss1[i] <- 0* 2 * sum(db[,i]*Xrsd) ## d rss / d rho_i
bSb1[i] <- sum(beta*Skb[[i]]) ## + 2 * sum(db[,i]*Sb) ## d b'Sb / d_rho_i
}
XX.db <- t(X)%*%(X%*%db)
S.db <- Sl.mult(Sl,db,k=0)
rss2 <- bSb2 <- matrix(0,nd,nd)
for (k in 1:nd) { ## second derivative loop
for (j in k:nd) {
## d2b <- (k==j)*db[,k] - backsolve(R,forwardsolve(t(R),Sk.db[[j]][piv,k]+Sk.db[[k]][piv,j]))[rp]
rss2[j,k] <- rss2[k,j] <- 2 * sum(db[,j]*XX.db[,k]) ## + 2 * sum(d2b*Xrsd)
bSb2[j,k] <- bSb2[k,j] <- (k==j)*sum(beta*Skb[[k]]) + 2*(sum(db[,k]*(Skb[[j]]+S.db[,j])) +
sum(db[,j]*Skb[[k]])) ## + 2 * (sum(d2b*Sb)
}
}
list(bSb=sum(beta*Sb),bSb1=bSb1,bSb2=bSb2,d1b=db,rss =sum(rsd^2),rss1=rss1,rss2=rss2)
} ## end Sl.ift
Sl.iftChol <- function(Sl,XX,R,d,beta,piv,nt=1) {
## function to obtain derviatives of \hat \beta by implicit differentiation
## and to use these directly to evaluate derivs of b'Sb and the RSS.
## piv contains the pivots from the chol that produced R.
## rssj and bSbj only contain the terms that will not cancel in rssj + bSbj
Sb <- Sl.mult(Sl,beta,k = 0) ## unpivoted
Skb <- Sl.termMult(Sl,beta,full=TRUE) ## unpivoted
nd <- length(Skb) ## number of derivatives
cd <- FALSE
if (cd) { ## check derivatives
k <- 0
eps <- 1e-6
eeps <- exp(eps)
fdSk <- list()
for (b in 1:length(Sl)) {
if (Sl[[b]]$linear) {
ind <- 1:length(Sl[[b]]$S)
for (i in ind) {
Sl[[b]]$lambda[i] <- Sl[[b]]$lambda[i]*eeps
Sb1 <- Sl.mult(Sl,beta,k = 0)
Sl[[b]]$lambda[i] <- Sl[[b]]$lambda[i]/eeps
k <- k + 1
fdSk[[k]] <- (Sb1-Sb)/eps
}
} else {
ind <- 1:Sl[[b]]$n.sp
theta <- Sl[[b]]$lambda
for (i in ind) {
theta[i] <- theta[i] + eps
Sl[[b]] <- Sl[[b]]$updateS(theta,Sl[[b]])
theta[i] <- theta[i] - eps
Sb1 <- Sl.mult(Sl,beta,k = 0)
k <- k + 1
fdSk[[k]] <- (Sb1-Sb)/eps
}
Sl[[b]] <- Sl[[b]]$updateS(theta,Sl[[b]])
}
}
plot(Skb[[1]],fdSk[[1]])
} ## if cd deriv check
np <- length(beta)
db <- matrix(0,np,nd)
rss1 <- bSb1 <- rep(0,nd)
if (nd==0) return(list(bSb=0,bSb1=rep(0,nd),bSb2=rep(0,nd),d1b=db,rss1=rss1,rss2=rss1))
## alternative all in one code - matches loop results, but
## timing close to identical - modified for parallel exec
D <- matrix(unlist(Skb),length(beta),nd)
bSb1 <- colSums(beta*D)
if (inherits(R,c("dgCMatrix","dtCMatrix"))) { ## sparse R
Dd <- Diagonal(length(d),1/d)
db[piv,] <- -as(solve(R,solve(t(R),(Dd %*% D)[piv,])),"matrix")
db <- as(Dd %*% db,"matrix")
} else { ## dense R
D1 <- .Call(C_mgcv_Rpforwardsolve,R,D[piv,]/d[piv],nt) ## note R transposed internally unlike forwardsolve
db[piv,] <- -.Call(C_mgcv_Rpbacksolve,R,D1,nt)/d[piv]
}
if (is.null(XX)) return(list(bSb1=bSb1,db=db)) ## return early
## XX.db <- XX%*%db
XX.db <- .Call(C_mgcv_pmmult2,XX,db,0,0,nt)
S.db <- Sl.mult(Sl,db,k=0)
rss2 <- 2 * .Call(C_mgcv_pmmult2,db,XX.db,1,0,nt)
bSb2 <- diag(x=colSums(beta*D),nrow=nd)
bSb2 <- bSb2 + 2 * (.Call(C_mgcv_pmmult2,db,D+S.db,1,0,nt) + .Call(C_mgcv_pmmult2,D,db,1,0,nt))
list(bSb=sum(beta*Sb),bSb1=bSb1,bSb2=bSb2,
d1b=db ,rss1=rss1,rss2=rss2)
} ## end Sl.iftChol
Sl.fitChol <- function(Sl,XX,f,rho,yy=0,L=NULL,rho0=0,log.phi=0,phi.fixed=TRUE,
nobs=0,Mp=0,nt=c(1,1),tol=0,gamma=1) {
## given X'WX in XX and f=X'Wy solves the penalized least squares problem
## with penalty defined by Sl and rho, and evaluates a REML Newton step, the REML
## gradient and the the estimated coefs bhat. If phi.fixed=FALSE then we need
## yy = y'Wy in order to get derivsatives w.r.t. phi.
## NOTE: with an optimized BLAS nt==1 is likely to be much faster than
## nt > 1
tol <- as.numeric(tol)
rho <- if (is.null(L)) rho + rho0 else L%*%rho + rho0
if (length(rho)<length(rho0)) rho <- rho0 ## ncol(L)==0 or length(rho)==0
## get log|S|_+ without stability transform...
fixed <- rep(FALSE,length(rho))
ldS <- ldetS(Sl,rho,fixed,np=ncol(XX),root=FALSE,repara=FALSE,nt=nt[1])
## now the Cholesky factor of the penalized Hessian...
#XXp <- XX+crossprod(ldS$E) ## penalized Hessian
XXp <- Sl.addS(Sl,XX,rho)
d <- diag(XXp);ind <- d<=0
d[ind] <- 1;d[!ind] <- sqrt(d[!ind])
#XXp <- t(XXp/d)/d ## diagonally precondition
R <- if (nt[2]>1) pchol(t(XXp/d)/d,nt[2]) else suppressWarnings(chol(t(XXp/d)/d,pivot=TRUE))
r <- min(attr(R,"rank"),Rrank(R))
p <- ncol(XXp)
piv <- attr(R,"pivot") #;rp[rp] <- 1:p
if (r<p) { ## drop rank deficient terms...
R <- R[1:r,1:r]
piv <- piv[1:r]
}
beta <- rep(0,p)
beta[piv] <- backsolve(R,(forwardsolve(t(R),f[piv]/d[piv])))/d[piv]
## get component derivatives based on IFT (noting that ldS$Sl has s.p.s updated to current)
dift <- Sl.iftChol(ldS$Sl,XX,R,d,beta,piv,nt=nt[1])
## now the derivatives of log|X'X+S|
PP <- matrix(0,p,p)
if (nt[2]>1) {
P <- pbsi(R,nt=nt[2],copy=TRUE) ## invert R
PP[piv,piv] <- pRRt(P,nt[2]) ## PP'
} else PP[piv,piv] <- chol2inv(R)
PP <- t(PP/d)/d
ldetXXS <- 2*sum(log(diag(R))+log(d[piv])) ## log|X'X+S|
dXXS <- d.detXXS(ldS$Sl,PP,nt=nt[1]) ## derivs of log|X'X+S|
phi <- exp(log.phi)
nrho <- length(rho)
reml1 <- if (nrho==0) rep(0,0) else (dXXS$d1[!fixed] - ldS$ldet1 +
(dift$rss1[!fixed] + dift$bSb1[!fixed])/(phi*gamma))/2
reml2 <- if (nrho==0) matrix(0,0,0) else (dXXS$d2[!fixed,!fixed] - ldS$ldet2 +
(dift$rss2[!fixed,!fixed] + dift$bSb2[!fixed,!fixed])/(phi*gamma))/2
if (nrho==0&&phi.fixed) { ## need to return now - nothing else to do
return(list(beta=beta,grad=0,step=0,db=dift$d1b,PP=PP,R=R,piv=piv,rank=r, hess=reml2,ldetS=ldS$ldetS,ldetXXS=ldetXXS))
}
if (!phi.fixed) {
n <- length(reml1)
rss.bSb <- yy - sum(beta*f) ## use identity ||y-Xb|| + b'Sb = y'y - b'X'y (b is minimizer)
reml1[n+1] <- (-rss.bSb/(phi*gamma) + nobs/gamma - Mp)/2
d <- c(-(dift$rss1[!fixed] + dift$bSb1[!fixed]),rss.bSb)/(2*phi*gamma)
reml2 <- if (n>0) rbind(cbind(reml2,d[1:n]),d) else matrix(d,1,1)
if (!is.null(L)) L <- rbind(cbind(L,rep(0,nrow(L))),c(rep(0,ncol(L)),1))
}
if (!is.null(L)) {
reml1 <- t(L)%*%reml1
reml2 <- t(L)%*%reml2%*%L
}
uconv.ind <- (abs(reml1) > tol)|(abs(diag(reml2))>tol)
hess <- reml2
grad <- reml1
if (length(grad)>0&&sum(uconv.ind)>0) {
if (sum(uconv.ind)!=ncol(reml2)) {
reml1 <- reml1[uconv.ind]
reml2 <- reml2[uconv.ind,uconv.ind]
}
er <- eigen(reml2,symmetric=TRUE)
er$values <- abs(er$values)
me <- max(er$values)*.Machine$double.eps^.5
er$values[er$values<me] <- me
step <- rep(0,length(uconv.ind))
step[uconv.ind] <- -er$vectors%*%((t(er$vectors)%*%reml1)/er$values)
## limit the step length...
ms <- max(abs(step))
if (ms>4) step <- 4*step/ms
} else step <- 0
## return the coefficient estimate, the reml grad and the Newton step...
list(beta=beta,grad=grad,step=step,db=dift$d1b,PP=PP,R=R,piv=piv,rank=r,
hess=hess,ldetS=ldS$ldetS,ldetXXS=ldetXXS)
} ## Sl.fitChol
Sl.fit <- function(Sl,X,y,rho,fixed,log.phi=0,phi.fixed=TRUE,rss.extra=0,nobs=NULL,Mp=0,nt=1,gamma=1) {
## fits penalized regression model with model matrix X and
## initialised block diagonal penalty Sl to data in y, given
## log smoothing parameters rho.
## Returns coefs, reml score + grad and Hessian.
np <- ncol(X) ## number of parameters
n <- nrow(X)
phi <- exp(log.phi)
if (is.null(nobs)) nobs <- n
## get log|S|_+ stably...
ldS <- ldetS(Sl,rho,fixed,np,root=TRUE,nt=nt)
## apply resulting stable re-parameterization to X...
X <- Sl.repara(ldS$rp,X)
## get pivoted QR decomp of augmented model matrix (in parallel if nt>1)
qrx <- if (nt>1) pqr2(rbind(X,ldS$E),nt=nt) else qr(rbind(X,ldS$E),LAPACK=TRUE)
rp <- qrx$pivot;rp[rp] <- 1:np ## reverse pivot vector
## find pivoted \hat beta...
R <- qr.R(qrx)
Qty0 <- qr.qty(qrx,c(y,rep(0,nrow(ldS$E))))
beta <- backsolve(R,Qty0)[1:np]
rss.bSb <- sum(Qty0[-(1:np)]^2) + rss.extra
## get component derivatives based on IFT...
dift <- Sl.ift(ldS$Sl,R,X,y,beta,qrx$pivot,rp)
## and the derivatives of log|X'X+S|...
P <- pbsi(R,nt=nt,copy=TRUE) ## invert R
## P <- backsolve(R,diag(np))[rp,] ## invert R and row unpivot
## crossprod and unpivot (don't unpivot if unpivoting P above)
PP <- if (nt==1) tcrossprod(P)[rp,rp] else pRRt(P,nt)[rp,rp] ## PP'
ldetXXS <- 2*sum(log(abs(diag(R)))) ## log|X'X+S|
dXXS <- d.detXXS(ldS$Sl,PP,nt=nt) ## derivs of log|X'X+S|
## all ingredients are now in place to form REML score and
## its derivatives....
reml <- (rss.bSb/(phi*gamma) + (nobs/gamma-Mp)*log(2*pi*phi) + Mp*log(gamma) +
ldetXXS - ldS$ldetS)/2
reml1 <- (dXXS$d1[!fixed] - ldS$ldet1 + # dift$bSb1[!fixed]/phi)/2
(dift$rss1[!fixed] + dift$bSb1[!fixed])/(phi*gamma))/2
reml2 <- (dXXS$d2[!fixed,!fixed] - ldS$ldet2 + #dift$bSb2[!fixed,!fixed]/phi)/2
(dift$rss2[!fixed,!fixed] + dift$bSb2[!fixed,!fixed])/(phi*gamma))/2
## finally add in derivatives w.r.t. log.phi
if (!phi.fixed) {
n <- length(reml1)
reml1[n+1] <- (-rss.bSb/(phi*gamma) + nobs/gamma - Mp)/2
#d <- c(-(dift$bSb1[!fixed]),rss.bSb)/(2*phi)
d <- c(-(dift$rss1[!fixed] + dift$bSb1[!fixed]),rss.bSb)/(2*phi*gamma)
reml2 <- rbind(cbind(reml2,d[1:n]),d)
}
## following are de-bugging lines for testing derivatives of components...
#list(reml=ldetXXS,reml1=dXXS$d1,reml2=dXXS$d2)
#list(reml=ldS$ldetS,reml1=ldS$ldet1,reml2=ldS$ldet2)
#list(reml=dift$rss,reml1=dift$rss1,reml2=dift$rss2)
#list(reml=dift$bSb,reml1=dift$bSb1,reml2=dift$bSb2)
list(reml=as.numeric(reml),reml1=reml1,reml2=reml2,beta=beta[rp],PP=PP,
rp=ldS$rp,rss=dift$rss+rss.extra,nobs=nobs,d1b=dift$d1b)
} ## Sl.fit
fast.REML.fit <- function(Sl,X,y,rho,L=NULL,rho.0=NULL,log.phi=0,phi.fixed=TRUE,
rss.extra=0,nobs=NULL,Mp=0,conv.tol=.Machine$double.eps^.5,nt=1,gamma=gamma) {
## estimates log smoothing parameters rho, by optimizing fast REML
## using Newton's method. On input Sl is a block diagonal penalty
## structure produced by Sl.setup, while X is a model matrix
## reparameterized to correspond to any re-parameterization
## used in Sl. Both will have had been modified to drop any
## structurally un-identifiable coefficients.
## Note that lower bounds on smoothing parameters are not handled.
maxNstep <- 5
if (is.null(nobs)) nobs <- nrow(X)
np <- ncol(X)
if (nrow(X) > np) { ## might as well do an initial QR step
qrx <- if (nt>1) pqr2(X,nt=nt) else qr(X,LAPACK=TRUE)
rp <- qrx$pivot
rp[rp] <- 1:np
X <- qr.R(qrx)[,rp]
y <- qr.qty(qrx,y)
rss.extra <- rss.extra + sum(y[-(1:np)]^2)
y <- y[1:np]
}
if (is.null(L)) {
L <- diag(length(rho))
if (is.null(rho.0)) rho.0 <- rep(0,nrow(L))
} else { ## convert intial s.p.s to account for L
if (is.null(rho.0)) rho.0 <- rep(0,nrow(L))
rho <- as.numeric(coef(lm(rho ~ L-1+offset(rho.0))))
}
fixed <- rep(FALSE,nrow(L))
best <- Sl.fit(Sl,X,y,L%*%rho+rho.0,fixed,log.phi,phi.fixed,rss.extra,nobs,Mp,nt=nt,gamma=gamma)
## get a typical scale for the reml score...
reml.scale <- abs(best$reml) + best$rss/best$nobs
nr <- length(rho.0)
if (!phi.fixed) {
rho <- c(rho,log.phi) ## append log.phi for fitting
rho.0 <- c(rho.0,0)
L <- rbind(cbind(L,L[,1]*0),c(L[1,]*0,1))
}
grad <- as.numeric(t(L)%*% best$reml1)
hess <- t(L)%*% best$reml2%*%L
grad2 <- diag(hess)
## create and index for the unconverged...
## idea in following is only to exclude terms with zero first and second derivative
## from optimization, as it is only these that slow things down if included...
uconv.ind <- (abs(grad) > reml.scale*conv.tol*.1)|(abs(grad2)>reml.scale*conv.tol*.1)
## if all appear converged at this stage, then there is probably something wrong,
## but reset anyway to see if situation can be recovered. If we don't do this then
## need to abort immediately, otherwise fails trying to eigen a 0 by 0 matrix
if (sum(uconv.ind)==0) {
warning("Possible divergence detected in fast.REML.fit",call.=FALSE,immediate.=TRUE)
uconv.ind <- rep(TRUE,length(grad))
}
step.failed <- FALSE
for (iter in 1:200) { ## the Newton loop
## Work only with unconverged (much quicker under indefiniteness)
hess <- (t(L)%*% best$reml2%*%L)[uconv.ind,uconv.ind]
grad <- as.numeric(t(L)%*%best$reml1)[uconv.ind]
## check that Hessian is +ve def. Fix if not.
eh <- eigen(hess,symmetric=TRUE)
## flip negative eigenvalues to get +ve def...
ind <- eh$values < 0
eh$values[ind] <- -eh$values[ind]
## avoid indefiniteness by further manipulation...
thresh <- max(abs(eh$values))*.Machine$double.eps^.5
ind <- eh$values < thresh
eh$values[ind] <- thresh
## get the Newton direction, -ve inverse hessian times gradient
uc.step <- - eh$vectors%*%((t(eh$vectors)%*%grad)/eh$values)
## now make sure step is not too far...
ms <- max(abs(uc.step))
if (ms>maxNstep) uc.step <- maxNstep * uc.step/ms
step <- rep(0,length(uconv.ind)); ## full step (including converged)
step[uconv.ind] <- uc.step ## step includes converged
## try out the step...
rho1 <- L%*%(rho + step)+rho.0; if (!phi.fixed) log.phi <- rho1[nr+1]
trial <- Sl.fit(Sl,X,y,rho1[1:nr],fixed,log.phi,phi.fixed,rss.extra,nobs,Mp,nt=nt,gamma=gamma)
k <- 0
not.moved <- 0 ## count number of consecutive steps of essentially no change from best
while (trial$reml>best$reml) { ## step half until improvement or failure
## idea with the following is to count the number of consecutive step halvings
## without a numerically significant change from best$reml, since
## this is an early indicator of step failure.
if (trial$reml-best$reml < conv.tol*reml.scale) not.moved <- not.moved + 1 else not.moved <- 0
if (k==25||sum(rho != rho + step)==0||not.moved>3) {
step.failed <- TRUE
break
}
step <- step/2;k <- k + 1
rho1 <- L%*%(rho + step)+rho.0; if (!phi.fixed) log.phi <- rho1[nr+1]
trial <- Sl.fit(Sl,X,y,rho1[1:nr],fixed,log.phi,phi.fixed,rss.extra,nobs,Mp,nt=nt,gamma=gamma)
}
if (step.failed) break ## can get no further
#if ((k==35 && trial$reml>best$reml)||(sum(rho != rho + step)==0)) { ## step has failed
# step.failed <- TRUE
# break ## can get no further
#}
## At this stage the step has been successful.
## Need to test for convergence...
converged <- TRUE
grad <- as.numeric(t(L)%*%trial$reml1)
hess <- t(L)%*%trial$reml2%*%L;grad2 <- diag(hess)
## idea in following is only to exclude terms with zero first and second derivative
## from optimization, as it is only these that slow things down if included...
uconv.ind <- (abs(grad) > reml.scale*conv.tol*.1)|(abs(grad2)>reml.scale*conv.tol*.1)
## now do the convergence testing...
## First check gradiants...
if (sum(abs(grad)>reml.scale*conv.tol)) converged <- FALSE
## Now check change in REML values
if (abs(best$reml-trial$reml)>reml.scale*conv.tol) {
if (converged) uconv.ind <- uconv.ind | TRUE ## otherwise can't progress
converged <- FALSE
}
best <- trial ## trial becomes current best.
rho <- rho + step ## and new log sp accepted.
if (converged) break ## ok done, leave loop
reml.scale <- abs(best$reml) + best$rss/best$nobs ## update for next iterate
} ## end of Newton loop
if (iter==200) warning("fast REML optimizer reached iteration limit")
if (step.failed) best$conv <- "step failed" else
if (iter==200) best$conv <- "no convergence in 200 iterations" else
best$conv <- "full convergence"
best$iter <- iter
best$outer.info <- list(conv = best$conv, iter = best$iter,grad = grad,hess = hess)
best$rho <- rho
best$rho.full <- as.numeric(L%*%rho+rho.0)
best ## return the best fit (note that it will need post-processing to be useable)
} ## end fast.REML.fit
ident.test <- function(X,E,nt=1) {
## routine to identify structurally un-identifiable coefficients
## for model with model matrix X and scaled sqrt penalty matrix E
## lambda is smoothing parameter vector corresponding to E,
## and the routine also suggests starting values for lambda
## based on scaling of X and E.
## If length(drop)>0 then X[,-drop] is new model matrix
## if beta contains coefs with unidentified dropped, and
## if beta.full is a vector of zeroes for each original coef
## then beta.full[undrop] <- beta, is the full, zero padded
## coeff vector, with dropped coefs re-nstated as zeroes.
Xnorm <- norm(X,type="F")
qrx <- if (nt>1) pqr2(rbind(X/Xnorm,E),nt=nt) else qr(rbind(X/Xnorm,E),LAPACK=TRUE) ## pivoted QR
rank <- Rrank(qr.R(qrx),tol=.Machine$double.eps^.75)
drop <- qrx$pivot[-(1:rank)] ## index of un-identifiable coefs
undrop <- 1:ncol(X)
if (length(drop)>0) undrop <- undrop[-drop]
list(drop=drop,undrop=undrop)
} ## ident.test
Sl.drop <- function(Sl,drop,np) {
## routine to drop coefs in drop, from block diagonal penalty
## stored in Sl. np is total number of coeffs
if (length(drop)==0) return(Sl)
keep <- rep(TRUE,np)
keep[drop] <- FALSE ## logical indexing of retained coefs
## let b1 be coefs after dropping and b0 be full vector before.
## new.loc gives location in b1 of ith element in b0. If i is
## in drop then new.loc[i] is position of last b0[j] not dropped.
## i.e. for i not in drop, b0[i] = b1[new.loc[i]].
## for i in drop, b1[new.loc[i]] = b0[j] where j is largest
## j < i s.t. j not in drop.
## These indices facilitate easy dropping from parts of blocks
## corresponding to coef indices in drop.
cholesky <- attr(Sl,"cholesky") ## is setup all Cholesky based?
new.loc <- cumsum(keep)
dropped.blocks <- FALSE
for (b in 1:length(Sl)) {
ind <- (Sl[[b]]$start:Sl[[b]]$stop)##[Sl[[b]]$ind]
if (length(Sl[[b]]$S)==1) { ## singleton
## need to count terms dropped from penalty,
## adjusting rank, ind, start and stop
bdrop <- ind%in%drop ## which are dropped here?
npd <- sum(bdrop[Sl[[b]]$ind]) ## number of penalized dropped
Sl[[b]]$ind <- Sl[[b]]$ind[!bdrop] ## retain not dropped
Sl[[b]]$rank <- Sl[[b]]$rank - npd ## reduce rank by penalized dropped
if (Sl[[b]]$rank <=0) dropped.blocks <- TRUE
Sl[[b]]$start <- new.loc[Sl[[b]]$start]
Sl[[b]]$stop <- new.loc[Sl[[b]]$stop]
} else { ## multi-S
bdrop <- ind%in%drop ## which are dropped here?
keep <- !bdrop[Sl[[b]]$ind] ## index of what to keep in the penalties
npd <- sum(!keep) ## number of penalized dropped
Sl[[b]]$ind <- Sl[[b]]$ind[!bdrop] ## retain not dropped
Sl[[b]]$rank <- Sl[[b]]$rank - npd ## reduce rank by penalized dropped
if (Sl[[b]]$rank <=0) dropped.blocks <- TRUE
## need to drop rows and cols from S and and rows from rS
for (i in 1:length(Sl[[b]]$S)) {
if (length(Sl[[b]]$rS)) Sl[[b]]$rS[[i]] <- if (cholesky) Sl[[b]]$rS[[i]][keep,keep] else Sl[[b]]$rS[[i]][keep,]
Sl[[b]]$S[[i]] <- Sl[[b]]$S[[i]][keep,keep]
}
Sl[[b]]$start <- new.loc[Sl[[b]]$start]
Sl[[b]]$stop <- new.loc[Sl[[b]]$stop]
}
}
if (dropped.blocks) {
new.b <- 1
for (i in 1:length(Sl)) {
if (Sl[[b]]$rank>0) {
Sl[[new.b]] <- Sl[[b]]
new.b <- new.b + 1
}
}
}
attr(Sl,"drop") <- drop
Sl
} ## Sl.drop
Sl.Xprep <- function(Sl,X,nt=1) {
## Sl is block diag object from Sl.setup, X is a model matrix
## this routine applies preliminary Sl transformations to X
## tests for structural identifibility problems and drops
## un-identifiable parameters.
X <- Sl.initial.repara(Sl,X,inverse=FALSE,both.sides=FALSE,cov=FALSE,nt=nt) ## apply re-para used in Sl to X
id <- ident.test(X,attr(Sl,"E"),nt=nt) ## deal with structural identifiability
## id contains drop, undrop, lambda
if (length(id$drop)>0) { ## then there is something to do here
Sl <- Sl.drop(Sl,id$drop,ncol(X)) ## drop unidentifiable from Sl
X <- X[,-id$drop] ## drop unidentifiable from X
}
rank <- 0
for (b in 1:length(Sl)) rank <- rank + Sl[[b]]$rank ## the total penalty rank
## Also add Mp, the total null space dimension to return list.
list(X=X,Sl=Sl,undrop=id$undrop,rank=rank,Mp=ncol(X)-rank)
} ## end Sl.Xprep
Sl.postproc <- function(Sl,fit,undrop,X0,cov=FALSE,scale = -1,L,nt=1) {
## reverse the various fitting re-parameterizations.
## X0 is the orginal model matrix before any re-parameterization
## or parameter dropping. Sl is also the original *before parameter
## dropping*
np <- ncol(X0)
beta <- rep(0,np)
beta[undrop] <- Sl.repara(fit$rp,fit$beta,inverse=TRUE)
beta <- Sl.initial.repara(Sl,beta,inverse=TRUE,both.sides=TRUE,cov=TRUE,nt=nt)
if (cov) {
d1b <- matrix(0,np,ncol(fit$d1b))
## following construction a bit ugly due to Sl.repara assumptions...
d1b[undrop,] <- t(Sl.repara(fit$rp,t(fit$d1b),inverse=TRUE,both.sides=FALSE))
for (i in 1:ncol(d1b)) d1b[,i] <-
Sl.initial.repara(Sl,as.numeric(d1b[,i]),inverse=TRUE,both.sides=TRUE,cov=TRUE,nt=nt) ## d beta / d rho matrix
PP <- matrix(0,np,np)
PP[undrop,undrop] <- Sl.repara(fit$rp,fit$PP,inverse=TRUE)
PP <- Sl.initial.repara(Sl,PP,inverse=TRUE,both.sides=TRUE,cov=TRUE,nt=nt)
#XPP <- crossprod(t(X0),PP)*X0
#hat <- rowSums(XPP);edf <- colSums(XPP)
XPP <- crossprod(t(X0),PP)
hat <- rowSums(XPP*X0)
F <- crossprod(XPP,X0)
edf <- diag(F)
edf1 <- 2*edf - rowSums(t(F)*F)
## edf <- rowSums(PP*crossprod(X0)) ## diag(PP%*%(t(X0)%*%X0))
if (scale<=0) {
scale <- fit$rss/(fit$nobs - sum(edf))
}
Vp <- PP * scale ## cov matrix
## sp uncertainty correction...
if (!is.null(L)) d1b <- d1b%*%L
M <- ncol(d1b)
ev <- eigen(fit$outer.info$hess,symmetric=TRUE)
ind <- ev$values <= 0
ev$values[ind] <- 0;ev$values[!ind] <- 1/sqrt(ev$values[!ind])
rV <- (ev$values*t(ev$vectors))[,1:M]
V.sp <- crossprod(rV)
attr(V.sp,"L") <- L
Vc <- crossprod(rV%*%t(d1b))
Vc <- Vp + Vc ## Bayesian cov matrix with sp uncertainty
edf2 <- rowSums(Vc*crossprod(X0))/scale
##bias <- as.numeric(beta-F%*%beta) ## estimate of smoothing bias in beta
return(list(beta=beta,Vp=Vp,Vc=Vc,Ve=F%*%Vp,V.sp=V.sp,edf=edf,edf1=edf1,edf2=edf2,hat=hat,F=F))
} else return(list(beta=beta))
} ## Sl.postproc
## USEAGE SEQUENCE:
## 1. Use gam.setup to setup gam object, G, say, as usual
## 2. Call Sl.setup which uses info in G$smooth and G$paraPen
## to set up a block diagonal penalty structure, Sl, say
## 3. At this stage reweight the model matrix in G if needed
## (e.g. in IRLS) to get X
## 4. um <- Sl.Xprep(Sl,X) to deal with identifiability and re-para.
## 5. initial smoothing parameters from initial.sp(X,G$S,G$off),
## initial phi from, say variance of y over 10??
## 6. fit <- fast.REML.fit(um$Sl,um$X,G$y,rho,L=G$L,rho.0=G$lsp0,
## log.phi=log.phi,phi.fixed=FALSE/TRUE,Mp=um$Mp)
## 7. res <- Sl.postproc(Sl,fit,um$undrop,X,cov=TRUE), to get postproc
## stuff
## Notice: that only steps 3-7 are needed in an IRLS loop and cov=TRUE
## is only needed after convergence of such a loop.
## Warning: min.sp not handled by this approach.
Any scripts or data that you put into this service are public.
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.