Nothing
## routines for very large dataset generalized additive modelling.
## (c) Simon N. Wood 2009-2023
ls.size <- function(x) {
## If `x' is a list, return the size of its elements, in bytes, in a named array
## otherwise return the size of the object
if (is.list(x)==FALSE) return(object.size(x))
xn <- names(x)
n <- length(x)
sz <- rep(-1,n)
for (i in 1:n) sz[i] <- object.size(x[[i]])
names(sz) <- xn
sz
} ## ls.size
rwMatrix <- function(stop,row,weight,X,trans=FALSE) {
## Routine to recombine the rows of a matrix X according to info in
## stop, row and weight. Consider the ith row of the output matrix
## ind <- 1:stop[i] if i==1 and ind <- (stop[i-1]+1):stop[i]
## otherwise. The ith output row is then X[row[ind],]*weight[ind]
if (is.matrix(X)) { n <- nrow(X);p<-ncol(X);ok <- TRUE} else { n<- length(X);p<-1;ok<-FALSE}
stop <- stop - 1;row <- row - 1 ## R indices -> C indices
oo <-.C(C_rwMatrix,as.integer(stop),as.integer(row),as.double(weight),X=as.double(X),
as.integer(n),as.integer(p),trans=as.integer(trans),work=as.double(rep(0,n*p)))
if (ok) return(matrix(oo$X,n,p)) else
return(oo$X)
} ## rwMatrix
chol2qr <- function(XX,Xy,nt=1) {
## takes X'X and X'y and returns R and f
## equivalent to qr update.
op <- options(warn = -1) ## otherwise warns if +ve semidef
R <- if (nt) pchol(XX,nt=nt) else chol(XX,pivot=TRUE)
options(op)
p <- length(Xy)
ipiv <- piv <- attr(R,"pivot");ipiv[piv] <- 1:p
rank <- attr(R,"rank");ind <- 1:rank
if (rank<p) R[(rank+1):p,] <- 0 ## chol is buggy (R 3.1.0) with pivot=TRUE
f <- c(forwardsolve(t(R[ind,ind]),Xy[piv[ind]]),rep(0,p-rank))[ipiv]
R <- R[ipiv,ipiv]
list(R=R,f=f)
} ## chol2qr
qr_update <- function(Xn,yn,R=NULL,f=rep(0,0),y.norm2=0,use.chol=FALSE,nt=1)
## Let X = QR and f = Q'y. This routine updates f and R
## when Xn is appended to X and yn appended to y. If R is NULL
## then initial QR of Xn is performed. ||y||^2 is accumulated as well.
## if use.chol==TRUE then quicker but less stable accumulation of X'X and
## X'y are used. Results then need post processing, to get R =chol(X'X)
## and f= R^{-1} X'y.
## if nt>1 and use.chol=FALSE then parallel QR is used
{ p <- ncol(Xn)
y.norm2 <- y.norm2+sum(yn*yn)
if (use.chol) {
if (is.null(R)) {
R <- crossprod(Xn)
fn <- as.numeric(t(Xn)%*%yn)
} else {
R <- R + crossprod(Xn)
fn <- f + as.numeric(t(Xn)%*%yn)
}
return(list(R=R,f=fn,y.norm2=y.norm2))
} else { ## QR update
if (!is.null(R)) {
Xn <- rbind(R,Xn)
yn <- c(f,yn)
}
qrx <- if (nt==1) qr(Xn,tol=0,LAPACK=TRUE) else pqr2(Xn,nt)
fn <- qr.qty(qrx,yn)[1:min(p,nrow(Xn))]
rp <- qrx$pivot;rp[rp] <- 1:p # reverse pivot
return(list(R = qr.R(qrx)[,rp],f=fn,y.norm2=y.norm2))
}
} ## qr_update
qr_up <- function(arg) {
## routine for parallel computation of the QR factorization of
## a large gam model matrix, suitable for calling with parLapply.
wt <- rep(0,0)
dev <- 0
eta <- arg$eta
efam <- !is.null(arg$family) ## extended family?
for (b in 1:arg$n.block) {
ind <- arg$start[b]:arg$stop[b]
X <- predict(arg$G,newdata=arg$mf[ind,],type="lpmatrix",newdata.guaranteed=TRUE,block.size=length(ind))
rownames(X) <- NULL
if (is.null(arg$coef)) eta1 <- arg$eta[ind] else eta[ind] <- eta1 <- drop(X%*%arg$coef) + arg$offset[ind]
mu <- arg$linkinv(eta1)
y <- arg$G$y[ind] ## arg$G$model[[arg$response]]
weights <- arg$G$w[ind]
if (efam) { ## extended family case
dd <- dDeta(y,mu,weights,theta=arg$theta,arg$family,0)
## note: no handling of infinities and wz case yet
w <- dd$EDeta2 * .5
#w <- w
z <- (eta1-arg$offset[ind]) - dd$Deta.EDeta2
good <- is.finite(z)&is.finite(w)
} else { ## regular exp fam case
mu.eta.val <- arg$mu.eta(eta1)
good <- (weights > 0) & (mu.eta.val != 0)
z <- (eta1 - arg$offset[ind]) + (y - mu)/mu.eta.val
w <- (weights * mu.eta.val^2)/arg$variance(mu)
}
w[!good] <- 0 ## drop if !good
#z[!good] <- 0 ## irrelevant
dev <- dev + if (efam) sum(arg$family$dev.resids(y,mu,weights,arg$theta)) else sum(arg$dev.resids(y,mu,weights))
wt <- c(wt,w)
z <- z[good];w <- w[good]
w <- sqrt(w)
## note assumption that nt=1 in following qr_update - i.e. each cluster node is strictly serial
if (b == 1) qrx <- qr_update(w*X[good,,drop=FALSE],w*z,use.chol=arg$use.chol)
else qrx <- qr_update(w*X[good,,drop=FALSE],w*z,qrx$R,qrx$f,qrx$y.norm2,use.chol=arg$use.chol)
rm(X);if(arg$gc.level>1) gc() ## X can be large: remove and reclaim
}
qrx$dev <- dev;qrx$wt <- wt;qrx$eta <- eta
if (arg$gc.level>1) { rm(arg,ind,mu,y,weights,mu.eta.val,good,z,w,wt,w);gc()}
qrx
} ## qr_up
compress.df <- function(dat,m=NULL) {
## Takes dataframe in dat and compresses it by rounding and duplicate
## removal. Returns index array as attribute. If covariates were matrices
## then the index is a matrix.
## For metric variables we first find the unique cases.
## If there are <= m of these then these are employed, otherwise
## rounding is used. Factors are always reduced to the number of
## levels present in the data. Idea is that this function is called
## with columns of dataframes corresponding to single smooths or marginals.
## Note that this uses random sampling, so random seed manipulation
## is typically used before calling to force exact repeatability.
d <- ncol(dat) ## number of variables to deal with
n <- nrow(dat) ## number of data/cases
if (is.null(m)) {
m <- if (d==1) 1000 else if (d==2) 100 else 25
} else if (d>1) m <- round(m^{1/d}) + 1
mf <- mm <- 1 ## total grid points for factor and metric
for (i in 1:d) if (is.factor(dat[,i])) {
mf <- mf * length(unique(as.vector(dat[,i])))
} else {
mm <- mm * m
}
if (is.matrix(dat[[1]])) { ## must replace matrix terms with vec(dat[[i]])
dat0 <- data.frame(as.vector(dat[[1]]))
if (d>1) for (i in 2:d) dat0[[i]] <- as.vector(dat[[i]])
names(dat0) <- names(dat)
dat <- dat0;rm(dat0)
}
xu <- uniquecombs(dat,TRUE)
if (nrow(xu)>mm*mf) { ## too many unique rows to use only unique
for (i in 1:d) if (!is.factor(dat[,i])) { ## round the metric variables
xl <- range(dat[,i])
xu <- seq(xl[1],xl[2],length=m)
dx <- xu[2]-xu[1]
kx <- round((dat[,i]-xl[1])/dx)+1
dat[,i] <- xu[kx] ## rounding the metric variables
}
xu <- uniquecombs(dat,TRUE)
}
k <- attr(xu,"index")
if (nrow(xu)==nrow(dat)) { ## might as well return original data
k <- 1:nrow(dat)
if (length(k)>n) k <- matrix(k,nrow=n) ## deal with matrix arguments
attr(dat,"index") <- k
return(dat)
}
## shuffle rows in order to avoid induced dependencies between discretized
## covariates (which can mess up gam.side)...
## Any RNG setting should be done in routine calling this one!!
ii <- sample(1:nrow(xu),nrow(xu),replace=FALSE) ## shuffling index
xu[ii,] <- xu ## shuffle rows of xu
k <- ii[k] ## correct k index accordingly
## ... finished shuffle
## if arguments were matrices, then return matrix index
if (length(k)>n) k <- matrix(k,nrow=n)
k -> attr(xu,"index")
xu
} ## compress.df
check.term <- function(term,rec) {
## utility function for discrete.mf. Checks whether variables in "term"
## have already been discretized, and if so whether this discretization
## can be re-used for the current "term". Stops if term already discretized
## but we can't re-use discretization. Otherwise returns index of k index
## or 0 if the term is not in the existing list.
ii <- which(rec$vnames%in%term)
if (length(ii)) { ## at least one variable already discretized
if (length(term)==rec$d[min(ii)]) { ## dimensions match previous discretization
if (sum(!(term%in%rec$vnames[ii]))) stop("bam can not discretize with this nesting structure")
else return(rec$ki[min(ii)]) ## all names match previous - return index of previous
} else stop("bam can not discretize with this nesting structure")
} else return(0) ## no match
} ## check.term
discrete.mf <- function(gp,mf,names.pmf,m=NULL,full=TRUE) {
## Attempt at a cleaner design, in which k.start and nr have an entry
## for each variable in mf. For jointly discretized covariates these
## will simply be identical for each covariate.
## discretize the covariates for the terms specified in smooth.spec
## id not allowed. names.pmf gives the names of the parametric part
## of mf. If full is FALSE then the mf returned is a list where columns
## can be of different lengths.
## On exit...
## * mf is a model frame containing the unique discretized covariate
## values, in randomized order, padded to all be same length (if full=TRUE)
## * nr records the number of unique discretized covariate values
## for each variable in mf i.e. the number of rows before the padding starts -
## elements are labelled, corresponding to names in mf.
## * ks is a two column matrix. ks[i,1] is the first column in index
## matrix k for the corresponding element of mf, ks[i,2]-1 is the
## final column in k. row names match names of mf.
## * k is the index matrix. The ith record of the 1st column of the
## jth variable is in row k[i,ks[j,1]] of the corresponding
## column of mf.
## ... there is an element of nr and ks for each variable of
## mf. Variables are only discretized and stored in mf once.
## Model terms can be matched to elements of nr and ks based on the
## 'xnames' returned from terms2tensor, or the 'terms' component of
## smooth objects.
## ks has an extra row, k an extra column and nr an extra entry for
## any "(Intercept)"
## Note that the margins component of a tensor product smooth spec
## is used to establish which covariates of the smooth must be
## jointly discretized.
## some sub sampling here... want to set and restore RNG state used for this
## to ensure strict repeatability.
rngs <- temp.seed(8547) ## keep different to tps constructor!
mf0 <- list()
nk <- 0 ## count number of index vectors to avoid too much use of cbind
nlp <- if (is.null(gp$nlp)) 1 else sum(unlist(lapply(gp,inherits,"split.gam.formula")))
for (lp in 1:nlp) { ## loop over linear predictors
smooth.spec <- if (is.null(gp$nlp)) gp$smooth.spec else gp[[lp]]$smooth.spec
if (length(smooth.spec)>0) for (i in 1:length(smooth.spec)) nk <- nk + as.numeric(smooth.spec[[i]]$by!="NA") + length(smooth.spec[[i]]$term)
## if (inherits(smooth.spec[[i]],"tensor.smooth.spec")) length(smooth.spec[[i]]$margin) else 1
}
names.pmf <- names.pmf[names.pmf %in% names(mf)] ## drop names.pmf not in mf (usually response)
nk <- nk + length(names.pmf)
k <- matrix(0,nrow(mf),nk) ## each column is an index vector
ks <- matrix(NA,nk,2,dimnames=list(rep("",nk)))
ik <- 0 ## index counter i.e. counts marginal smooths and their indices
nr <- rep(0,nk) ## number of rows for marginal term
## structure to record terms already processed...
rec <- list(vnames = rep("",0), ## variable names
ki = rep(0,0), ## index of original index vector var relates to
d = rep(0,0)) ## dimension of terms involving this var
## loop through the terms discretizing the covariates...
ii <- 0
for (lp in 1:nlp) { ## loop over linear predictors
smooth.spec <- if (is.null(gp$nlp)) gp$smooth.spec else gp[[lp]]$smooth.spec
if (length(smooth.spec)>0) for (i in 1:length(smooth.spec)) { ## loop over smooths in this lp
nmarg <- if (inherits(smooth.spec[[i]],"tensor.smooth.spec")) length(smooth.spec[[i]]$margin) else 1
maxj <- if (smooth.spec[[i]]$by=="NA") nmarg else nmarg + 1
ii <- ii + 1 ## all smooths counter (ii==i if nlp = 1)
mi <- if (is.null(m)||length(m)==1) m else m[ii]
j <- 0
for (jj in 1:maxj) { ## loop through marginals
if (jj==1&&maxj!=nmarg) termi <- smooth.spec[[i]]$by else {
j <- j + 1
termi <- if (inherits(smooth.spec[[i]],"tensor.smooth.spec")) smooth.spec[[i]]$margin[[j]]$term else
smooth.spec[[i]]$term
}
ik.prev <- check.term(termi,rec) ## term already discretized?
if (ik.prev==0) { ## new discretization required
ik <- ik + 1 ## increment index counter
mfd <- compress.df(mf[termi],m=mi)
ki <- attr(mfd,"index")
ks[ik,1] <- if (ik==1) 1 else ks[ik-1,2]
if (is.matrix(ki)) {
ks[ik,2] <- ks[ik,1] + ncol(ki)
k <- cbind(k,matrix(0,nrow(k),ncol(ki)-1)) ## extend index matrix
ind <- ks[ik,1]:(ks[ik,2]-1)
k[,ind] <- ki
} else {
ks[ik,2] <- ks[ik,1] + 1
k[,ks[ik,1]] <- ki
}
names(nr)[ik] <- rownames(ks)[ik] <- names(mfd)[1]
nr[ik] <- nrow(mfd)
if (length(termi)>1) for (ii in 2:length(termi)) { ## duplicate index info for each jointly discretized variable
ik <- ik+1
ks[ik,] <- ks[ik-1,]
nr[ik] <- nr[ik-1]
names(nr)[ik] <- rownames(ks)[ik] <- names(mfd)[ii]
}
mf0 <- c(mf0,mfd)
## record variable discretization info, for later duplicate avoiding check...
d <- length(termi)
rec$vnames <- c(rec$vnames,termi)
rec$ki <- c(rec$ki,rep(ik,d)) ### NOTE: OK, as ki contents essentially unused
rec$d <- c(rec$d,rep(d,d))
} else { ## re-use an earlier discretization...
## nothing to do in fact
}
} ## end marginal jj loop
} ## term loop (i)
} ## linear predictor, lp, loop
## obtain parametric terms and..
## pad mf0 so that all rows are the same length
## padding is necessary if gam.setup is to be used for setup
if (full) {
maxr <- max(nr)
## If NA's caused rows to be dropped in mf, then they should
## also be dropped in pmf, otherwise we can end up with factors
## with more levels than unique observations, for example.
## The next couple of lines achieve this.
mfp <- mf[names.pmf] ## retain only variables needed in parametric part
## now discretize parametric covariates...
mi <- if (is.null(m)) m else max(m)
if (length(mfp)) for (i in 1:length(mfp)) {
ii <- which(rec$vnames %in% names.pmf[i])
ik.prev <- if (length(ii)>0) rec$ki[ii] else 0 ## term already discretized?
if (ik.prev==0) { ## new discretization needed (no need to record - no repeat vars within para)
## note that compress.df(mfp[i]) is set up to deal with matrices in the manner appropriate
## to the smooth summation convention, but not to matrices in the parametric part of the model
## hence the following work around...
if (is.matrix(mfp[[i]])) {
mfd <- compress.df(mfp[[i]],m=mi)
mr <- nrow(mfd)
if (is.matrix(mfd)&&ncol(mfd)==1) mfd <- drop(mfd)
mf0[[names(mfp[i])]] <- mfd
} else {
mfd <- compress.df(mfp[i],m=mi);mf0 <- c(mf0,mfd)
mr <- length(mfd[[1]])
}
ki <- attr(mfd,"index")
ik <- ik + 1
ks[ik,1] <- if (ik==1) 1 else ks[ik-1,2]
ks[ik,2] <- ks[ik,1] + 1
k[,ks[ik,1]] <- ki
if (maxr<mr) maxr <- mr
nr[ik] <- mr
names(nr)[ik] <- rownames(ks)[ik] <- names(mfp[i])
} else { ## re-use previous discretization
## nothing needed
}
}
for (i in 1:length(mf0)) { ## pad out columns to be same length
if (is.matrix(mf0[[i]])) {
me <- nrow(mf0[[i]])
mf0[[i]] <- rbind(mf0[[i]],matrix(sample(mf0[[i]],(maxr-me)*ncol(mf0[[i]]),replace=TRUE),maxr-me,ncol(mf0[[i]])))
} else {
me <- length(mf0[[i]])
if (me < maxr) mf0[[i]][(me+1):maxr] <- sample(mf0[[i]],maxr-me,replace=TRUE)
}
}
## mf0 is the discretized model frame (actually a list), padded to have equal length rows
## now copy back into mf so terms unchanged
mf <- mf[sample(1:nrow(mf),maxr,replace=TRUE),,drop=FALSE]
for (na in names(mf0)) mf[[na]] <- mf0[[na]]
} else mf <- mf0
nr <- nr[names(mf)] ## same order for both mf and nr
## reset RNG to old state...
temp.seed(rngs)
## k can end up with too many columns if marginals themselves have
## multiple arguments, so drop these here...
ks <- ks[!is.na(ks[,1]),,drop=FALSE]
if (ncol(k)>max(ks)-1) k <- k[,1:(max(ks)-1),drop=FALSE]
k <- cbind(k,1) ## add an intercept index column
nk <- ncol(k)
ks <- rbind(ks,c(nk,nk+1)) ## add intercept row to ks
nr <- c(nr,1) ## and intercept entry to nr
names(nr)[length(nr)] <- rownames(ks)[nrow(ks)] <- "(Intercept)"
list(mf=mf,k=k,nr=nr,ks=ks)
} ## discrete.mf
mini.mf <-function(mf,chunk.size) {
## takes a model frame and produces a representative subset of it, suitable for
## basis setup.
## first count the minimum number of rows required for representiveness
mn <- 0
for (j in 1:length(mf)) mn <- mn + if (is.factor(mf[[j]])) length(levels(mf[[j]])) else 2
if (chunk.size < mn) chunk.size <- mn
n <- nrow(mf)
if (n <= chunk.size) return(mf)
rngs <- temp.seed(66)
## randomly sample from original frame...
ind <- sample(1:n,chunk.size)
mf0 <- mf[ind,,drop=FALSE]
## ... now need to ensure certain sorts of representativeness
## work through elements collecting the rows containing
## max and min for each variable, and a random row for each
## factor level....
ind <- sample(1:n,n,replace=FALSE) ## randomized index for stratified sampling w.r.t. factor levels
fun <- function(X,fac,ind) ind[fac[ind]==X][1] ## stratified sampler
k <- 0
for (j in 1:length(mf)) if (is.numeric(mf0[[j]])) {
if (is.matrix(mf0[[j]])) { ## find row containing minimum
j.min <- min((1:n)[as.logical(rowSums(mf[[j]]==min(mf[[j]])))])
j.max <- min((1:n)[as.logical(rowSums(mf[[j]]==max(mf[[j]])))])
} else { ## vector
j.min <- min(which(mf[[j]]==min(mf[[j]])))
j.max <- min(which(mf[[j]]==max(mf[[j]])))
}
k <- k + 1; mf0[k,] <- mf[j.min,]
k <- k + 1; mf0[k,] <- mf[j.max,]
} else if (is.factor(mf[[j]])) { ## factor variable...
## randomly sample one row from each factor level...
find <- apply(X=as.matrix(levels(mf[[j]])),MARGIN=1,FUN=fun,fac=mf[[j]],ind=ind)
find <- find[is.finite(find)] ## in case drop.unused.levels==FALSE, so that there ar levels without rows
nf <- length(find)
mf0[(k+1):(k+nf),] <- mf[find,]
k <- k + nf
}
temp.seed(rngs) ## reset RNG to initial state
mf0
} ## mini.mf
bgam.fitd <- function (G, mf, gp ,scale , coef=NULL, etastart = NULL,
mustart = NULL, offset = rep(0, nobs),rho=0, control = gam.control(), intercept = TRUE,
gc.level=0,nobs.extra=0,npt=c(1,1),gamma=1,in.out=NULL,...) {
## This is a version of bgam.fit designed for use with discretized covariates.
## Difference to bgam.fit is that XWX, XWy and Xbeta are computed in C
## code using compressed versions of X. Parallelization of XWX formation
## is performed at the C level using openMP.
## Alternative fitting iteration using Cholesky only, including for REML.
## Basic idea is to take only one Newton step for parameters per iteration
## and to control the step length to ensure that at the end of the step we
## are not going uphill w.r.t. the REML criterion...
y <- G$y
weights <- G$w
conv <- FALSE
nobs <- nrow(mf)
offset <- G$offset
if (inherits(G$family,"extended.family")) { ## preinitialize extended family
efam <- TRUE
if (!is.null(G$family$preinitialize) && !is.null(attr(G$family$preinitialize,"needG"))) attr(G$family,"G") <- G
pini <- if (is.null(G$family$preinitialize)) NULL else G$family$preinitialize(y,G$family)
if (is.null(G$family$preinitialize)) attr(G$family,"G") <- NULL
if (!is.null(pini$family)) G$family <- pini$family
if (!is.null(pini$Theta)) G$family$putTheta(pini$Theta)
if (!is.null(pini$y)) y <- pini$y
if (is.null(G$family$scale)) scale <- 1 else scale <- if (G$family$scale<0) scale else G$family$scale
scale1 <- scale
if (scale < 0) scale <- var(y) *.1 ## initial guess
} else efam <- FALSE
if (rho!=0) { ## AR1 error model
ld <- 1/sqrt(1-rho^2) ## leading diagonal of root inverse correlation
sd <- -rho*ld ## sub diagonal
N <- nobs
## see rwMatrix() for how following are used...
ar.row <- c(1,rep(1:N,rep(2,N))[-c(1,2*N)]) ## index of rows to reweight
ar.weight <- c(1,rep(c(sd,ld),N-1)) ## row weights
ar.stop <- c(1,1:(N-1)*2+1) ## (stop[i-1]+1):stop[i] are the rows to reweight to get ith row
if (!is.null(mf$"(AR.start)")) { ## need to correct the start of new AR sections...
ii <- which(mf$"(AR.start)"==TRUE)
if (length(ii)>0) {
if (ii[1]==1) ii <- ii[-1] ## first observation does not need any correction
ar.weight[ii*2-2] <- 0 ## zero sub diagonal
ar.weight[ii*2-1] <- 1 ## set leading diagonal to 1
}
}
} else {## AR setup complete
ar.row <- ar.weight <- ar.stop <- -1 ## signal no re-weighting
}
family <- G$family
additive <- if (family$family=="gaussian"&&family$link=="identity") TRUE else FALSE
linkinv <- family$linkinv;#dev.resids <- family$dev.resids
if (!efam) {
variance <- family$variance
mu.eta <- family$mu.eta
if (!is.function(variance) || !is.function(linkinv))
stop("'family' argument seems not to be a valid family object")
}
valideta <- family$valideta
if (is.null(valideta))
valideta <- function(eta) TRUE
validmu <- family$validmu
if (is.null(validmu))
validmu <- function(mu) TRUE
if (is.null(mustart)) {
eval(family$initialize)
}
else {
mukeep <- mustart
eval(family$initialize)
mustart <- mukeep
}
if (is.matrix(y)&&ncol(y)>1) stop("This family should not have a matrix response")
eta <- if (!is.null(etastart))
etastart
else family$linkfun(mustart)
mu <- linkinv(eta)
if (!(validmu(mu) && valideta(eta)))
stop("cannot find valid starting values: please specify some")
dev <- sum(family$dev.resids(y, mu, weights))*2 ## just to avoid converging at iter 1
conv <- FALSE
G$coefficients <- rep(0,ncol(G$X))
class(G) <- "gam"
## need to reset response and weights to post initialization values
## in particular to deal with binomial properly...
G$y <- y
G$w <- weights
Sl <- Sl.setup(G) ## setup block diagonal penalty object
rank <- 0
if (length(Sl)>0) for (b in 1:length(Sl)) rank <- rank + Sl[[b]]$rank
Mp <- ncol(G$X) - rank ## null space dimension
Nstep <- 0
if (efam) theta <- family$getTheta()
for (iter in 1L:control$maxit) { ## main fitting loop
devold <- dev
dev <- 0
if (iter==1||!additive) {
qrx <- list()
if (iter>1) {
## form eta = X%*%beta
eta <- Xbd(G$Xd,coef,G$kd,G$ks,G$ts,G$dt,G$v,G$qc,G$drop) + offset
lsp.full <- G$lsp0
if (n.sp>0) lsp.full <- lsp.full + if (is.null(G$L)) lsp[1:n.sp] else G$L %*% lsp[1:n.sp]
rSb <- Sl.rSb(Sl,lsp.full,prop$beta) ## store S beta to allow rapid step halving
if (iter>2) {
rSb0 <- Sl.rSb(Sl,lsp.full,b0)
bSb0 <- sum(rSb0^2) ## penalty at start of beta step
## get deviance at step start, with current theta if efam
dev0 <- if (efam) sum(family$dev.resids(G$y,mu0,G$w,theta)) else
sum(family$dev.resids(G$y,mu0,G$w))
}
}
kk <- 1
repeat {
mu <- linkinv(eta)
dev <- if (efam) sum(family$dev.resids(G$y,mu,G$w,theta)) else
sum(family$dev.resids(G$y,mu,G$w))
if (iter>2) { ## coef step length control
bSb <- sum(rSb^2) ## penalty at end of beta step
if ((!is.finite(dev) || dev0 + bSb0 < dev + bSb) && kk < 30) { ## beta step not improving current pen dev
coef <- (coef0 + coef)/2 ## halve the step
rSb <- (rSb0 + rSb)/2
eta <- (eta0 + eta)/2
prop$beta <- (b0 + prop$beta)/2
kk <- kk + 1
} else break
} else break
}
if (iter>1) { ## save components of penalized deviance for step control
coef0 <- coef ## original para
eta0 <- eta
mu0 <- mu
b0 <- prop$beta ## beta repara
dev <- dev + sum(rSb^2) ## add penalty to deviance
} else reml <- dev ## for convergence checking
if (efam) { ## extended family
if (iter>1) { ## estimate theta
if (family$n.theta>0||scale1<0) theta <- estimate.theta(theta,family,y,mu,scale=scale1,wt=G$w,tol=1e-7)
if (!is.null(family$scale) && scale1<0) {
scale <- exp(theta[family$n.theta+1])
theta <- theta[1:family$n.theta]
}
family$putTheta(theta)
}
dd <- dDeta(y,mu,G$w,theta=theta,family,0)
## note: no handling of infinities and wz case yet
if (rho==0) {
w <- dd$Deta2 * .5
z <- (eta-offset) - dd$Deta.Deta2
} else { ## use fisher weights
w <- dd$EDeta2 * .5
z <- (eta-offset) - dd$Deta.EDeta2
}
good <- is.finite(z)&is.finite(w)
w[!good] <- 0 ## drop if !good
z[!good] <- 0 ## irrelevant
} else { ## exponential family
mu.eta.val <- mu.eta(eta)
good <- mu.eta.val != 0
mu.eta.val[!good] <- .1 ## irrelvant as weight is zero
z <- (eta - offset) + (G$y - mu)/mu.eta.val
w <- (G$w * mu.eta.val^2)/variance(mu)
}
qrx$y.norm2 <- if (rho==0) sum(w*z^2) else ## AR mod needed
sum(rwMatrix(ar.stop,ar.row,ar.weight,sqrt(w)*z,trans=FALSE)^2)
## form X'WX efficiently...
qrx$R <- XWXd(G$Xd,w,G$kd,G$ks,G$ts,G$dt,G$v,G$qc,npt[1],G$drop,ar.stop,ar.row,ar.weight)
## form X'Wz efficiently...
qrx$f <- XWyd(G$Xd,w,z,G$kd,G$ks,G$ts,G$dt,G$v,G$qc,G$drop,ar.stop,ar.row,ar.weight)
if(gc.level>1) gc()
## following reparameterizes X'X and f=X'y, according to initial reparameterizarion...
qrx$XX <- Sl.initial.repara(Sl,qrx$R,inverse=FALSE,both.sides=TRUE,cov=FALSE,nt=npt[1])
qrx$Xy <- Sl.initial.repara(Sl,qrx$f,inverse=FALSE,both.sides=TRUE,cov=FALSE,nt=npt[1])
G$n <- nobs
} else { ## end of if (iter==1||!additive)
dev <- qrx$y.norm2 - sum(coef*qrx$f) ## actually penalized deviance
}
if (control$trace)
message(gettextf("Deviance = %s Iterations - %d", dev, iter, domain = "R-mgcv"))
if (!is.finite(dev)) stop("Non-finite deviance")
## preparation for working model fit is ready, but need to test for convergence first
if (iter>2 && abs(dev - devold)/(0.1 + abs(dev)) < control$epsilon && (scale>0 || abs(Nstep[n.sp+1])<control$epsilon*(abs(log.phi)+1))) {
conv <- TRUE
break
}
## use fast REML code
## block diagonal penalty object, Sl, set up before loop
if (iter==1) { ## need to get initial smoothing parameters
lambda.0 <- if (is.null(in.out)) initial.sp(qrx$R,G$S,G$off,XX=TRUE) else in.out$sp ## note that this uses the untransformed X'X in qrx$R
## convert intial s.p.s to account for L
lsp0 <- log(lambda.0) ## initial s.p.
if (!is.null(G$L)) lsp0 <-
if (ncol(G$L)>0) as.numeric(coef(lm(lsp0 ~ G$L-1+offset(G$lsp0)))) else rep(0,0)
n.sp <- length(lsp0)
}
## carry forward scale estimate if possible...
if (scale>0) log.phi <- log(scale) else {
if (iter==1) {
if (is.null(in.out)) {
if (is.null(coef)||qrx$y.norm2==0) lsp0[n.sp+1] <- log(var(as.numeric(G$y))*.05) else
lsp0[n.sp+1] <- log(qrx$y.norm2/(nobs+nobs.extra))
} else lsp0[n.sp+1] <- log(in.out$scale)
}
}
## get beta, grad and proposed Newton step...
repeat { ## Take a Newton step to update log sp and phi
lsp <- lsp0 + Nstep
if (scale<=0) log.phi <- lsp[n.sp+1]
prop <- Sl.fitChol(Sl,qrx$XX,qrx$Xy,rho=lsp[1:n.sp],yy=qrx$y.norm2,L=G$L,rho0=G$lsp0,log.phi=log.phi,
phi.fixed=scale>0,nobs=nobs,Mp=Mp,nt=npt,tol=abs(reml)*.Machine$double.eps^.5,gamma=gamma)
if (max(Nstep)==0) {
Nstep <- prop$step;lsp0 <- lsp;
break
} else { ## step length control
if (sum(prop$grad*Nstep)>dev*1e-7) Nstep <- Nstep/2 else {
Nstep <- prop$step;lsp0 <- lsp;break;
}
}
} ## end of sp update
coef <- Sl.initial.repara(Sl,prop$beta,inverse=TRUE,both.sides=FALSE,cov=FALSE)
if (any(!is.finite(coef))) {
conv <- FALSE
warning(gettextf("non-finite coefficients at iteration %d",
iter))
break
}
reml <- (dev/(exp(log.phi)*gamma) - prop$ldetS + prop$ldetXXS)/2
} ## end fitting iteration
if (!conv)
warning("algorithm did not converge")
eps <- 10 * .Machine$double.eps
if (family$family == "binomial") {
if (any(mu > 1 - eps) || any(mu < eps))
warning("fitted probabilities numerically 0 or 1 occurred")
}
if (family$family == "poisson") {
if (any(mu < eps))
warning("fitted rates numerically 0 occurred")
}
Mp <- G$nsdf
if (length(G$smooth)>1) for (i in 1:length(G$smooth)) Mp <- Mp + G$smooth[[i]]$null.space.dim
scale <- exp(log.phi)
reml <- (dev/(scale*gamma) - prop$ldetS + prop$ldetXXS + (length(y)/gamma-Mp)*log(2*pi*scale)+Mp*log(gamma))/2
if (rho!=0) { ## correct REML score for AR1 transform
df <- if (is.null(mf$"(AR.start)")) 1 else sum(mf$"(AR.start)")
reml <- reml - (nobs/gamma-df)*log(ld)
}
if (ncol(prop$db)>0) for (i in 1:ncol(prop$db)) prop$db[,i] <- ## d beta / d rho matrix
Sl.initial.repara(Sl,as.numeric(prop$db[,i]),inverse=TRUE,both.sides=TRUE,cov=TRUE,nt=npt[1])
object <- list(db.drho=prop$db,
gcv.ubre=reml,mgcv.conv=conv,rank=prop$r,
scale.estimated = scale<=0,outer.info=NULL,
optimizer=c("perf","chol"))
object$coefficients <- coef
object$family <- family
## form linear predictor efficiently...
object$linear.predictors <- Xbd(G$Xd,coef,G$kd,G$ks,G$ts,G$dt,G$v,G$qc,G$drop) + G$offset
object$fitted.values <- family$linkinv(object$linear.predictors)
if (efam) { ## deal with any post processing
if (!is.null(family$postproc)) {
posr <- family$postproc(family=object$family,y=G$y,prior.weights=G$w,
fitted=object$fitted.values,linear.predictors=object$linear.predictors,offset=G$offset,
intercept=G$intercept)
if (!is.null(posr$family)) object$family$family <- posr$family
if (!is.null(posr$deviance)) object$deviance <- posr$deviance
if (!is.null(posr$null.deviance)) object$null.deviance <- posr$null.deviance
}
if (is.null(object$null.deviance)) object$null.deviance <- sum(family$dev.resids(G$y,weighted.mean(G$y,G$w),G$w,theta))
}
PP <- Sl.initial.repara(Sl,prop$PP,inverse=TRUE,both.sides=TRUE,cov=TRUE,nt=npt[1])
F <- pmmult(PP,qrx$R,FALSE,FALSE,nt=npt[1]) ##crossprod(PP,qrx$R) - qrx$R contains X'WX in this case
object$edf <- diag(F)
object$edf1 <- 2*object$edf - rowSums(t(F)*F)
lsp <- if (n.sp>0) lsp[1:n.sp] else rep(0,0)
object$sp <- exp(lsp)
object$full.sp <- if (is.null(G$L)) object$sp else exp(drop(G$L%*%lsp + G$lsp0))
object$sig2 <- object$scale <- scale
object$Vp <- PP * scale
object$Ve <- pmmult(F,object$Vp,FALSE,FALSE,nt=npt[1]) ## F%*%object$Vp
## sp uncertainty correction...
if (!is.null(G$L)) prop$db <- prop$db%*%G$L
M <- ncol(prop$db)
if (M>0) {
ev <- eigen(prop$hess,symmetric=TRUE)
ind <- ev$values <= 0
ev$values[ind] <- 0;ev$values[!ind] <- 1/sqrt(ev$values[!ind])
rV <- (ev$values*t(ev$vectors))[,1:M]
V.sp <- crossprod(rV)
attr(V.sp,"L") <- G$L
Vc <- pcrossprod(rV%*%t(prop$db),nt=npt[1])
} else {
Vc <- 0
V.sp <- NULL
}
Vc <- object$Vp + Vc ## Bayesian cov matrix with sp uncertainty
object$edf2 <- rowSums(Vc*qrx$R)/scale
object$Vc <- Vc
object$V.sp <- V.sp
object$outer.info <- list(grad = prop$grad,hess=prop$hess)
object$AR1.rho <- rho
object$R <- if (npt[2]>1) pchol(qrx$R,npt) else suppressWarnings(chol(qrx$R,pivot=TRUE)) ## latter much faster under optimized BLAS
piv <- attr(object$R,"pivot")
object$R[,piv] <- object$R
object$iter <- iter
object$wt <- w
object$y <- G$y
object$prior.weights <- G$w
rm(G);if (gc.level>0) gc()
object
} ## end bgam.fitd
regular.Sb <- function(S,off,sp,beta) {
## form S %*% beta for a normal G list
a <- beta*0
if (length(S)>0) for (i in 1:length(S)) {
ind <- off[i] - 1 + 1:ncol(S[[i]])
a[ind] <- a[ind] + sp[i] * S[[i]] %*% beta[ind]
}
a
} ## regular.Sb
bgam.fit <- function (G, mf, chunk.size, gp ,scale ,gamma,method, coef=NULL,etastart = NULL,
mustart = NULL, offset = rep(0, nobs), control = gam.control(), intercept = TRUE,
cl = NULL,gc.level=0,use.chol=FALSE,nobs.extra=0,samfrac=1,npt=1,in.out=NULL,...) {
y <- G$y
weights <- G$w
conv <- FALSE
nobs <- nrow(mf)
offset <- G$offset
family <- G$family
## extended family may have non-standard y that requires careful subsetting (e.g. cnorm)
subsety <- if (is.null(G$family$subsety)) function(y,ind) y[ind] else G$family$subsety
if (inherits(G$family,"extended.family")) { ## preinitialize extended family
efam <- TRUE
pini <- if (is.null(G$family$preinitialize)) NULL else G$family$preinitialize(y,G$family)
if (!is.null(pini$Theta)) G$family$putTheta(pini$Theta)
if (!is.null(pini$y)) y <- pini$y
if (is.null(G$family$scale)) scale <- 1 else scale <- if (G$family$scale<0) scale else G$family$scale
scale1 <-scale
if (scale < 0) scale <- var(y) *.1 ## initial guess
} else efam <- FALSE
G$family <- gaussian() ## needed if REML/ML used
G$family$drop.intercept <- family$drop.intercept ## needed in predict.gam
linkinv <- family$linkinv
if (!efam) {
variance <- family$variance
mu.eta <- family$mu.eta
if (!is.function(variance) || !is.function(linkinv))
stop("'family' argument seems not to be a valid family object")
}
dev.resids <- family$dev.resids
## aic <- family$aic
valideta <- family$valideta
if (is.null(valideta))
valideta <- function(eta) TRUE
validmu <- family$validmu
if (is.null(validmu))
validmu <- function(mu) TRUE
if (is.null(mustart)) {
eval(family$initialize)
}
else {
mukeep <- mustart
eval(family$initialize)
mustart <- mukeep
}
if (is.matrix(y)&&ncol(y)>1) stop("This family should not have a matrix response")
##coefold <- NULL
eta <- if (!is.null(etastart))
etastart
else family$linkfun(mustart)
mu <- linkinv(eta)
if (!(validmu(mu) && valideta(eta)))
stop("cannot find valid starting values: please specify some")
dev <- sum(dev.resids(y, mu, weights))*2 ## just to avoid converging at iter 1
conv <- FALSE
G$coefficients <- rep(0,ncol(G$X))
class(G) <- "gam"
## need to reset response and weights to post initialization values
## in particular to deal with binomial properly...
G$y <- y
G$w <- weights
## set up cluster for parallel computation...
if (!is.null(cl)&&inherits(cl,"cluster")) {
n.threads <- length(cl)
while(nobs/n.threads < ncol(G$X)) n.threads <- n.threads - 1
if (n.threads < length(cl)) {
warning("Too many cluster nodes to use all efficiently")
}
} else n.threads <- 1
if (n.threads>1) { ## set up thread argument lists
## number of obs per thread
nt <- rep(ceiling(nobs/n.threads),n.threads)
nt[n.threads] <- nobs - sum(nt[-n.threads])
arg <- list()
n1 <- 0
for (i in 1:n.threads) {
n0 <- n1+1;n1 <- n1+nt[i]
ind <- n0:n1 ## this thread's data block from mf
n.block <- nt[i]%/%chunk.size ## number of full sized blocks
stub <- nt[i]%%chunk.size ## size of end block
if (n.block>0) {
start <- (0:(n.block-1))*chunk.size+1
stop <- (1:n.block)*chunk.size
if (stub>0) {
start[n.block+1] <- stop[n.block]+1
stop[n.block+1] <- nt[i]
n.block <- n.block+1
}
} else {
n.block <- 1
start <- 1
stop <- nt[i]
}
arg[[i]] <- list(nobs= nt[i],start=start,stop=stop,n.block=n.block,
linkinv=linkinv,dev.resids=dev.resids,gc.level=gc.level,
mf = mf[ind,],
eta = eta[ind],offset = offset[ind],G = G,use.chol=use.chol)
if (efam) {
arg[[i]]$family <- family
} else {
arg[[i]]$mu.eta <- mu.eta
arg[[i]]$variance <- variance
}
arg[[i]]$G$w <- G$w[ind];arg[[i]]$G$model <- NULL
arg[[i]]$G$y <- subsety(G$y,ind) ##G$y[ind]
}
} else { ## single thread, requires single indices
## construct indices for splitting up model matrix construction...
n.block <- nobs%/%chunk.size ## number of full sized blocks
stub <- nobs%%chunk.size ## size of end block
if (n.block>0) {
start <- (0:(n.block-1))*chunk.size+1
stop <- (1:n.block)*chunk.size
if (stub>0) {
start[n.block+1] <- stop[n.block]+1
stop[n.block+1] <- nobs
n.block <- n.block+1
}
} else {
n.block <- 1
start <- 1
stop <- nobs
}
} ## single thread indices complete
conv <- FALSE
if (method=="fREML") Sl <- Sl.setup(G) ## setup block diagonal penalty object
if (efam) theta <- family$getTheta()
for (iter in 1L:control$maxit) { ## main fitting loop
## accumulate the QR decomposition of the weighted model matrix
devold <- dev
kk <- 0
repeat {
dev <- 0;wt <- rep(0,0)
if (n.threads == 1) { ## use original serial update code
wt <- G$y
for (b in 1:n.block) {
ind <- start[b]:stop[b]
X <- predict(G,newdata=mf[ind,],type="lpmatrix",newdata.guaranteed=TRUE,block.size=length(ind))
rownames(X) <- NULL
if (is.null(coef)) eta1 <- eta[ind] else eta[ind] <- eta1 <- drop(X%*%coef) + offset[ind]
mu <- linkinv(eta1)
y <- subsety(G$y,ind) #G$y[ind] ## G$model[[gp$response]] ## - G$offset[ind]
weights <- G$w[ind]
if (efam) { ## extended family case
dd <- dDeta(y,mu,weights,theta=theta,family,0)
## note: no handling of infinities and wz case yet
w <- dd$EDeta2 * .5
z <- (eta1-offset[ind]) - dd$Deta.EDeta2
good <- is.finite(z)&is.finite(w)
} else { ## regular exp fam case
mu.eta.val <- mu.eta(eta1)
good <- (weights > 0) & (mu.eta.val != 0)
z <- (eta1 - offset[ind]) + (y - mu)/mu.eta.val
w <- (weights * mu.eta.val^2)/variance(mu)
}
dev <- dev + if (efam) sum(dev.resids(y,mu,weights,theta)) else sum(dev.resids(y,mu,weights))
w[!good] <- 0 ## drop if !good
#z[!good] <- 0 ## irrelevant
wt[ind] <- w ## wt <- c(wt,w)
w <- w[good];z <- z[good]
w <- sqrt(w)
## note that QR may be parallel using npt>1, even under serial accumulation...
if (b == 1) qrx <- qr_update(w*X[good,,drop=FALSE],w*z,use.chol=use.chol,nt=npt)
else qrx <- qr_update(w*X[good,,drop=FALSE],w*z,qrx$R,qrx$f,qrx$y.norm2,use.chol=use.chol,nt=npt)
rm(X);if(gc.level>1) gc() ## X can be large: remove and reclaim
}
if (use.chol) { ## post proc to get R and f...
y.norm2 <- qrx$y.norm2
qrx <- chol2qr(qrx$R,qrx$f,nt=npt)
qrx$y.norm2 <- y.norm2
}
} else { ## use parallel accumulation
for (i in 1:length(arg)) {
arg[[i]]$coef <- coef
if (efam) arg[[i]]$theta <- theta
}
res <- parallel::parLapply(cl,arg,qr_up)
## now consolidate the results from the parallel threads...
if (use.chol) {
R <- res[[1]]$R;f <- res[[1]]$f;dev <- res[[1]]$dev
wt <- res[[1]]$wt;y.norm2 <- res[[1]]$y.norm2
eta <- res[[1]]$eta
for (i in 2:n.threads) {
R <- R + res[[i]]$R; f <- f + res[[i]]$f
wt <- c(wt,res[[i]]$wt);eta <- c(eta,res[[i]]$eta);
dev <- dev + res[[i]]$dev
y.norm2 <- y.norm2 + res[[i]]$y.norm2
}
qrx <- chol2qr(R,f,nt=npt)
qrx$y.norm2 <- y.norm2
} else { ## proper QR
R <- res[[1]]$R;f <- res[[1]]$f;dev <- res[[1]]$dev
wt <- res[[1]]$wt;y.norm2 <- res[[1]]$y.norm2; eta <- res[[1]]$eta
for (i in 2:n.threads) {
R <- rbind(R,res[[i]]$R); f <- c(f,res[[i]]$f)
wt <- c(wt,res[[i]]$wt);eta <- c(eta,res[[i]]$eta)
dev <- dev + res[[i]]$dev
y.norm2 <- y.norm2 + res[[i]]$y.norm2
}
## use parallel QR here if npt>1...
qrx <- if (npt>1) pqr2(R,npt) else qr(R,tol=0,LAPACK=TRUE)
f <- qr.qty(qrx,f)[1:ncol(R)]
rp <- qrx$pivot;rp[rp] <- 1:ncol(R) # reverse pivot
qrx <- list(R=qr.R(qrx)[,rp],f=f,y.norm2=y.norm2)
}
}
## if the routine has been called with only a random sample of the data, then
## R, f and ||y||^2 can be corrected to estimate the full versions...
qrx$R <- qrx$R/sqrt(samfrac)
qrx$f <- qrx$f/sqrt(samfrac)
qrx$y.norm2 <- qrx$y.norm2/samfrac
G$n <- nobs
rss.extra <- qrx$y.norm2 - sum(qrx$f^2)
if (control$trace)
message(gettextf("Deviance = %s Iterations - %d", dev, iter, domain = "R-mgcv"))
if (!is.finite(dev)) stop("Non-finite deviance")
## preparation for working model fit is ready, but need to test for convergence first
if (iter>2 && abs(dev - devold)/(0.1 + abs(dev)) < control$epsilon) {
conv <- TRUE
coef <- start
break
}
if (kk > 0) break ## already shrunk the step
## At this point it is worth checking that coef update actually improved the penalized
## deviance. If not try step halving, and redo the above once a suitable step has been
## found...
if (iter>2) { ## can test divergence
## need to compute penalty at start and end of step
if (efam) {
dev0 <- sum(dev.resids(G$y,linkinv(eta0),G$w,theta0)) ## depends on theta, which will have changed
dev1 <- sum(dev.resids(G$y,linkinv(eta),G$w,theta0)) ## depends on theta, which will have changed
} else { dev1 <- dev }
if (method=="fREML") {
pcoef <- fit$beta
Sb0 <- Sl.Sb(um$Sl,rho=log(object$full.sp),pcoef0)
Sb <- Sl.Sb(um$Sl,rho=log(object$full.sp),pcoef)
} else {
pcoef <- coef
full.sp <- if (is.null(object$full.sp)) object$sp else object$full.sp
Sb0 <- regular.Sb(G$S,G$off,full.sp,pcoef0)
Sb <- regular.Sb(G$S,G$off,full.sp,pcoef)
}
while (dev0 + sum(pcoef0*Sb0) < dev1 + sum(pcoef * Sb) && kk < 6) {
## shrink step ...
coef <- (coef0 + coef)/2
pcoef <- (pcoef0 + pcoef)/2
eta <- (eta0 + eta)/2
Sb <- (Sb0 + Sb)/2
## recompute deviance ...
dev <- if (efam) sum(dev.resids(G$y,linkinv(eta),G$w,theta)) else sum(dev.resids(G$y,linkinv(eta),G$w))
dev1 <- if (efam) sum(dev.resids(G$y,linkinv(eta),G$w,theta0)) else dev
kk <- kk + 1
}
}
if (kk == 0) break ## step was ok
} ## repeat
if (conv) break
if (iter>1) { ## store coef and eta for divergence checking
coef0 <- coef
if (efam) theta0 <- theta ## theta used for determining step
pcoef0 <- if (method=="fREML") fit$beta else coef
eta0 <- eta
dev0 <- dev
}
if (efam && iter>1) { ## estimate theta
if (family$n.theta>0||scale1<0) theta <- estimate.theta(theta,family,G$y,linkinv(eta),scale=scale1,wt=G$w,tol=1e-7)
if (!is.null(family$scale) && scale1<0) {
scale <- exp(theta[family$n.theta+1])
theta <- theta[1:family$n.theta]
}
family$putTheta(theta)
}
if (method=="GCV.Cp") {
fit <- magic(qrx$f,qrx$R,G$sp,G$S,G$off,L=G$L,lsp0=G$lsp0,rank=G$rank,
H=G$H,C=matrix(0,0,ncol(qrx$R)), ##C=G$C,
gamma=gamma,scale=scale,gcv=(scale<=0),
extra.rss=rss.extra,n.score=nobs+nobs.extra)
post <- magic.post.proc(qrx$R,fit,qrx$f*0+1)
} else if (method=="fREML") { ## use fast REML code
## block diagonal penalty object, Sl, set up before loop
um <- Sl.Xprep(Sl,qrx$R,nt=npt)
lambda.0 <- if (is.null(in.out)) initial.sp(qrx$R,G$S,G$off) else in.out$sp
lsp0 <- log(lambda.0) ## initial s.p.
## carry forward scale estimate if possible...
if (scale>0) log.phi <- log(scale) else {
if (iter>1) log.phi <- log(object$scale) else {
if (is.null(in.out)) {
if (is.null(coef)||qrx$y.norm2==0) log.phi <- log(var(as.numeric(G$y))*.05) else
log.phi <- log(qrx$y.norm2/(nobs+nobs.extra))
} else log.phi <- log(in.out$scale)
}
}
fit <- fast.REML.fit(um$Sl,um$X,qrx$f,rho=lsp0,L=G$L,rho.0=G$lsp0,
log.phi=log.phi,phi.fixed=scale>0,rss.extra=rss.extra,
nobs =nobs+nobs.extra,Mp=um$Mp,nt=npt,gamma=gamma)
res <- Sl.postproc(Sl,fit,um$undrop,qrx$R,cov=FALSE,L=G$L,nt=npt)
object <- list(coefficients=res$beta,db.drho=fit$d1b,
gcv.ubre=fit$reml,mgcv.conv=list(iter=fit$iter,
message=fit$conv),rank=ncol(um$X),
Ve=NULL,scale.estimated = scale<=0,outer.info=fit$outer.info,
optimizer=c("perf","newton"))
if (scale<=0) { ## get sp's and scale estimate
nsp <- length(fit$rho)
object$sig2 <- object$scale <- exp(fit$rho[nsp])
object$sp <- exp(fit$rho[-nsp])
nsp <- length(fit$rho.full)
object$full.sp <- exp(fit$rho.full[-nsp])
} else { ## get sp's
object$sig2 <- object$scale <- scale
object$sp <- exp(fit$rho)
object$full.sp <- exp(fit$rho.full)
}
class(object)<-c("gam")
} else { ## method is one of "ML", "P-REML" etc...
y <- G$y; w <- G$w; n <- G$n;offset <- G$offset
G$y <- qrx$f
G$w <- G$y*0+1
G$X <- qrx$R
G$n <- length(G$y)
G$offset <- G$y*0
G$dev.extra <- rss.extra
G$pearson.extra <- rss.extra
G$n.true <- nobs+nobs.extra
object <- gam(G=G,method=method,gamma=gamma,scale=scale,control=gam.control(nthreads=npt))
y -> G$y; w -> G$w; n -> G$n;offset -> G$offset
object$deviance <- object$family <- object$null.deviance <- object$fitted.values <- NULL
}
if (method=="GCV.Cp") {
object <- list()
object$coefficients <- fit$b
object$edf <- post$edf
object$edf1 <- post$edf1
object$full.sp <- fit$sp.full
object$gcv.ubre <- fit$score
object$hat <- post$hat
object$mgcv.conv <- fit$gcv.info
object$optimizer="magic"
object$rank <- fit$gcv.info$rank
object$Ve <- post$Ve
object$Vp <- post$Vb
object$sig2 <- object$scale <- fit$scale
object$sp <- fit$sp
names(object$sp) <- names(G$sp)
class(object)<-c("gam")
}
coef <- object$coefficients
if (any(!is.finite(coef))) {
conv <- FALSE
warning(gettextf("non-finite coefficients at iteration %d",
iter))
break
}
} ## end fitting iteration
if (method=="fREML") { ## do expensive cov matrix cal only at end
res <- Sl.postproc(Sl,fit,um$undrop,qrx$R,cov=TRUE,scale=scale,L=G$L,nt=npt)
object$edf <- res$edf
object$edf1 <- res$edf1
object$edf2 <- res$edf2
object$hat <- res$hat
object$Vp <- res$Vp
object$Ve <- res$Ve
object$Vc <- res$Vc
object$V.sp <- res$V.sp
}
if (efam) { ## deal with any post processing
if (!is.null(family$postproc)) {
object$family <- family
posr <- family$postproc(family=family,y=G$y,prior.weights=G$w,
fitted=linkinv(eta),linear.predictors=eta,offset=G$offset,
intercept=G$intercept)
if (!is.null(posr$family)) object$family$family <- posr$family
if (!is.null(posr$deviance)) object$deviance <- posr$deviance
if (!is.null(posr$null.deviance)) object$null.deviance <- posr$null.deviance
}
if (is.null(object$null.deviance)) object$null.deviance <- sum(family$dev.resids(G$y,weighted.mean(G$y,G$w),G$w,theta))
}
if (!conv)
warning("algorithm did not converge")
eps <- 10 * .Machine$double.eps
if (family$family == "binomial") {
if (any(mu > 1 - eps) || any(mu < eps))
warning("fitted probabilities numerically 0 or 1 occurred")
}
if (family$family == "poisson") {
if (any(mu < eps))
warning("fitted rates numerically 0 occurred")
}
object$R <- qrx$R
object$iter <- iter
object$wt <- wt
object$y <- G$y
object$prior.weights <- G$w
rm(G);if (gc.level>0) gc()
object
} ## end bgam.fit
ar.qr_up <- function(arg) {
## function to perform QR updating with AR residuals, on one execution thread
if (arg$rho!=0) { ## AR1 error model
ld <- 1/sqrt(1 - arg$rho^2) ## leading diagonal of root inverse correlation
sd <- -arg$rho * ld ## sub diagonal
}
yX.last <- NULL
qrx <- list(R=NULL,f=array(0,0),y.norm2=0) ## initial empty qr object
for (i in 1:arg$n.block) {
ind <- arg$start[i]:arg$end[i]
if (arg$rho!=0) { ## have to find AR1 transform...
N <- arg$end[i]-arg$start[i]+1
## note first row implied by this transform
## is always dropped, unless really at beginning of data.
row <- c(1,rep(1:N,rep(2,N))[-c(1,2*N)])
weight <- c(1,rep(c(sd,ld),N-1))
stop <- c(1,1:(N-1)*2+1)
if (!is.null(arg$mf$"(AR.start)")) { ## need to correct the start of new AR sections...
ii <- which(arg$mf$"(AR.start)"[ind]==TRUE)
if (length(ii)>0) {
if (ii[1]==1) ii <- ii[-1] ## first observation does not need any correction
weight[ii*2-2] <- 0 ## zero sub diagonal
weight[ii*2-1] <- 1 ## set leading diagonal to 1
}
}
}
w <- sqrt(arg$G$w[ind])
X <- w*predict(arg$G,newdata=arg$mf[ind,],type="lpmatrix",newdata.guaranteed=TRUE,block.size=length(ind))
y <- w*(arg$mf[ind,arg$response] - arg$offset[ind]) ## w*(arg$G$model[[arg$response]] - arg$offset[ind])
if (arg$rho!=0) {
## Apply transform...
if (arg$last&&arg$end[i]==arg$nobs) yX.last <-
c(y[nrow(X)],X[nrow(X),]) ## store final row, in case of update
if (arg$first&&i==1) {
X <- rwMatrix(stop,row,weight,X)
y <- rwMatrix(stop,row,weight,y)
} else {
X <- rwMatrix(stop,row,weight,X)[-1,]
y <- rwMatrix(stop,row,weight,y)[-1]
}
} ## dealt with AR1
qrx <- qr_update(X,y,qrx$R,qrx$f,qrx$y.norm2,use.chol=arg$use.chol)
rm(X);if (arg$gc.level>1) {gc()} ## X can be large: remove and reclaim
} ## all blocks dealt with
qrx$yX.last <- yX.last
if (arg$gc.level>1) {rm(arg,w,y,ind);gc()}
qrx
} ## ar.qr_up
pabapr <- function(arg) {
## function for parallel calling of predict.gam
## QUERY: ... handling?
predict.gam(arg$object,newdata=arg$newdata,type=arg$type,se.fit=arg$se.fit,terms=arg$terms,
block.size=arg$block.size,newdata.guaranteed=arg$newdata.guaranteed,
na.action=arg$na.action)
}
predict.bam <- function(object,newdata,type="link",se.fit=FALSE,terms=NULL,exclude=NULL,
block.size=50000,newdata.guaranteed=FALSE,na.action=na.pass,
cluster=NULL,discrete=TRUE,n.threads=1,gc.level=0,...) {
## function for prediction from a bam object, possibly in parallel
#if (is.function(na.action)) na.action <- deparse(substitute(na.action)) ## otherwise predict.gam can't detect type
if (discrete && !is.null(object$dinfo)) {
return(predict.bamd(object,newdata,type,se.fit,terms,exclude,
block.size,newdata.guaranteed,na.action,n.threads,gc.level=gc.level,...))
}
## remove some un-needed stuff from object
object$Sl <- object$qrx <- object$R <- object$F <- object$Ve <-
object$Vc <- object$G <- object$residuals <- object$fitted.values <-
object$linear.predictors <- NULL
if (gc.level>0) gc()
if (!is.null(cluster)&&inherits(cluster,"cluster")) {
## require(parallel)
n.threads <- length(cluster)
} else n.threads <- 1
if (missing(newdata)) n <- nrow(object$model) else {
n <- if (is.matrix(newdata[[1]])) nrow(newdata[[1]]) else length(newdata[[1]])
}
if (n < 100*n.threads) n.threads <- 1 ## not worth the overheads
if (n.threads==1) { ## single threaded call
if (missing(newdata)) return(
predict.gam(object,newdata=object$model,type=type,se.fit=se.fit,terms=terms,exclude=exclude,
block.size=block.size,newdata.guaranteed=newdata.guaranteed,
na.action=na.action,...)
) else return(
predict.gam(object,newdata=newdata,type=type,se.fit=se.fit,terms=terms,exclude=exclude,
block.size=block.size,newdata.guaranteed=newdata.guaranteed,
na.action=na.action,...))
} else { ## parallel call...
nt <- rep(floor(n/n.threads),n.threads)
nt[1] <- n - sum(nt[-1])
arg <- list()
n1 <- 0
for (i in 1:n.threads) {
n0 <- n1+1;n1 <- n1+nt[i]
ind <- n0:n1 ## this thread's data block from mf
arg[[i]] <- list(object=object,type=type,se.fit=se.fit,terms=terms,exclude=exclude,
block.size=block.size,newdata.guaranteed=newdata.guaranteed,
na.action=na.action)
arg[[i]]$object$model <- object$model[1:2,] ## save space
if (missing(newdata)) {
arg[[i]]$newdata <- object$model[ind,]
} else {
arg[[i]]$newdata <- newdata[ind,]
}
} ## finished setting up arguments
## newdata and object no longer needed - all info in thread lists...
if (!missing(newdata)) rm(newdata)
rm(object)
if (gc.level>0) gc()
res <- parallel::parLapply(cluster,arg,pabapr) ## perform parallel prediction
if (gc.level>0) gc()
## and splice results back together...
if (type=="lpmatrix") {
X <- res[[1]]
for (i in 2:length(res)) X <- rbind(X,res[[i]])
return(X)
} else if (se.fit==TRUE) {
rt <- list(fit = res[[1]]$fit,se.fit = res[[1]]$se.fit)
if (type=="terms") {
for (i in 2:length(res)) {
rt$fit <- rbind(rt$fit,res[[i]]$fit)
rt$se.fit <- rbind(rt$se.fit,res[[i]]$se.fit)
}
} else {
for (i in 2:length(res)) {
rt$fit <- c(rt$fit,res[[i]]$fit)
rt$se.fit <- c(rt$se.fit,res[[i]]$se.fit)
}
}
return(rt)
} else { ## no se's returned
rt <- res[[1]]
if (type=="terms") {
for (i in 2:length(res)) rt <- rbind(rt,res[[i]])
} else {
for (i in 2:length(res)) rt <- c(rt,res[[i]])
}
return(rt)
}
}
} ## end predict.bam
bam.fit <- function(G,mf,chunk.size,gp,scale,gamma,method,rho=0,
cl=NULL,gc.level=0,use.chol=FALSE,in.out=NULL,npt=1) {
## function that does big additive model fit in strictly additive case
## first perform the QR decomposition, blockwise....
n <- nrow(mf)
if (rho!=0) { ## AR1 error model
ld <- 1/sqrt(1-rho^2) ## leading diagonal of root inverse correlation
sd <- -rho*ld ## sub diagonal
}
if (n>chunk.size) { ## then use QR accumulation approach
if (!is.null(cl)&&inherits(cl,"cluster")) {
n.threads <- length(cl)
while(n/n.threads < ncol(G$X)) n.threads <- n.threads - 1
if (n.threads < length(cl)) {
warning("Too many cluster nodes to use all efficiently")
}
} else n.threads <- 1
G$coefficients <- rep(0,ncol(G$X))
class(G) <- "gam"
if (n.threads>1) { ## set up thread argument lists
## number of obs per thread
nt <- rep(ceiling(n/n.threads),n.threads)
nt[n.threads] <- n - sum(nt[-n.threads])
arg <- list()
n1 <- 0
for (i in 1:n.threads) {
n0 <- n1+1;n1 <- n1+nt[i]
if (i>1&&rho!=0) { ## need to start from end of last block if rho!=0
n0 <- n0-1;nt[i] <- nt[i]+1
}
ind <- n0:n1 ## this thread's data block from mf
n.block <- nt[i]%/%chunk.size ## number of full sized blocks
stub <- nt[i]%%chunk.size ## size of end block
if (n.block>0) {
## each block is of size
start <- (0:(n.block-1))*chunk.size+1
end <- start + chunk.size - 1
if (stub>0) {
start[n.block+1] <- end[n.block]+1
end[n.block+1] <- nt[i]
n.block <- n.block+1
}
if (rho!=0) { ## then blocks must overlap
ns <- length(start)
if (ns>1) start[2:ns] <- start[2:ns]-1
}
} else {
n.block <- 1
start <- 1
end <- nt[i]
}
arg[[i]] <- list(nobs= nt[i],start=start,end=end,n.block=n.block,
rho=rho,mf = mf[ind,],gc.level=gc.level,
offset = G$offset[ind],G = G,response=gp$response,
first=FALSE,last=FALSE,use.chol=use.chol)
if (i==1) arg[[1]]$first <- TRUE
if (i==n.threads) arg[[i]]$last <- TRUE
arg[[i]]$G$w <- G$w[ind];arg[[i]]$G$model <- NULL
}
} else { ## single thread, requires single indices
n.block <- n%/%chunk.size ## number of full sized blocks
stub <- n%%chunk.size ## size of end block
if (stub>0) n.block <- n.block + 1
start <- 0:(n.block-1)*chunk.size ## block starts
end <- start + chunk.size; ## block ends
end[n.block] <- n
if (rho==0) start <- start + 1 ## otherwise most blocks go to 1 before block start
start[1] <- 1
}
if (n.threads==1) { ## use original single thread method...
qrx <- list(R=NULL,f=array(0,0),y.norm2=0) ## initial empty qr object
for (i in 1:n.block) {
ind <- start[i]:end[i]
if (rho!=0) {
N <- end[i]-start[i]+1
row <- c(1,rep(1:N,rep(2,N))[-c(1,2*N)])
weight <- c(1,rep(c(sd,ld),N-1))
stop <- c(1,1:(N-1)*2+1)
if (!is.null(mf$"(AR.start)")) { ## need to correct the start of new AR sections...
ii <- which(mf$"(AR.start)"[ind]==TRUE)
if (length(ii)>0) {
if (ii[1]==1) ii <- ii[-1] ## first observation does not need any correction
weight[ii*2-2] <- 0 ## zero sub diagonal
weight[ii*2-1] <- 1 ## set leading diagonal to 1
}
}
}
w <- sqrt(G$w[ind])
X <- w*predict(G,newdata=mf[ind,],type="lpmatrix",newdata.guaranteed=TRUE,block.size=length(ind))
y <- w*(mf[ind,gp$response]-G$offset[ind]) ## w*(G$model[[gp$response]] - G$offset[ind])
if (rho!=0) {
## Apply transform...
if (end[i]==n) yX.last <- c(y[nrow(X)],X[nrow(X),]) ## store final row, in case of update
if (i==1) {
X <- rwMatrix(stop,row,weight,X)
y <- rwMatrix(stop,row,weight,y)
} else {
X <- rwMatrix(stop,row,weight,X)[-1,]
y <- rwMatrix(stop,row,weight,y)[-1]
}
}
qrx <- qr_update(X,y,qrx$R,qrx$f,qrx$y.norm2,use.chol=use.chol,nt=npt)
rm(X)
if (gc.level>1) {gc()} ## X can be large: remove and reclaim
} ## end of single thread block loop
if (use.chol) { ## post proc to get R and f...
y.norm2 <- qrx$y.norm2
qrx <- chol2qr(qrx$R,qrx$f,nt=npt)
qrx$y.norm2 <- y.norm2
}
} else { ## use parallel accumulation
res <- parallel::parLapply(cl,arg,ar.qr_up)
## now consolidate the results from the parallel threads...
R <- res[[1]]$R;f <- res[[1]]$f; ## dev <- res[[1]]$dev
y.norm2 <- res[[1]]$y.norm2
for (i in 2:n.threads) {
if (use.chol) {
R <- R + res[[i]]$R; f <- f + res[[i]]$f
} else {
R <- rbind(R,res[[i]]$R); f <- c(f,res[[i]]$f)
}
y.norm2 <- y.norm2 + res[[i]]$y.norm2
}
if (use.chol) {
qrx <- chol2qr(R,f,nt=npt)
qrx$y.norm2 <- y.norm2
} else { ## proper QR
## use parallel QR if npt>1...
qrx <- if (npt>1) pqr2(R,npt) else qr(R,tol=0,LAPACK=TRUE)
f <- qr.qty(qrx,f)[1:ncol(R)]
rp <- qrx$pivot;rp[rp] <- 1:ncol(R) # reverse pivot
qrx <- list(R=qr.R(qrx)[,rp],f=f,y.norm2=y.norm2)
}
yX.last <- res[[n.threads]]$yX.last
}
G$n <- n
} else { ## n <= chunk.size
if (rho==0) qrx <- qr_update(sqrt(G$w)*G$X,sqrt(G$w)*(G$y-G$offset),use.chol=use.chol,nt=npt) else {
row <- c(1,rep(1:n,rep(2,n))[-c(1,2*n)])
weight <- c(1,rep(c(sd,ld),n-1))
stop <- c(1,1:(n-1)*2+1)
if (!is.null(mf$"(AR.start)")) { ## need to correct the start of new AR sections...
ii <- which(mf$"(AR.start)"==TRUE)
if (length(ii)>0) {
if (ii[1]==1) ii <- ii[-1] ## first observation does not need any correction
weight[ii*2-2] <- 0 ## zero sub diagonal
weight[ii*2-1] <- 1 ## set leading diagonal to 1
}
}
yX.last <- c(G$y[n],G$X[n,]) ## store final row, in case of update
X <- rwMatrix(stop,row,weight,sqrt(G$w)*G$X)
y <- rwMatrix(stop,row,weight,sqrt(G$w)*G$y)
qrx <- qr_update(X,y,use.chol=use.chol,nt=npt)
rm(X); if (gc.level>1) gc() ## X can be large: remove and reclaim
}
if (use.chol) { ## post proc to get R and f...
y.norm2 <- qrx$y.norm2
qrx <- chol2qr(qrx$R,qrx$f,nt=npt)
qrx$y.norm2 <- y.norm2
}
}
rss.extra <- qrx$y.norm2 - sum(qrx$f^2)
if (method=="GCV.Cp") {
fit <- magic(qrx$f,qrx$R,G$sp,G$S,G$off,L=G$L,lsp0=G$lsp0,rank=G$rank,
H=G$H,C=matrix(0,0,ncol(qrx$R)), ##C=G$C,
gamma=gamma,scale=scale,gcv=(scale<=0),
extra.rss=rss.extra,n.score=n)
post <- magic.post.proc(qrx$R,fit,qrx$f*0+1)
} else if (method=="fREML"){ ## use fast REML code
Sl <- Sl.setup(G) ## setup block diagonal penalty object
um <- Sl.Xprep(Sl,qrx$R,nt=npt)
lambda.0 <- if (is.null(in.out)) initial.sp(qrx$R,G$S,G$off) else in.out$sp
lsp0 <- log(lambda.0) ## initial s.p.
log.phi <- if (scale<=0) { if (is.null(in.out)) log(var(as.numeric(G$y))*.05) else log(in.out$scale) } else log(scale)
fit <- fast.REML.fit(um$Sl,um$X,qrx$f,rho=lsp0,L=G$L,rho.0=G$lsp0,
log.phi=log.phi,phi.fixed=scale>0,rss.extra=rss.extra,
nobs =n,Mp=um$Mp,nt=npt,gamma=gamma)
res <- Sl.postproc(Sl,fit,um$undrop,qrx$R,cov=TRUE,scale=scale,L=G$L,nt=npt)
object <- list(coefficients=res$beta,edf=res$edf,edf1=res$edf1,edf2=res$edf2,##F=res$F,
db.drho=fit$d1b,
gcv.ubre=fit$reml,hat=res$hat,mgcv.conv=list(iter=fit$iter,
message=fit$conv),rank=ncol(um$X),
Ve=res$Ve,Vp=res$Vp,Vc=res$Vc,V.sp=res$V.sp,
scale.estimated = scale<=0,outer.info=fit$outer.info,
optimizer=c("perf","newton"))
if (scale<=0) { ## get sp's and scale estimate
nsp <- length(fit$rho)
object$sig2 <- object$scale <- exp(fit$rho[nsp])
object$sp <- exp(fit$rho[-nsp])
nsp <- length(fit$rho.full)
object$full.sp <- exp(fit$rho.full[-nsp])
} else { ## get sp's
object$sig2 <- object$scale <- scale
object$sp <- exp(fit$rho)
object$full.sp <- exp(fit$rho.full)
}
if (rho!=0) { ## correct RE/ML score for AR1 transform
df <- if (is.null(mf$"(AR.start)")) 1 else sum(mf$"(AR.start)")
object$gcv.ubre <- object$gcv.ubre - (n-df)*log(ld)
}
G$X <- qrx$R;G$dev.extra <- rss.extra
G$pearson.extra <- rss.extra;G$n.true <- n
object$Sl <- Sl ## to allow for efficient update
class(object)<-c("gam")
} else { ## method is "ML", "P-REML" or similar
y <- G$y; w <- G$w; n <- G$n;offset <- G$offset
G$y <- qrx$f
G$w <- G$y*0+1
G$X <- qrx$R
G$n <- length(G$y)
G$offset <- G$y*0
G$dev.extra <- rss.extra
G$pearson.extra <- rss.extra
G$n.true <- n
object <- gam(G=G,method=method,gamma=gamma,scale=scale,control=gam.control(nthreads=npt))
object$null.deviance <- object$fitted.values <- NULL
y -> G$y; w -> G$w; n -> G$n;offset -> G$offset
if (rho!=0) { ## correct RE/ML score for AR1 transform
df <- if (is.null(mf$"(AR.start)")) 1 else sum(mf$"(AR.start)")
object$gcv.ubre <- object$gcv.ubre - (n-df)*log(ld)
}
}
if (method=="GCV.Cp") {
object <- list()
object$coefficients <- fit$b
object$edf <- post$edf
object$edf1 <- post$edf1
object$full.sp <- fit$sp.full
object$gcv.ubre <- fit$score
object$hat <- post$hat
object$mgcv.conv <- fit$gcv.info
object$optimizer="magic"
object$rank <- fit$gcv.info$rank
object$Ve <- post$Ve
object$Vp <- post$Vb
object$sig2 <- object$scale <- fit$scale
object$sp <- fit$sp
class(object)<-c("gam")
} else {
}
G$smooth <- G$X <- NULL
object$prior.weights <- G$w
object$AR1.rho <- rho
if (rho!=0) { ## need to store last model matrix row, to allow update
object$yX.last <- yX.last
}
object$R <- qrx$R
object$gamma <- gamma;object$G <- G;object$qrx <- qrx ## to allow updating of the model
object$y <- mf[[gp$response]]
object$iter <- 1
object
} # end of bam.fit
predict.bamd <- function(object,newdata,type="link",se.fit=FALSE,terms=NULL,exclude=NULL,
block.size=50000,newdata.guaranteed=FALSE,na.action=na.pass,
n.threads=1,gc.level=0,...) {
## function for prediction from a bam object, by discrete methods
## remove some un-needed stuff from object
object$Sl <- object$qrx <- object$R <- object$F <- object$Ve <-
object$Vc <- object$G <- object$residuals <- object$fitted.values <-
object$linear.predictors <- NULL
if (gc.level>0) gc()
if (missing(newdata)) newdata <- object$model
convert2mf <- is.null(attr(newdata,"terms"))
if (type=="iterms") {
type <- "terms"
warning("iterms reset to terms")
}
lpi <- attr(object$formula,"lpi") ## lpi[[i]] indexes coefs for ith linear predoctor
nlp <- if (is.null(lpi)) 1 else length(lpi) ## number of linear predictors
lpid <- if (nlp>1) object$dinfo$lpid else NULL ## index of discrete terms involved in each linear predictor
## newdata has to be processed first to avoid, e.g. dropping different subsets of data
## for parametric and smooth components....
newterms <- attr(newdata,"terms") ## non NULL for model frame
newd <- predict.gam(object,newdata=newdata,type="newdata",se.fit=se.fit,terms=terms,exclude=exclude,
block.size=block.size,newdata.guaranteed=newdata.guaranteed,
na.action=na.action,...)
if (nrow(newd)>0) {
newdata <- newd;rm(newd)
} else { ## no non NA data, might as well call predict.gam
return(predict.gam(object,newdata=newdata,type=type,se.fit=se.fit,terms=terms,exclude=exclude,
block.size=block.size,newdata.guaranteed=newdata.guaranteed,
na.action=na.action,...))
}
## Next line needed to avoid treating newdata as a model frame if it was supplied not as a model frame.
## Otherwise names of e.g. offset are messed up (as they will also be if it was supplied as a model frame
## or was set to object$model and we set terms to NULL)
if (is.null(newterms)) attr(newdata,"terms") <- NULL
na.act <- attr(newdata,"na.action") ## save the NA action for later
yname <- attr(attr(object$terms,"dataClasses"),"names")[attr(object$terms,"response")]
response <- newdata[[yname]]
## Parametric terms have to be dealt with safely, but without forming all terms
## or a full model matrix. Strategy here is to use predict.gam, having removed
## key smooth related components from model object, so that it appears to be
## a parametric model...
offset <- if (nlp>1) as.list(rep(0,nlp)) else 0
pp <- if (se.fit) list(fit=rep(0,0),se.fit=rep(0,0)) else rep(0,0) ## note: needed in terms branch below
## now discretize covariates...
if (convert2mf) newdata <- model.frame(object$dinfo$gp$fake.formula[-2],newdata)
dk <- discrete.mf(object$dinfo$gp,mf=newdata,names.pmf=object$dinfo$pmf.names,full=TRUE)
Xd <- list() ### list of discrete model matrices...
n <- if (is.matrix(newdata[[1]])) nrow(newdata[[1]]) else length(newdata[[1]])
kb <- k <- 1;kd <- dk$k
ks <- matrix(0,0,2) ## NOTE: slightly more efficient not to repeatedly extend
for (j in 1:nlp) { ## loop over parametric components of linear predictors
## get discretized marginal matrices for the parametric model (if contrast warnings see predict.gam fix)
ptens <- if (nlp==1&&!is.list(object$pterms)) terms2tensor(object$pterms,dk$mf,object$contrasts) else
terms2tensor(object$pterms[[j]],dk$mf,object$contrasts)
offi <- if (nlp==1&&!is.list(object$pterms)) attr(object$pterms,"offset") else attr(object$pterms[[j]],"offset")
if (!is.null(offi)) {
offname <- if (nlp==1&&!is.list(object$pterms)) names(attr(object$pterms,"dataClasses"))[offi] else
names(attr(object$pterms[[j]],"dataClasses"))[offi]
if (j==1&&nlp>1) offset <- list()
if (nlp>1) offset[[j]] <- newdata[[offname]] else offset <- newdata[[offname]]
}
if (!is.null(ptens)) {
np <-length(ptens$X);
for (i in 1:np) {
ks <- rbind(ks,dk$ks[ptens$xname[i],])
Xd[[k]] <- ptens$X[[i]][1:dk$nr[ptens$xname[i]],,drop=FALSE]
k <- k + 1
}
}
kb <- kb + length(ptens$ts)
} ## parametric lp loop
ts <- object$dinfo$ts
dt <- object$dinfo$dt
## remove padding from the discretized data...
class(dk$mf) <- "list"
for (i in 1:length(dk$mf)) dk$mf[[i]] <- if (is.matrix(dk$mf[[i]])) dk$mf[[i]][1:dk$nr[i],] else dk$mf[[i]][1:dk$nr[i]]
if (length(object$smooth)) for (i in 1:length(object$smooth)) { ## work through the smooth list
## potentially each smoother model matrix can be made up of a sequence
## of row-tensor products, nead to loop over such sub blocks...
nsub <- if (!is.null(object$smooth[[i]]$ts)) length(object$smooth[[i]]$ts) else 1
## first deal with any by variable (as first marginal of tensor)...
for (sb in 1:nsub) { ## loop over sub-blocks
if (object$smooth[[i]]$by!="NA") {
termk <- object$smooth[[i]]$by
by.var <- dk$mf[[object$smooth[[i]]$by]][1:dk$nr[termk]]
if (is.factor(by.var)) {
## create dummy by variable...
by.var <- as.numeric(by.var==object$smooth[[i]]$by.level)
}
Xd[[k]] <- matrix(by.var,dk$nr[termk],1)
ks <- rbind(ks,dk$ks[termk,])
k <- k + 1
by.present <- 1
} else by.present <- 0
## ... by done
if (inherits(object$smooth[[i]],"tensor.smooth")) {
nmar <- if (is.null(object$smooth[[i]]$dt)) length(object$smooth[[i]]$margin) else object$smooth[[i]]$dt[sb]
XP <- object$smooth[[i]]$XP
jind <- if (sb>1) object$smooth[[i]]$ts[sb] + 1:object$smooth[[i]]$dt[sb] - 1 else 1:nmar
for (j in jind) {
object$smooth[[i]]$margin[[j]]$by<- "NA" ## should be no by's here (any by dealt with above)
termk <- object$smooth[[i]]$margin[[j]]$term[1]
Xd[[k]] <-if (termk=="(Intercept)") matrix(1,dk$nr[termk],1) else PredictMat(object$smooth[[i]]$margin[[j]],dk$mf,n=dk$nr[termk])
ks <- rbind(ks,dk$ks[termk,])
if (!is.null(XP)&&(j<=length(XP))&&!is.null(XP[[j]])) Xd[[k]] <- Xd[[k]]%*%XP[[j]]
k <- k + 1
}
} else { ## not a tensor smooth
object$smooth[[i]]$by <- "NA" ## have to ensure by not applied here (it's dealt with as a tensor marginal)!
termk <- object$smooth[[i]]$term[1]
ks <- rbind(ks,dk$ks[termk,])
Xd[[k]] <- PredictMat(object$smooth[[i]],dk$mf,n=dk$nr[termk])
k <- k + 1
}
kb <- kb + 1
} ## sub block loop
}
attr(Xd,"lpip") <- object$dinfo$lpip ## list of coef indices for each term
## end of discrete set up
se <- se.fit
if (type=="terms") {
term.lab <- names(object$dinfo$ts) ## names of all terms
termi <- rep(TRUE,length(term.lab)) ## do we want term included?
termi[term.lab=="(Intercept)"] <- FALSE ## not if it's intercept
if (!is.null(terms)) termi <- termi & term.lab %in% terms ## and only if it's in terms
if (!is.null(exclude)) termi <- termi & !(term.lab %in% exclude) ## and not in exclude
uterms <- unique(term.lab[termi])
nterms <- length(uterms) ## sum(termi)
fit <- matrix(0,nrow(kd),nterms)
if (se) se.fit <- matrix(0,nrow(kd),nterms)
for (k in 1:nterms) {
lt <- which(term.lab%in%uterms[k]) ## discrete terms involved in this smooth (can be more than one)
if (termi[lt]) {
fit[,k] <- Xbd(Xd,object$coefficients,kd,ks,
ts,dt,object$dinfo$v,object$dinfo$qc,drop=object$dinfo$drop,lt=lt)
if (se) se.fit[,k] <- diagXVXd(Xd,object$Vp,kd,ks,
ts,dt,object$dinfo$v,object$dinfo$qc,drop=object$dinfo$drop,nthreads=n.threads,lt=lt,rt=lt)^.5
}
}
colnames(fit) <- uterms
if (se) {
colnames(se.fit) <- uterms
fit <- list(fit=fit,se.fit=se.fit)
}
} else if (type=="lpmatrix") {
lt <- 1:length(ts);names(lt) <- names(ts)
if (!is.null(terms)) lt <- lt[names(lt)%in%terms]
if (!is.null(exclude)) lt <- lt[!names(lt)%in%exclude]
fit <- Xbd(Xd,diag(length(object$coefficients)),kd,ks,ts,dt,object$dinfo$v,object$dinfo$qc,drop=object$dinfo$drop,lt=lt)
if (nlp>1) attr(fit,"lpi") <- lpi
} else { ## link or response
if (is.null(object$family$predict)||type=="link") {
lt <- 1:length(ts);names(lt) <- names(ts)
if (nlp>1) {
fit <- matrix(0,nrow(kd),nlp)
if (se) se.fit <- matrix(0,nrow(kd),nlp)
for (i in 1:nlp) {
lt <- 1:length(ts);names(lt) <- names(ts)
lt <- lt[lpid[[i]]]
if (!is.null(terms)) lt <- lt[names(lt)%in%terms]
if (!is.null(exclude)) lt <- lt[!names(lt)%in%exclude]
fit[,i] <- Xbd(Xd,object$coefficients,kd,ks,ts,dt,object$dinfo$v,object$dinfo$qc,drop=object$dinfo$drop,lt=lt) + offset[[i]]
if (se) se.fit[,i] <- diagXVXd(Xd,object$Vp,kd,ks,ts,dt,object$dinfo$v,object$dinfo$qc,drop=object$dinfo$drop,
lt=lt,rt=lt,nthreads=n.threads)^.5
}
} else {
lt <- 1:length(ts);names(lt) <- names(ts)
if (!is.null(terms)) lt <- lt[names(lt)%in%terms]
if (!is.null(exclude)) lt <- lt[!names(lt)%in%exclude]
fit <- Xbd(Xd,object$coefficients,kd,ks,ts,dt,object$dinfo$v,object$dinfo$qc,drop=object$dinfo$drop,lt=lt) + offset
if (se) se.fit <- diagXVXd(Xd,object$Vp,kd,ks,ts,dt,object$dinfo$v,object$dinfo$qc,drop=object$dinfo$drop,lt=lt,rt=lt,nthreads=n.threads)^.5
}
if (type=="response") {
linkinv <- object$family$linkinv
dmu.deta <- object$family$mu.eta
} else linkinv <- dmu.deta <- NULL
if (se==TRUE) {
if (type=="response") {
if (nlp>1) for (i in 1:nlp) {
se.fit[,i] <- se.fit[,i] * abs(object$family$linfo[[i]]$mu.eta[fit[,i]])
fit[,i] <- object$family$linfo[[i]]$linkinv[fit[,i]]
} else {
se.fit <- se.fit * abs(object$family$mu.eta(fit))
fit <- object$family$linkinv(fit)
}
}
fit <- list(fit=fit,se.fit=se.fit)
} else if (type=="response") fit <- object$family$linkinv(fit)
} else { ## family has its own response prediction code
X <- list(Xd=Xd,kd=kd,ks=ks,ts=ts,dt=dt,v=object$dinfo$v,qc=object$dinfo$qc,drop=object$dinfo$drop,lpid=lpid)
if (nlp>1) attr(X,"lpi") <- lpi
## NOTE: not set up for families needing response for prediction (e.g. cox.ph)
fampred <- object$family$predict ## just eases debugging
offs <- if (length(offset)==1) rep(offset,nrow(kd)) else offset ## avoid subsetting failure
ffv <- fampred(object$family,se=se,y=response,X=X,beta=object$coefficients,
off=offs,Vb=object$Vp) ## NOTE: offsets not handled
fit <- ffv[[1]]
if (se) fit <- list(fit=fit,se.fit =ffv[[2]])
}
}
rn <- rownames(newdata)
if (type=="lpmatrix") {
colnames(fit) <- names(object$coefficients)
rownames(fit) <- rn
attr(fit,"model.offset") <- offset
fit <- napredict(na.act,fit)
} else {
if (se) {
if (is.null(nrow(fit$fit))) {
names(fit$fit) <- rn
names(fit$se.fit) <- rn
fit$fit <- napredict(na.act,fit$fit)
fit$se.fit <- napredict(na.act,fit$se.fit)
} else {
rownames(fit$fit) <- rn
rownames(fit$se.fit) <- rn
fit$fit <- napredict(na.act,fit$fit)
fit$se.fit <- napredict(na.act,fit$se.fit)
}
} else {
if (is.null(nrow(fit))) names(fit) <- rn else
rownames(fit) <- rn
fit <- napredict(na.act,fit)
}
}
fit
} ## end predict.bamd
tero <- function(sm) {
## te smooth spec re-order so that largest marginal is last.
maxd <- 0
ns <- length(sm$margin)
for (i in 1:ns) if (sm$margin[[i]]$bs.dim>=maxd) {
maxi <- i;maxd <- sm$margin[[i]]$bs.dim
}
if (maxi<ns) { ## re-ordering required
ind <- 1:ns;ind[maxi] <- ns;ind[ns] <- maxi
sm$margin <- sm$margin[ind]
sm$fix <- sm$fix[ind]
if (!is.null(sm$mc)) sm$mc <- sm$mc[ind]
sm$term <- rep("",0)
for (i in 1:ns) sm$term <- c(sm$term,sm$margin[[i]]$term)
sm$label <- paste0(substr(sm$label,1,3),paste0(sm$term,collapse=","),")",collapse="")
}
sm
} ## tero
AR.resid <- function(rsd,rho=0,AR.start=NULL) {
## standardised residuals for AR1 model
if (rho==0) return(rsd)
ld <- 1/sqrt(1-rho^2) ## leading diagonal of root inverse correlation
sd <- -rho*ld ## sub diagonal
N <- length(rsd)
## see rwMatrix() for how following are used...
ar.row <- c(1,rep(1:N,rep(2,N))[-c(1,2*N)]) ## index of rows to reweight
ar.weight <- c(1,rep(c(sd,ld),N-1)) ## row weights
ar.stop <- c(1,1:(N-1)*2+1) ## (stop[i-1]+1):stop[i] are the rows to reweight to get ith row
if (!is.null(AR.start)) { ## need to correct the start of new AR sections...
ii <- which(AR.start==TRUE)
if (length(ii)>0) {
if (ii[1]==1) ii <- ii[-1] ## first observation does not need any correction
ar.weight[ii*2-2] <- 0 ## zero sub diagonal
ar.weight[ii*2-1] <- 1 ## set leading diagonal to 1
}
}
rwMatrix(ar.stop,ar.row,ar.weight,rsd)
} ## AR.resid
tens2matrix <- function(X,ts,dt) {
## converts result of terms2tensor to a full model matrix. Should
## match direct model.matrix call on the original terms object.
## Purely for checking purposes (insufficiently efficient for
## general use)
Xt <- matrix(0,nrow(X[[1]]),0)
for (i in 1:length(ts)) {
ind <- ts[i]:(ts[i]+dt[i]-1)
Xt <- if (dt[i]==1) cbind(Xt,X[[ind]]) else cbind(Xt,tensor.prod.model.matrix(X[ind]))
}
Xt
} ## tens2matrix
terms2tensor <- function(terms,data=NULL,contrasts.arg=NULL,drop.intercept=FALSE,identify=TRUE,sparse=FALSE) {
## takes a terms object or formula and converts it into a sequence of marginal model matrices, X
## and associated indices ts and dt, using the data in 'data'. drops the intercept if needed.
## If 'identify' then identifiability constraints/contrasts imposed, otherwise not.
## The ith block of the model matrix is the row tensor product
## of the dt[i] elements of X starting at the ts[i].
## i.e. tensor.prod.model.matrix(X[ts[i]:(ts[i]+dt[i]-1)])
## xname[i] is the name of the variable in data used to create the marginal matrix X[[i]], with
## `(Intercept)' returned for the intercept term
## If data=NULL does a dummy run, not returning X or p
if (!inherits(terms,"formula")) stop("requires a terms or formula object as first argument")
if (!inherits(terms,"terms")) terms <- terms(terms)
fac <- attr(terms,"factors")
intercept <- attr(terms,"intercept")==1 ## is there an intercept?
nt <- if (is.matrix(fac)) ncol(fac) else 0
nt <- nt + intercept - drop.intercept ## number of terms.
if (nt==0) return(NULL)
p <- ts <- dt <- rep(1,nt)
X <- list() ## marginal model matrix list
form <- list() ## marginal formulae list
k <- 1
xname <- rep("",nt)
dummy <- is.null(data)
if (!dummy) n <- if (is.matrix(data[[1]])) nrow(data[[1]]) else length(data[[1]])
if (intercept && !drop.intercept) {
ts[k] <- 1;dt[k] <- 1
if (!dummy) X[[k]] <- matrix(1,n,1)
xname[k] <- "(Intercept)"
form[[k]] <- ~1
environment(form[[k]]) <- NULL
k <- k + 1
term.labels <- c("(Intercept)",attr(terms,"term.labels"))
} else term.labels <- attr(terms,"term.labels")
varn <- rownames(fac)
ord <- attr(terms,"order")
no.int <- !intercept ## indicator of whether missing intercept still unhandled
if (drop.intercept) intercept <- 0;
if (nt-intercept>0) for (i in 1:(nt-intercept)) { ## work through the terms
ts[i+intercept] <- k;dt[i+intercept] <- ord[i]
if (ord[i]==1) { ## special case due to odd intercept handling
vn <- varn[as.logical(fac[,i])]
if (no.int||!identify) {
fm <- as.formula(paste("~",vn,"-1"))
## unfortunately a warning has been introduced in model.matrix that contradicts the documentation and will complain about any
## element of contrast.arg that does not match a variable name in the formula. Rather than introduce redundant work around
## code it seems better to just suppress the warning...
if (!dummy) X[[k]] <- if (sparse) sparse.model.matrix(fm,data,contrasts.arg) else suppressWarnings(model.matrix(fm,data,contrasts.arg))
no.int <- FALSE
} else {
fm <- as.formula(paste("~",vn))
if (!dummy) X[[k]] <-
if (sparse) sparse.model.matrix(fm,data,contrasts.arg)[,-1,drop=FALSE] else suppressWarnings(model.matrix(fm,data,contrasts.arg)[,-1,drop=FALSE])
}
xname[k] <- if (!dummy && vn %in% names(data)) vn else all.vars(fm)
form[[k]] <- fm; environment(form[[k]]) <- NULL
if (!dummy) p[i+intercept] = ncol(X[[k]])
k <- k + 1
} else { ## interaction term
m <- which(fac[,i]!=0) ## marginal terms involved
for (j in ord[i]:1) { ## reverse ordering is to conform with model.matrix(terms,data) called directly
vn <- varn[m[j]]
if (fac[m[j],i]==2||!identify) { ## no contrast
fm <- as.formula(paste("~",vn,"-1"))
if (!dummy) X[[k]] <- if (sparse) sparse.model.matrix(fm,data,contrasts.arg) else suppressWarnings(model.matrix(fm,data,contrasts.arg))
} else { ## with contrast
fm <- as.formula(paste("~",vn))
if (!dummy) X[[k]] <-
if (sparse) sparse.model.matrix(fm,data,contrasts.arg)[,-1,drop=FALSE] else suppressWarnings(model.matrix(fm,data,contrasts.arg)[,-1,drop=FALSE])
}
xname[k] <- if (!dummy && vn %in% names(data)) vn else all.vars(fm)
form[[k]] <- fm; environment(form[[k]]) <- NULL
if (!dummy) p[i+intercept] <- p[i+intercept] * ncol(X[[k]])
k <- k + 1
} ## j marginal loop end
} ## finished interaction term
} ## i term loop end
list(X=X, ## list of marginal model matrices
ts=ts, ## ts[i] is starting marginal matrix for ith term
dt=dt, ## dt[i] is number of marginal matrices for ith term
xname=xname, ## xname[k] is name of covariate associated with X[[k]]
form=form, ## form[[k]] is formula used to produce X[[k]]
term.labels=term.labels, ## term.labels[i] is label of ith term
p=p) ## p[i] is total coef count for ith term
} # terms2tensor
bam <- function(formula,family=gaussian(),data=list(),weights=NULL,subset=NULL,na.action=na.omit,
offset=NULL,method="fREML",control=list(),select=FALSE,scale=0,gamma=1,knots=NULL,sp=NULL,
min.sp=NULL,paraPen=NULL,chunk.size=10000,rho=0,AR.start=NULL,discrete=FALSE,
cluster=NULL,nthreads=1,gc.level=0,use.chol=FALSE,samfrac=1,coef=NULL,
drop.unused.levels=TRUE,G=NULL,fit=TRUE,drop.intercept=NULL,in.out=NULL,...)
## Routine to fit an additive model to a large dataset. The model is stated in the formula,
## which is then interpreted to figure out which bits relate to smooth terms and which to
## parametric terms.
## This is a modification of `gam' designed to build the QR decomposition of the model matrix
## up in chunks, to keep memory costs down.
## If cluster is a parallel package cluster uses parallel QR build on cluster.
## 'n.threads' is number of threads to use for non-cluster computation (e.g. combining
## results from cluster nodes). If 'NA' then is set to max(1,length(cluster)).
{ control <- do.call("gam.control",control)
if (control$trace) t3 <- t2 <- t1 <- t0 <- proc.time()
if (length(nthreads)==1) nthreads <- rep(nthreads,2)
if (is.null(G)) { ## need to set up model!
if (is.character(family))
family <- eval(parse(text = family))
if (is.function(family))
family <- family()
if (is.null(family$family))
stop("family not recognized")
if (family$family=="gaussian"&&family$link=="identity") am <- TRUE else am <- FALSE
if (scale==0) { if (family$family %in% c("poisson","binomial")) scale <- 1 else scale <- -1}
if (!method%in%c("fREML","GACV.Cp","GCV.Cp","REML",
"ML","P-REML","P-ML")) stop("un-supported smoothness selection method")
if (is.logical(discrete)) {
discretize <- discrete
discrete <- NULL ## use default discretization, if any
} else {
discretize <- if (is.numeric(discrete)) TRUE else FALSE
}
if (discretize) {
if (method!="fREML") {
discretize <- FALSE
warning("discretization only available with fREML")
} else {
if (!is.null(cluster)) warning("discrete method does not use parallel cluster - use nthreads instead")
if (all(is.finite(nthreads)) && any(nthreads>1) && !mgcv.omp()) warning("openMP not available: single threaded computation only")
}
}
if (inherits(family,"extended.family")) {
family <- fix.family.link(family); efam <- TRUE
} else efam <- FALSE
if (method%in%c("fREML")&&!is.null(min.sp)) {
min.sp <- NULL
warning("min.sp not supported with fast REML computation, and ignored.")
}
gp <- interpret.gam(formula) # interpret the formula
if (discretize && length(gp$smooth.spec)==0) {
ok <- TRUE
## check it's not a list formula
if (!is.null(gp$nlp)) for (i in 1:gp$nlp) if (length(gp[[i]]$smooth.spec)>0) ok <- FALSE
if (ok) {
warning("no smooths, ignoring `discrete=TRUE'")
discretize <- FALSE
}
}
if (discretize) {
## re-order the tensor terms for maximum efficiency, and
## signal that "re"/"fs" terms should be constructed with marginals
## also for efficiency
if (is.null(gp$nlp)) for (i in 1:length(gp$smooth.spec)) {
if (inherits(gp$smooth.spec[[i]],"tensor.smooth.spec")) gp$smooth.spec[[i]] <- tero(gp$smooth.spec[[i]])
#if (inherits(gp$smooth.spec[[i]],c("re.smooth.spec","fs.smooth.spec"))&&gp$smooth.spec[[i]]$dim>1)
if (!is.null(gp$smooth.spec[[i]]$tensor.possible)&&gp$smooth.spec[[i]]$dim>1){
class(gp$smooth.spec[[i]]) <- c(class(gp$smooth.spec[[i]]),"tensor.smooth.spec")
if (is.null(gp$smooth.spec[[i]]$margin)) {
gp$smooth.spec[[i]]$margin <- list()
## only ok for 'fs' with univariate metric variable (caught in 'fs' construcor)...
for (j in 1:gp$smooth.spec[[i]]$dim) gp$smooth.spec[[i]]$margin[[j]] <- list(term=gp$smooth.spec[[i]]$term[j])
}
}
} else for (j in 1:length(formula)) if (length(gp[[j]]$smooth.spec)>0) for (i in 1:length(gp[[j]]$smooth.spec)) {
if (inherits(gp[[j]]$smooth.spec[[i]],"tensor.smooth.spec")) gp[[j]]$smooth.spec[[i]] <- tero(gp[[j]]$smooth.spec[[i]])
#if (inherits(gp[[j]]$smooth.spec[[i]],c("re.smooth.spec","fs.smooth.spec"))&&gp[[j]]$smooth.spec[[i]]$dim>1)
if (!is.null(gp[[j]]$smooth.spec[[i]]$tensor.possible)&&gp[[j]]$smooth.spec[[i]]$dim>1) {
class(gp[[j]]$smooth.spec[[i]]) <- c(class(gp[[j]]$smooth.spec[[i]]),"tensor.smooth.spec")
if (is.null(gp[[j]]$smooth.spec[[i]]$margin)) {
gp[[j]]$smooth.spec[[i]]$margin <- list()
## only ok for 'fs' with univariate metric variable (caught in 'fs' construcor)...
for (k in 1:gp[[j]]$smooth.spec[[i]]$dim) gp[[j]]$smooth.spec[[i]]$margin[[k]] <- list(term=gp[[j]]$smooth.spec[[i]]$term[k])
}
}
}
} ## if (discretize)
cl <- match.call() # call needed in gam object for update to work
mf <- match.call(expand.dots=FALSE)
mf$formula <- gp$fake.formula
mf$method <- mf$family<-mf$control<-mf$scale<-mf$knots<-mf$sp<-mf$min.sp <- mf$gc.level <-
mf$gamma <- mf$paraPen<- mf$chunk.size <- mf$rho <- mf$cluster <- mf$discrete <-
mf$use.chol <- mf$samfrac <- mf$nthreads <- mf$G <- mf$fit <- mf$select <- mf$drop.intercept <-
mf$coef <- mf$in.out <- mf$... <-NULL
mf$drop.unused.levels <- drop.unused.levels
mf[[1]] <- quote(stats::model.frame) ## as.name("model.frame")
if (is.list(formula)) { ## then there are several linear predictors
environment(formula) <- environment(formula[[1]]) ## e.g. termplots needs this
pterms <- list()
tlab <- rep("",0)
pmf.names <- rep("",0)
for (i in 1:length(formula)) {
pmf <- mf
pmf$formula <- gp[[i]]$pf
pmf <- eval(pmf, parent.frame())
pmf.names <- c(pmf.names,names(pmf))
pterms[[i]] <- attr(pmf,"terms")
tlabi <- attr(pterms[[i]],"term.labels")
if (i>1&&length(tlabi)>0) tlabi <- paste(tlabi,i-1,sep=".")
tlab <- c(tlab,tlabi)
}
pmf.names <- unique(pmf.names)
attr(pterms,"term.labels") <- tlab ## labels for all parametric terms, distinguished by predictor
nlp <- gp$nlp
lpid <- list() ## list of terms for each lp
lpid[[nlp]] <- rep(0,0)
} else { ## single linear predictor case
nlp <- 1
pmf <- mf
pmf$formula <- gp$pf
pmf <- eval(pmf, parent.frame()) # pmf contains all data for parametric part
pterms <- attr(pmf,"terms") ## pmf only used for this and discretization, if selected.
pmf.names <- names(pmf)
}
if (gc.level>0) gc()
mf <- eval(mf, parent.frame()) # the model frame now contains all the data
if (nrow(mf)<2) stop("Not enough (non-NA) data to do anything meaningful")
terms <- attr(mf,"terms")
if (gc.level>0) gc()
if (rho!=0&&!is.null(mf$"(AR.start)")) if (!is.logical(mf$"(AR.start)")) stop("AR.start must be logical")
## summarize the *raw* input variables
## note can't use get_all_vars here -- buggy with matrices
vars <- all_vars1(gp$fake.formula[-2]) ## drop response here
inp <- parse(text = paste("list(", paste(vars, collapse = ","),")"))
## allow a bit of extra flexibility in what `data' is allowed to be (as model.frame actually does)
if (!is.list(data)&&!is.data.frame(data)) data <- as.data.frame(data)
dl <- eval(inp, data, parent.frame())
if (!control$keepData) { rm(data);if (gc.level>0) gc()} ## save space
names(dl) <- vars ## list of all variables needed
var.summary <- variable.summary(gp$pf,dl,nrow(mf)) ## summarize the input data
rm(dl); if (gc.level>0) gc() ## save space
## should we force the intercept to be dropped, meaning that the constant is removed
## from the span of the parametric effects?
if (is.null(family$drop.intercept)) { ## family does not provide information
lengthf <- if (is.list(formula)) length(formula) else 1
if (is.null(drop.intercept)) drop.intercept <- rep(FALSE,lengthf) else {
drop.intercept <- rep(drop.intercept,length=lengthf) ## force drop.intercept to correct length
if (sum(drop.intercept)) family$drop.intercept <- drop.intercept ## ensure prediction works
}
} else drop.intercept <- as.logical(family$drop.intercept) ## family overrides argument
## need mini.mf for basis setup, then accumulate full X, y, w and offset
if (discretize) {
## discretize the data, creating list mf0 with discrete values
## and indices giving the discretized value for each element of model frame.
## 'discrete' can be null, or contain a discretization size, or
## a discretization size per smooth term.
dk <- discrete.mf(gp,mf,pmf.names,m=discrete)
mf0 <- dk$mf ## padded discretized model frame
sparse.cons <- 0 ## default constraints required for tensor terms
} else {
mf0 <- mini.mf(mf,chunk.size)
sparse.cons <- -1
}
rm(pmf); ## no further use
## allow bam to set up general families, even if it can not (yet) process them...
if (inherits(family,"general.family")&&!is.null(family$presetup)) eval(family$presetup)
gsname <- if (is.list(formula)) "gam.setup.list" else "gam.setup"
if (control$trace) t1 <- proc.time()
reset <- TRUE
while (reset) {
G <- do.call(gsname,list(formula=gp,pterms=pterms,
data=mf0,knots=knots,sp=sp,min.sp=min.sp,
H=NULL,absorb.cons=TRUE,sparse.cons=sparse.cons,select=select,
idLinksBases=!discretize,scale.penalty=control$scalePenalty,
paraPen=paraPen,apply.by=!discretize,drop.intercept=drop.intercept,modCon=2))
if (!discretize&&ncol(G$X)>=chunk.size) { ## no point having chunk.size < p
chunk.size <- 4*ncol(G$X)
warning(gettextf("chunk.size < number of coefficients. Reset to %d",chunk.size))
if (chunk.size>=nrow(mf)) { ## no sense splitting up computation
mf0 <- mf ## just use full dataset
} else reset <- FALSE
} else reset <- FALSE
}
if (control$trace) t2 <- proc.time()
if (discretize) {
ks <- matrix(0,0,2) ## NOTE: slightly more efficient not to repeatedly extend
if (nlp>1) lpi <- attr(G$X,"lpi")
v <- G$Xd <- list()
kb <- k <- 1 ## kb indexes blocks, k indexes marginal matrices
G$kd <- dk$k
qc <- dt <- ts <- rep(0,length(G$smooth))
## have to extract full parametric model matrix from pterms and mf
npt <- if (nlp==1) 1 else length(G$pterms)
lpip <- list() ## record coef indices for each discretized term
for (j in 1:npt) { ## loop over parametric terms in each formula
paratens <- TRUE
## get the parametric model split into tensor components...
ptens <- if (nlp==1&&!is.list(G$pterms)) terms2tensor(G$pterms,mf0,drop.intercept=drop.intercept[j]) else
terms2tensor(G$pterms[[j]],mf0,drop.intercept=drop.intercept[j])
if (j>1) ptens$term.labels <- paste(ptens$term.labels,".",j-1,sep="")
## now locate the index vectors for each parametric marginal discrete matrix ptens$X
if (!is.null(ptens)) {
np <-length(ptens$X);n <- nrow(mf)
qc <- c(rep(0,length(ptens$ts)),qc) ## extend (empty) constraint indicator
coef.ind <-1
kk <- 1
for (i in 1:length(ptens$ts)) {
jj <- 1
ts[kb] = k;names(ts)[kb] <- ptens$term.labels[i]
dt[kb] = ptens$dt[i]
for (ii in 1:dt[kb]) {
ks <- rbind(ks,dk$ks[ptens$xname[kk],])
G$Xd[[k]] <- ptens$X[[kk]][1:dk$nr[ptens$xname[kk]],,drop=FALSE];
kk <- kk + 1
jj <- jj * ncol(G$Xd[[k]]) ## number of coeffs for this term
k <- k + 1 ## update matrix counter
}
lpip[[kb]] <- 1:jj - 1 + if (nlp==1) coef.ind else attr(G$nsdf,"pstart")[j] - 1 + coef.ind
coef.ind <- coef.ind + jj
if (nlp>1) for (ii in 1:length(lpi)) if (any(lpip[[kb]]%in%lpi[[ii]])) lpid[[ii]] <- c(lpid[[ii]],kb)
kb <- kb + 1 ## update block counter
}
} ## is.null(ptens)
} ## loop over parametric terms in each formula
## k is marginal counter, kb is block counter
## G$kd[,ks[j,1]:ks[j,2]] (dk$k) gives index columns for term j, thereby allowing
## summation over matrix covariates....
#G$ks <- cbind(dk$k.start[-length(dk$k.start)],dk$k.start[-1])
drop <- rep(0,0) ## index of te related columns to drop
if (length(G$smooth)>0) for (i in 1:length(G$smooth)) { ## loop over smooths
## potentially each smoother model matrix can be made up of a sequence
## of row-tensor products, nead to loop over such sub blocks...
nsub <- if (!is.null(G$smooth[[i]]$ts)) length(G$smooth[[i]]$ts) else 1
lp0 <- G$smooth[[i]]$first.para -1 ## offset for start of coeffs for this sub block
for (sb in 1:nsub) { ## loop over sub-blocks
np <- 1 ## compute number of sub-block coeffs
ts[kb] <- k;names(ts)[kb] <- G$smooth[[i]]$label
## first deal with any by variable (as first marginal of tensor)...
if (G$smooth[[i]]$by!="NA") {
dt[kb] <- 1
termk <- G$smooth[[i]]$by
by.var <- dk$mf[[termk]][1:dk$nr[termk]]
if (is.factor(by.var)) {
## create dummy by variable...
by.var <- as.numeric(by.var==G$smooth[[i]]$by.level)
}
G$Xd[[k]] <- matrix(by.var,dk$nr[termk],1)
np <- ncol(G$Xd[[k]])
ks <- rbind(ks,dk$ks[termk,])
k <- k + 1
by.present <- 1
} else by.present <- dt[kb] <- 0
## ... by done
if (inherits(G$smooth[[i]],"tensor.smooth")) {
nmar <- if (is.null(G$smooth[[i]]$dt)) length(G$smooth[[i]]$margin) else G$smooth[[i]]$dt[sb]
dt[kb] <- dt[kb] + nmar
jind <- if (sb>1) G$smooth[[i]]$ts[sb] + 1:G$smooth[[i]]$dt[sb] - 1 else 1:nmar
for (j in jind) {
termk <- G$smooth[[i]]$margin[[j]]$term[1]
G$Xd[[k]] <- G$smooth[[i]]$margin[[j]]$X[1:dk$nr[termk],,drop=FALSE]
np <- np * ncol(G$Xd[[k]])
ks <- rbind(ks,dk$ks[termk,])
k <- k + 1
}
## deal with any side constraints on tensor terms
if (sb==1) { ## only once per smooth!
di <- attr(G$smooth[[i]],"del.index")
if (!is.null(di)&&length(di>0)) {
di <- di + G$smooth[[i]]$first.para + length(drop) - 1
drop <- c(drop,di)
}
## deal with tensor smooth constraint
qrc <- attr(G$smooth[[i]],"qrc")
## compute v such that Q = I-vv' and Q[,-1] is constraint null space basis
if (inherits(qrc,"qr")) {
v[[kb]] <- qrc$qr/sqrt(qrc$qraux);v[[kb]][1] <- sqrt(qrc$qraux)
qc[kb] <- 1 ## indicate a constraint
} else if (length(qrc)>1) { ## Kronecker product of set to zero constraints
## on entry qrc is [unused.index, dim1, dim2,..., total number of constraints]
v[[kb]] <- c(length(qrc)-2,qrc[-1]) ## number of sum-to-zero contrasts, their dimensions, number of constraints
qc[kb] <- -1
} else {
v[[kb]] <- rep(0,0) ##
if (!inherits(qrc,"character")||qrc!="no constraints") warning("unknown tensor constraint type")
}
} else { ## sb==1 once per smooth stuff
qc <- c(qc,0) ## extend
v[[kb]] <- rep(0,0)
}
} else { ## not a tensor smooth
v[[kb]] <- rep(0,0)
dt[kb] <- dt[kb] + 1
termk <- G$smooth[[i]]$term[1]
G$Xd[[k]] <- G$X[1:dk$nr[termk],G$smooth[[i]]$first.para:G$smooth[[i]]$last.para,drop=FALSE]
np <- np * ncol(G$Xd[[k]])
ks <- rbind(ks,dk$ks[termk,])
k <- k + 1
}
#jj <- G$smooth[[i]]$first.para:G$smooth[[i]]$last.para;
if (sb==1&&qc[kb]) {
ncon <- if (qc[kb]<0) v[[kb]][length(v[[kb]])] else 1
jj <- 1:(np-ncon) + lp0; lp0 <- lp0 + np - ncon
## Hard to think of an application requiring constraint when nsub>1, hence not
## worked out yet. Add warning to make sure this is flagged if attempt made
## to do this in future....
if (nsub>1) warning("constraints for sub blocked tensor products un-tested")
} else {
jj <- 1:np + lp0; lp0 <- lp0 + np
}
lpip[[kb]] <- jj
if (nlp>1) { ## record which lp each discrete term belongs to (can be more than one)
for (j in 1:nlp) if (any(jj %in% lpi[[j]])) lpid[[j]] <- c(lpid[[j]],kb)
}
kb <- kb + 1
} ## sub block loop
} ## looping over smooths
## put lpid indices into coefficient index order...
if (nlp>1) {
for (j in 1:nlp) lpid[[j]] <- lpid[[j]][order(unlist(lapply(lpip[lpid[[j]]],max)))]
G$lpid <- lpid
}
if (length(drop>0)) G$drop <- drop ## index of terms to drop as a result of side cons on tensor terms
attr(G$Xd,"lpip") <- lpip ## index of coefs by term
## ... Xd is the list of discretized model matrices, or marginal model matrices
## kd contains indexing vectors, so the ith model matrix or margin is Xd[[i]][kd[i,],]
## ts[i] is the starting matrix in Xd for the ith model matrix, while dt[i] is the number
## of elements of Xd that make it up (1 for a singleton, more for a tensor).
## v is list of Householder vectors encoding constraints and qc the constraint indicator.
G$v <- v;G$ts <- ts;G$dt <- dt;G$qc <- qc
G$ks <- ks
jj <- max(G$ks)-1
if (ncol(G$kd) > jj) G$kd <- G$kd[,1:jj]
## bundle up discretization information needed for discrete prediction...
G$dinfo <- list(gp=gp, v = G$v, ts = G$ts, dt = G$dt, qc = G$qc, drop = G$drop, pmf.names=pmf.names,lpip=lpip)
if (nlp>1) G$dinfo$lpid <- lpid
if (paratens) G$dinfo$para.discrete <- TRUE
} ## if (discretize)
if (control$trace) t3 <- proc.time()
## no advantage to "fREML" with no free smooths...
if (((!is.null(G$L)&&ncol(G$L) < 1)||(length(G$sp)==0))&&method=="fREML") method <- "REML"
G$var.summary <- var.summary
G$family <- family
G$terms<-terms;
G$pred.formula <- gp$pred.formula
n <- nrow(mf)
if (is.null(mf$"(weights)")) G$w<-rep(1,n)
else G$w<-mf$"(weights)"
G$y <- mf[[gp$response]]
## now get offset, dealing with possibility of multiple predictors (see gam.setup)
## the point is that G$offset relates to the compressed or discretized model frame,
## so we need to correct it to the full data version...
if (discretize) {
if (is.list(pterms)) { ## multiple predictors
for (i in 1:length(pterms)) {
offi <- attr(pterms[[i]],"offset")
if (is.null(offi)) G$offset[[i]] <- rep(0,n) else {
G$offset[[i]] <- mf[[names(attr(pterms[[i]],"dataClasses"))[offi]]]
if (is.null(G$offset[[i]])) G$offset[[i]] <- rep(0,n)
}
}
} else { ## single predictor, handle as non-discrete
G$offset <- model.offset(mf)
if (is.null(G$offset)) G$offset <- rep(0,n)
}
} else { ## non-discrete
G$offset <- model.offset(mf)
if (is.null(G$offset)) G$offset <- rep(0,n)
}
## if (!discretize && ncol(G$X)>nrow(mf)) stop("Model has more coefficients than data")
if (ncol(G$X) > chunk.size && !discretize) { ## no sense having chunk.size < p
chunk.size <- 4*ncol(G$X)
warning(gettextf("chunk.size < number of coefficients. Reset to %d",chunk.size))
}
G$cl <- cl
G$am <- am
G$min.edf<-G$nsdf #-dim(G$C)[1]
if (G$m) for (i in 1:G$m) G$min.edf<-G$min.edf+G$smooth[[i]]$null.space.dim
G$discretize <- discretize
G$formula<-formula
## environment(G$formula)<-environment(formula)
environment(G$pterms) <- environment(G$terms) <- environment(G$pred.formula) <-
environment(G$formula) <- .BaseNamespaceEnv
} else { ## G supplied
if (scale<=0) scale <- G$scale
efam <- G$efam
mf <- G$mf; G$mf <- NULL
gp <- G$gp; G$gp <- NULL
na.action <- G$na.action; G$na.action <- NULL
if (!is.null(sp)&&any(sp>=0)) { ## request to modify smoothing parameters
if (is.null(G$L)) G$L <- diag(length(G$sp))
if (length(sp)!=ncol(G$L)) stop('length of sp must be number of free smoothing parameters in original model')
ind <- sp>=0 ## which smoothing parameters are now fixed
spind <- log(sp[ind]);
spind[!is.finite(spind)] <- -30 ## set any zero parameters to effective zero
G$lsp0 <- G$lsp0 + drop(G$L[,ind,drop=FALSE] %*% spind) ## add fix to lsp0
G$L <- G$L[,!ind,drop=FALSE] ## drop the cols of G
G$sp <- rep(-1,ncol(G$L))
}
} ## end of G setup
if (!fit) {
G$efam <- efam
G$scale <- scale
G$mf <- mf;G$na.action <- na.action;G$gp <- gp
class(G) <- "bam.prefit"
return(G)
}
if (inherits(G$family,"general.family")) stop("general families not supported by bam")
## number of threads to use for non-cluster node computation
if (!is.finite(nthreads[1])||nthreads[1]<1) nthreads[1] <- max(1,length(cluster))
G$conv.tol<-control$mgcv.tol # tolerence for mgcv
G$max.half<-control$mgcv.half # max step halving in bfgs optimization
## now build up proper model matrix, and deal with y, w, and offset...
if (control$trace) cat("Setup complete. Calling fit\n")
colnamesX <- colnames(G$X)
if (G$am&&!G$discretize) {
if (nrow(mf)>chunk.size) G$X <- matrix(0,0,ncol(G$X)); if (gc.level>1) gc()
object <- bam.fit(G,mf,chunk.size,gp,scale,gamma,method,rho=rho,cl=cluster,
gc.level=gc.level,use.chol=use.chol,in.out=in.out,npt=nthreads[1])
} else if (G$discretize) {
object <- bgam.fitd(G, mf, gp ,scale ,nobs.extra=0,rho=rho,coef=coef,
control = control,npt=nthreads,gc.level=gc.level,gamma=gamma,in.out=in.out,...)
} else {
G$X <- matrix(0,0,ncol(G$X)); if (gc.level>1) gc()
if (rho!=0) warning("AR1 parameter rho unused with generalized model")
if (samfrac<1 && samfrac>0) { ## sub-sample first to get close to right answer...
ind <- sample(1:nrow(mf),ceiling(nrow(mf)*samfrac))
if (length(ind)<2*ncol(G$X)) warning("samfrac too small - ignored") else {
Gw <- G$w;Goffset <- G$offset
G$w <- G$w[ind];G$offset <- G$offset[ind]
control1 <- control
control1$epsilon <- 1e-2
object <- bgam.fit(G, mf[ind,], chunk.size, gp ,scale ,gamma,method=method,nobs.extra=0,
control = control1,cl=cluster,npt=nthreads[1],gc.level=gc.level,coef=coef,
use.chol=use.chol,samfrac=1,in.out=in.out,...)
G$w <- Gw;G$offset <- Goffset
coef <- object$coefficients
}
}
## fit full dataset
object <- bgam.fit(G, mf, chunk.size, gp ,scale ,gamma,method=method,coef=coef,
control = control,cl=cluster,npt=nthreads[1],gc.level=gc.level,
use.chol=use.chol,in.out=in.out,...)
}
if (gc.level>0) gc()
if (control$trace) t4 <- proc.time()
if (control$trace) cat("Fit complete. Finishing gam object.\n")
if (scale < 0) { object$scale.estimated <- TRUE;object$scale <- object$scale.est} else {
object$scale.estimated <- FALSE; object$scale <- scale
}
object$assign <- G$assign # applies only to pterms
object$boundary <- FALSE # always FALSE for this case
object$call<-G$cl # needed for update() to work
object$cmX <- G$cmX ## column means of model matrix --- useful for CIs
object$contrasts <- G$contrasts
object$control <- control
object$converged <- TRUE ## no iteration
object$data <- NA ## not saving it in this case
object$df.null <- nrow(mf)
object$df.residual <- object$df.null - sum(object$edf)
if (is.null(object$family)) object$family <- family
object$formula <- G$formula
if (method=="GCV.Cp") {
if (scale<=0) object$method <- "GCV" else object$method <- "UBRE"
} else {
object$method <- method
}
object$min.edf<-G$min.edf
object$model <- mf;rm(mf);if (gc.level>0) gc()
object$na.action <- attr(object$model,"na.action") # how to deal with NA's
object$nsdf <- G$nsdf
if (G$nsdf>0) names(object$coefficients)[1:G$nsdf] <- colnamesX[1:G$nsdf]
object$offset <- G$offset
##object$prior.weights <- G$w
object$pterms <- G$pterms
object$pred.formula <- G$pred.formula
object$smooth <- G$smooth
object$terms <- G$terms
object$var.summary <- G$var.summary
if (is.null(object$wt)) object$weights <- object$prior.weights else
object$weights <- object$wt
object$xlevels <- G$xlevels
#object$y <- object$model[[gp$response]]
object$NA.action <- na.action ## version to use in bam.update
names(object$sp) <- names(G$sp)
if (!is.null(object$full.sp)) names(object$full.sp) <- names(G$lsp0)
names(object$coefficients) <- G$term.names
names(object$edf) <- G$term.names
## note that predict.gam assumes that it must be ok not to split the
## model frame, if no new data supplied, so need to supply explicitly
class(object) <- c("bam","gam","glm","lm")
if (!G$discretize) { object$linear.predictors <-
as.numeric(predict.bam(object,newdata=object$model,block.size=chunk.size,cluster=cluster))
} else { ## store discretization specific information to help with discrete prediction
#object$dinfo <- list(gp=gp, v = G$v, ts = G$ts, dt = G$dt, qc = G$qc, drop = G$drop, pmf.names=pmf.names,lpip=lpip)
#if (paratens) object$dinfo$para.discrete <- TRUE
object$dinfo <- G$dinfo
}
rm(G);if (gc.level>0) gc()
if (is.null(object$fitted.values)) object$fitted.values <- family$linkinv(object$linear.predictors)
object$residuals <- if (is.null(family$residuals)) sqrt(family$dev.resids(object$y,object$fitted.values,object$prior.weights)) *
sign(object$y-object$fitted.values) else residuals(object)
if (rho!=0) object$std.rsd <- AR.resid(object$residuals,rho,object$model$"(AR.start)")
if (!efam || is.null(object$deviance)) object$deviance <- sum(object$residuals^2)
## 'dev' is used in family$aic to estimate scale. That's standard and fine for Gaussian data, but
## can lead to badly biased estimates for e.g. low count data with the Tweedie (see Fletcher Biometrika paper)
## So set dev to give object $sig2 estimate when divided by sum(prior.weights)...
dev <- if (family$family!="gaussian"&&!is.null(object$sig2)) object$sig2*sum(object$prior.weights) else object$deviance ## used to give scale in family$aic
if (rho!=0&&family$family=="gaussian") dev <- sum(object$std.rsd^2)
object$aic <- if (efam) family$aic(object$y,object$fitted.values,family$getTheta(),object$prior.weights,dev) else
family$aic(object$y,1,object$fitted.values,object$prior.weights,dev)
object$aic <- object$aic -
2 * (length(object$y) - sum(sum(object$model[["(AR.start)"]])))*log(1/sqrt(1-rho^2)) + ## correction for AR
2*sum(object$edf)
if (!is.null(object$edf2)&&sum(object$edf2)>sum(object$edf1)) object$edf2 <- object$edf1
if (is.null(object$null.deviance)) object$null.deviance <- sum(family$dev.resids(object$y,weighted.mean(object$y,object$prior.weights),object$prior.weights))
if (!is.null(object$full.sp)) {
if (length(object$full.sp)==length(object$sp)&&
all.equal(object$sp,object$full.sp)==TRUE) object$full.sp <- NULL
}
environment(object$formula) <- environment(object$pred.formula) <-
environment(object$terms) <- environment(object$pterms) <-
environment(attr(object$model,"terms")) <- .GlobalEnv
if (control$trace) {
t5 <- proc.time()
t5 <- rbind(t1-t0,t2-t1,t3-t2,t4-t3,t5-t4)[,1:3]
row.names(t5) <- c("initial","gam.setup","pre-fit","fit","finalise")
print(t5)
}
names(object$gcv.ubre) <- method
object
} ## end of bam
bam.update <- function(b,data,chunk.size=10000) {
## update the strictly additive gaussian model `b' in the light of new data in `data'
## Need to update modelframe (b$model)
if (is.null(b$qrx)) {
stop("Model can not be updated")
}
gp<-interpret.gam(b$formula) # interpret the formula
## next 2 lines problematic if there are missings in the response, so now constructed from mf below...
## X <- predict(b,newdata=data,type="lpmatrix",na.action=b$NA.action) ## extra part of model matrix
## rownames(X) <- NULL
cnames <- names(b$coefficients)
AR.start <- NULL ## keep R checks happy
## now get the new data in model frame form...
getw <- "(weights)"%in%names(b$model)
getARs <- "(AR.start)"%in%names(b$model)
if (getw&&getARs) {
mf <- model.frame(gp$fake.formula,data,weights=weights,AR.start=AR.start,
xlev=b$xlev,na.action=b$NA.action)
w <- mf[["(weights)"]]
} else if (getw) {
mf <- model.frame(gp$fake.formula,data,weights=weights,xlev=b$xlev,na.action=b$NA.action)
w <- mf[["(weights)"]]
} else if (getARs) {
mf <- model.frame(gp$fake.formula,data,AR.start=AR.start,xlev=b$xlev,na.action=b$NA.action)
w <- rep(1,nrow(mf))
} else {
mf <- model.frame(gp$fake.formula,data,xlev=b$xlev,na.action=b$NA.action)
w <- rep(1,nrow(mf))
}
X <- predict(b,newdata=mf,type="lpmatrix",na.action=b$NA.action) ## extra part of model matrix
rownames(X) <- NULL
b$model <- rbind(b$model,mf) ## complete model frame --- old + new
## get response and offset...
off.col <- attr(attr(b$model,"terms"),"offset")
if (is.null(off.col)) offset <- rep(0,nrow(mf)) else offset <- mf[,off.col]
y <- mf[,attr(attr(b$model,"terms"),"response")] - offset
## update G
b$G$y <- c(b$G$y,y)
b$G$offset <- c(b$G$offset,offset)
b$G$w <- c(b$G$w,w)
b$G$n <- nrow(b$model)
n <- b$G$n;
## update the qr decomposition...
w <- sqrt(w)
if (b$AR1.rho!=0) { ## original model had AR1 error structure...
rho <- b$AR1.rho
ld <- 1/sqrt(1-rho^2) ## leading diagonal of root inverse correlation
sd <- -rho*ld ## sub diagonal
## append the final row of weighted X and y from original fit, first
wy <- c(b$yX.last[1],w*y)
wX <- rbind(b$yX.last[-1],w*X)
m <- nrow(wX)
b$yX.last <- c(wy[m],wX[m,])
row <- c(1,rep(1:m,rep(2,m))[-c(1,2*m)])
weight <- c(1,rep(c(sd,ld),m-1))
stop <- c(1,1:(m-1)*2+1)
if (!is.null(mf$"(AR.start)")) { ## need to correct the start of new AR sections...
ii <- which(mf$"(AR.start)"==TRUE)
if (length(ii)>0) {
if (ii[1]==1) ii <- ii[-1] ## first observation does not need any correction
weight[ii*2-2] <- 0 ## zero sub diagonal
weight[ii*2-1] <- 1 ## set leading diagonal to 1
}
}
## re-weight to independence....
wX <- rwMatrix(stop,row,weight,wX)[-1,]
wy <- rwMatrix(stop,row,weight,wy)[-1]
## update
b$qrx <- qr_update(wX,wy,b$qrx$R,b$qrx$f,b$qrx$y.norm2)
} else {
b$qrx <- qr_update(w*X,w*y,b$qrx$R,b$qrx$f,b$qrx$y.norm2)
}
## now do the refit...
rss.extra <- b$qrx$y.norm2 - sum(b$qrx$f^2)
if (b$method=="GCV"||b$method=="UBRE") method <- "GCV.Cp" else method <- b$method
if (method=="GCV.Cp") {
if (b$method=="GCV") scale <- -1 else scale = b$sig2
fit <- magic(b$qrx$f,b$qrx$R,b$sp,b$G$S,b$G$off,L=b$G$L,lsp0=b$G$lsp0,rank=b$G$rank,
H=b$G$H,C= matrix(0,0,ncol(b$qrx$R)),##C=b$G$C,
gamma=b$gamma,scale=scale,gcv=(scale<=0),
extra.rss=rss.extra,n.score=n)
post <- magic.post.proc(b$qrx$R,fit,b$qrx$f*0+1)
b$y <- b$G$y;b$offset <- b$G$offset; b$G$w -> b$weights -> b$prior.weights;
} else if (method=="fREML") { ## fast REML
um <- Sl.Xprep(b$Sl,b$qrx$R)
lsp0 <- log(b$sp) ## initial s.p.
log.phi <- log(b$sig2) ## initial or fixed scale
fit <- fast.REML.fit(um$Sl,um$X,b$qrx$f,rho=lsp0,L=b$G$L,rho.0=b$G$lsp0,
log.phi=log.phi,phi.fixed = !b$scale.estimated,rss.extra=rss.extra,
nobs =n,Mp=um$Mp,nt=1,gamma=b$gamma)
if (b$scale.estimated) scale <- -1 else scale=b$sig2
res <- Sl.postproc(b$Sl,fit,um$undrop,b$qrx$R,cov=TRUE,scale=scale,L=b$g$L)
object <- list(coefficients=res$beta,edf=res$edf,edf1=res$edf1,edf2=res$edf2,##F=res$F,
gcv.ubre=fit$reml,hat=res$hat,outer.info=list(iter=fit$iter,
message=fit$conv),optimizer="fast-REML",rank=ncol(um$X),
Ve=res$Ve,Vp=res$Vp,Vc=res$Vc,db.drho=fit$d1b,scale.estimated = scale<=0)
if (scale<=0) { ## get sp's and scale estimate
nsp <- length(fit$rho)
object$sig2 <- object$scale <- exp(fit$rho[nsp])
object$sp <- exp(fit$rho[-nsp])
nsp <- length(fit$rho.full)
object$full.sp <- exp(fit$rho.full[-nsp])
} else { ## get sp's
object$sig2 <- object$scale <- scale
object$sp <- exp(fit$rho)
object$full.sp <- exp(fit$rho.full)
}
if (b$AR1.rho!=0) { ## correct RE/ML score for AR1 transform
df <- if (getARs) sum(b$model$"(AR.start)") else 1
object$gcv.ubre <- object$gcv.ubre - (n-df)*log(ld)
}
b$G$X <- b$qrx$R;b$G$dev.extra <- rss.extra
b$G$pearson.extra <- rss.extra;b$G$n.true <- n
b$y <- b$G$y;b$offset <- b$G$offset; b$G$w -> b$weights -> b$prior.weights;
} else { ## method is "REML" or "ML"
y <- b$G$y; w <- b$G$w;offset <- b$G$offset
b$G$y <- b$qrx$f
b$G$w <- b$G$y*0+1
b$G$X <- b$qrx$R
b$G$n <- length(b$G$y)
b$G$offset <- b$G$y*0
b$G$dev.extra <- rss.extra
b$G$pearson.extra <- rss.extra
b$G$n.true <- n
if (b$scale.estimated) scale <- -1 else scale = b$sig2
in.out <- list(sp=b$sp,scale=b$reml.scale)
object <- gam(G=b$G,method=method,gamma=b$gamma,scale=scale,in.out=in.out)
if (b$AR1.rho!=0) { ## correct RE/ML score for AR1 transform
df <- if (getARs) sum(b$model$"(AR.start)") else 1
object$gcv.ubre <- object$gcv.ubre - (n-df)*log(ld)
}
offset -> b$G$offset -> b$offset
w -> b$G$w -> b$weights -> b$prior.weights; n -> b$G$n
y -> b$G$y -> b$y;
}
if (method=="GCV.Cp") {
b$coefficients <- fit$b
b$edf <- post$edf
b$edf1 <- post$edf1
##b$F <- post$F
b$full.sp <- fit$sp.full
b$gcv.ubre <- fit$score
b$hat <- post$hat
b$mgcv.conv <- fit$gcv.info
b$optimizer="magic"
b$rank <- fit$gcv.info$rank
b$Ve <- post$Ve
b$Vp <- post$Vb
b$sig2 <- b$scale <- fit$scale
b$sp <- fit$sp
} else { ## REML or ML
b$coefficients <- object$coefficients
b$edf <- object$edf
b$edf1 <- object$edf1
##b$F <- object$F
b$full.sp <- object$sp.full
b$gcv.ubre <- object$gcv.ubre
b$hat <- object$hat
b$outer.info <- object$outer.info
b$rank <- object$rank
b$Ve <- object$Ve
b$Vp <- object$Vp
b$sig2 <- b$scale <- object$sig2
b$sp <- object$sp
if (b$AR1.rho!=0) { ## correct RE/ML score for AR1 transform
b$gcv.ubre <- b$gcv.ubre - (n-1)*log(ld)
}
}
b$R <- b$qrx$R
b$G$X <- NULL
b$linear.predictors <- as.numeric(predict.gam(b,newdata=b$model,block.size=chunk.size))
b$fitted.values <- b$linear.predictor ## strictly additive only!
b$residuals <- sqrt(b$family$dev.resids(b$y,b$fitted.values,b$prior.weights)) *
sign(b$y-b$fitted.values)
b$deviance <- sum(b$residuals^2)
b$aic <- b$family$aic(b$y,1,b$fitted.values,b$prior.weights,b$deviance) +
2 * sum(b$edf)
if (b$AR1.rho!=0) { ## correct aic for AR1 transform
df <- if (getARs) sum(b$model$"(AR.start)") else 1
b$aic <- b$aic + 2*(n-df)*log(ld)
}
b$null.deviance <- sum(b$family$dev.resids(b$y,mean(b$y),b$prior.weights))
names(b$coefficients) <- names(b$edf) <- cnames
b
} ## end of bam.update
#### ISSUES:
## ? negative binomial support --- docs say it's there...
## offset unused in bam/bgam.fit, also gp only needed for "response",
## so could efficiently be replaced
Any scripts or data that you put into this service are public.
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.