Nothing
## (c) Natalya Pya (2012-2023). Released under GPL2.
## efsudr.scam based on (c) Simon N Wood (efsudr(mgcv))
#########################################################
# Function to return gcv/ubre ... ##
#########################################################
gcv.ubre <- function(rho,G,env,control)
{ ## function to get GCV.UBRE value for optim()...
if (length(rho)!= length(G$off)) stop (paste("length of rho and n.terms has to be the same"))
sp <- exp(rho)
b <- scam.fit(G=G, sp=sp, env=env,control=scam.control())
if (G$scale.known) # value of Mallow's Cp/UBRE/AIC ....
{ n <- nrow(G$X)
gcv.ubre <- b$dev/n - G$sig2 +2*G$gamma*b$trA*G$sig2/n
} else # value of GCV ...
gcv.ubre <- b$gcv
return(gcv.ubre)
}
#########################################################
## function to get the gradient of the gcv/ubre..... ##
#########################################################
gcv.ubre.derivative <- function(rho,G, env, control)
{ ## function to return derivative of GCV or UBRE for optim...
gcv.ubre_grad(rho, G, env, control=control)$gcv.ubre.rho
}
#############################################################################
## for nlm() function to get the gcv/ubre and gradient of the gcv/ubre.....##
#############################################################################
dgcv.ubre.nlm <- function(rho,G, env, control)
{ ## GCV UBRE objective function for nlm
gg <- gcv.ubre_grad(rho, G, env, control=control)
attr(gg$gcv.ubre,"gradient") <- gg$gcv.ubre.rho
gg$gcv.ubre
}
#######################################################
#### estimate.scam().... ##
#######################################################
estimate.scam <- function(G,optimizer,optim.method,rho, env, control)
## check.analytical, del, devtol.fit, steptol.fit)
## function to select smoothing parameter by minimizing GCV/UBRE criterion...
{ ## set 'newton' method for coeff estimation if not specified
## (it can happen if 'optimizer' was supplied with one element, specifying
## the sp optimization method to use as, e.g., 'optimizer="efs")
if (is.na(optimizer[2]))
optimizer[2] <- "newton"
if (!(optimizer[2] %in% c("newton", "bfgs")) )
stop("unknown optimization method to use for the model coefficient estimation")
if (optimizer[2]== "bfgs")
if (optimizer[1] !="efs") {
warning("`bfgs` method for the coefficient estimation works only together with `efs` method; `efs' was used")
optimizer[1] <- "efs"
}
if (!(optimizer[1] %in% c("bfgs", "nlm", "optim","nlm.fd","efs")) )
stop("unknown optimization method to use to optimize the smoothing parameter estimation criterion")
if (length(rho)==0) { ## no sp estimation to do -- run a fit instead
optimizer[1] <- "no.sps" ## will cause scam.fit/scam.fit1 to be called, below
}
if (optimizer[1] == "bfgs") {## minimize GCV/UBRE by BFGS...
b <- bfgs_gcv.ubre(gcv.ubre_grad,rho=rho, G=G,env=env, control=control) ## check.analytical=check.analytical, del=del,devtol.fit=devtol.fit, steptol.fit=steptol.fit)
sp <- exp(b$rho)
object <- b$object
object$gcv.ubre <- b$gcv.ubre
object$dgcv.ubre <- b$dgcv.ubre
object$termcode <- b$termcode
object$check.grad <- b$check.grad
object$dgcv.ubre.check <- b$dgcv.ubre.check
object$conv.bfgs <- b$conv.bfgs
object$iterations <- b$iterations
object$score.hist <- b$score.hist
}
else if (optimizer[1]=="optim"){ ## gr=gcv.ubre.derivative
if (!(optim.method[1] %in% c("Nelder-Mead", "BFGS", "CG", "L-BFGS-B", "SANN")))
{ warning("unknown optim() method, `L-BFGS-B' were used")
optim.method[1] <- "L-BFGS-B"
}
if (is.na(optim.method[2]))
{ warning("the second parameter of optim.method argument is not supplied,
finite-difference approximation of the gradient were used")
grr <- NULL
} else if (!(optim.method[2] %in% c("fd","grad")))
{ warning("only `fd' and `grad' options are possible, finite-difference
approximation of the gradient were used")
grr <- NULL
} else if (optim.method[2] == "grad")
grr <- gcv.ubre.derivative
else
grr <- NULL
b <- optim(par=rho,fn=gcv.ubre, gr=grr, method=optim.method[1],G=G, control=
list(factr=control$optim$factr,lmm=min(5,length(rho))), env=env)
sp <- exp(b$par)
gcv.ubre <- b$value
dgcv.ubre <- NULL
iterations <- b$counts
termcode <- b$convergence
if (termcode == 0)
conv <- "Successful completion"
else if (termcode == 1)
conv <- "The iteration limit `maxit' had been reached"
else if (termcode == 10)
conv <- "Degeneracy of the Nelder-Mead simplex"
else if (termcode == 51)
conv <- "A warning from the `L-BFGS-B' method; see help for `optim' for further details"
else if (termcode == 52)
conv <- "An error from the `L-BFGS-B' method; see help for `optim' for further details"
}
else if (optimizer[1]=="nlm.fd") {## nlm() with finite difference derivatives...
b <- nlm(f=gcv.ubre, p=rho,typsize=rho, stepmax = control$nlm$stepmax, ndigit = control$nlm$ndigit,
gradtol = control$nlm$gradtol, steptol = control$nlm$steptol,
iterlim = control$nlm$iterlim, G=G, env=env, control=control)
}
else if (optimizer[1]=="nlm"){ ## nlm() with analytical derivatives...
b <- nlm(f=dgcv.ubre.nlm, p=rho,typsize=rho, stepmax = control$nlm$stepmax, ndigit = control$nlm$ndigit, gradtol = control$nlm$gradtol, steptol = control$nlm$steptol, iterlim = control$nlm$iterlim, G=G,env=env, control=control)
}
else if (optimizer[1]=="efs"){ ## Extended Fellner-Schall method
## if bfgs method is used for model coeff. estimation, the inner fit function is scam.fit1(),
## if newton method then the fit function is scam.fit()...
fit.fn <- if (optimizer[2]== "bfgs") scam.fit1 else scam.fit
b <- efsudr.scam2(fit.fn=fit.fn,G=G,lsp=rho,env=env, control=control)
sp <- b$sp
object <- b
object$gcv.ubre <- b$gcv.ubre
object$outer.info <- b$outer.info
object$inner.info <- b$inner.info
## if (optimizer[2]== "bfgs") { ## bfgs method for the model coeff. estimation
## b <- efsudr.scam2(G=G,lsp=rho,env=env, control=control)
## sp <- b$sp
## object <- b
## # object$iterations <- b$niter
## # object$conv <- b$outer.info$conv
## # object$score.hist <- b$outer.info$score.hist
## object$gcv.ubre <- b$gcv.ubre
## object$outer.info <- b$outer.info
## object$inner.info <- b$inner.info
## } else { ## newton method for the model coeff. estimation
## b <- efsudr.scam(G=G,lsp=rho,env=env, control=control)
## ## b <- efsudr.scam1(G=G,lsp=rho,env=env, control=control)
## sp <- b$sp
## object <- b
## object$gcv.ubre <- b$gcv.ubre
## object$outer.info <- b$outer.info
## object$inner.info <- b$inner.info
## }
}
## get some convergence info from the optimization method used...
if (optimizer[1]== "nlm.fd" || optimizer[1]== "nlm"){
sp <- exp(b$estimate)
gcv.ubre <- b$minimum
dgcv.ubre <- b$gradient
iterations <- b$iterations
termcode <- b$code
if (termcode == 1)
conv <- "Relative gradient is close to zero, current iterate is probably solution"
else if (termcode == 2)
conv <- "Successive iterates within tolerance, current iterate is probably solution"
else if (termcode == 3)
conv <- "Last global step failed to locate a point lower than `estimate'. Either
`estimate' is an approximate local minimum of the function or
`steptol' is too small"
else if (termcode == 4)
conv <- "Iteration limit exceeded"
else if (termcode == 5)
conv <- "Maximum step size `stepmax' exceeded five consecutive
times. Either the function is unbounded below, becomes asymptotic
to a finite value from above in some direction or stepmax is too small"
}
## fit the model using the optimal sp from "optim" or "nlm"...
if (optimizer[1]== "nlm.fd" || optimizer[1]== "nlm" || optimizer[1]== "optim"){
object <- scam.fit(G=G, sp=sp,env=env,control=control)
object$conv <- object$conv
object$gcv.ubre <- gcv.ubre
object$dgcv.ubre <- dgcv.ubre
object$outer.info <- list(termcode=termcode,conv=conv,iterations=iterations)
}
else if (optimizer[1]=="no.sps"){
# object <- scam.fit(G=G, sp=exp(rho),env=env,control=control)
sp <- G$sp
object <- if (optimizer[2] == "newton")
scam.fit(G=G, sp=sp,env=env, control=control)
else scam.fit1(G=G, sp=sp,env=env, control=control) ## BFGS optimization
# object$optimizer[1] <- "NA"
}
if (optimizer[1]=="optim"){
object$optim.method <- rep(NA,2)
object$optim.method[1] <- optim.method[1]
if (!is.null(grr))
object$optim.method[2] <- "grad"
}
object$sp <- sp
object$q.f <- G$q.f
object$p.ident <- G$p.ident
object$S <- G$S
object$optimizer <- optimizer
if (optimizer[1]=="no.sps") object$optimizer[1] <- "NA"
object
} ## estimate.scam
efsudr.scam2 <- function(fit.fn,G,lsp,env, control)
## Extended Fellner-Schall method for regular families as in efsudr(mgcv), however,
## rather than minimizing REML (as in gam(mgcv)), the function minimizes GCV/UBRE criterion;
## also there is an alternative BFGS method in place of the full Newton.
## The dependence of H on lambda is neglected.
## 'fit.fn' - the routine to estimate the model coefficients:
## fit.fn = scam.fit1 if a quasi-Newton/BFGS method is used for the coefficient estimation,
## fit.fn = scam.fit in case of the full Newton (PIRLS).
## This function is the same as efsudr.scam1 with the only difference that scam.fit() is either
## scam.fit1() or scam.fit()...
{ ## deriv.dev.edf <- function(G,fit,sp){ ## the expected approx Hessian is not correct here, not +ve def!!
## ## function to calculate derivatives of deviance and tr(A) w.r.t. log(sp)
## ## Hessian is replaced by a BFGS approximation from the scam.fit1,
## ## the dependence of H on sp/lambda is neglected
## b <- fit
## nsp <- length(G$S)
## H <- b$hess ## expected approximate Hessian
## Vb <- b$inv.B ## inverse of the approximate Hessian
## trA.rho<- rep(0,nsp)
## beta.rho <- matrix(0,ncol(G$X),nsp) ## matrix of the parameters derivatives wrt log(sp)
## for (j in 1:nsp){
## VbS <- Vb%*%b$S[[j]]
## trA.rho[j] <- -sp[j]*sum((VbS%*%Vb)*t(H)) ## -lambda*tr(Vb*Sj*Vb*H)
## beta.rho[,j] <- -sp[j]*(VbS%*%b$beta)
## }
## y.mu <- drop(b$y)-b$mu
## c <- -2*y.mu/(b$Var*b$dlink.mu)
## D.beta <- t(b$X1)%*%c ## derivative of the deviance w.r.t. beta
## D.rho <- t(D.beta)%*%beta.rho ## derivative of the deviance w.r.t. log(sp)
## list(trA.rho=trA.rho,D.rho=D.rho)
## } ## deriv.dev.edf()
deriv.dev.edf <- function(G,fit,sp){
## function to calculate derivatives of deviance and tr(A) w.r.t. log(sp),
## using expected Hessian and neglecting its dependence on sp
b <- fit
nsp <- length(G$S)
H <- crossprod(b$wX1) ## expected Hessian = CtXtWXC
Vb <- tcrossprod(b$P) ## P%*%t(P)*sig2 Bayesian posterior covariance matrix for the parameters
## but without ? scale parameter here (inverse of the Hessian)
trA.rho<- rep(0,nsp)
beta.rho <- matrix(0,ncol(G$X),nsp) ## matrix of the parameters derivatives wrt log(sp)
## VbS <- matrix(0,dim(Vb)[1],dim(Vb)[2])
for (j in 1:nsp){
VbS <- Vb%*%b$S[[j]]
trA.rho[j] <- -sp[j]*sum((VbS%*%Vb)*t(H)) ## -lambda*tr(Vb*Sj*Vb*H)
beta.rho[,j] <- -sp[j]*(VbS%*%b$beta)
}
y.mu <- drop(b$y)-b$mu
c <- -2*y.mu/(b$Var*b$dlink.mu)
D.beta <- t(b$X1)%*%c ## derivative of the deviance w.r.t. beta
D.rho <- t(D.beta)%*%beta.rho ## derivative of the deviance w.r.t. log(sp)
list(trA.rho=trA.rho,D.rho=D.rho)
} ## deriv.dev.edf()
crit.gcv <- function(fit,G,n){
## function to get the GCV criterion value
## gcv=dev*nobs/(nobs-gamma*trA)^2
fit$gcv
}
crit.ubre <- function(fit,G,n){
## function to get the UBRE criterion value
## ubre = dev/n - sig2 +2*gamma*trA*sig2/n
## cr <- fit$deviance +2*G$gamma*fit$trA*G$sig2
cr <- fit$deviance/n +2*G$gamma*fit$trA*G$sig2/n - G$sig2
cr
}
if (G$scale.known) ## use UBRE...
crit <- crit.ubre
else crit <- crit.gcv ## use GCV
nsp <- length(G$S) # number of smoothing parameters
spind <- 1:nsp ## index of smoothing params in lsp
lsp[spind] <- lsp[spind] + 2.5
mult <- 1
fit <- fit.fn(G=G,sp=exp(lsp), env=env,control=control)
q <- ncol(G$X)
n <- nrow(G$X)
score.hist <- rep(0,200)
D.rho <- tau.rho <- rep(0,nsp) ## initialize derivatives of deviance and edf (tau/trA) wrt log(sp)
for (iter in 1:200) { ## loop for GCV/UBRE minimization...
## calculation of the derivatives of deviance and edf wrt rho...
der <- deriv.dev.edf(G=G, fit=fit,sp=exp(lsp))
D.rho <- der$D.rho
tau.rho <- der$trA.rho
if (G$scale.known)
a <- pmax(0,-2*G$gamma*G$sig2*tau.rho)
else a <- pmax(0,-2*G$gamma*fit$deviance*tau.rho/(n-fit$trA))
r <- a/pmax(0,D.rho)
# r[a==0 & D.rho==0] <- 1
r[a==0 | D.rho==0] <- 1
r[!is.finite(r)] <- 1e6
lsp1 <- lsp
lsp1 <- pmin(lsp + log(r)*mult,control$efs.lspmax)
max.step <- max(abs(lsp1-lsp))
old.gcv <- crit(fit, G,n) ## fit$gcv
fit <- fit.fn(G=G,sp=exp(lsp1), env=env,control=control)
## some step length control...
fit.gcv <- crit(fit, G,n) ## undated gcv/ubre value
if (fit.gcv<=old.gcv) { ## improvement
if (max.step<.05) { ## consider step extension (near optimum)
lsp2 <- lsp
lsp2 <- pmin(lsp + log(r)*mult*2,control$efs.lspmax) ## try extending step...
fit2 <- fit.fn(G=G,sp=exp(lsp2), env=env,control=control)
fit2.gcv <- crit(fit2,G,n)
if (fit2.gcv < fit.gcv) { ## improvement - accept extension
fit <- fit2;lsp <- lsp2
fit.gcv <- fit2.gcv
mult <- mult * 2
} else { ## accept old step
lsp <- lsp1
}
} else lsp <- lsp1
} else { ## no improvement
# while (fit$gcv > old.gcv&&mult>1) { ## don't contract below 1 as update doesn't have to improve GCV
# mult <- mult/2 ## contract step
# lsp1 <- lsp
# lsp1 <- pmin(lsp + log(r)*mult,control$efs.lspmax)
# fit <- fit.fn(G=G,sp=exp(lsp1), gamma=gamma, env=env,control=control)
# }
gcv.thresh <- 10*(.1 +abs(old.gcv))*.Machine$double.eps^.5
ii <- 1
maxHalf.fit <- 15
while (is.na(fit.gcv) || (fit.gcv-old.gcv) > gcv.thresh) { # 'step reduction' approach
if (ii > maxHalf.fit)
break ## stop ("step reduction failed")
ii <- ii+1
mult <- mult/2 ## contract step
lsp1 <- lsp
lsp1 <- pmin(lsp + log(r)*mult,control$efs.lspmax)
fit <- fit.fn(G=G,sp=exp(lsp1), env=env,control=control)
fit.gcv <- crit(fit,G,n)
}
lsp <- lsp1
if (mult<1) mult <- 1
}
score.hist[iter] <- fit.gcv
## break if EFS step small and GCV change negligible over last 3 steps.
if (iter>3 && max.step<.05 && max(abs(diff(score.hist[(iter-3):iter])))<control$efs.tol) break
## or break if deviance not changing...
if (iter==1) old.dev <- fit$deviance else {
if (abs(old.dev-fit$deviance) < 100*control$devtol.fit*abs(fit$deviance)) break
old.dev <- fit$deviance
}
} # end of 'for' loop for GCV/UBRE minimization
fit$sp <- exp(lsp)
fit$niter <- iter
fit$outer.info <- list(iter = iter,score.hist=score.hist[1:iter])
fit$outer.info$conv <- if (iter==200) "iteration limit reached" else "full convergence"
fit$gcv.ubre <- fit.gcv
fit$inner.info <- list(iter=fit$iter,conv=fit$conv,pdev.hist=fit$pdev.hist) ## convergence info from BFGS for pen. deviance minimization
fit
} ## efsudr.scam2
##########################################################################
## Extended Fellner-Schall method for regular families as in efsudr(mgcv) (c) Simon N Wood
## with PIRLS performed by scam.fit. Rather than minimizing REML (as in gam(mgcv)),
## the function minimizes GCV/UBRE criterion...
##########################################################################
efsudr.scam <- function(G,lsp,env, control){##maxit=200, devtol.fit=1e-7, steptol.fit=1e-7,
# gamma=1, start=NULL, etastart=NULL, mustart=NULL, env=env){
## Extended Fellner-Schall method for regular families,
## with full-Newton/PIRLS method used for the coefficient estimation, performed by scam.fit().
## In this implementation I use the third order derivatives of the log likelihood when calculating
## the derivatives of the tau/edf. So I did not neglect the dependence of H on lambda
## as it is in Wood and Fasiolo (2017)
deriv.dev.edf <- function(G,fit,sp){
## function to calculate derivatives of deviance and tr(A) w.r.t. log(sp)
b <- fit
nsp <- length(G$S)
q <- ncol(G$X)
n <- nrow(G$X)
y.mu <- drop(b$y)-b$mu
c <- -2*y.mu/(b$Var*b$dlink.mu)
D.beta <- t(b$X1)%*%c # derivative of the deviance w.r.t. beta
D.rho <- rep(0,nsp) # define derivative of the deviance wrt rho
D.rho <- t(D.beta)%*%b$dbeta.rho
## calculating the derivative of tr(A) w.r.t. log(sp)
d2link.dlink <- b$d2link.mu/b$dlink.mu
a1 <- as.numeric(y.mu*d2link.dlink) # a constant for updating the derivative of z
a2 <- as.numeric(b$w^2*(b$dvar.mu*b$dlink.mu+2*b$Var*b$d2link.mu)) # a constant for updating the derivative of w
eta.rho <- matrix(0,n,nsp) # define derivatives of the linear predictor
N_rho <- matrix(0,q,nsp) # define diagonal elements of N_j
w.rho <- matrix(0,n,nsp) # define derivatives of the diagonal elements of W
alpha.rho <- matrix(0,n,nsp) # define derivatives of alpha
T_rho <- matrix(0,n,nsp) # define diagonal elements of T_j
## a constant for the derivative of the alpha
dvar.var <- b$dvar.mu/b$Var
alpha1 <- as.numeric(-(dvar.var+d2link.dlink)/b$dlink.mu -
y.mu*(dvar.var^2 + d2link.dlink^2 - b$d2var.mu/b$Var -
b$d3link.mu/b$dlink.mu)/b$dlink.mu)
eta.rho <- b$X1%*%b$dbeta.rho
N_rho[b$iv,] <- b$dbeta.rho[b$iv,]
alpha.rho <- alpha1*eta.rho
w.rho <- -a2*b$alpha*eta.rho + b$w1*alpha.rho
T_rho <- w.rho / b$abs.w
z2 <- b$dlink.mu*y.mu # term for the derivative of E
w1.rho <- matrix(0,n,nsp) # define derivatives of the diagonal elements of W1
T1_rho <- matrix(0,n,nsp) # define diagonal elements of T1_j
Q_rho <- matrix(0,q,nsp) # derivative of E diagonal wrt log(sp[j])
w1.rho <- -a2*eta.rho
T1_rho <- w1.rho/b$w1
term <- T1_rho*z2 + a1*eta.rho- eta.rho
Q_rho <- N_rho*drop(b$C2diag*crossprod(b$X,b$w1*z2)) +
b$C1diag*crossprod(b$X,b$w1*term)
## efficient version of derivative of trA...
KtIL <- t((b$L*b$I.plus)*b$K)
KtILK <- KtIL%*%b$K
KKtILK <- b$K%*%KtILK
trA.rho<-rep(0,nsp)
for (j in 1:nsp){
trA.rho[j] <- - 2*sum(KtILK*(b$KtIQ1R%*%(N_rho[,j]*b$P))) -
sum((T_rho[,j]*KKtILK)*b$K) -
sp[j]*sum((t(b$P)%*%b$S[[j]]%*%b$P)*t(KtILK) )+
sum( (t(b$P)%*%(c(Q_rho[,j])*b$P))*t(KtILK) ) +
2*sum( b$KtILQ1R*t(N_rho[,j]*b$P) ) +
sum(KtIL*t(T1_rho[,j]*b$K))
}
list(trA.rho=trA.rho,D.rho=D.rho)
} ## deriv.dev.edf()
crit.gcv <- function(fit,G,n){
## function to get the GCV criterion value
## gcv=dev*nobs/(nobs-gamma*trA)^2
fit$gcv
}
crit.ubre <- function(fit,G,n){
## function to get the UBRE criterion value
## ubre = dev/n - sig2 +2*gamma*trA*sig2/n
## cr <- fit$deviance +2*G$gamma*fit$trA*G$sig2
cr <- fit$deviance/n +2*G$gamma*fit$trA*G$sig2/n - G$sig2
cr
}
if (G$scale.known) ## use UBRE...
crit <- crit.ubre
else crit <- crit.gcv ## use GCV
nsp <- length(G$S) # number of smoothing parameters
spind <- 1:nsp ## index of smoothing params in lsp
lsp[spind] <- lsp[spind] + 2.5
mult <- 1
fit <- scam.fit(G=G,sp=exp(lsp), env=env,control=control)
q <- ncol(G$X)
n <- nrow(G$X)
score.hist <- rep(0,200)
D.rho <- tau.rho <- rep(0,nsp) ## initialize derivatives of deviance and edf (tau/trA) wrt log(sp)
for (iter in 1:200) { ## loop for GCV/UBRE minimization...
## calculation of the derivatives of deviance and edf wrt rho...
der <- deriv.dev.edf(G=G, fit=fit,sp=exp(lsp))
D.rho <- der$D.rho
tau.rho <- der$trA.rho
if (G$scale.known)
a <- pmax(0,-2*G$gamma*G$sig2*tau.rho)
else a <- pmax(0,-2*G$gamma*fit$deviance*tau.rho/(n-fit$trA))
r <- a/pmax(0,D.rho)
# r[a==0 & D.rho==0] <- 1
r[a==0 | D.rho==0] <- 1
r[!is.finite(r)] <- 1e6
lsp1 <- lsp
lsp1 <- pmin(lsp + log(r)*mult,control$efs.lspmax)
max.step <- max(abs(lsp1-lsp))
old.gcv <- crit(fit, G,n) ## fit$gcv
fit <- scam.fit(G=G,sp=exp(lsp1), env=env,control=control)
## some step length control...
fit.gcv <- crit(fit, G,n) ## undated gcv/ubre value
if (fit.gcv<=old.gcv) { ## improvement
if (max.step<.05) { ## consider step extension (near optimum)
lsp2 <- lsp
lsp2 <- pmin(lsp + log(r)*mult*2,control$efs.lspmax) ## try extending step...
fit2 <- scam.fit(G=G,sp=exp(lsp2), env=env,control=control)
fit2.gcv <- crit(fit2,G,n)
if (fit2.gcv < fit.gcv) { ## improvement - accept extension
fit <- fit2;lsp <- lsp2
fit.gcv <- fit2.gcv
mult <- mult * 2
} else { ## accept old step
lsp <- lsp1
}
} else lsp <- lsp1
} else { ## no improvement
# while (fit$gcv > old.gcv&&mult>1) { ## don't contract below 1 as update doesn't have to improve GCV
# mult <- mult/2 ## contract step
# lsp1 <- lsp
# lsp1 <- pmin(lsp + log(r)*mult,control$efs.lspmax)
# fit <- scam.fit(G=G,sp=exp(lsp1), gamma=gamma, env=env,control=control)
# }
gcv.thresh <- 10*(.1 +abs(old.gcv))*.Machine$double.eps^.5
ii <- 1
maxHalf.fit <- 15
while (is.na(fit.gcv) || (fit.gcv-old.gcv) > gcv.thresh) { # 'step reduction' approach
if (ii > maxHalf.fit)
break ## stop ("step reduction failed")
ii <- ii+1
mult <- mult/2 ## contract step
lsp1 <- lsp
lsp1 <- pmin(lsp + log(r)*mult,control$efs.lspmax)
fit <- scam.fit(G=G,sp=exp(lsp1), env=env,control=control)
fit.gcv <- crit(fit,G,n)
}
lsp <- lsp1
if (mult<1) mult <- 1
}
score.hist[iter] <- fit.gcv
## break if EFS step small and GCV change negligible over last 3 steps.
if (iter>3 && max.step<.05 && max(abs(diff(score.hist[(iter-3):iter])))<control$efs.tol) break
## or break if deviance not changing...
if (iter==1) old.dev <- fit$deviance else {
if (abs(old.dev-fit$deviance) < 100*control$devtol.fit*abs(fit$deviance)) break
old.dev <- fit$deviance
}
} # end of 'for' loop for GCV/UBRE minimization
fit$sp <- exp(lsp)
fit$niter <- iter
fit$outer.info <- list(iter = iter,score.hist=score.hist[1:iter])
fit$outer.info$conv <- if (iter==200) "iteration limit reached" else "full convergence"
fit$gcv.ubre <- fit.gcv
fit$inner.info <- list(iter=fit$iter,conv=fit$conv,pdev.hist=fit$pdev.hist) ## convergence info from the Newton optimization
fit
} ## efsudr.scam
efsudr.scam1 <- function(G,lsp,env, control)
## Extended Fellner-Schall method for regular families,
## with PIRLS/full-Newton method used for the coefficient estimation, performed by scam.fit().
## Here only I use the first and the second order derivatives of the log likelihood when calculating
## the derivatives of the tau/edf. The dependence of H on lambda is neglected,
## as it is in Wood and Fasiolo (2017)
{ deriv.dev.edf <- function(G,fit,sp){
## function to calculate derivatives of deviance and tr(A) w.r.t. log(sp),
## using expected Hessian and neglecting its dependence on sp
b <- fit
nsp <- length(G$S)
H <- crossprod(b$wX1) ## expected Hessian = CtXtWXC
Vb <- tcrossprod(b$P) ## P%*%t(P)*sig2 Bayesian posterior covariance matrix for the parameters
## but without ? scale parameter here (inverse of the Hessian)
trA.rho<- rep(0,nsp)
beta.rho <- matrix(0,ncol(G$X),nsp) ## matrix of the parameters derivatives wrt log(sp)
## VbS <- matrix(0,dim(Vb)[1],dim(Vb)[2])
for (j in 1:nsp){
VbS <- Vb%*%b$S[[j]]
trA.rho[j] <- -sp[j]*sum((VbS%*%Vb)*t(H)) ## -lambda*tr(Vb*Sj*Vb*H)
beta.rho[,j] <- -sp[j]*(VbS%*%b$beta)
}
y.mu <- drop(b$y)-b$mu
c <- -2*y.mu/(b$Var*b$dlink.mu)
D.beta <- t(b$X1)%*%c ## derivative of the deviance w.r.t. beta
D.rho <- t(D.beta)%*%beta.rho ## derivative of the deviance w.r.t. log(sp)
list(trA.rho=trA.rho,D.rho=D.rho)
} ## deriv.dev.edf()
crit.gcv <- function(fit,G,n){
## function to get the GCV criterion value
## gcv=dev*nobs/(nobs-gamma*trA)^2
fit$gcv
}
crit.ubre <- function(fit,G,n){
## function to get the UBRE criterion value
## ubre = dev/n - sig2 +2*gamma*trA*sig2/n
## cr <- fit$deviance +2*G$gamma*fit$trA*G$sig2
cr <- fit$deviance/n +2*G$gamma*fit$trA*G$sig2/n - G$sig2
cr
}
if (G$scale.known) ## use UBRE...
crit <- crit.ubre
else crit <- crit.gcv ## use GCV
nsp <- length(G$S) # number of smoothing parameters
spind <- 1:nsp ## index of smoothing params in lsp
lsp[spind] <- lsp[spind] + 2.5
mult <- 1
fit <- scam.fit(G=G,sp=exp(lsp), env=env,control=control)
q <- ncol(G$X)
n <- nrow(G$X)
score.hist <- rep(0,200)
D.rho <- tau.rho <- rep(0,nsp) ## initialize derivatives of deviance and edf (tau/trA) wrt log(sp)
for (iter in 1:200) { ## loop for GCV/UBRE minimization...
## calculation of the derivatives of deviance and edf wrt rho...
der <- deriv.dev.edf(G=G, fit=fit,sp=exp(lsp))
D.rho <- der$D.rho
tau.rho <- der$trA.rho
if (G$scale.known)
a <- pmax(0,-2*G$gamma*G$sig2*tau.rho)
else a <- pmax(0,-2*G$gamma*fit$deviance*tau.rho/(n-fit$trA))
r <- a/pmax(0,D.rho)
# r[a==0 & D.rho==0] <- 1
r[a==0 | D.rho==0] <- 1
r[!is.finite(r)] <- 1e6
lsp1 <- lsp
lsp1 <- pmin(lsp + log(r)*mult,control$efs.lspmax)
max.step <- max(abs(lsp1-lsp))
old.gcv <- crit(fit, G,n) ## fit$gcv
fit <- scam.fit(G=G,sp=exp(lsp1), env=env,control=control)
## some step length control...
fit.gcv <- crit(fit, G,n) ## undated gcv/ubre value
if (fit.gcv<=old.gcv) { ## improvement
if (max.step<.05) { ## consider step extension (near optimum)
lsp2 <- lsp
lsp2 <- pmin(lsp + log(r)*mult*2,control$efs.lspmax) ## try extending step...
fit2 <- scam.fit(G=G,sp=exp(lsp2), env=env,control=control)
fit2.gcv <- crit(fit2,G,n)
if (fit2.gcv < fit.gcv) { ## improvement - accept extension
fit <- fit2;lsp <- lsp2
fit.gcv <- fit2.gcv
mult <- mult * 2
} else { ## accept old step
lsp <- lsp1
}
} else lsp <- lsp1
} else { ## no improvement
# while (fit$gcv > old.gcv&&mult>1) { ## don't contract below 1 as update doesn't have to improve GCV
# mult <- mult/2 ## contract step
# lsp1 <- lsp
# lsp1 <- pmin(lsp + log(r)*mult,control$efs.lspmax)
# fit <- scam.fit(G=G,sp=exp(lsp1), gamma=gamma, env=env,control=control)
# }
gcv.thresh <- 10*(.1 +abs(old.gcv))*.Machine$double.eps^.5
ii <- 1
maxHalf.fit <- 15
while (is.na(fit.gcv) || (fit.gcv-old.gcv) > gcv.thresh) { # 'step reduction' approach
if (ii > maxHalf.fit)
break ## stop ("step reduction failed")
ii <- ii+1
mult <- mult/2 ## contract step
lsp1 <- lsp
lsp1 <- pmin(lsp + log(r)*mult,control$efs.lspmax)
fit <- scam.fit(G=G,sp=exp(lsp1), env=env,control=control)
fit.gcv <- crit(fit,G,n)
}
lsp <- lsp1
if (mult<1) mult <- 1
}
score.hist[iter] <- fit.gcv
## break if EFS step small and GCV change negligible over last 3 steps.
if (iter>3 && max.step<.05 && max(abs(diff(score.hist[(iter-3):iter])))<control$efs.tol) break
## or break if deviance not changing...
if (iter==1) old.dev <- fit$deviance else {
if (abs(old.dev-fit$deviance) < 100*control$devtol.fit*abs(fit$deviance)) break
old.dev <- fit$deviance
}
} # end of 'for' loop for GCV/UBRE minimization
fit$sp <- exp(lsp)
fit$niter <- iter
fit$outer.info <- list(iter = iter,score.hist=score.hist[1:iter])
fit$outer.info$conv <- if (iter==200) "iteration limit reached" else "full convergence"
fit$gcv.ubre <- fit.gcv
fit$inner.info <- list(iter=fit$iter,conv=fit$conv,pdev.hist=fit$pdev.hist) ## convergence info from the Newton optimization
fit
} ## efsudr.scam1
Any scripts or data that you put into this service are public.
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.