Nothing
## (c) Natalya Pya (2012-2024). Provided under GPL 2.
## routines for fitting scam()...
## based on setup routines (c) Simon N Wood
#############################################################
## the wrapper overall function to fit scam... ##
#############################################################
scam <- function(formula, family=gaussian(), data=list(), gamma=1, sp=NULL,
weights=NULL, offset=NULL, optimizer=c("bfgs","newton"), optim.method=c("Nelder-Mead","fd"),
scale=0, knots=NULL, not.exp=FALSE, start=NULL, etastart=NULL, mustart=NULL, control=list(),
AR1.rho=0, AR.start=NULL,drop.unused.levels=TRUE) ##,devtol.fit=1e-8, steptol.fit=1e-8, check.analytical=FALSE, del=1e-4)
## Function to fit a SCAM to some data, withe the model stated in the formula
## Basic steps:
## 1) using 'interpret.gam()' of mgcv, formula is split up into parametric and
## non-parametric parts, and a fake formula constructed to be used to pick up
## data for model frame. pterms "terms" object(s) created for parametric
## components, model frame created along with terms object.
## 2. 'gam.setup()' of mgcv called to do most of basis construction and other
## elements of model setup.
## 3. 'estimate.scam()' is called to estimate the model if smoothing parameter should be
## selected, otherwise 'scam.fit' is called. Numerical optimization method to use
## to optimize the smoothing parameter estimation criterion is specified in 'optimizer'
## input argument as one of "bfgs" (currenlt default), "optim", "nlm", "nlm.fd", "efs"
## 4. 'scam.fit.post()' does post-fitting steps: compute null deviance and covariance
## matrices after the scam fit.
## 5. Finished 'scam' object assembled.
## 'scale': scale parameter of the exponential distribution as in gam(mgcv)
## 'optimizer': An array specifying the numerical optimization methods
## to optimize the smoothing parameter estimation criterion (specified in the first
## element of 'optimizer') and to use to estimate the model coefficients
## (specified in the second element of 'optimizer').
## For the model coeff estimation, there are two alternatives:
## 'newton' (default) and 'bfgs' methods.
## For the sp selection the available methods are "bfgs" (default), "optim", "nlm",
## "nlm.fd", "efs". Note that 'bfgs' method for the coefficient estimation works only with 'efs'.
## 'optim.method': if optimizer[1]=="optim", then the first argument of optim.method specifies
## the method, and the second can be either "fd" for finite-difference approximation
## of the gradient or "grad" - to use analytical gradient of gcv/ubre
## 'not.exp': if TRUE then notExp() function will be used in place of exp in positivity
## ensuring beta parameters re-parameterization
{ control <- do.call("scam.control",control)
## Setting from mgcv(gam).......
## create model frame.....
gp <- interpret.gam(formula) # interpret the formula
cl <- match.call() # call needed in gam object for update to work
mf <- match.call(expand.dots=FALSE)
mf$formula <- gp$fake.formula
mf$family <- mf$control<-mf$scale<-mf$knots<-mf$sp<-mf$min.sp<-mf$H<-mf$select <- mf$drop.intercept <-
mf$gamma<-mf$method<-mf$fit<-mf$paraPen<-mf$G<-mf$optimizer <- mf$optim.method <- mf$not.exp <- mf$in.out <- mf$AR1.rho <- mf$AR.start <- mf$start <- mf$etastart <- mf$mustart <-mf$devtol.fit <- mf$steptol.fit <- mf$del <- mf$...<-NULL
mf$drop.unused.levels <- drop.unused.levels
mf[[1]] <- quote(stats::model.frame) ## as.name("model.frame")
pmf <- mf
mf <- eval(mf, parent.frame()) # the model frame now contains all the data
if (nrow(mf)<2) stop("Not enough (non-NA) data to do anything meaningful")
terms <- attr(mf,"terms")
## summarize the *raw* input variables
## note can't use get_all_vars here -- buggy with matrices
vars <- allvars1(gp$fake.formula[-2]) ## drop response here
inp <- parse(text = paste("list(", paste(vars, collapse = ","),")"))
## allow a bit of extra flexibility in what `data' is allowed to be (as model.frame actually does)
if (!is.list(data)&&!is.data.frame(data)) data <- as.data.frame(data)
dl <- eval(inp, data, parent.frame())
names(dl) <- vars ## list of all variables needed
var.summary <- variable.summary(gp$pf,dl,nrow(mf)) ## summarize the input data
rm(dl) ## save space
## pterms are terms objects for the parametric model components used in
## model setup - don't try obtaining by evaluating pf in mf - doesn't
## work in general (e.g. with offset)...
if (is.list(formula)) { ## then there are several linear predictors
environment(formula) <- environment(formula[[1]]) ## e.g. termplots needs this
pterms <- list()
tlab <- rep("",0)
for (i in 1:length(formula)) {
pmf$formula <- gp[[i]]$pf
pterms[[i]] <- attr(eval(pmf, parent.frame()),"terms")
tlabi <- attr(pterms[[i]],"term.labels")
if (i>1&&length(tlabi)>0) tlabi <- paste(tlabi,i-1,sep=".")
tlab <- c(tlab,tlabi)
}
attr(pterms,"term.labels") <- tlab ## labels for all parametric terms, distinguished by predictor
} else { ## single linear predictor case
pmf$formula <- gp$pf
pmf <- eval(pmf, parent.frame()) # pmf contains all data for parametric part
pterms <- attr(pmf,"terms") ## pmf only used for this
}
if (is.character(family)) family <- eval(parse(text=family))
if (is.function(family)) family <- family()
if (is.null(family$family)) stop("family not recognized")
if (family$family[1]=="gaussian" && family$link=="identity") am <- TRUE
else am <- FALSE
## if (AR1.rho!=0&&!is.null(mf$"(AR.start)")) if (!is.logical(mf$"(AR.start)")) stop("AR.start must be logical")
## scam_1.2-15...
if (AR1.rho!=0&&!is.null(AR.start)) if (!is.logical(AR.start)) stop("AR.start must be logical")
if (AR1.rho!=0 && !am) stop("residual autocorrelation, AR1, is currently available only for the Gaussian identity link model.")
if (!control$keepData) rm(data) ## save space
## check whether family requires intercept to be dropped...
# drop.intercept <- if (is.null(family$drop.intercept) || !family$drop.intercept) FALSE else TRUE
# drop.intercept <- as.logical(family$drop.intercept)
### if (is.null(family$drop.intercept)) { ## family does not provide information
### lengthf <- if (is.list(formula)) length(formula) else 1
### if (is.null(drop.intercept)) drop.intercept <- rep(FALSE, lengthf) else {
### drop.intercept <- rep(drop.intercept,length=lengthf) ## force drop.intercept to correct length
### if (sum(drop.intercept)) family$drop.intercept <- drop.intercept ## ensure prediction works
### }
### } else drop.intercept <- as.logical(family$drop.intercept) ## family overrides argument
### if (inherits(family,"general.family")&&!is.null(family$presetup)) eval(family$presetup)
### gsname <- if (is.list(formula)) "gam.setup.list" else "gam.setup"
### G <- do.call(gsname,list(formula=gp,pterms=pterms,
### data=mf,knots=knots,sp=sp, min.sp=min.sp,
### H=H,absorb.cons=TRUE,sparse.cons=0,select=select,
### idLinksBases=control$idLinksBases,scale.penalty=control$scalePenalty,
### paraPen=paraPen,drop.intercept=drop.intercept))
G <- do.call("gam.setup",list(formula=gp,pterms=pterms,
data=mf,knots=knots,sp=sp, absorb.cons=TRUE,sparse.cons=0))
G$var.summary <- var.summary
G$family <- family
if ((is.list(formula)&&(is.null(family$nlp)||family$nlp!=gp$nlp))||
(!is.list(formula)&&!is.null(family$npl)&&(family$npl>1))) stop("incorrect number of linear predictors for family")
G$terms<-terms;
G$mf<-mf;G$cl<-cl;
G$am <- am
G$AR1.rho <- AR1.rho; G$AR.start <- AR.start
if (is.null(G$offset)) G$offset<-rep(0,G$n)
G$min.edf <- G$nsdf ## -dim(G$C)[1]
if (G$m) for (i in 1:G$m) G$min.edf<-G$min.edf+G$smooth[[i]]$null.space.dim
G$formula <- formula
G$pred.formula <- gp$pred.formula
environment(G$formula)<-environment(formula)
if (ncol(G$X)>nrow(G$X)) stop("Model has more coefficients than data")
### G <- gam(formula, family,data=data, knots=knots,fit=FALSE)
### n.terms <- length(G$smooth) ## number of smooth terms in the model
### n <- nrow(G$X)
## intercept <- G$intercept ## TRUE or FALSE
## now need to set 'offset' as the above G wouldn't take in 'offset' that is outside of formula..
### gp <- interpret.gam(formula) # interpret the formula
### cl <- match.call() # call needed in gam object for update to work
### mf <- match.call(expand.dots=FALSE)
### mf$formula <- gp$fake.formula
### mf$family <- mf$control<-mf$scale<-mf$knots<-mf$sp<-mf$min.sp<-mf$H<-mf$select <-
### mf$gamma<-mf$method<-mf$fit<-mf$paraPen<-mf$G<-mf$optimizer <- mf$optim.method <- mf$not.exp <- mf$in.out <-
### mf$devtol.fit <- mf$steptol.fit <- mf$del <- mf$...<-NULL
### mf[[1]] <- quote(stats::model.frame) ## as.name("model.frame")
### pmf <- mf
### mf <- eval(mf, parent.frame()) # the model frame now contains all the data
### if (nrow(mf)<2) stop("Not enough (non-NA) data to do anything meaningful")
# terms <- attr(mf,"terms")
n.terms <- length(G$smooth) ## number of smooth terms in the model
n <- nrow(G$X)
intercept <- G$intercept
G$offset <- as.vector(model.offset(mf))
if (is.null(G$offset))
G$offset <- rep.int(0, n)
## done offset
### if (is.null(weights))
### weights <- rep.int(1,n)
weights <- G$w
fam.name <- G$family[1]
if (scale == 0)
{ if (fam.name == "binomial" || fam.name == "poisson")
sig2 <- 1
else sig2 <- -1
}
else { sig2 <- scale }
if (sig2 > 0) scale.known <- TRUE else scale.known <- FALSE
## get penalty matrices and
## vector of identifications for exponentiated model coefficients...
Q <- penalty_pident(G)
## checking sp...
if (!is.null(sp)) {
neg <- FALSE
if (length(sp)!= length(G$off)) {
warning("Supplied smoothing parameter vector is too short - ignored.")
sp <- NULL
} else if (sum(is.na(sp))) {
warning("NA's in supplied smoothing parameter vector - ignoring.")
sp <- NULL
} else {
good <- sp < 0
if (sum(good) > 0) { ## cheking negative values..
warning("Supplied smoothing parameter vector has negative values - ignored.")
neg <- TRUE
}
}
if (neg) sp <- NULL
}
## Create new environments with `start' initially empty (if not supplied)...
# ee <- new.env() ## parent = .GlobalEnv
env <- new.env()
## assign("start",rep(0,0),envir=env)
assign("dbeta.start",rep(0,0),envir=env)
assign("sp.last",rep(0,0),envir=env)
nvars <- NCOL(G$X) # number of model coefficients
if (!is.null(start)) { ## scam_1.2-12 ...
if (length(start) != nvars)
stop(gettextf("Length of start should equal %d and correspond to initial coefs.",nvars))
else assign("start",start,envir=env)
} else{
assign("start",rep(0,0),envir=env)
}
q.f <- rep(0,n.terms)
if (n.terms >0) for (i in 1:n.terms)
q.f[i] <- ncol(G$smooth[[i]]$S[[1]]) + 1
G$S <- Q$S
G$q.f <- q.f
G$q0 <- G$off[1]-1 ## number of the parameters of the strictly parametric model
G$p.ident <- Q$p.ident ## vector of 0's & 1's for the model parameters identification:
G$n.terms <- n.terms ## number of the smooth terms in the SCAM
# G$intercept <- intercept
G$weights <- weights
G$sig2 <- sig2
G$scale.known <- scale.known
G$not.exp <- not.exp
G$gamma <- gamma
### if (!control$keepData) rm(data) ## save space
object <- list()
# if (is.null(sp)) {
# ## get initial estimates of the smoothing parameter...
# ## start <- etastart <- mustart <- NULL
# y <- G$y; family <- G$family
# nobs <- NROW(y)
# eval(family$initialize)
# G$y <- y ## needed to set factor response values as numeric
# def.sp <- initial.sp.scam(G,optimizer=optimizer, Q,q.f=q.f,n.terms=n.terms,family=family,
# intercept=intercept,offset=G$offset, env=env,
# weights=weights, control=control)
# rho <- log(def.sp+1e-4) ## get initial log(sp) ...
# if (!is.null(start))
# assign("start",start,envir=env) ## scam_1.2-12
# ## minimize GCV/UBRE by optimizer....
# ptm <- proc.time()
# re <- estimate.scam(G=G,optimizer=optimizer,optim.method=optim.method,
# rho=rho, env=env,control=control)
# CPU.time <- proc.time()-ptm
# best <- re
# object$gcv.ubre <- re$gcv.ubre
# object$dgcv.ubre <- re$dgcv.ubre
# best$p.ident <- Q$p.ident
# best$S <- Q$S
# object$optimizer <- re$optimizer
# object$edf1 <- re$edf1
# object$termcode <- re$termcode
# if (optimizer[1] == "bfgs")
# { object$check.grad <- re$check.grad
# object$dgcv.ubre.check <- re$dgcv.ubre.check
# }
# } else { ## no GCV minimization if sp is given...
# object$optimizer <- optimizer
# if (is.na(optimizer[2]))
# optimizer[2] <- object$optimizer[2] <- "newton"
# best <- if (optimizer[2] == "newton")
# scam.fit(G=G, sp=sp,env=env, control=control) ## gamma=gamma
# else scam.fit1(G=G, sp=sp,env=env, control=control) ## BFGS optimization #
# object$optimizer[1] <- "NA"
# }
if (is.null(sp)) { ## scam_1.2-14 ...
## get initial estimates of the smoothing parameter...
## start <- etastart <- mustart <- NULL
y <- G$y; family <- G$family
nobs <- NROW(y)
eval(family$initialize)
G$y <- y ## needed to set factor response values as numeric
def.sp <- initial.sp.scam(G,optimizer=optimizer, Q,q.f=q.f,n.terms=n.terms,family=family,
intercept=intercept,offset=G$offset, env=env,
weights=weights, control=control)
rho <- log(def.sp+1e-4) ## get initial log(sp) ...
if (!is.null(start))
assign("start",start,envir=env) ## scam_1.2-12
} else { ## no sp estimation to do
rho <- rep(0,0)
G$sp <- sp
}
ptm <- proc.time()
re <- estimate.scam(G=G,optimizer=optimizer,optim.method=optim.method,
rho=rho, env=env,control=control)
CPU.time <- proc.time()-ptm
best <- re
object$gcv.ubre <- re$gcv.ubre
object$dgcv.ubre <- re$dgcv.ubre
best$p.ident <- Q$p.ident
best$S <- Q$S
object$optimizer <- re$optimizer
object$edf1 <- re$edf1
object$termcode <- re$termcode
if (optimizer[1] == "bfgs")
{ object$check.grad <- re$check.grad
object$dgcv.ubre.check <- re$dgcv.ubre.check
}
## post-fitting values...
best$n.smooth <- object$n.smooth <- n.terms
best$formula <- object$formula <- formula
best$family <- object$family <- G$family
best$smooth <- object$smooth <- G$smooth
best$model <- object$model <- G$mf
object$R <- best$R
if (is.null(object$R)){
if (object$optimizer[2] == "newton"){
rr <- scam.fit(G=G, sp=best$sp, env=env, control=control) ## gamma=gamma
object$R <- rr$R ## not sure if it's needed?
}
}
object$sp <- best$sp
names(object$sp) <- names(G$sp)
if (sum(is.na(names(object$sp)))!=0){ ## create names for sp if NA's from G
if (n.terms >0) for (i in 1:n.terms) names(object$sp)[i] <- object$smooth[[i]]$label
}
object$conv <- best$conv # whether or not the inner full Newton(or bfgs) method converged
if (object$optimizer[2] == "newton")
post <- scam.fit.post(G=G, object=best,control=control)
else post <- scam.fit.post1(G=G, object=best) ## for inner bfgs method
object$edf <- best$edf # post$edf
object$edf1 <- post$edf1
object$trA <- best$trA # post$trA
names(object$edf) <- G$term.names
names(object$edf1) <- G$term.names
object$aic <- post$aic # best$aic
object$null.deviance <- post$null.dev
object$deviance <- post$deviance
object$residuals <- post$residuals
object$df.residual <- nrow(G$X) - sum(post$edf)
object$rank <- post$rank
object$var.summary <- G$var.summary
object$cmX <- G$cmX ## column means of model matrix --- useful for CIs
object$model<-G$mf # store the model frame
object$full.sp <- G$full.sp ## incorrect !!!
if (!is.null(object$full.sp)) names(object$full.sp) <- names(G$full.sp)
object$na.action <- attr(G$mf,"na.action") # how to deal with NA's
object$df.null <- post$df.null
object$Ve <- post$Ve
object$Vp <- post$Vb
object$Ve.t <- post$Ve.t
object$Vp.t <- post$Vb.t
object$sig2 <- post$sig2
object$coefficients <- best$beta
object$coefficients.t <- best$beta.t
object$beta <- best$beta
object$beta.t <- best$beta.t
object$pterms <- G$pterms
object$terms <- G$terms
object$assign <- G$assign
object$contrasts <- G$contrasts
object$xlevels <- G$xlevels
object$nsdf <- G$nsdf
object$y <- G$y
# object$data <- G$mf
if (control$keepData) object$data <- data
object$control <- control
object$offset <- G$offset
object$not.exp <- G$not.exp
# object$scale.known <- scale.known # to be passed in the summary function
object$scale.estimated <- !scale.known # to be passed in the summary function
object$prior.weights <-weights # prior weights on observations
object$weights <- best$w # final weights used in full Newton iteration
object$fitted.values <- post$mu #best$mu
object$linear.predictors <- post$eta #best$eta
# cl<-match.call() # call needed in gam object for update to work
object$call <- cl
object$p.ident <- Q$p.ident
object$intercept <- G$intercept
object$min.edf <- G$min.edf ## Minimum possible degrees of freedom for whole model
object$gamma <- gamma
object$iter <- best$iter # number of iterations of the Full Newton
object$pdev.hist <- best$pdev.hist
if (is.null(sp))
object$CPU.time <- CPU.time
else
object$CPU.time <- NULL
object$AR1.rho <- AR1.rho
## scam_1.2-15...
object$AR.start <- AR.start
object$std.rsd <- post$std.rsd
## get the optimizer info (smoothing parameter selection).....
if (is.null(sp))
{ if (optimizer[1] == "bfgs")
{ ## get the bfgs info in case of sp selection...
object$bfgs.info <- list()
object$bfgs.info$conv <- re$conv.bfgs
object$bfgs.info$iter <- re$iterations
object$bfgs.info$grad <- re$dgcv.ubre
object$bfgs.info$score.hist <- re$score.hist
}
else if (optimizer[1] == "nlm.fd" || optimizer[1] == "nlm")
{ object$nlm.info <- list()
object$nlm.info$conv <- re$outer.info$conv
object$nlm.info$iter <- re$outer.info$iterations
object$nlm.info$grad <- re$dgcv.ubre
}
else if (optimizer[1]=="optim")
{ object$optim.info <- list()
object$optim.info$conv <- re$outer.info$conv
object$optim.info$iter <- re$outer.info$iterations
object$optim.method <- re$optim.method
}
else if (optimizer[1]=="efs")
{ object$efs.info <- list()
object$efs.info$conv <- re$outer.info$conv
object$efs.info$iter <- re$outer.info$iter
object$efs.info$score.hist <- re$outer.info$score.hist
## convergence info of the bfgs for pen.deviance minimization wrt model coeff-s...
if (is.null(object$conv)) object$conv <- re$inner.info$conv
if (is.null(object$iter)) object$iter <- re$inner.info$iter
if (is.null(object$pdev.hist)) object$pdev.hist <- re$inner.info$pdev.hist
}
}
if (scale.known)
object$method <- "UBRE"
else object$method <- "GCV"
if (G$nsdf > 0)
term.names <- colnames(G$X)[1:G$nsdf]
else term.names <- array("", 0)
if (n.terms >0)
for (i in 1:n.terms)
{ k <- 1
for (j in G$smooth[[i]]$first.para:G$smooth[[i]]$last.para)
{ term.names[j] <- paste(G$smooth[[i]]$label, ".",
as.character(k), sep = "")
k <- k + 1
}
}
names(object$coefficients) <- term.names
names(object$coefficients.t) <- term.names
ynames <- if (is.matrix(G$y))
rownames(G$y)
else names(G$y)
names(object$residuals) <- ynames
class(object) <- c("scam","glm","lm")
rm(G)
object
} ## end scam
##############################################################
## control function for scam (similar to gam.control(mgcv)) ##
##############################################################
scam.control <- function (maxit = 200, maxHalf=30, devtol.fit=1e-7, steptol.fit=1e-7,
keepData=FALSE,efs.lspmax=15,efs.tol=.1, nlm=list(),optim=list(),bfgs=list(), trace =FALSE, print.warn=FALSE, b.notexp=1, threshold.notexp=20)
# Control structure for a scam.
# devtol.fit is the tolerance to use in the scam.fit call within each IRLS.
# check.analytical - logical whether the analytical gradient of GCV/UBRE should be checked for bfgs method
# del - increment for finite differences when checking analytical gradients for bfgs method
# maxHalf is the number of step halvings to employ in bfgs_gcv.ubre search, before giving up on a search direction.
# trace turns on or off some de-bugging information.
# print.warn =FALSE - when set to 'FALSE' turns off printing warning messages for step halving under non-finite exponentiated coefficients, non-finite deviance and/or if mu or eta are out of bounds.
# b.notexp, threshold.notexp are parameters of notExp() softPlus function
{ if (!is.numeric(devtol.fit) || devtol.fit <= 0)
stop("value of devtol.fit must be > 0")
if (!is.numeric(steptol.fit) || devtol.fit <= 0)
stop("value of steptol.fit must be > 0")
if (!is.numeric(maxit) || maxit <= 0)
stop("maximum number of iterations must be > 0")
if (!is.numeric(maxHalf) || maxHalf <= 0)
stop("maximum number of step halving must be > 0")
if (!is.logical(trace)) stop("trace must be logical")
if (!is.logical(print.warn)) stop("print.warn must be logical")
# work through nlm defaults
if (is.null(nlm$ndigit)||nlm$ndigit<2) nlm$ndigit <- max(2,ceiling(-log10(1e-7)))
nlm$ndigit <- round(nlm$ndigit)
ndigit <- floor(-log10(.Machine$double.eps))
if (nlm$ndigit>ndigit) nlm$ndigit <- ndigit
if (is.null(nlm$gradtol)) nlm$gradtol <- 1e-6
nlm$gradtol <- abs(nlm$gradtol)
## note that nlm will stop after hitting stepmax 5 consecutive times
## hence should not be set too small ...
if (is.null(nlm$stepmax)||nlm$stepmax==0) nlm$stepmax <- 2
nlm$stepmax <- abs(nlm$stepmax)
if (is.null(nlm$steptol)) nlm$steptol <- 1e-4
nlm$steptol <- abs(nlm$steptol)
if (is.null(nlm$iterlim)) nlm$iterlim <- 200
nlm$iterlim <- abs(nlm$iterlim)
## Should be reset for a while anytime derivative code altered...
if (is.null(nlm$check.analyticals)) nlm$check.analyticals <- FALSE
nlm$check.analyticals <- as.logical(nlm$check.analyticals)
# and bfgs defaults
if (is.null(bfgs$check.analytical)) bfgs$check.analytical <- FALSE
if (is.null(bfgs$del)) bfgs$del <- 1e-4
if (is.null(bfgs$steptol.bfgs)) bfgs$steptol.bfgs <- 1e-7
if (is.null(bfgs$gradtol.bfgs)) bfgs$gradtol.bfgs <- 1e-06
if (is.null(bfgs$maxNstep)) bfgs$maxNstep <- 5 ## the maximum allowable step length
if (is.null(bfgs$maxHalf)) bfgs$maxHalf <- maxHalf ## the maximum number of step halving in "backtracking"
# and optim defaults
if (is.null(optim$factr)) optim$factr <- 1e7
optim$factr <- abs(optim$factr)
if (efs.tol<=0) efs.tol <- .1
# notExp (sofPlus) function defaults...
if (!is.numeric(b.notexp) || b.notexp <= 0)
stop("value of b.notexp must be > 0")
if (!is.numeric(threshold.notexp) || threshold.notexp <= 0)
stop("value of threshold.notexp must be > 0")
list(maxit=maxit, devtol.fit=devtol.fit, steptol.fit=steptol.fit, keepData=as.logical(keepData[1]), nlm=nlm, optim=optim,bfgs=bfgs,efs.lspmax=efs.lspmax,efs.tol=efs.tol,trace = trace, print.warn=print.warn,b.notexp=b.notexp, threshold.notexp=threshold.notexp)
} ## end scam.control
#################################################################
## function to get initial estimates of smoothing parameters...##
#################################################################
initial.sp.scam <- function(G,optimizer,Q,q.f,n.terms,family,intercept,offset, env= env,
weights, control=control)
{ ## function to get initial estimates of smoothing parameters
control$devtol.fit <- 1e-4
control$steptol.fit <- 1e-4
## set 'newton' method for coeff estimation if not specified
## (it can happen if 'optimizer' was supplied with one element, specifying
## the sp optimization method to use as, e.g., 'optimizer="efs")
if (is.na(optimizer[2]))
optimizer[2] <- "newton"
## step 1: set sp=rep(0.5,p) and estimate hessian...
if (optimizer[2] == "bfgs") {
b <- scam.fit1(G=G,sp=rep(0.05,length(G$off)), env=env, control=control)
H <- crossprod(b$wX1)
} else {
b <- scam.fit(G=G,sp=rep(0.05,length(G$off)), env=env, control=control)
H <- crossprod(b$wX1) - b$E
}
## step 2:...
n.p <- length(Q$S) ## number of penalty matrices
def.sp <- array(0,n.p) ## initialize the initial sp values
j <- 1
if (n.terms >0) for (i in 1:n.terms){
for (kk in 1:length(G$smooth[[i]]$S)){
start <- G$off[j]
finish <- start + ncol(G$smooth[[i]]$S[[kk]])-1
# matrix norm of the Hessian elements penalized by S[[kk]]...
Hi.norm <- sum(H[start:finish,start:finish]*H[start:finish,start:finish])
Si.norm <- sum(G$smooth[[i]]$S[[kk]]*G$smooth[[i]]$S[[kk]])
def.sp[j] <- (Hi.norm/Si.norm)^0.5
j <- j+1
}
}
## Create again new environments with `start' initially empty...
env <- new.env()
assign("start",rep(0,0),envir=env)
assign("dbeta.start",rep(0,0),envir=env)
assign("sp.last",rep(0,0),envir=env)
def.sp
}
#########################################################
## function to get list of penalty matrices and ##
## vector of parameter identifications ..... ##
#########################################################
penalty_pident <- function(object)
{ ## function to get the list of penalties and vector of model parameters
## identifications from the gam() setting...
n.terms <- length(object$smooth) # number of terms in the model
q <- ncol(object$X) # total number of parameters
cons.terms <- rep(0,n.terms) # define whether each term is constrained or not
if (n.terms>0)
for (i in 1:n.terms){
if (!is.null(object$smooth[[i]]$p.ident))
cons.terms[i] <- 1
}
p.ident <- rep(FALSE,q) # initialize vector of parameter identifications
# with `TRUE' - for a parameter to be exponentiated, `FALSE' - otherwise
off.terms <- rep(0,n.terms) # starting points for each term
off <- object$off
if (n.terms ==length(off))
off.terms <- off
else
{ off.terms[1] <- off[1]
k <- 1
l <- 1
while (l<length(off))
{ if (off[l]!=off[l+1])
{ off.terms[k+1] <- off[l+1]
k <- k+1; l <- l+1
}
else l <- l+1
}
}
if (n.terms>0)
for (i in 1:n.terms){
if (cons.terms[i]==1){
# if (inherits(object$smooth[[i]], c("mipc.smooth")))
# p.ident[off.terms[i]:(off.terms[i]+ncol(object$smooth[[i]]$S[[1]])-1)] <-
# object$smooth[[i]]$p.ident[2:length(object$smooth[[i]]$p.ident)]
# else
ind <- attr(object$smooth[[i]],"del.index") ## to remove unidentifiable model coefficients, scam_1.2-15
if (!is.null(ind))
object$smooth[[i]]$p.ident <- object$smooth[[i]]$p.ident[-ind]
p.ident[off.terms[i]:(off.terms[i]+ncol(object$smooth[[i]]$S[[1]])-1)] <- object$smooth[[i]]$p.ident
}
}
## getting the list of penalty matrices in terms of the full model vector of coefficients...
S <- list()
j <- 1
if (n.terms>0)
for(i in 1:n.terms){
for (kk in 1:length(object$smooth[[i]]$S)){
S[[j]] <- matrix(0,q,q) # initialize penalty matrix
S[[j]][off.terms[i]:(off.terms[i]+ncol(object$smooth[[i]]$S[[kk]])-1),
off.terms[i]:(off.terms[i]+ncol(object$smooth[[i]]$S[[kk]])-1)] <- object$smooth[[i]]$S[[kk]]
j <- j+1
}
}
object$S <- S
object$p.ident <- p.ident
object
} ## end penalty_pident
#############################################################
## Function to fit SCAM based on Full Newton method ##
#############################################################
scam.fit <- function(G,sp, etastart=NULL, mustart=NULL, env=env,
null.coef=rep(0,ncol(G$X)), control=scam.control()) ## gamma=1
## maxit=200, devtol.fit=1e-8, steptol.fit=1e-8, trace=FALSE, print.warn=FALSE
## G - list of items from gam(...,fit=FALSE) needed to fit a scam
## sp- vector of smoothing parameters
## not.exp - if TRUE then notExp() function will be used in place of exp in positivity ensuring beta parameters re-parameterization
## null.coef - coefficients for a null model, in order to be able to check for immediate divergence. null.coef give some sort of upper bound on deviance. This allows immediate divergence problems to be controlled (from mgcv). here some of coeff need to be exponentiated.
## control - control list; it includes:
##* maxit - a positive scalar which gives the maximum number of iterations for Newton's method
##* devtol.fit - a scalar giving the tolerance at which the relative penalized deviance is considered to be close enougth to 0 to terminate the algorithm
##* steptol.fit - a scalar giving the tolerance at which the scaled distance between two successive iterates is considered close enough to zero to terminate the algorithm
##* trace turns on or off some de-bugging information.
##* print.warn =FALSE - when set to 'FALSE' turns off printing warning messages for step halving under non-finite exponentiated coefficients, non-finite deviance and/or if mu or eta are out of bounds.
{ y <- G$y; X <- G$X; S <- G$S; not.exp <- G$not.exp; gamma <- G$gamma
AR1.rho <- G$AR1.rho
attr(X,"dimnames") <- NULL
q0 <- G$q0; q.f <- G$q.f
p.ident <- G$p.ident; n.terms <- G$n.terms
family <- G$family; intercept <- G$intercept; offset <- G$offset;
weights <- G$weights;
n <- nobs <- NROW(y)
q <- ncol(X)
dg <- fix.family.link(family)
dv <- fix.family.var(family)
nvars <- NCOL(X)
EMPTY <- nvars == 0
variance <- family$variance
linkinv <- family$linkinv
if (!is.function(variance) || !is.function(linkinv))
stop("'family' argument seems not to be a valid family object")
dev.resids <- family$dev.resids
aic <- family$aic
mu.eta <- family$mu.eta
mu.eta <- family$mu.eta
if (not.exp){
b.notexp <- control$b.notexp
th.notexp <- control$threshold.notexp
}
if (AR1.rho!=0) {
ld <- 1/sqrt(1-AR1.rho^2) ## leading diagonal of root inverse correlation
sd <- -AR1.rho*ld ## sub diagonal
row <- c(1,rep(1:nobs,rep(2,nobs))[-c(1,2*nobs)])
weight.r <- c(1,rep(c(sd,ld),nobs-1))
end <- c(1,1:(nobs-1)*2+1)
if (!is.null(G$AR.start)) { ## need to correct the start of new AR sections...
ii <- which(G$AR.start ==TRUE)
if (length(ii)>0) {
if (ii[1]==1) ii <- ii[-1] ## first observation does not need any correction
weight.r[ii*2-2] <- 0 ## zero sub diagonal
weight.r[ii*2-1] <- 1 ## set leading diagonal to 1
}
}
## apply transform...
X <- rwMatrix(end,row,weight.r,X)
y <- rwMatrix(end,row,weight.r,y)
}
if (!is.function(variance) || !is.function(linkinv))
stop("illegal `family' argument")
valideta <- family$valideta
if (is.null(valideta))
valideta <- function(eta) TRUE
validmu <- family$validmu
if (is.null(validmu))
validmu <- function(mu) TRUE
if (is.null(mustart)) {
eval(family$initialize)
}
else {
mukeep <- mustart
eval(family$initialize)
mustart <- mukeep
}
## Added code...
if (family$family=="gaussian"&&family$link=="identity") strictly.additive <- TRUE else
strictly.additive <- FALSE
## end of added code
if (EMPTY) {
eta <- rep.int(0, nobs) + offset
if (!valideta(eta))
stop("Invalid linear predictor values in empty model")
mu <- linkinv(eta)
if (AR1.rho!=0) {
mu <- rwMatrix(end,row,weight.r,mu)
}
if (!validmu(mu))
stop("Invalid fitted means in empty model")
dev <- sum(dev.resids(y, mu, weights))
w <- (weights * mu.eta(eta)^2)/variance(mu) ## incorrect for Newton
residuals <- (y - mu)/mu.eta(eta)
good <- rep(TRUE, length(residuals))
boundary <- conv <- TRUE
coef <- numeric(0)
iter <- 0
V <- variance(mu)
alpha <- dev
trA <- 0
GCV <- nobs * alpha/(nobs - gamma * trA)^2
UBRE <- alpha/nobs + 2 * gamma* trA*scale/n - scale
scale.est <- alpha/(nobs - trA)
aic.model <- aic(y, n, mu, weights, dev) + 2 * trA
} ### end if (EMPTY)
else {
eta <- if (!is.null(etastart))
etastart
else family$linkfun(mustart)
mu <- as.numeric(linkinv(eta))
# if (!(validmu(mu) && valideta(eta)))
# stop("Can't find valid starting values: please specify some")
S.t <- matrix(0,q,q) # define the total sum of the penalties times sp
n.pen <- length(S) # number of penalties
if (length(sp)!=n.pen) stop (paste("length of sp has to be equal to", n.pen))
if (n.pen>0) for (j in 1:n.pen) S.t <- S.t + sp[j]*S[[j]]
# get sqrt of total penalty matrix...
er <- eigen(S.t,symmetric=TRUE); er$values[er$values<0] <- 0
rS <- crossprod(sqrt(sqrt(er$values))*t(er$vectors))
# ii <- p.ident==1 ## set to TRUE/FALSE
## count <- sum(p.ident)
## iv <- array(0, dim=c(count,1)) # define an index vector for the coeff-s to be exponentiated
iv <- (1:q)[p.ident] # define an index vector for the coeff-s to be exponentiated
##############################################
## Initialization of parameters start here...
beta0 <- get("start",envir=env)
dbeta0 <- get("dbeta.start",envir=env)
sp.old <- get("sp.last",envir=env)
if (length(beta0)==0) {
# list argument to pcls for initializing model coefficients
M <- list(X=X,p=rep(0.1,q),C=matrix(0,0,0),sp=sp,y=eta-offset,w=y*0+1)
M$Ain <- matrix(0,q,q); diag(M$Ain) <- rep(1,q);
M$bin <- rep(-1e+12,q); M$bin[iv] <- 1e-12
M$off <- rep(0,n.pen); M$S <- list()
if (n.pen>0) for (j in 1:n.pen) {M$S[[j]] <- matrix(0,q,q); M$S[[j]] <- S[[j]]}
beta.t <- pcls(M) # initialize model coefficients (re-parameterized beta)
beta <- beta.t # initialize beta
beta[iv] <- log(beta.t[iv]) # values of beta of the constrained terms
}
else {
beta <- beta0
beta.t <- beta ## current beta tilde
beta.t[iv] <- if (!not.exp) exp(beta[iv]) else notExp(beta[iv],b.notexp,th.notexp) # values of re-para beta of the constrained term
}
## Initialization of parameters finishes here
#################################################
eta <- as.numeric(X%*%beta.t + as.numeric(offset)) # define initial linear predictor
mu <- linkinv(eta) # define initial fitted model
## dev <- sum(dev.resids(y,mu,weights)) # define initial norm/deviance
## pdev <- dev + sum((rS%*%beta)^2) # define initial penalized deviance
## old.pdev <- pdev # initialize convergence control for penalized deviance
##################################################################
## added code here made on May 6, 2020 for scam version 1-2-6,
## following gam.fit3(mgcv_1.8-31))....
###################################################################
## shrink towards null.coef if immediately invalid
## betaold <- null.coef
## etaold <- null.eta <- as.numeric(X%*%null.coef + as.numeric(offset))
## old.pdev <- sum(dev.resids(y, linkinv(null.eta), weights)) + sum((rS%*%null.coef)^2)
## null.eta <- as.numeric(X%*%null.coef + as.numeric(offset))
ii <- 0
while (!(validmu(mu) && valideta(eta))) { ## shrink towards null.coef if immediately invalid
ii <- ii + 1
if (ii>20) stop("Can't find valid starting values: please specify some")
beta <- beta * .9 + null.coef * .1
beta.t <- beta ## current beta tilde
beta.t[iv] <- if (!not.exp) exp(beta[iv]) else notExp(beta[iv],b.notexp,th.notexp)
eta <- as.numeric(X%*%beta.t + offset)
mu <- linkinv(eta)
}
betaold <- null.coef <- beta
betaold.t <- beta
betaold.t[iv] <- if (!not.exp) exp(betaold[iv]) else notExp(betaold[iv],b.notexp,th.notexp)
etaold <- as.numeric(X%*%betaold.t + offset)
old.pdev <- sum(dev.resids(y,linkinv(etaold),weights)) + sum((rS%*%betaold)^2)
##################################################################
pdev.plot <- 0 # define initial pen dev for plotting it
E <- matrix(0,q,q) # define diagonal matrix E- second term of the Hessian
Cdiag <- rep(1,q); C1diag <- rep(0,q)
## if (!not.exp) {
## Cdiag[iv] <- C1diag[iv] <- beta.t[iv]
## } else {
## Cdiag[iv] <- DnotExp(beta[iv]); C1diag[iv] <- D2notExp(beta[iv])
## }
## tX1 <- Cdiag*t(X)
## g.deriv <- 1/mu.eta(eta) # diag(G)
## w1 <- weights/(variance(mu)*g.deriv^2) # diag(W1)
## Dp.g <- - drop(tX1%*%(w1*g.deriv*(y-mu))) + S.t%*%beta
# the gradient vector of the penalized deviance
## Dp.gnorm <- max(abs(Dp.g)) # set convergence on the max value of the Dp.g
## betaold <- beta
boundary <- conv <- FALSE
old.warn <- getOption("warn")
if (!control$print.warn) curr.warn <- -1 else curr.warn <- old.warn
###################################################
## MAIN ITERATIONS START HERE...
#cat("\nscam.fit iter start")
for (iter in 1:control$maxit) {
#cat(".")
good <- weights > 0
var.val <- variance(mu)
varmu <- var.val[good]
if (any(is.na(varmu)))
stop("NAs in V(mu)")
if (any(varmu == 0))
stop("0s in V(mu)")
mu.eta.val <- mu.eta(eta)
if (any(is.na(mu.eta.val[good])))
stop("NAs in d(mu)/d(eta)")
good <- (weights > 0) & (mu.eta.val != 0)
if (all(!good)) {
conv <- FALSE
warning("No observations informative at iteration ",
iter)
break
}
if (!not.exp) {
Cdiag[iv] <- C1diag[iv] <- beta.t[iv]
} else {
Cdiag[iv] <- DnotExp(beta[iv],b.notexp,th.notexp); C1diag[iv] <- D2notExp(beta[iv],b.notexp,th.notexp)
}
tX1 <- Cdiag*t(X)
g.deriv <- 1/mu.eta(eta) # diag(G)
w1 <- weights/(variance(mu)*g.deriv^2) # diag(W1) - Fisher weights
y.mu <- y - mu
alpha <- 1+ y.mu*(dv$dvar(mu)/variance(mu)+dg$d2link(mu)/g.deriv) # alpha elements of W
w <- w1*alpha # diag(W) - Newton weights
diag(E) <- drop((C1diag*t(X))%*%(w1*g.deriv*y.mu))
abs.w <- abs(w) # absolute values of the diag(W)
I.minus <- rep(0,nobs) # define diagonal elements of the matrix I^{-}
z1 <- g.deriv*y.mu/alpha # the first term of the pseudodata
iin <- w < 0; I.minus[iin] <- 1;z1[iin] <- -z1[iin]
wX11 <- rbind(sqrt(abs.w)[1:nobs]*t(tX1),rS)
illcond <- FALSE
Q <- qr(wX11,LAPACK=TRUE)
R <- qr.R(Q)
rp <- 1:ncol(R)
rp[Q$pivot] <- rp ## reverse pivot X=Q%*%R[,rp]
if (Rrank(R)==ncol(R)) { ## no need to truncate, can just use QR
R.inv <- backsolve(R,diag(ncol(R)))[rp,] ## inverse of unpivoted R
tR.inv <- t(R.inv)
} else { ## need SVD step
R <- R[,rp] ## unpivoted R
svd.r <- svd(R)
d.inv <- rep(0,q) # initial vector of inverse singular values
good1 <- svd.r$d >= max(svd.r$d)*.Machine$double.eps^.5
d.inv[good1] <- 1/svd.r$d[good1]
if (sum(!good1)>0) illcond <- TRUE
R <- svd.r$d*t(svd.r$v)
Q <- qr.qy(Q,rbind(svd.r$u,matrix(0,nobs,q))) ## this is inefficient don't need to extract Q really
tR.inv <- d.inv*t(svd.r$v) # inverse of transpose of R
R.inv <- t(tR.inv)
}
QtQRER <- tR.inv%*%(diag(E)*R.inv)
if (sum(I.minus)>0) {
if (is.qr(Q)) {
# QtQRER <- QtQRER + 2*tcrossprod(qr.qty(Q,diag(nrow(wX11))[,(1:nobs)[as.logical(I.minus)]]))
QtQRER <- QtQRER + 2*crossprod(I.minus*qr.Q(Q)[1:nobs,])
} else {
QtQRER <- QtQRER + 2*crossprod(I.minus*Q[1:nobs,])
}
}
ei <- eigen(QtQRER,symmetric=TRUE) # eigen-decomposition of QtQRER
d <- ei$values # vector of eigenvalues
ok1 <- sum(d>1) > 0 # checking positive semi-definiteness
if (ok1 == TRUE) {# Fisher step in case of not +ve semi-definiteness of penalized loglikelihood
# set alpha =1
eta.t <- drop(t(beta)%*%tX1) # eta tilde for pseudodata
wX11 <- rbind(sqrt(w1)[1:nobs]*t(tX1),rS)
z<-g.deriv*y.mu+eta.t # pseudodata
wz<-w1^.5*z # weighted pseudodata
wz.aug<-c(wz,rep(0,nrow(rS))) # augmented pseudodata
Q <- qr(wX11,LAPACK=TRUE)
R <- qr.R(Q)
rp <- 1:ncol(R)
rp[Q$pivot] <- rp ## reverse pivot X=Q%*%R[,rp]
if (Rrank(R)==ncol(R)) { ## no need to truncate, can just use QR
beta <- backsolve(R,qr.qty(Q,wz.aug)[1:q])[rp]
} else { ## need SVD step
R <- R[,rp] ## unpivoted R
s1 <- svd(R)
d.inv1 <- rep(0,q)
good1 <- s1$d >= max(s1$d)*.Machine$double.eps^.5
d.inv1[good1] <- 1/s1$d[good1]
beta <- s1$v%*%((d.inv1*t(s1$u))%*%qr.qty(Q,wz.aug)[1:q])
}
} ### end of if (ok1) - Fisher step
else { ## full Newton step
Id.inv.r<-1/(1-d)^.5 # vector of inverse values of (1-sv)^.5
iin <- (1-d) < .Machine$double.eps
Id.inv.r[iin] <- 0
eidrop <- t(Id.inv.r*t(ei$vectors))
wz1<-sqrt(abs.w)*z1 # the first term of the weighted pseudodata
if (is.qr(Q)) {
beta <- R.inv%*%(eidrop%*%(t(eidrop)%*%qr.qty(Q,c(wz1,rep(0,nrow(rS))))[1:nrow(eidrop)]))
} else {
beta <- R.inv%*%(eidrop%*%(t(eidrop)%*%(t(Q[1:nobs,])%*%wz1)[1:nrow(eidrop)]))
}
beta <- betaold +
drop(beta - R.inv%*%(eidrop%*%(t(eidrop)%*%(tR.inv%*%(S.t%*%betaold)))))
} ### end of if (!ok1) - Newton step
## delta <- beta-c(betaold) # trial step
## step <- 1 # initial trial step length
## beta <- c(betaold)+step*delta # current parameter estimates
beta.t <- beta # current reparameterized beta
beta.t[iv] <- if (!not.exp) exp(beta[iv]) else notExp(beta[iv],b.notexp,th.notexp) # values of re-para beta of the shape constrained term
## eta <- drop(as.numeric(X%*%beta.t + offset)) # linear predictor
## mu <- linkinv(eta) # fitted values
## dev <- sum(dev.resids(y,mu,weights)) # deviance of the working model
## pdev <- dev + sum((rS%*%beta)^2) # deviance + penalty of the working model
##################################################################
## added code here made on May 6, 2020 for scam version 1-2-6,
## following gam.fit3(mgcv_1.8-31))....
###################################################################
if (any(!is.finite(beta))) {
conv <- FALSE
warning(gettextf("Non-finite coefficients at iteration %d",
iter))
break
}
options(warn = curr.warn) ## turn of/on printing warning messages under non-finite deviance resulted in non-finite exponentiated coefficients...
if (any(!is.finite(beta.t))) {
conv <- FALSE
warning(gettextf("Non-finite exponentiated coefficients at iteration %d",
iter))
# break ## takes out of the main iteration loop 'iter in 1:maxit'
}
eta <- drop(as.numeric(X%*%beta.t + offset)) # linear predictor
mu <- linkinv(eta) # fitted values
dev <- sum(dev.resids(y,mu,weights)) # deviance of the working model
penalty <- sum((rS%*%beta)^2)
if (control$trace)
message(gettextf("Deviance = %s Iterations - %d", dev, iter, domain = "R-scam"))
boundary <- FALSE
## step halve under non-finite deviance...
if (!is.finite(dev)) {
if (is.null(betaold)) {
if (is.null(null.coef))
stop("no valid set of coefficients has been found:please supply starting values",
call. = FALSE)
## Try to find feasible coefficients from the null.coef and null.eta
betaold <- null.coef
}
warning("Step size truncated due to divergence",
call. = FALSE)
ii <- 0
## step <- 1
while (!is.finite(dev)) {
if (ii > control$maxit)
stop("inner loop 1; can't correct step size")
ii <- ii + 1
## beta <- (beta + betaold)/2
## eta <- (eta + etaold)/2
## mu <- linkinv(eta)
## step <- step/2 ## decrease step length
## beta <- c(betaold)+step*delta ## update current parameter estimates
beta <- (beta + c(betaold))/2
beta.t <- beta ## update current re-para beta
beta.t[iv] <- if (!not.exp) exp(beta[iv]) else notExp(beta[iv],b.notexp,th.notexp)
eta <- as.numeric(X%*%beta.t + offset) ## linear predictor
mu <- linkinv(eta) ## fitted values
dev <- sum(dev.resids(y, mu, weights))
}
boundary <- TRUE
penalty <- sum((rS%*%beta)^2) ## t(beta)%*%S.t%*%beta # reset penalty too
if (control$trace)
cat("Step halved: new deviance =", dev, "\n")
} ## end of infinite deviance correction
## now step halve if mu or eta are out of bounds...
if (!(valideta(eta) && validmu(mu))) {
warning("Step size truncated: out of bounds",
call. = FALSE)
ii <- 0
## step <- 1
while (!(valideta(eta) && validmu(mu))) {
if (ii > control$maxit)
stop("inner loop 2; can't correct step size")
ii <- ii + 1
## beta <- (beta + betaold)/2
## eta <- (eta + etaold)/2
## mu <- linkinv(eta)
## step <- step/2 ## decrease step length
## beta <- c(betaold)+step*delta ## update current parameter estimates
beta <- (beta + c(betaold))/2 ## update current parameter estimates
beta.t <- beta ## update current re-para beta
beta.t[iv] <- if (!not.exp) exp(beta[iv]) else notExp(beta[iv],b.notexp,th.notexp)
eta <- as.numeric(X%*%beta.t + offset) ## linear predictor
mu <- linkinv(eta) ## fitted values
}
boundary <- TRUE
penalty <- sum((rS%*%beta)^2) ## t(beta)%*%S.t%*%beta
dev <- sum(dev.resids(y, mu, weights))
if (control$trace)
cat("Step halved: new deviance =", dev, "\n")
} ## end of invalid mu/eta handling
options(warn = old.warn) ## return to the default option for printing warning messages (which is '0')
## now check for divergence of penalized deviance....
pdev <- dev + penalty ## the penalized deviance
if (control$trace)
message(gettextf("penalized deviance = %s", pdev, domain = "R-scam"))
######################################################
## end of added coding
######################################################
div.thresh <- 10*(.1 +abs(old.pdev))*.Machine$double.eps^.5
## `step reduction' approach starts here...
## ii <- 1
## while ( (pdev-old.pdev) > div.thresh) { # 'step reduction' approach
## if (ii > maxHalf.fit)
## break ## stop ("step reduction failed")
## ii <- ii+1
## step <- step/2 # decrease step length
## beta <- c(old.beta)+step*delta # update current parameter estimates
## beta.t <- beta # update current re-para beta
## beta.t[iv] <- if (!not.exp) exp(beta[iv]) else notExp(beta[iv])
## eta <- as.numeric(X%*%beta.t + offset) # linear predictor
## mu <- linkinv(eta) # fitted values
## dev <- sum(dev.resids(y,mu,weights)) # update deviance of the working model
## pdev <- dev+sum((rS%*%beta)^2) # update pen deviance of the working model
## }
## `step reduction' finishes here
## ... threshold for judging divergence --- too tight and near
## perfect convergence can cause a failure here
if (pdev-old.pdev>div.thresh) { ## solution diverging
ii <- 0 ## step halving counter
## step <- 1
## if (iter==1) { ## immediate divergence, need to shrink towards zero
## betaold <- null.coef
## }
while (pdev -old.pdev > div.thresh){ ## step halve until pdev <= old.pdev
if (ii > 100)
stop("inner loop 3; can't correct step size")
ii <- ii + 1
## beta <- (beta + betaold)/2
## eta <- (eta + etaold)/2
## mu <- linkinv(eta)
## step <- step/2 ## decrease step length
## beta <- c(betaold)+step*delta ## update current parameter estimates
beta <- (beta + c(betaold))/2
beta.t <- beta ## update current re-para beta
beta.t[iv] <- if (!not.exp) exp(beta[iv]) else notExp(beta[iv],b.notexp,th.notexp)
eta <- as.numeric(X%*%beta.t + offset) ## linear predictor
mu <- linkinv(eta) ## fitted values
dev <- sum(dev.resids(y, mu, weights)) ## update deviance of the working model
pdev <- dev + sum((rS%*%beta)^2) ## update the penalized deviance
if (control$trace)
message(gettextf("Step halved: new penalized deviance = %g", pdev, "\n"))
}
} ## end of pdev divergence
Dp.g <- -drop(tX1%*%(w1*g.deriv*(y-mu)))+S.t%*%beta # the gradient vector of the penalized deviance
Dp.gnorm <- max(abs(Dp.g))
pdev.plot[iter] <- pdev # store penilized deviance of the working model for plotting
## if (strictly.additive) { conv <- TRUE; break;}
## checking convergence adequately...
if (abs(pdev - old.pdev)/(.1 + abs(pdev)) < control$devtol.fit) {
if (Dp.gnorm > control$devtol.fit*max(abs(beta+c(betaold)))/2) {
## if (max(abs(beta - c(betaold))) > control$steptol.fit*max(abs(beta + c(betaold)))/2) {
old.pdev <- pdev ## not converged quite enough
betaold <- beta
}
else { ## converged
conv <- TRUE
beta <- beta
break
}
}
else { ## not converged
old.pdev <- pdev
betaold <- beta
}
} ## main iteration procedure is completed here.
#######################################################
## at this stage the model has been fully estimated
## now define matrices at their converged values from the full Newton method...
# beta.t <- beta ## estimates of re-para beta
# beta.t[iv] <- if (!not.exp) exp(beta[iv]) else notExp(beta[iv])
# eta <- as.numeric(X%*%beta.t + offset) ## linear predictor
# mu <- linkinv(eta) ## fitted values
# dev <- sum(dev.resids(y,mu,weights))
if (!not.exp) { Cdiag[iv] <- C1diag[iv] <- beta.t[iv]
} else {
Cdiag[iv] <- DnotExp(beta[iv],b.notexp,th.notexp); C1diag[iv] <- D2notExp(beta[iv],b.notexp,th.notexp)
}
X1 <- t(Cdiag*t(X))
g.deriv <- 1/ mu.eta(eta) # diag(G)
w1 <- weights/(variance(mu)*g.deriv^2) # diag(W1)
alpha <- 1+(y-mu)*(dv$dvar(mu)/variance(mu)+dg$d2link(mu)/g.deriv) # alpha elements of W
w <- w1*alpha # diag(W)
diag(E) <- drop((C1diag*t(X))%*%(w1*g.deriv*(y-mu))) # diagonal elements of E
abs.w <- abs(w) # absolute values of the diag(W)
I.minus <- rep(0,nobs) # define diagonal elements of the matrix I^{-}
I.minus[w<0] <- 1
wX1 <- sqrt(abs.w)[1:nobs]*X1 ## wX1 actually used later
wX11 <- rbind(wX1,rS) # augmented model matrix
## Faster version only does SVD when needed (and then only on QR factor)
illcond <- FALSE
Q <- qr(wX11,LAPACK=TRUE)
R <- qr.R(Q)
rp <- 1:ncol(R)
rp[Q$pivot] <- rp ## reverse pivot X=Q%*%R[,rp]
R.out <- R[,rp] ## unpivoted R, needed for summary function
rank <- Rrank(R)
if (rank==ncol(R)) { ## no need to truncate, can just use QR
R.inv <- backsolve(R,diag(ncol(R)))[rp,] ## inverse of unpivoted R
tR.inv <- t(R.inv)
} else { ## need SVD step
R <- R[,rp] ## unpivoted R
svd.r <- svd(R)
d.inv <- rep(0,q) # initial vector of inverse singular values
good1 <- svd.r$d >= max(svd.r$d)*.Machine$double.eps^.5
d.inv[good1] <- 1/svd.r$d[good1]
if (sum(!good1)>0) illcond <- TRUE
R <- svd.r$d*t(svd.r$v)
Q <- qr.qy(Q,rbind(svd.r$u,matrix(0,nobs,q))) ## this is inefficient don't need to extract Q really
tR.inv <- d.inv*t(svd.r$v) # inverse of transpose of R
R.inv <- t(tR.inv)
}
QtQRER <- tR.inv%*%(diag(E)*R.inv)
if (sum(I.minus)>0) {
if (is.qr(Q)) {
QtQRER <- QtQRER + 2*crossprod(I.minus*qr.Q(Q)[1:nobs,])
} else {
QtQRER <- QtQRER + 2*crossprod(I.minus*Q[1:nobs,])
}
}
ei <- eigen(QtQRER,symmetric=TRUE) # eigen-decomposition of QtQRER
d <- ei$values # vector of eigenvalues
ok1 <- sum(d>1)>0
if (ok1) { ## Fisher step in case of not positive semi-definiteness of penalized loglikelihood
## set alpha =1 ...
wX1<-sqrt(w1)[1:nobs]*X1
wX11<-rbind(wX1,rS)
Q <- qr(wX11,LAPACK=TRUE)
R <- qr.R(Q)
rp <- 1:ncol(R)
rp[Q$pivot] <- rp ## reverse pivot X=Q%*%R[,rp]
R.out <- R[,rp] ## unpivoted R, needed for summary function
rank <- Rrank(R)
if (rank==ncol(R)) { ## no need to truncate, can just use QR
P <- backsolve(R,diag(ncol(R)))[rp,]
K <- qr.Q(Q)[1:nobs,]
} else { ## need SVD step
R <- R[,rp] ## unpivoted R
s1 <- svd(R)
d.inv1 <- rep(0,q)
good1 <- s1$d >= max(s1$d)*.Machine$double.eps^.5
d.inv1[good1] <- 1/s1$d[good1]
P <- t(d.inv1*t(s1$v))
K <- qr.qy(Q,rbind(s1$u,matrix(0,nobs,q)))[1:nobs,]
}
} ## end of if (ok1)
else { ## full Newton step
Id.inv.r<-1/(1-d)^.5 # vector of inverse values of (1-sv)^1/2
ii <- (1-d) < .Machine$double.eps
Id.inv.r[ii] <- 0
eidrop <- t(Id.inv.r*t(ei$vectors))
P <- R.inv%*%eidrop ## ei$vectors%*%diag(Id.inv.r) # define matrix P
if (is.qr(Q)) {
K <- qr.qy(Q,rbind(eidrop,matrix(0,nobs,q)))[1:nobs,]
} else {
K <- Q[1:nobs,]%*%eidrop ## (ei$vectors%*%diag(Id.inv.r)) # define matrix K
}
} ## end of if (!ok1)
# end of calculation of the matrices at their converged values...
Dp.g <- -t(X1)%*%(w1*g.deriv*(y-mu))+S.t%*%beta # the gradient vector of the penalized deviance
Dp.gnorm<-max(abs(Dp.g))
# calculating tr(A)...
I.plus <- rep(1,nobs) # define diagonal elements of the matrix I^{+}
I.plus[w<0] <- -1
L <- c(1/alpha) # define diagonal elements of L=diag(1/alpha)
## NOTE PKt is O(np^2) and not needed --- can compute trA as side effect of gradiant
KtILQ1R <- crossprod(L*I.plus*K,wX1) ## t(L*I.plus*K)%*%wX1
edf <- rowSums(P*t(KtILQ1R))
trA <- sum(edf)
## below is needed for later calculations of derivative of trA
wXC1 <- sqrt(abs.w)[1:nobs]*t(C1diag*t(X))
KtILQ1R <- if (!not.exp) KtILQ1R else crossprod(L*I.plus*K,wXC1) ##t(L*I.plus*K)%*%wXC1
KtIQ1R <- if (!not.exp) crossprod(I.plus*K,wX1)
else crossprod(I.plus*K,wXC1)
C2diag <- rep(0,q)
C2diag[iv] <- if (!not.exp) C1diag[iv] else D3notExp(beta[iv],b.notexp,th.notexp)
XC2 <- t(C2diag*t(X))
XC1 <- t(C1diag*t(X))
############################
## some return values....
dlink.mu <- 1/mu.eta(eta); Var<- variance(mu)
link <- family$linkfun(mu); d2link.mu <- dg$d2link(mu)
dvar.mu <- dv$dvar(mu); d2var.mu <- dv$d2var(mu)
d3link.mu <- dg$d3link(mu)
z <- g.deriv*(y-mu)+X1%*%beta
#############################
#############################
scale.est <- dev/(nobs-trA) # scale estimate
## re-transforming 'mu' back on original scale and hence residuals, in case of correlated errors...
residuals <- rep.int(NA, nobs)
residuals <- (y-mu)*g.deriv
# if (AR1.rho==0) residuals <- (y-mu)*g.deriv
# else {
# eta <- as.numeric(G$X%*%beta.t + offset) # linear predictor on original scale
# mu <- linkinv(eta) # fitted values on original scale
# g.deriv <- 1/ mu.eta(eta) # diag(G)
# residuals <- (G$y-mu)*g.deriv
# }
################################
## calculation of the derivatives of beta by the Implicit Function Theorem starts here
dbeta.rho <- matrix(0,q,n.pen) # define matrix of the parameters derivatives
if (n.pen>0) for (j in 1:n.pen) {
dbeta.rho[,j] <- - sp[j]*P%*%(t(P)%*%(S[[j]]%*%beta)) # derivative of beta wrt rho[j]
}
# end of calculating the parameters derivatives
aic.model <- aic(y, n, mu, weights, dev) + 2 * sum(edf)
## scam_1.2-15...
if (AR1.rho!=0) { ## correct aic for AR1 transform
std.rsd <- AR.resid(residuals,AR1.rho,G$AR.start) ##standardised residuals for AR1 model (approximately uncorrelated under correct model)
dev.st <- sum(std.rsd^2)
df <- if (is.null(G$AR.start)) 1 else sum(G$AR.start)
aic.model <- aic(y, n, mu, weights, dev.st) +2*sum(edf) -2*(n-df)*log(ld)
}
assign("start",beta,envir=env)
assign("dbeta.start",dbeta.rho,envir=env)
assign("sp.last",sp,envir=env)
} ### end if (!EMPTY)
list(L=L,C1diag=C1diag,E=E,iter=iter, old.beta=betaold, gcv=dev*nobs/(nobs-gamma *trA)^2, sp=sp,
mu=mu,X=G$X,y=drop(G$y), X1=X1,beta=beta,beta.t=beta.t,iv=iv,S=S,S.t=S.t,rS=rS,
P=P,K=K, C2diag=C2diag, KtILQ1R= KtILQ1R, KtIQ1R=KtIQ1R, ## XC1=XC1, XC2=XC2,
dlink.mu=dlink.mu,Var=Var, abs.w=drop(abs.w),
link=link,w=as.numeric(w),w1=drop(w1),d2link.mu=d2link.mu,wX1=wX1,I.plus=I.plus,
dvar.mu=dvar.mu,d2var.mu=d2var.mu,deviance=dev,scale.est=scale.est,
ok1=ok1,alpha=as.numeric(alpha),d3link.mu=d3link.mu,eta=eta,iter=iter,
Dp.gnorm=Dp.gnorm, Dp.g=Dp.g,d=d, conv=conv, illcond=illcond,R=R.out, edf=edf,trA=trA,
residuals=residuals,z=z,dbeta.rho=dbeta.rho, aic=aic.model,rank=rank,pdev.hist=pdev.plot) ##step=step, AR1.rho=AR1.rho,AR.start=G$AR.start)
} ## end of scam.fit
#######################################################################
## function to get null deviance and covariance matrices after fit ##
#######################################################################
scam.fit.post<- function(G, object,control) ##,sig2,offset,intercept, weights,scale.known, not.exp)
{ ## Function to compute null deviance and covariance matrices after a scam fit.
## covariance matrix should use expected Hessian, so re-computation of factors
## is required.
## object - object from estimate.scam()
y <- G$y; X <- G$X;
sig2 <- G$sig2; offset <- G$offset; intercept <- G$intercept;
weights <- G$weights; scale.known <- G$scale.known; not.exp <- G$not.exp
n <- nobs <- NROW(y) # number of observations
q <- ncol(X)
if (not.exp) {
b.notexp <- control$b.notexp
th.notexp <- control$threshold.notexp
}
linkinv <- object$family$linkinv
dev.resids <- object$family$dev.resids
dg <- fix.family.link(object$family)
dv <- fix.family.var(object$family)
## scam_1.2-15 (removed AR1 transform for y and X as output must be on original untransformed scale)...
eta <- as.numeric(X%*%object$beta.t + offset) # linear predictor
mu <- linkinv(eta) # fitted values
dev <- sum(dev.resids(y,mu,weights)) # deviance of the final model
wtdmu <- if (intercept) sum(weights * y)/sum(weights)
else linkinv(offset)
null.dev <- sum(dev.resids(y, wtdmu, weights))
n.ok <- nobs - sum(weights == 0)
nulldf <- n.ok - as.integer(intercept)
## define matrices at their converged values from the full Newton method...
Cdiag <- rep(1,q); C1diag <- rep(0,q)
iv <- object$iv
if (!not.exp) {
Cdiag[iv] <- C1diag[iv] <- object$beta.t[iv]
} else {
Cdiag[iv] <- DnotExp(object$beta[iv],b.notexp,th.notexp); C1diag[iv] <- D2notExp(object$beta[iv],b.notexp,th.notexp)
}
X1 <- t(Cdiag*t(X))
g.deriv <- 1/object$family$mu.eta(eta) # diag(G)
w1 <- weights/(object$family$variance(mu)*g.deriv^2) # diag(W1)
alpha <- 1+(y-mu)*(dv$dvar(mu)/object$family$variance(mu)+dg$d2link(mu)/g.deriv) # alpha elements of W
w <- w1*alpha # diag(W)
E <- matrix(0,q,q)
diag(E) <- drop((C1diag*t(X))%*%(w1*g.deriv*(y-mu))) # diagonal elements of E
abs.w <- abs(w) # absolute values of the diag(W)
I.minus <- rep(0,nobs) # define diagonal elements of the matrix I^{-}
I.minus[w<0] <- 1
wX1 <- sqrt(abs.w)[1:nobs]*X1
wX11 <- rbind(wX1,object$rS) # augmented model matrix
## Faster version only does SVD when needed (and then only on QR factor)
illcond <- FALSE
Q <- qr(wX11,LAPACK=TRUE)
R <- qr.R(Q)
rp <- 1:ncol(R)
rp[Q$pivot] <- rp ## reverse pivot X=Q%*%R[,rp]
R.out <- R[,rp] ## unpivoted R, needed for summary function
rank <- Rrank(R)
if (rank==ncol(R)) { ## no need to truncate, can just use QR
R.inv <- backsolve(R,diag(ncol(R)))[rp,] ## inverse of unpivoted R
tR.inv <- t(R.inv)
} else { ## need SVD step
R <- R[,rp] ## unpivoted R
svd.r <- svd(R)
d.inv <- rep(0,q) # initial vector of inverse singular values
good <- svd.r$d >= max(svd.r$d)*.Machine$double.eps^.5
d.inv[good] <- 1/svd.r$d[good]
if (sum(!good)>0) illcond <- TRUE
R <- svd.r$d*t(svd.r$v)
Q <- qr.qy(Q,rbind(svd.r$u,matrix(0,nobs,q))) ## this is inefficient don't need to extract Q really
tR.inv <- d.inv*t(svd.r$v) # inverse of transpose of R
R.inv <- t(tR.inv)
}
QtQRER <- tR.inv%*%(diag(E)*R.inv)
if (sum(I.minus)>0) {
if (is.qr(Q)) {
QtQRER <- QtQRER + 2*crossprod(I.minus*qr.Q(Q)[1:nobs,])
} else {
QtQRER <- QtQRER + 2*crossprod(I.minus*Q[1:nobs,])
}
}
ei <- eigen(QtQRER,symmetric=TRUE) # eigen-decomposition of QtQRER
d <- ei$values # vector of eigenvalues
ok1 <- sum(d>1)>0
if (ok1) { ## Fisher step in case of not positive semi-definiteness of penalized loglikelihood
## set alpha =1 ...
wX1<-sqrt(w1)[1:nobs]*X1
wX11<-rbind(wX1,object$rS)
Q <- qr(wX11,LAPACK=TRUE)
R <- qr.R(Q)
rp <- 1:ncol(R)
rp[Q$pivot] <- rp ## reverse pivot X=Q%*%R[,rp]
R.out <- R[,rp] ## unpivoted R, needed for summary function
rank <- Rrank(R)
if (rank==ncol(R)) { ## no need to truncate, can just use QR
P <- backsolve(R,diag(ncol(R)))[rp,]
K <- qr.Q(Q)[1:nobs,]
} else { ## need SVD step
R <- R[,rp] ## unpivoted R
s1 <- svd(R)
d.inv1 <- rep(0,q)
good1 <- s1$d >= max(s1$d)*.Machine$double.eps^.5
d.inv1[good1] <- 1/s1$d[good1]
P <- t(d.inv1*t(s1$v))
K <- qr.qy(Q,rbind(s1$u,matrix(0,nobs,q)))[1:nobs,]
}
} ## end of if (ok1)
else { ## full Newton step
Id.inv.r<-1/(1-d)^.5 # vector of inverse values of (1-sv)^1/2
ii <- (1-d) < .Machine$double.eps
Id.inv.r[ii] <- 0
eidrop <- t(Id.inv.r*t(ei$vectors))
P <- R.inv%*%eidrop ## ei$vectors%*%diag(Id.inv.r) # define matrix P
if (is.qr(Q)) {
K <- qr.qy(Q,rbind(eidrop,matrix(0,nobs,q)))[1:nobs,]
} else {
K <- Q[1:nobs,]%*%eidrop ## (ei$vectors%*%diag(Id.inv.r)) # define matrix K
}
} ## end of if (!ok1)
# end of calculation of the matrices at their converged values...
## calculating edf and trA...
I.plus <- rep(1,nobs) # define diagonal elements of the matrix I^{+}
I.plus[w<0] <- -1
L <- c(1/alpha)
KtILQ1R <- crossprod(L*I.plus*K,wX1) ## t(object$L*object$I.plus*K)%*%object$wX1
F <- P%*%(KtILQ1R)
edf <- diag(F) ## effective degrees of freedom
edf1 <- 2*edf - rowSums(t(F)*F) ## alternative
trA <- sum(edf)
## calculating the approximate covariance matrices
## (dealing with the expected Hessian of the log likelihood) ...
## get the inverse of the expected Hessian...
if (!scale.known) sig2 <- dev/(nobs-trA) # scale estimate
Vb <- tcrossprod(P) * sig2
## P%*%t(P)*sig2 # Bayesian posterior covariance matrix for the parameters
Ve <- crossprod(K%*%t(P)) *sig2
#PKt%*%t(PKt)*sig2 # covariance matrix of the parameter estimators
## Delta method to get covariance matrix for the reparametrized parameters...
df.p <- rep(1,q)
df.p[object$iv] <- object$beta.t[object$iv]
Vb.t <- t(df.p*t(df.p*Vb))
Ve.t <- t(df.p*t(df.p*Ve))
## eta <- as.numeric(G$X%*%object$beta.t + offset)
## mu <- linkinv(eta) # fitted values
residuals <- rep.int(NA, nobs)
g.deriv <- 1/object$family$mu.eta(eta) # diag(G)
residuals <- (G$y-mu)*g.deriv # the working residuals for the fitted model
aic.model <- object$family$aic(y, n, mu, weights, dev) + 2 * sum(edf)
## scam_1.2-15...
std.rsd <- NULL
if (G$AR1.rho!=0){
std.rsd <- AR.resid(residuals,G$AR1.rho,G$AR.start) ##standardised residuals for AR1 model (approximately uncorrelated under correct model)
dev.st <- sum(std.rsd^2)
## dev <- sum(residuals^2) ## ss of the working residuals of the fitted model
df <- if (is.null(G$AR.start)) 1 else sum(G$AR.start)
aic.model <- object$family$aic(y, n, mu, weights, dev.st) + 2 * sum(edf) -
2*(n-df)*log(1/sqrt(1-G$AR1.rho^2))
}
list (null.dev=null.dev, df.null=nulldf,Vb=Vb,Vb.t=Vb.t,Ve=Ve,Ve.t=Ve.t,rank=rank, sig2=sig2,edf=edf,
edf1=edf1,trA=trA, deviance=dev,residuals=residuals, aic=aic.model, mu=mu, eta=eta,
std.rsd=std.rsd) ## scam_1.2-15
} ## end of scam.fit.post
### the following three functions are for use in place of exp(beta)
### notExp(), from scam version 1.2-17, is a softplus function suggested by Jens Lichter, and as implemented in PyTorch, to ensure positivity when re-parameterizing scop-spline coefficients.
## 'for numerical stability the implementation reverts to the linear function when x*b > threshold
### DnotExp() calculates the first derivative
### D2notExp() gets the second derivative
notExp <- function(x,b=1,threshold=20){
## Softplus function, as implemented in PyTorch,
## https://pytorch.org/docs/stable/generated/torch.nn.Softplus.html
## 'for numerical stability the implementation reverts to the linear function when
## x*b > threshold
f <- x
ind <- x*b < threshold
f[ind] <- 1/b*log(1+exp(b*x[ind]))
f
}
DnotExp <- function(x,b=1,threshold=20) {
## first derivative of notExp()...
f <- 1
ind <- x*b < threshold
f[ind] <- exp(b*x[ind])/(1+exp(b*x[ind]))
f
}
D2notExp <- function(x,b=1,threshold=20) {
## second derivative of notExp()...
f <- 0
ind <- x*b < threshold
f[ind] <- b*exp(b*x[ind])/(1+exp(b*x[ind]))^2
f
}
D3notExp <- function(x,b=1,threshold=20) {
## third derivative of notExp()...
f <- 0
ind <- x*b < threshold
f[ind] <- b^2*exp(b*x[ind])*(1-exp(2*b*x[ind]))/(1+exp(b*x[ind]))^4
f
}
### the following three functions are for use in place of exp(beta)
### notExp0(), used up till scam version 1.2-16, is similar to that in R package mgcv() of Simon N Wood
### in positivity ensuring beta parameters re-parameterization.... they have `better'
### over/underflow characteristics, but is still continuous to second
### derivative.
### DnotExp0() calculates the first derivative
### D2notExp0() gets the second derivative
notExp0 <- function(x){
f <- x
ind <- x > 1
f[ind] <- exp(1)*(x[ind]^2+1)/2
ind <- (x <= 1)&(x > -1)
f[ind] <- exp(x[ind])
ind <- (x <= -1)
f[ind] <- exp(1)*(x[ind]^2+1)/2; f[ind]<-1/f[ind]
f
}
DnotExp0 <- function(x) {
## first derivative of notExp()...
f <- x
ind <- x > 1
f[ind] <- exp(1)*x[ind]
ind <- (x <= 1)&(x > -1)
f[ind] <- exp(x[ind])
ind <- (x <= -1)
f[ind] <- -4*x[ind]/exp(1)/(x[ind]^2+1)^2
f
}
D2notExp0 <- function(x) {
## second derivative of notExp()...
f <- x
ind <- x > 1
f[ind] <- exp(1)
ind <- (x <= 1)&(x > -1)
f[ind] <- exp(x[ind])
ind <- (x <= -1)
f[ind] <- (12*x[ind]^2-4)/exp(1)/(x[ind]^2+1)^3
f
}
D3notExp0 <- function(x) {
## third derivative of notExp()...
f <- x
ind <- x > 1
f[ind] <- 0
ind <- (x <= 1)&(x > -1)
f[ind] <- exp(x[ind])
ind <- (x <= -1)
f[ind] <- 48*x[ind]*(1-x[ind]^2)/exp(1)/(x[ind]^2+1)^4
f
}
## checking derivatives...
#eps <- 1e-7
#x<-seq(-5,5,length.out=100)
#d1 <- (notExp(x+eps)-notExp(x))/eps
#range((DnotExp(x)-d1)/d1)
#d2 <- (DnotExp(x+eps)-DnotExp(x))/eps
#range((D2notExp(x)-d2)/d2)
#d3 <- (D2notExp(x+eps)-D2notExp(x))/eps
#range((D3notExp(x)-d3)/d3)
logLik.scam <- function (object,...)
{ # based on logLik.gam and logLik.glm
sc.p <- as.numeric(object$scale.estimated)
p <- sum(object$edf) + sc.p
val <- p - object$aic/2
#if (fam %in% c("gaussian", "Gamma", "inverse.gaussian","Tweedie"))
# p <- p + 1
np <- length(object$coefficients) + sc.p
if (p > np) p <- np
attr(val, "df") <- p
class(val) <- "logLik"
val
} ## logLik.scam
formula.scam <- function(x, ...)
# clone of formula.gam...
# formula.lm and formula.glm reconstruct the formula from x$terms, this is
# problematic because of the way mgcv handles s() and te() terms
{ x$formula
}
###############################################################################################
## below are functions from mgcv package, copied as they are not exported by 'namespace:mgcv'
## Copyright (c) Simon N. Wood 2008-2019 simon.wood@r-project.org
## gam.setup() has extra SCAM lines to update smooth$Zc matrix using 'del.index' to deal with identifiability: when two or more smooths have the same covariate... (c) natalya pya (2024)
gam.setup <- function(formula,pterms,
data=stop("No data supplied to gam.setup"),knots=NULL,sp=NULL,
min.sp=NULL,H=NULL,absorb.cons=TRUE,sparse.cons=0,select=FALSE,idLinksBases=TRUE,
scale.penalty=TRUE,paraPen=NULL,gamm.call=FALSE,drop.intercept=FALSE,
diagonal.penalty=FALSE,apply.by=TRUE,list.call=FALSE,modCon=0)
## set up the model matrix, penalty matrices and auxilliary information about the smoothing bases
## needed for a gam fit.
## elements of returned object:
## * m - number of smooths
## * min.sp - minimum smoothing parameters
## * H supplied H matrix
## * pearson.extra, dev.extra, n.true --- entries to hold these quantities
## * pterms - terms object for parametric terms
## * intercept TRUE if intercept present
## * offset - the model offset
## * nsdf - number of strictly parameteric coefs
## * contrasts
## * xlevels - records levels of factors
## * assign - indexes which parametric model matrix columns map to which term in pterms
## * smooth - list of smooths
## * S - penalties (non-zero block only)
## * off - first coef penalized by each element of S
## * cmX - col mean of X
## * P - maps parameters in fit constraint parameterization to those in prediction parameterization
## * X - model matrix
## * sp
## * rank
## * n.paraPen
## * L
## * lsp0
## * y - response
## * C - constraint matrix - only if absorb.cons==FALSE
## * n - dim(y)
## * w - weights
## * term.names
## * nP
{ # split the formula if the object being passed is a formula, otherwise it's already split
if (inherits(formula,"split.gam.formula")) split <- formula else
if (inherits(formula,"formula")) split <- interpret.gam(formula)
else stop("First argument is no sort of formula!")
if (length(split$smooth.spec)==0) {
if (split$pfok==0) stop("You've got no model....")
m <- 0
} else m <- length(split$smooth.spec) # number of smooth terms
G <- list(m=m,min.sp=min.sp,H=H,pearson.extra=0,
dev.extra=0,n.true=-1,pterms=pterms) ## dev.extra gets added to deviance if REML/ML used in gam.fit3
if (is.null(attr(data,"terms"))) # then data is not a model frame
mf <- model.frame(split$pf,data,drop.unused.levels=FALSE) # must be false or can end up with wrong prediction matrix!
else mf <- data # data is already a model frame
G$intercept <- attr(attr(mf,"terms"),"intercept")>0
## get any model offset. Complicated by possibility of offsets in multiple formulae...
if (list.call) {
offi <- attr(pterms,"offset")
if (!is.null(offi)) {
G$offset <- mf[[names(attr(pterms,"dataClasses"))[offi]]]
}
} else G$offset <- model.offset(mf) # get any model offset including from offset argument
if (!is.null(G$offset)) G$offset <- as.numeric(G$offset)
# construct strictly parametric model matrix....
if (drop.intercept) attr(pterms,"intercept") <- 1 ## ensure there is an intercept to drop
X <- model.matrix(pterms,mf)
if (drop.intercept) { ## some extended families require intercept to be dropped
xat <- attributes(X);ind <- xat$assign>0 ## index of non intercept columns
X <- X[,ind,drop=FALSE] ## some extended families need to drop intercept
xat$assign <- xat$assign[ind];xat$dimnames[[2]]<-xat$dimnames[[2]][ind];
xat$dim[2] <- xat$dim[2]-1;attributes(X) <- xat
G$intercept <- FALSE
}
rownames(X) <- NULL ## save memory
G$nsdf <- ncol(X)
G$contrasts <- attr(X,"contrasts")
G$xlevels <- .getXlevels(pterms,mf)
G$assign <- attr(X,"assign") # used to tell which coeffs relate to which pterms
## now deal with any user supplied penalties on the parametric part of the model...
PP <- parametricPenalty(pterms,G$assign,paraPen,sp)
if (!is.null(PP)) { ## strip out supplied sps already used
ind <- 1:length(PP$sp)
if (!is.null(sp)) sp <- sp[-ind]
if (!is.null(min.sp)) {
PP$min.sp <- min.sp[ind]
min.sp <- min.sp[-ind]
}
}
# next work through smooth terms (if any) extending model matrix.....
G$smooth <- list()
G$S <- list()
if (gamm.call) { ## flag that this is a call from gamm --- some smoothers need to know!
if (m>0) for (i in 1:m) attr(split$smooth.spec[[i]],"gamm") <- TRUE
}
if (m>0 && idLinksBases) { ## search smooth.spec[[]] for terms linked by common id's
id.list <- list() ## id information list
for (i in 1:m) if (!is.null(split$smooth.spec[[i]]$id)) {
id <- as.character(split$smooth.spec[[i]]$id)
if (length(id.list)&&id%in%names(id.list)) { ## it's an existing id
ni <- length(id.list[[id]]$sm.i) ## number of terms so far with this id
id.list[[id]]$sm.i[ni+1] <- i ## adding smooth.spec index to this id's list
## clone smooth.spec from base smooth spec....
base.i <- id.list[[id]]$sm.i[1]
split$smooth.spec[[i]] <- clone.smooth.spec(split$smooth.spec[[base.i]],
split$smooth.spec[[i]])
## add data for this term to the data list for basis setup...
temp.term <- split$smooth.spec[[i]]$term
## note cbind deliberate in next line, as construction will handle matrix argument
## correctly...
for (j in 1:length(temp.term)) id.list[[id]]$data[[j]] <- cbind(id.list[[id]]$data[[j]],
get.var(temp.term[j],data,vecMat=FALSE))
} else { ## new id
id.list[[id]] <- list(sm.i=i) ## start the array of indices of smooths with this id
id.list[[id]]$data <- list()
## need to collect together all data for which this basis will be used,
## for basis setup...
term <- split$smooth.spec[[i]]$term
for (j in 1:length(term)) id.list[[id]]$data[[j]] <- get.var(term[j],data,vecMat=FALSE)
} ## new id finished
}
} ## id.list complete
G$off<-array(0,0)
first.para<-G$nsdf+1
sm <- list()
newm <- 0
if (m>0) for (i in 1:m) {
# idea here is that terms are set up in accordance with information given in split$smooth.spec
# appropriate basis constructor is called depending on the class of the smooth
# constructor returns penalty matrices model matrix and basis specific information
id <- split$smooth.spec[[i]]$id
if (is.null(id)||!idLinksBases) { ## regular evaluation
sml <- smoothCon(split$smooth.spec[[i]],data,knots,absorb.cons,scale.penalty=scale.penalty,
null.space.penalty=select,sparse.cons=sparse.cons,
diagonal.penalty=diagonal.penalty,apply.by=apply.by,modCon=modCon)
} else { ## it's a smooth with an id, so basis setup data differs from model matrix data
names(id.list[[id]]$data) <- split$smooth.spec[[i]]$term ## give basis data suitable names
sml <- smoothCon(split$smooth.spec[[i]],id.list[[id]]$data,knots,
absorb.cons,n=nrow(data),dataX=data,scale.penalty=scale.penalty,
null.space.penalty=select,sparse.cons=sparse.cons,
diagonal.penalty=diagonal.penalty,apply.by=apply.by,modCon=modCon)
}
for (j in 1:length(sml)) {
newm <- newm + 1
sm[[newm]] <- sml[[j]]
}
}
G$m <- m <- newm ## number of actual smooths
## at this stage, it is neccessary to impose any side conditions required
## for identifiability
if (m>0) {
sm <- gam.side(sm,X,tol=.Machine$double.eps^.5)
if (!apply.by) for (i in 1:length(sm)) { ## restore any by-free model matrices
if (!is.null(sm[[i]]$X0)) { ## there is a by-free matrix to restore
ind <- attr(sm[[i]],"del.index") ## columns, if any to delete
sm[[i]]$X <- if (is.null(ind)) sm[[i]]$X0 else sm[[i]]$X0[,-ind,drop=FALSE]
}
}
}
## The matrix, L, mapping the underlying log smoothing parameters to the
## log of the smoothing parameter multiplying the S[[i]] must be
## worked out...
idx <- list() ## idx[[id]]$c contains index of first col in L relating to id
L <- matrix(0,0,0)
lsp.names <- sp.names <- rep("",0) ## need a list of names to identify sps in global sp array
if (m>0) for (i in 1:m) {
id <- sm[[i]]$id
## get the L matrix for this smooth...
length.S <- length(sm[[i]]$S)
if (is.null(sm[[i]]$L)) Li <- diag(length.S) else Li <- sm[[i]]$L
if (length.S > 0) { ## there are smoothing parameters to name
if (length.S == 1) lspn <- sm[[i]]$label else {
Sname <- names(sm[[i]]$S)
lspn <- if (is.null(Sname)) paste(sm[[i]]$label,1:length.S,sep="") else
paste(sm[[i]]$label,Sname,sep="") ## names for all sp's
}
spn <- lspn[1:ncol(Li)] ## names for actual working sps
}
## extend the global L matrix...
if (is.null(id)||is.null(idx[[id]])) { ## new `id'
if (!is.null(id)) { ## create record in `idx'
idx[[id]]$c <- ncol(L)+1 ## starting column in L for this `id'
idx[[id]]$nc <- ncol(Li) ## number of columns relating to this `id'
}
L <- rbind(cbind(L,matrix(0,nrow(L),ncol(Li))),
cbind(matrix(0,nrow(Li),ncol(L)),Li))
if (length.S > 0) { ## there are smoothing parameters to name
sp.names <- c(sp.names,spn) ## extend the sp name vector
lsp.names <- c(lsp.names,lspn) ## extend full.sp name vector
}
} else { ## it's a repeat id => shares existing sp's
L0 <- matrix(0,nrow(Li),ncol(L))
if (ncol(Li)>idx[[id]]$nc) {
stop("Later terms sharing an `id' can not have more smoothing parameters than the first such term")
}
L0[,idx[[id]]$c:(idx[[id]]$c+ncol(Li)-1)] <- Li
L <- rbind(L,L0)
if (length.S > 0) { ## there are smoothing parameters to name
lsp.names <- c(lsp.names,lspn) ## extend full.sp name vector
}
}
}
## create the model matrix...
Xp <- NULL ## model matrix under prediction constraints, if given
if (m>0) for (i in 1:m) {
n.para<-ncol(sm[[i]]$X)
# define which elements in the parameter vector this smooth relates to....
sm[[i]]$first.para<-first.para
first.para<-first.para+n.para
sm[[i]]$last.para<-first.para-1
## termwise offset handling ...
Xoff <- attr(sm[[i]]$X,"offset")
if (!is.null(Xoff)) {
if (is.null(G$offset)) G$offset <- Xoff
else G$offset <- G$offset + Xoff
}
## model matrix accumulation ...
## alternative version under alternative constraint first (prediction only)
if (is.null(sm[[i]]$Xp)) {
if (!is.null(Xp)) Xp <- cbind2(Xp,sm[[i]]$X)
} else {
if (is.null(Xp)) Xp <- X
Xp <- cbind2(Xp,sm[[i]]$Xp);sm[[i]]$Xp <- NULL
}
## now version to use for fitting ...
X <- cbind2(X,sm[[i]]$X);sm[[i]]$X<-NULL
G$smooth[[i]] <- sm[[i]]
}
if (is.null(Xp)) {
G$cmX <- colMeans(X) ## useful for componentwise CI construction
} else {
G$cmX <- colMeans(Xp)
## transform from fit params to prediction params...
## G$P <- qr.coef(qr(Xp),X) ## old code assumes always full rank!!
qrx <- qr(Xp,LAPACK=TRUE)
R <- qr.R(qrx)
p <- ncol(R)
rank <- Rrank(R) ## rank of Xp/R
QtX <- qr.qty(qrx,X)[1:rank,]
if (rank<p) { ## rank deficient
R <- R[1:rank,]
qrr <- qr(t(R),tol=0)
R <- qr.R(qrr)
G$P <- forwardsolve(t(R),QtX)
} else {
G$P <- backsolve(R,QtX)
}
if (rank<p) {
G$P <- qr.qy(qrr,rbind(G$P,matrix(0,p-rank,p)))
}
G$P[qrx$pivot,] <- G$P
}
## cmX relates to computation of CIs incorportating uncertainty about the mean
## It may make more sense to incorporate all uncertainty about the mean,
## rather than just the uncertainty in the fixed effects mean. This means
## incorporating the mean of random effects and unconstrained smooths. Hence
## comment out the following.
#if (G$nsdf>0) G$cmX[-(1:G$nsdf)] <- 0 ## zero the smooth parts here
#else G$cmX <- G$cmX * 0
G$X <- X;rm(X)
n.p <- ncol(G$X)
# deal with penalties
## min.sp must be length nrow(L) to make sense
## sp must be length ncol(L) --- need to partition
## L into columns relating to free log smoothing parameters,
## and columns, L0, corresponding to values supplied in sp.
## lsp0 = L0%*%log(sp[sp>=0]) [need to fudge sp==0 case by
## setting log(0) to log(effective zero) computed case-by-case]
## following deals with supplied and estimated smoothing parameters...
## first process the `sp' array supplied to `gam'...
if (!is.null(sp)) { # then user has supplied fixed smoothing parameters
ok <- TRUE
if (length(sp) < ncol(L)) {
warning("Supplied smoothing parameter vector is too short - ignored.")
ok <- FALSE
}
if (sum(is.na(sp))) {
warning("NA's in supplied smoothing parameter vector - ignoring.")
ok <- FALSE
}
} else ok <- FALSE
G$sp <- if (ok) sp[1:ncol(L)] else rep(-1,ncol(L))
names(G$sp) <- sp.names
## now work through the smooths searching for any `sp' elements
## supplied in `s' or `te' terms.... This relies on `idx' created
## above...
k <- 1 ## current location in `sp' array
if (m>0) for (i in 1:m) {
id <- sm[[i]]$id
if (is.null(sm[[i]]$L)) Li <- diag(length(sm[[i]]$S)) else Li <- sm[[i]]$L
if (is.null(id)) { ## it's a smooth without an id
spi <- sm[[i]]$sp
if (!is.null(spi)) { ## sp supplied in `s' or `te'
if (length(spi)!=ncol(Li)) stop("incorrect number of smoothing parameters supplied for a smooth term")
G$sp[k:(k+ncol(Li)-1)] <- spi
}
k <- k + ncol(Li)
} else { ## smooth has an id
spi <- sm[[i]]$sp
if (is.null(idx[[id]]$sp.done)) { ## not already dealt with these sp's
if (!is.null(spi)) { ## sp supplied in `s' or `te'
if (length(spi)!=ncol(Li)) stop("incorrect number of smoothing parameters supplied for a smooth term")
G$sp[idx[[id]]$c:(idx[[id]]$c+idx[[id]]$nc-1)] <- spi
}
idx[[id]]$sp.done <- TRUE ## only makes sense to use supplied `sp' from defining term
k <- k + idx[[id]]$nc
}
}
} ## finished processing `sp' vectors supplied in `s' or `te' terms
## copy initial sp's back into smooth objects, so there is a record of
## fixed and free...
k <- 1
if (length(idx)) for (i in 1:length(idx)) idx[[i]]$sp.done <- FALSE
if (m>0) for (i in 1:m) { ## work through all smooths
id <- sm[[i]]$id
if (!is.null(id)) { ## smooth with id
if (idx[[id]]$nc>0) { ## only copy if there are sp's
G$smooth[[i]]$sp <- G$sp[idx[[id]]$c:(idx[[id]]$c+idx[[id]]$nc-1)]
}
if (!idx[[id]]$sp.done) { ## only update k on first encounter with this smooth
idx[[id]]$sp.done <- TRUE
k <- k + idx[[id]]$nc
}
} else { ## no id, just work through sp
if (is.null(sm[[i]]$L)) nc <- length(sm[[i]]$S) else nc <- ncol(sm[[i]]$L)
if (nc>0) G$smooth[[i]]$sp <- G$sp[k:(k+nc-1)]
k <- k + nc
}
} ## now all elements of G$smooth have a record of initial sp.
if (!is.null(min.sp)) { # then minimum s.p.'s supplied
if (length(min.sp)<nrow(L)) stop("length of min.sp is wrong.")
min.sp <- min.sp[1:nrow(L)]
if (sum(is.na(min.sp))) stop("NA's in min.sp.")
if (sum(min.sp<0)) stop("elements of min.sp must be non negative.")
}
k.sp <- 0 # count through sp and S
G$rank <- array(0,0)
if (m>0) for (i in 1:m) {
sm<-G$smooth[[i]]
if (length(sm$S)>0)
for (j in 1:length(sm$S)) { # work through penalty matrices
k.sp <- k.sp+1
G$off[k.sp] <- sm$first.para
G$S[[k.sp]] <- sm$S[[j]]
G$rank[k.sp]<-sm$rank[j]
if (!is.null(min.sp)) {
if (is.null(H)) H<-matrix(0,n.p,n.p)
H[sm$first.para:sm$last.para,sm$first.para:sm$last.para] <-
H[sm$first.para:sm$last.para,sm$first.para:sm$last.para]+min.sp[k.sp]*sm$S[[j]]
}
}
}
## need to modify L, lsp.names, G$S, G$sp, G$rank and G$off to include any penalties
## on parametric stuff, at this point....
if (!is.null(PP)) { ## deal with penalties on parametric terms
L <- rbind(cbind(L,matrix(0,nrow(L),ncol(PP$L))),
cbind(matrix(0,nrow(PP$L),ncol(L)),PP$L))
G$off <- c(PP$off,G$off)
G$S <- c(PP$S,G$S)
G$rank <- c(PP$rank,G$rank)
G$sp <- c(PP$sp,G$sp)
lsp.names <- c(PP$full.sp.names,lsp.names)
G$n.paraPen <- length(PP$off)
if (!is.null(PP$min.sp)) { ## deal with minimum sps
if (is.null(H)) H <- matrix(0,n.p,n.p)
for (i in 1:length(PP$S)) {
ind <- PP$off[i]:(PP$off[i]+ncol(PP$S[[i]])-1)
H[ind,ind] <- H[ind,ind] + PP$min.sp[i] * PP$S[[i]]
}
} ## min.sp stuff finished
} else G$n.paraPen <- 0
## Now remove columns of L and rows of sp relating to fixed
## smoothing parameters, and use removed elements to create lsp0
fix.ind <- G$sp>=0
if (sum(fix.ind)) {
lsp0 <- G$sp[fix.ind]
ind <- lsp0==0 ## find the zero s.p.s
ef0 <- indi <- (1:length(ind))[ind]
if (length(indi)>0) for (i in 1:length(indi)) {
## find "effective zero" to replace each zero s.p. with
ii <- G$off[i]:(G$off[i]+ncol(G$S[[i]])-1)
ef0[i] <- norm(G$X[,ii],type="F")^2/norm(G$S[[i]],type="F")*.Machine$double.eps*.1
}
lsp0[!ind] <- log(lsp0[!ind])
lsp0[ind] <- log(ef0) ##log(.Machine$double.xmin)*1000 ## zero fudge
lsp0 <- as.numeric(L[,fix.ind,drop=FALSE]%*%lsp0)
L <- L[,!fix.ind,drop=FALSE]
G$sp <- G$sp[!fix.ind]
} else {lsp0 <- rep(0,nrow(L))}
G$H <- H
if (ncol(L)==nrow(L)&&!sum(L!=diag(ncol(L)))) L <- NULL ## it's just the identity
G$L <- L;G$lsp0 <- lsp0
names(G$lsp0) <- lsp.names ## names of all smoothing parameters (not just underlying)
if (absorb.cons==FALSE) { ## need to accumulate constraints
G$C <- matrix(0,0,n.p)
if (m>0) {
for (i in 1:m) {
if (is.null(G$smooth[[i]]$C)) n.con<-0
else n.con<- nrow(G$smooth[[i]]$C)
C <- matrix(0,n.con,n.p)
C[,G$smooth[[i]]$first.para:G$smooth[[i]]$last.para]<-G$smooth[[i]]$C
G$C <- rbind(G$C,C)
G$smooth[[i]]$C <- NULL
}
rm(C)
}
} ## absorb.cons == FALSE
## SCAM stuff added here, scam_1.2-15...
if (m>0) for (i in 1:m) {
if (!is.null(G$smooth[[i]]$Zc)) {
ind <- attr(G$smooth[[i]],"del.index")
G$smooth[[i]]$Zc <- if (!is.null(ind)) G$smooth[[i]]$Zc[-ind,-ind,drop=FALSE] ##need to check if removing "del.index" rows is correct
else G$smooth[[i]]$Zc
}
}
## end of scam stuff
G$y <- data[[split$response]]
##data[[deparse(split$full.formula[[2]],backtick=TRUE)]]
G$n <- nrow(data)
if (is.null(data$"(weights)")) G$w <- rep(1,G$n)
else G$w <- data$"(weights)"
## Create names for model coefficients...
if (G$nsdf > 0) term.names <- colnames(G$X)[1:G$nsdf] else term.names<-array("",0)
n.smooth <- length(G$smooth)
if (n.smooth)
## create coef names, if smooth has any coefs, and create a global indicator of non-linear parameters
## g.index, if needed
for (i in 1:n.smooth) {
k <- 1
jj <- G$smooth[[i]]$first.para:G$smooth[[i]]$last.para
if (G$smooth[[i]]$df > 0) for (j in jj) {
term.names[j] <- paste(G$smooth[[i]]$label,".",as.character(k),sep="")
k <- k+1
}
if (!is.null(G$smooth[[i]]$g.index)) {
if (is.null(G$g.index)) G$g.index <- rep(FALSE,n.p)
G$g.index[jj] <- G$smooth[[i]]$g.index
}
}
G$term.names <- term.names
## Deal with non-linear parameterizations...
G$pP <- PP ## return paraPen object, if present
G
} ## gam.setup
clone.smooth.spec <- function(specb,spec) {
## produces a version of base smooth.spec, `specb', but with
## the variables relating to `spec'. Used by `gam.setup' in handling
## of linked smooths.
## check dimensions same...
if (specb$dim!=spec$dim) stop("`id' linked smooths must have same number of arguments")
## Now start cloning...
if (inherits(specb,c("tensor.smooth.spec","t2.smooth.spec"))) { ##`te' or `t2' generated base smooth.spec
specb$term <- spec$term
specb$label <- spec$label
specb$by <- spec$by
k <- 1
for (i in 1:length(specb$margin)) {
if (is.null(spec$margin)) { ## sloppy user -- have to construct margin info...
for (j in 1:length(specb$margin[[i]]$term)) {
specb$margin[[i]]$term[j] <- spec$term[k]
k <- k + 1
}
specb$margin[[i]]$label <- ""
} else { ## second term was at least `te'/`t2', so margin cloning is easy
specb$margin[[i]]$term <- spec$margin[[i]]$term
specb$margin[[i]]$label <- spec$margin[[i]]$label
specb$margin[[i]]$xt <- spec$margin[[i]]$xt
}
}
} else { ## `s' generated case
specb$term <- spec$term
specb$label <- spec$label
specb$by <- spec$by
specb$xt <- spec$xt ## don't generally know what's in here => don't clone
}
specb ## return clone
} ## clone.smooth.spec
allvars1 <- function(form) {
## version of all.vars that doesn't split up terms like x$y into x and y
vars <- all.vars(form)
vn <- all.names(form)
vn <- vn[vn%in%c(vars,"$","[[")] ## actual variable related names
if ("[["%in%vn) stop("can't handle [[ in formula")
ii <- which(vn%in%"$") ## index of '$'
if (length(ii)) { ## assemble variable names
vn1 <- if (ii[1]>1) vn[1:(ii[1]-1)]
go <- TRUE
k <- 1
while (go) {
n <- 2;
while(k<length(ii) && ii[k]==ii[k+1]-1) { k <- k + 1;n <- n + 1 }
vn1 <- c(vn1,paste(vn[ii[k]+1:n],collapse="$"))
if (k==length(ii)) {
go <- FALSE
ind <- if (ii[k]+n<length(vn)) (ii[k]+n+1):length(vn) else rep(0,0)
} else {
k <- k + 1
ind <- if (ii[k-1]+n<ii[k]-1) (ii[k-1]+n+1):(ii[k]-1) else rep(0,0)
}
vn1 <- c(vn1,vn[ind])
}
} else vn1 <- vn
vn1
} ## allvars1
variable.summary <- function(pf,dl,n) {
## routine to summarize all the variables in dl, which is a list
## containing raw input variables to a model (i.e. no functions applied)
## pf is a formula containing the strictly parametric part of the
## model for the variables in dl. A list is returned, with names given by
## the variables. For variables in the parametric part, then the list elements
## may be:
## * a 1 column matrix with elements set to the column medians, if variable
## is a matrix.
## * a 3 figure summary (min,median,max) for a numeric variable.
## * a factor variable, with the most commonly occuring factor (all levels)
## --- classes are as original data type, but anything not numeric, factor or matrix
## is coerced to numeric.
## For non-parametric variables, any matrices are coerced to numeric, otherwise as
## parametric.
## medians in the above are always observed values (to deal with variables coerced to
## factors in the model formulae in a nice way).
## variables with less than `n' entries are discarded
v.n <- length(dl)
## if (v.n) for (i in 1:v.n) if (length(dl[[i]])<n) dl[[i]] <- NULL
v.name <- v.name1 <- names(dl)
if (v.n) ## need to strip out names of any variables that are too short.
{ k <- 0 ## counter for retained variables
for (i in 1:v.n) if (length(dl[[i]])>=n) {
k <- k+1
v.name[k] <- v.name1[i] ## save names of variables of correct length
}
if (k>0) v.name <- v.name[1:k] else v.name <- rep("",k)
}
## v.name <- names(dl) ## the variable names
p.name <- all.vars(pf[-2]) ## variables in parametric part (not response)
vs <- list()
v.n <- length(v.name)
if (v.n>0) for (i in 1:v.n) {
if (v.name[i]%in%p.name) para <- TRUE else para <- FALSE ## is variable in the parametric part?
if (para&&is.matrix(dl[[v.name[i]]])&&ncol(dl[[v.name[i]]])>1) { ## parametric matrix --- a special case
x <- matrix(apply(dl[[v.name[i]]],2,quantile,probs=0.5,type=3,na.rm=TRUE),1,ncol(dl[[v.name[i]]])) ## nearest to median entries
} else { ## anything else
x <- dl[[v.name[i]]]
if (is.character(x)) x <- as.factor(x)
if (is.factor(x)) {
x <- x[!is.na(x)]
lx <- levels(x)
freq <- tabulate(x)
ii <- min((1:length(lx))[freq==max(freq)])
x <- factor(lx[ii],levels=lx)
} else {
x <- as.numeric(x)
x <- c(min(x,na.rm=TRUE),as.numeric(quantile(x,probs=.5,type=3,na.rm=TRUE)) ,max(x,na.rm=TRUE)) ## 3 figure summary
}
}
vs[[v.name[i]]] <- x
}
vs
} ## end variable.summary
parametricPenalty <- function(pterms,assign,paraPen,sp0) {
## routine to process any penalties on the parametric part of the model.
## paraPen is a list whose items have names corresponding to the
## term.labels in pterms. Each list item may have named elements
## L, rank and sp. All other elements should be penalty coefficient matrices.
S <- list() ## penalty matrix list
off <- rep(0,0) ## offset array
rank <- rep(0,0) ## rank array
sp <- rep(0,0) ## smoothing param array
full.sp.names <- rep("",0) ## names for sp's multiplying penalties (not underlying)
L <- matrix(0,0,0)
k <- 0
tind <- unique(assign) ## unique term indices
n.t <- length(tind)
if (n.t>0) for (j in 1:n.t) if (tind[j]>0) {
term.label <- attr(pterms[tind[j]],"term.label")
P <- paraPen[[term.label]] ## get any penalty information for this term
if (!is.null(P)) { ## then there is information
ind <- (1:length(assign))[assign==tind[j]] ## index of coefs involved here
Li <- P$L;P$L <- NULL
spi <- P$sp;P$sp <- NULL
ranki <- P$rank;P$rank <- NULL
## remaining terms should be penalty matrices...
np <- length(P)
if (!is.null(ranki)&&length(ranki)!=np) stop("`rank' has wrong length in `paraPen'")
if (np) for (i in 1:np) { ## unpack penalty matrices, offsets and ranks
k <- k + 1
S[[k]] <- P[[i]]
off[k] <- min(ind) ## index of first coef penalized by this term
if ( ncol(P[[i]])!=nrow(P[[i]])||nrow(P[[i]])!=length(ind)) stop(" a parametric penalty has wrong dimension")
if (is.null(ranki)) {
ev <- eigen(S[[k]],symmetric=TRUE,only.values=TRUE)$values
rank[k] <- sum(ev>max(ev)*.Machine$double.eps*10) ## estimate rank
} else rank[k] <- ranki[i]
}
## now deal with L matrices
if (np) { ## only do this stuff if there are any penalties!
if (is.null(Li)) Li <- diag(np)
if (nrow(Li)!=np) stop("L has wrong dimension in `paraPen'")
L <- rbind(cbind(L,matrix(0,nrow(L),ncol(Li))),
cbind(matrix(0,nrow(Li),ncol(L)),Li))
ind <- (length(sp)+1):(length(sp)+ncol(Li))
ind2 <- (length(sp)+1):(length(sp)+nrow(Li)) ## used to produce names for full sp array
if (is.null(spi)) {
sp[ind] <- -1 ## auto-initialize
} else {
if (length(spi)!=ncol(Li)) stop("`sp' dimension wrong in `paraPen'")
sp[ind] <- spi
}
## add smoothing parameter names....
if (length(ind)>1) names(sp)[ind] <- paste(term.label,ind-ind[1]+1,sep="")
else names(sp)[ind] <- term.label
if (length(ind2)>1) full.sp.names[ind2] <- paste(term.label,ind2-ind2[1]+1,sep="")
else full.sp.names[ind2] <- term.label
}
} ## end !is.null(P)
} ## looped through all terms
if (k==0) return(NULL)
if (!is.null(sp0)) {
if (length(sp0)<length(sp)) stop("`sp' too short")
sp0 <- sp0[1:length(sp)]
sp[sp<0] <- sp0[sp<0]
}
## S is list of penalty matrices, off[i] is index of first coefficient penalized by each S[[i]]
## sp is array of underlying smoothing parameter (-ve to estimate), L is matrix mapping log
## underlying smoothing parameters to log smoothing parameters, rank[i] is the rank of S[[i]].
list(S=S,off=off,sp=sp,L=L,rank=rank,full.sp.names=full.sp.names)
} ## parametricPenalty
## copied from bam(mgcv)..
## (c) Simon N. Wood 2009-2019
rwMatrix <- function(stop,row,weight,X,trans=FALSE) {
## Routine to recombine the rows of a matrix X according to info in
## stop, row and weight. Consider the ith row of the output matrix
## ind <- 1:stop[i] if i==1 and ind <- (stop[i-1]+1):stop[i]
## otherwise. The ith output row is then X[row[ind],]*weight[ind]
if (is.matrix(X)) { n <- nrow(X);p<-ncol(X);ok <- TRUE} else { n<- length(X);p<-1;ok<-FALSE}
stop <- stop - 1;row <- row - 1 ## R indices -> C indices
oo <-.C(C_rwMatrix,as.integer(stop),as.integer(row),as.double(weight),X=as.double(X),
as.integer(n),as.integer(p),trans=as.integer(trans),work=as.double(rep(0,n*p)))
if (ok) return(matrix(oo$X,n,p)) else
return(oo$X)
} ## rwMatrix
AR.resid <- function(rsd,rho=0,AR.start=NULL) {
## standardised residuals for AR1 model
if (rho==0) return(rsd)
ld <- 1/sqrt(1-rho^2) ## leading diagonal of root inverse correlation
sd <- -rho*ld ## sub diagonal
N <- length(rsd)
## see rwMatrix() for how following are used...
ar.row <- c(1,rep(1:N,rep(2,N))[-c(1,2*N)]) ## index of rows to reweight
ar.weight <- c(1,rep(c(sd,ld),N-1)) ## row weights
ar.stop <- c(1,1:(N-1)*2+1) ## (stop[i-1]+1):stop[i] are the rows to reweight to get ith row
if (!is.null(AR.start)) { ## need to correct the start of new AR sections...
ii <- which(AR.start==TRUE)
if (length(ii)>0) {
if (ii[1]==1) ii <- ii[-1] ## first observation does not need any correction
ar.weight[ii*2-2] <- 0 ## zero sub diagonal
ar.weight[ii*2-1] <- 1 ## set leading diagonal to 1
}
}
rwMatrix(ar.stop,ar.row,ar.weight,rsd)
} ## AR.resid
###############################################################
## loading functions, copied from mgcv() package of Simon Wood
#################################################################
print.scam.version <- function()
{ library(help=scam)$info[[1]] -> version
version <- version[pmatch("Version",version)]
um <- strsplit(version," ")[[1]]
version <- um[nchar(um)>0][2]
hello <- paste("This is scam ",version,".",sep="")
packageStartupMessage(hello)
}
.onAttach <- function(...) {
print.scam.version()
}
##.onUnload <- function(libpath) library.dynam.unload("scam", libpath)
Any scripts or data that you put into this service are public.
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.