Nothing
## shape constrained additive modelling code
## (c) Simon N. Wood 2025
ucon <- function(A,b) {
## finds unique constraints for a constraint set A x = b or Ax >= b.
n <- length(b)
A <- cbind(A,b)
p <- ncol(A)
up <- rep(TRUE,n) ## not processed yet
## scale constraint so that first non-zero element is 1 - this will mean that
## constraints identical to a multiplicative constant are correctly trweated as duplicates.
for (i in 1:p) {
ii <- A[,i]!=0 & up
A[ii,] <- A[ii,]/abs(A[ii,i])
up[ii] <- FALSE
if (sum(up)==0) break
}
A <- unique(A)
list(A=A[,1:(p-1),drop=FALSE],b = A[,p])
} ## ucon
scasm <- function(formula,family=gaussian(),data=list(),weights=NULL,subset=NULL,na.action,offset=NULL,
control=list(),scale=0,select=FALSE,knots=NULL,sp=NULL,gamma=1,fit=TRUE,
G=NULL,drop.unused.levels=TRUE,drop.intercept=NULL,bs=0,...) {
## Shape Constrained Additive Smooth Model function. Modified from 'gam'. Smoothing parameter selection by EFS,
## constrained coefficient estimation by quadratic programming using 'pcls'.
control <- do.call("gam.control",control)
if (is.null(G)) {
## create model frame.....
gp <- interpret.gam(formula) # interpret the formula
cl <- match.call() # call needed in gam object for update to work
mf <- match.call(expand.dots=FALSE)
mf$formula <- gp$fake.formula
mf$family <- mf$control<-mf$scale<-mf$knots<-mf$sp<-mf$select <- mf$drop.intercept <-
mf$gamma<-mf$fit<-mf$G <- mf$bs <- mf$... <-NULL
mf$drop.unused.levels <- drop.unused.levels
mf[[1]] <- quote(stats::model.frame) ## as.name("model.frame")
pmf <- mf
mf <- eval(mf, parent.frame()) # the model frame now contains all the data
if (nrow(mf)<2) stop("Not enough (non-NA) data to do anything meaningful")
terms <- attr(mf,"terms")
## summarize the *raw* input variables
## note can't use get_all_vars here -- buggy with matrices
vars <- all_vars1(gp$fake.formula[-2]) ## drop response here
inp <- parse(text = paste("list(", paste(vars, collapse = ","),")"))
## allow a bit of extra flexibility in what `data' is allowed to be (as model.frame actually does)
if (!is.list(data)&&!is.data.frame(data)) data <- as.data.frame(data)
dl <- eval(inp, data, parent.frame())
names(dl) <- vars ## list of all variables needed
var.summary <- variable.summary(gp$pf,dl,nrow(mf)) ## summarize the input data
rm(dl) ## save space
## pterms are terms objects for the parametric model components used in
## model setup - don't try obtaining by evaluating pf in mf - doesn't
## work in general (e.g. with offset)...
if (is.list(formula)) { ## then there are several linear predictors
environment(formula) <- environment(formula[[1]]) ## e.g. termplots needs this
pterms <- list()
tlab <- rep("",0)
for (i in 1:length(formula)) {
pmf$formula <- gp[[i]]$pf
pterms[[i]] <- attr(eval(pmf, parent.frame()),"terms")
tlabi <- attr(pterms[[i]],"term.labels")
if (i>1&&length(tlabi)>0) tlabi <- paste(tlabi,i-1,sep=".")
tlab <- c(tlab,tlabi)
}
attr(pterms,"term.labels") <- tlab ## labels for all parametric terms, distinguished by predictor
} else { ## single linear predictor case
pmf$formula <- gp$pf
pmf <- eval(pmf, parent.frame()) # pmf contains all data for parametric part
pterms <- attr(pmf,"terms") ## pmf only used for this
}
if (is.character(family)) family <- eval(parse(text=family))
if (is.function(family)) family <- family()
if (is.null(family$family)) stop("family not recognized")
if (family$family[1]=="gaussian" && family$link=="identity") am <- TRUE
else am <- FALSE
if (!control$keepData) rm(data) ## save space
## check whether family requires intercept to be dropped...
if (is.null(family$drop.intercept)) { ## family does not provide information
lengthf <- if (is.list(formula)) length(formula) else 1
if (is.null(drop.intercept)) drop.intercept <- rep(FALSE, lengthf) else {
drop.intercept <- rep(drop.intercept,length=lengthf) ## force drop.intercept to correct length
if (sum(drop.intercept)) family$drop.intercept <- drop.intercept ## ensure prediction works
}
} else drop.intercept <- as.logical(family$drop.intercept) ## family overrides argument
if (inherits(family,"general.family")&&!is.null(family$presetup)) eval(family$presetup)
gsname <- if (is.list(formula)) gam.setup.list else gam.setup
G <- do.call(gsname,list(formula=gp,pterms=pterms,
data=mf,knots=knots,sp=sp,min.sp=NULL,
H=NULL,absorb.cons=FALSE,sparse.cons=0,select=select,
idLinksBases=control$idLinksBases,scale.penalty=control$scalePenalty,
paraPen=NULL,drop.intercept=drop.intercept))
m <- length(G$smooth); p <- ncol(G$X)
## Set up constraints and feasible initial params. pcls wants initial to not exactly
## satisfy inequality constraints...
Ain <- matrix(0,0,p); bi <- b <- rep(0,0)
b0 <- numeric(p)
for (i in 1:m) {
ii <- G$smooth[[i]]$first.para: G$smooth[[i]]$last.para
if (!is.null(G$C)) {
k <- which(rowSums(G$C[,ii,drop=FALSE]!=0)!=0)
if (length(k)&&any(G$C[k,-ii]!=0)) stop("fixed constraints span multiple smooths")
C0 <- if (length(k)) G$C[k,ii,drop=FALSE] else matrix(0,0,length(ii))
d0 <- if (length(k) && !is.null(G$d)) G$d[k] else numeric(0)
} else {
C0 <- matrix(0,0,length(ii))
d0 <- numeric(0)
}
if (!is.null(G$smooth[[i]]$Ain)) {
con <- ucon(G$smooth[[i]]$Ain,G$smooth[[i]]$bin) ## strip any duplicate constraints (this matters!)
G$smooth[[i]]$Ain <- con$A; G$smooth[[i]]$bin <- con$b
A0 <- matrix(0,nrow(G$smooth[[i]]$Ain),p)
A0[,ii] <- G$smooth[[i]]$Ain
Ain <- rbind(Ain,A0)
b <- c(b,G$smooth[[i]]$bin)
}
if (!all(d0==0)||!is.null(G$smooth[[i]]$Ain)) {
b0[ii] <- feasible(G$smooth[[i]]$Ain,G$smooth[[i]]$bin + .Machine$double.eps^.25,C0,d0)
if (any(G$smooth[[i]]$Ain %*% b0[ii] <= G$smooth[[i]]$bin)) stop("initial point fails inequality constraints")
if (nrow(C0)&&any(abs(C0 %*% b0[ii] - d0)>.Machine$double.eps^.85*max(abs(b0[ii]))*norm(C0,"M"))) stop(
"initial point fails equality constraints")
}
}
G$Ain <- Ain; G$bin <- b; # G$C <- C;
G$beta0 <- b0
## ...so beta0 satisfies Ain beta0 > bin and C beta0 = d (here zero)
## point is that starting from beta_0, pcls can keep constraints satisfied.
G$var.summary <- var.summary
G$family <- fix.family(family)
if ((is.list(formula)&&(is.null(family$nlp)||family$nlp!=gp$nlp))||
(!is.list(formula)&&!is.null(family$npl)&&(family$npl>1)))
stop("incorrect number of linear predictors for family")
G$terms<-terms;
G$mf<-mf;G$cl<-cl;
G$am <- am
if (is.null(G$offset)) G$offset<-rep(0,G$n)
G$min.edf <- G$nsdf ## -dim(G$C)[1]
if (G$m) for (i in 1:G$m) G$min.edf<-G$min.edf+G$smooth[[i]]$null.space.dim
G$formula <- formula
G$pred.formula <- gp$pred.formula
environment(G$formula)<-environment(formula)
} else { ## G not null
if (!is.null(sp)&&any(sp>=0)) { ## request to modify smoothing parameters
if (is.null(G$L)) G$L <- diag(length(G$sp))
if (length(sp)!=ncol(G$L)) stop('length of sp must be number of free smoothing parameters in original model')
ind <- sp>=0 ## which smoothing parameters are now fixed
spind <- log(sp[ind]);
spind[!is.finite(spind)] <- -30 ## set any zero parameters to effective zero
G$lsp0 <- G$lsp0 + drop(G$L[,ind,drop=FALSE] %*% spind) ## add fix to lsp0
G$L <- G$L[,!ind,drop=FALSE] ## drop the cols of G
G$sp <- rep(-1,ncol(G$L))
}
}
if (!fit) {
class(G) <- "gam.prefit"
return(G)
}
if (scale>0) G$family$scale <- scale
object <- scasm.fit(G,control,gamma=gamma,bs=bs)
object$Ain <- G$Ain; object$bin <- G$bin; object$beta0 <- G$beta0
if (!is.null(G$L)) {
object$full.sp <- as.numeric(exp(G$L%*%log(object$sp)+G$lsp0))
names(object$full.sp) <- names(G$lsp0)
}
names(object$sp) <- names(G$sp)
object$paraPen <- G$pP
object$formula <- G$formula
## store any lpi attribute of G$X for use in predict.gam...
if (is.list(object$formula)) attr(object$formula,"lpi") <- attr(G$X,"lpi")
object$var.summary <- G$var.summary
object$cmX <- G$cmX ## column means of model matrix --- useful for CIs
object$model<-G$mf # store the model frame
object$na.action <- attr(G$mf,"na.action") # how to deal with NA's
object$control <- control
object$terms <- G$terms
object$pred.formula <- G$pred.formula
attr(object$pred.formula,"full") <- reformulate(all.vars(object$terms))
object$pterms <- G$pterms
object$assign <- G$assign # applies only to pterms
object$contrasts <- G$contrasts
object$xlevels <- G$xlevels
object$offset <- G$offset
if (!is.null(G$Xcentre)) object$Xcentre <- G$Xcentre
if (control$keepData) object$data <- data
object$df.residual <- nrow(G$X) - sum(object$edf)
object$min.edf <- G$min.edf
object$optimizer <- "EFS"
names(object$coefficients) <- G$term.names
object$call <- G$cl # needed for update() to work
class(object) <- c("gam","glm","lm")
if (is.null(object$deviance)) object$deviance <- sum(residuals(object,"deviance")^2)
object$gcv.ubre <- -object$laml
names(object$gcv.ubre) <- "REML"
object$method <- "REML"
object$smooth <- G$smooth
object$nsdf <- G$nsdf
object$sig2 <- object$scale
!object$scale.known -> object$scale.estimated
object$y <- G$y;object$prior.weights <- G$w
## The following lines avoid potentially very large objects in hidden environments being stored
## with fitted gam objects. The downside is that functions like 'termplot' that rely on searching in
## the environment of the formula can fail...
environment(object$formula) <- environment(object$pred.formula) <-
environment(object$terms) <- environment(object$pterms) <- .GlobalEnv
if (!is.null(object$model)) environment(attr(object$model,"terms")) <- .GlobalEnv
if (!is.null(attr(object$pred.formula,"full"))) environment(attr(object$pred.formula,"full")) <- .GlobalEnv
object
} ## scasm
smooth.construct.sc.smooth.spec <- function(object,data,knots) {
## basic shape constrained smooth based on bs basis, using the
## relationship between difference constraints on B-spline basis
## coeffs and constraints on derivative of the corresponding spline.
## Options are c+, c-, m+, m-, + for convex, concave, increasing,
## decreasing and positive. All coherent combinations are possible,
## and Ain %*% b >= bin imposes the constraints.
## For c+ with +, Ain will have more rows than columns: all other
## combinations there are at most as many rows as columns.
xt <- object$xt
sm <- smooth.construct.bs.smooth.spec(object,data,knots)
if (is.null(xt)) return(sm)
p <- ncol(sm$X)
if (is.list(xt)) {
if (is.character(xt[1])) con <- xt[[1]] else con <- NULL
} else con <- xt
Ain <- matrix(0,0,p)
Ip <- diag(1,nrow = p)
## get start and end, i.e. first and last interior knots...
xint <- c(sm$knots[sm$m[1]+1],sm$knots[length(sm$knots)-sm$m[1]])
dp <- data.frame(xint[1]); names(dp) <- sm$term ## used for start/end constraints
if ("c+" %in% con) { ## increasing first derivative
Ain <- rbind(Ain,diff(Ip,diff=2))
if ("m+" %in% con) { ## must be monotonic increasing at start
sm$deriv <- 1
Ain <- rbind(Ain,Predict.matrix.Bspline.smooth(sm,dp))
if ("+" %in% con) { ## ... and positive at start
sm$deriv <- NULL
Ain <- rbind(Ain,Predict.matrix.Bspline.smooth(sm,dp))
}
} else if ("m-" %in% con) { ## must be monotonic decreasing at end
sm$deriv <- 1; dp[[1]] <- xint[2]
Ain <- rbind(Ain,-Predict.matrix.Bspline.smooth(sm,dp))
if ("+" %in% con) { ## ... and positive at end
sm$deriv <- NULL
Ain <- rbind(Ain,Predict.matrix.Bspline.smooth(sm,dp))
}
} else if ("+" %in% con) { ## +ve but not monotonic
Ain <- rbind(Ain,Ip) ## all needed as minimum location not known in advance :-(
}
} else if ("c-" %in% con) { ## decreasing first derivative
Ain <- rbind(Ain,-diff(Ip,diff=2))
if ("m+" %in% con) { ## must be monotonic increasing at end
sm$deriv <- 1; dp[[1]] <- xint[2]
Ain <- rbind(Ain,Predict.matrix.Bspline.smooth(sm,dp))
if ("+" %in% con) { ## ... and positive at start
sm$deriv <- NULL; dp[[1]] <- xint[1]
Ain <- rbind(Ain,Predict.matrix.Bspline.smooth(sm,dp))
}
} else if ("m-" %in% con) { ## must be monotonic decreasing at start
sm$deriv <- 1
Ain <- rbind(Ain,-Predict.matrix.Bspline.smooth(sm,dp))
if ("+" %in% con) { ## ... and positive at end
sm$deriv <- NULL; dp[[1]] <- xint[2]
Ain <- rbind(Ain,Predict.matrix.Bspline.smooth(sm,dp))
}
} else if ("+" %in% con) { ## +ve but not nonotonic
dp <- data.frame(xint); names(dp) <- sm$term ## +ve both ends
Ain <- rbind(Ain,Predict.matrix.Bspline.smooth(sm,dp))
}
} else if ("m+" %in% con) { ## not convex/cave but increasing
Ain <- rbind(Ain,diff(Ip))
if ("+" %in% con) { ## positive at start
Ain <- rbind(Ain,Predict.matrix.Bspline.smooth(sm,dp))
}
} else if ("m-" %in% con) { ## not convex/cave but decreasing
Ain <- rbind(Ain,-diff(Ip))
if ("+" %in% con) { ## positive at end
dp[[1]] <- xint[2]
Ain <- rbind(Ain,Predict.matrix.Bspline.smooth(sm,dp))
}
} else if ("+" %in% con) Ain <- Ip ## simply positive
if ("+" %in% con) sm$C <- matrix(0,0,p) ## +ve constraint makes no sense unless centering not needed
sm$Ain <- Ain; sm$bin <- rep(0,nrow(Ain))
sm$bin0 <- rep(0.01,nrow(Ain)) ## threshold for initializing to ensure > not =
sm$deriv <- NULL ## make sure turned off
sm
} ## smooth.construct.sc.smooth.spec
smooth.construct.scad.smooth.spec <- function(object,data,knots) {
## basic shape constrained smooth based on ad basis...
xt <- object$xt
object$xt <- list(bs="ps")
sm <- smooth.construct.ad.smooth.spec(object,data,knots)
if (is.null(xt)) return(sm)
if (sm$dim>1) stop("scad is for 1D smoothing only")
p <- ncol(sm$X)
if (is.list(xt)) {
if (is.character(xt[1])) con <- xt[[1]] else con <- NULL
} else con <- xt
Ain <- matrix(0,0,p)
Ip <- diag(1,nrow = p)
if ("+" %in% con) {
Ain <- Ip
sm$C <- matrix(0,0,p) ## never makes any sense to use positive constraint unless centring not needed
}
if ("m+" %in% con) Ain <- rbind(Ain,diff(Ip)) else
if ("m-" %in% con) Ain <- rbind(Ain,-diff(Ip))
if ("c+" %in% con) Ain <- rbind(Ain,diff(Ip,diff=2)) else
if ("c-" %in% con) Ain <- rbind(Ain,-diff(Ip,diff=2))
sm$Ain <- Ain; sm$bin <- rep(0,nrow(Ain))
sm$bin0 <- rep(0.01,nrow(Ain)) ## threshold for initializing to ensure > not =
sm
} ## smooth.construct.scad.smooth.spec
bSbfun <- function(beta,S,off) {
bSb <- numeric(length(S))
for (j in 1:length(S)) {
ii <- 1:ncol(S[[j]]) + off[j]-1
bSb[j] <- beta[ii] %*% S[[j]] %*% beta[ii]
}
bSb
} ## bSbfun
scasm.pirls <- function(G,lsp,control,start=NULL,etastart=NULL,mustart=NULL,
nstep=control$maxit+10,eps=1e-4,bs=0) {
## penalized IRLS for constrained GAM fit. Weighted least squares is performed by
## pcls. Note that 'start' is never used as the pcls initial parameter value...
## G$beta0 is designed for that purpose.
warn <- list()
## initialize as glm.fit
weights <- G$w
offset <- G$offset
n <- nobs <- NROW(G$y) ## really n set in initialize
nvars <- ncol(G$X)
family <- G$family
variance <- family$variance
linkinv <- family$linkinv
if (!is.function(variance) || !is.function(linkinv))
stop("'family' argument seems not to be a valid family object",
call. = FALSE)
dev.resids <- family$dev.resids
aic <- family$aic
mu.eta <- family$mu.eta
valideta <- family$valideta %||% function(eta) TRUE
validmu <- family$validmu %||% function(mu) TRUE
y <- G$y
if (is.null(mustart)) {
eval(family$initialize)
} else {
mukeep <- mustart
eval(family$initialize)
mustart <- mukeep
}
pdevold <- coefold <- NULL
eta <- etastart %||% {
if (!is.null(start))
if (length(start) != nvars)
stop(gettextf("length of 'start' should equal %d and correspond to initial coefs",
nvars),domain = NA)
else {
coefold <- start
offset + as.vector(if (NCOL(G$X) == 1L) G$X * start else G$X %*% start)
}
else G$family$linkfun(mustart)
}
mu <- linkinv(eta)
etaold <- eta
if (!(validmu(mu) && valideta(eta)))
stop("cannot find valid starting values: please specify some", call. = FALSE)
boundary <- conv <- FALSE
gausid <- family$family == "gaussian" && family$link == "id"
if (is.null(coefold)) {
coefold <- G$beta0 ## might as well use feasible beta as starting point
etaold <- drop(G$X %*% coefold)+offset
}
pdevold <- sum(dev.resids(y,linkinv(etaold),weights))
bSb <- bSbfun(coefold,G$S,G$off)
pdevold <- pdevold + sum(exp(lsp)*bSb)
if (is.null(start)||is.null(attr(start,"active"))) start <- G$beta0
for (iter in 1L:control$maxit) {
good <- weights > 0
varmu <- variance(mu)[good]
if (anyNA(varmu)) stop("NAs in V(mu)")
if (any(varmu == 0)) stop("0s in V(mu)")
mu.eta.val <- mu.eta(eta)
if (any(is.na(mu.eta.val[good]))) stop("NAs in d(mu)/d(eta)")
good <- (weights > 0) & (mu.eta.val != 0)
if (all(!good)) {
conv <- FALSE
warning(gettextf("no observations informative at iteration %d",
iter), domain = NA)
break
}
z <- (eta - offset)[good] + (y - mu)[good]/mu.eta.val[good]
w <- weights[good] * mu.eta.val[good]^2/variance(mu)[good]
start <- pcls(list(y=z,w=w,X=G$X[good,],C=G$C,S=G$S,off=G$off-1L,sp=exp(lsp),
p=G$beta0,Ain=G$Ain,bin = G$bin,active=0*attr(start,"active")))
bSb <- bSbfun(start,G$S,G$off)
eta <- drop(G$X %*% start)
mu <- linkinv(eta <- eta + offset)
dev <- sum(dev.resids(y, mu, weights))
pdev <- dev + sum(exp(lsp)*bSb)
if (control$trace) cat("pdev = ", pdev, " Iterations - ", iter,"\n", sep = "")
boundary <- FALSE
if (!is.finite(pdev)) {
if (is.null(coefold))
stop("no valid set of coefficients has been found: please supply starting values",call. = FALSE)
warn[length(warn)+1] <- "step size truncated due to divergence"
ii <- 1
while (!is.finite(pdev)) {
if (ii > control$maxit) stop("inner loop 1; cannot correct step size", call. = FALSE)
ii <- ii + 1
start <- (start + coefold)/2
eta <- drop(G$X %*% start)
mu <- linkinv(eta <- eta + offset)
dev <- sum(dev.resids(y, mu, weights))
bSb <- bSbfun(start,G$S,G$off)
pdev <- dev + sum(exp(lsp)*bSb)
}
boundary <- TRUE
if (control$trace) cat("Step halved: new deviance = ", dev, "\n", sep = "")
} ## finite pdev control
if (!(valideta(eta) && validmu(mu))) {
if (is.null(coefold))
stop("no valid set of coefficients has been found: please supply starting values",
call. = FALSE)
warn[length(warn)+1] <- "step size truncated: out of bounds"
ii <- 1
while (!(valideta(eta) && validmu(mu))) {
if (ii > control$maxit)
stop("inner loop 2; cannot correct step size", call. = FALSE)
ii <- ii + 1
start <- (start + coefold)/2
eta <- drop(G$X %*% start)
mu <- linkinv(eta <- eta + offset)
}
boundary <- TRUE
dev <- sum(dev.resids(y, mu, weights))
bSb <- bSbfun(start,G$S,G$off)
pdev <- dev + sum(exp(lsp)*bSb)
if (control$trace) cat("Step halved: new pdev = ", pdev, "\n", sep = "")
} ## valid mu/eta control
f <- function(x) {
p <- coefold + x*(start-coefold)
etax <- etaold + x*(eta-etaold)
mu <- linkinv(etax)
dev <- sum(dev.resids(y, mu, weights))
bSb <- bSbfun(p,G$S,G$off)
pdev <- dev + sum(exp(lsp)*bSb)
}
if (pdev-pdevold > abs(pdev)*control$epsilon) { ## golden section search for best
alpha <- tau <- 2/(1 + sqrt(5))
a0 <- 0; a1 <- 1; a2 <- tau^2
ft <- f(alpha); ft2 <- f(a2)
while (alpha-a2>1e-5)
if (ft2<ft) {
a1 <- alpha; alpha <- a2; ft <- ft2
a2 <- a0 + (a1-a0)*tau^2
ft2 <- f(a2)
} else {
a0 <- a2; ft2 <- ft; a2 <- alpha
alpha <- a0 + (a1-a0)*tau
ft <- f(alpha)
}
pdev <- ft
start <- coefold + alpha*(start-coefold)
if (!is.null(G$Ain) && nrow(G$Ain)>0) {
active <- attr(start,"active")
attr(start,"active") <- active[abs(G$Ain[active,,drop=FALSE]%*%start-G$bin[active])<norm(G$Ain[active,,drop=FALSE],"M")*.Machine$double.eps^.9]
}
eta <- etaold + alpha*(eta-etaold)
mu <- linkinv(eta)
}
if (iter == nstep || abs(pdev - pdevold)/(0.1 + abs(pdev)) < eps || gausid) {
conv <- TRUE
coef <- start
break
}
if (FALSE) {
ii <- 1
while (pdev-pdevold > abs(pdev)*control$epsilon) { ## step halve
if (ii > 20) break; ii <- ii + 1
start <- (start + coefold)/2
eta <- (eta + etaold)/2
mu <- linkinv(eta <- eta + offset)
dev <- sum(dev.resids(y, mu, weights))
bSb <- bSbfun(start,G$S,G$off)
pdev <- dev + sum(exp(lsp)*bSb)
} ## divergence control
}
pdevold <- pdev
coef <- coefold <- start
etaold <- eta
} ## main iteration
if (!conv) warn[length(warn)+1] <- "scasm.pirls not converged"
bsr <- matrix(0,length(start),bs)
ng <- length(w)
if (bs>0) for (k in 1:bs) { ## non-parametric bootstrap
bsr[,k] <- pcls(list(y=z,w=w*tabulate(sample(ng,replace=TRUE),ng),X=G$X[good,],C=G$C,S=G$S,off=G$off-1L,sp=exp(lsp),
p=start,Ain=G$Ain,bin = G$bin,active=attr(start,"active")))
}
## weights are iterative and w prior below (w can be changed by initialization!)
list(coef=coef,bSb=bSb,niter=iter,pearson = sum(w*(z-eta)^2),weights=w, H = crossprod(G$X,w*G$X),
fitted.values=mu,linear.predictors=eta,residuals=z-eta,pdev=pdev,warn=warn,n=n,y=y,w=weights,bs=bsr)
} ## scasm.pirls
gH2ls <- function(b,g,R,eta=NULL,WX=NULL) {
## Produces a least squares problem ||z-Rb||^2 with gradient g and Hessian H at b.
## H = 2 P'R'RP, where P is a pivot matrix and R a pivoted Colesky factor. If H is
## not full rank then the least squares problem includes an extra ridge penalty on the
## unidentifiable coefficient subspace.
p <- ncol(R); r <- attr(R,"rank")
if (r<p ) { ii <- (r+1):p; R[ii,ii] <- diag(1,nrow=p-r) } ## deal with rank deficiency
pi <- attr(R,"pivot"); ip <- order(pi) ## pivot and inverse pivot sequences
if (is.null(b)) {
z <- forwardsolve(R,(drop(eta %*% WX)-g)[pi]/2,upper.tri=TRUE,transpose=TRUE) ## R^{-T}P(Hb-g)/2
} else {
z <- R %*% b[pi]-forwardsolve(R,g[pi]/2,upper.tri=TRUE,transpose=TRUE)
}
list(z=z,R=R[,ip]) ## R = RP here
} ## gH2ls
scasm.newton <- function(G,lsp,control,start=NULL,scale.known=TRUE,bs=0) {
## Newton sequential QP fitting of shape constrained models for extended and general families
warn <- list()
y <- G$y; nobs <- length(G$w); family <- G$family; weights <- G$w
mustart <- etastart <- NULL
if (inherits(G$family,"general.family")) { ## general family initialization
## get unconstrained initial coefs, from which Hessian and grad can be computed
start0 <- start
x <- G$X; offset <- G$offset; E <- G$Eb
eval(G$family$initialize)
if (!is.null(start0)) start <- start0
efam <- FALSE; llf <- family$ll
WX <- eta <- NULL; scale1 <- 1
ucstart <- 2 ## number of iterations before inequality constraints used
} else { ## extended family initialization
## get initial mu and hence initial Hessian and grad
dev.resids <- G$family$dev.resids;
eval(G$family$initialize)
if (!is.null(start)) mustart <- G$family$linkinv(as.numeric(G$X %*% start + G$offset))
mu <- mustart; eta <- G$family$linkfun(mu); efam <- TRUE
scale1 <- if (is.null(G$family$scale)) 1 else G$family$scale
theta <- G$family$getTheta()
ucstart <- 1
}
if (is.null(start)) {
feasible <- FALSE; coef <- NULL
} else {
coef <- as.numeric(start)
feasible <- ((is.null(G$Ain)||!nrow(G$Ain)||min(G$Ain%*%coef - G$bin) >= -.Machine$double.eps^.5) &&
(is.null(G$C)||!nrow(G$C)||all.equal(G$C%*%coef,G$C%*%G$beta0)==TRUE))
}
if (!feasible) { ## start not feasible so use G$beta0 as initial feasible point
if (efam) {
mu <- G$family$linkinv(as.numeric(G$X %*% G$beta0 + G$offset))
dev <- sum(dev.resids(y, mu, G$w,theta))
} else { ## general family
ll <- llf(y,G$X,G$beta0,G$w,G$family,offset=G$offset,deriv=0)
dev <- -2*ll$l
}
bSb <- bSbfun(G$beta0,G$S,G$off); penalty <- sum(exp(lsp)*bSb)
pdev.feasible <- dev + penalty ## penalized deviance according to G$beta0
if (!is.finite(pdev.feasible)) pdev.feasible <- Inf
pdev0 <- pdev.feasible;
} else ucstart <- 0 ## no unconstrained initialization if start already feasible
w <- rep(1,ncol(G$X)); pdev <- pdev0 <- Inf;conv <- FALSE
dth0 <- theta0 <- NULL; thmult <- 1
tol <- .Machine$double.eps^.75
for (iter in 1L:control$maxit) { ## Main loop
if (iter>1) { ## convert to equivalent least squares problem and solve...
suppressWarnings(R <- chol(H/2,pivot=TRUE)) ## PHP'/2 = R'R
ls <- gH2ls(coef,g,R,eta=eta,WX=WX)
if (iter<=ucstart) { ## run without inequality constraints to initialize
Ain <- matrix(0,0,0);bin <- rep(0,0);feasible <- TRUE
} else { ## inequality constraints imposed
if (iter==ucstart+1&&is.finite(pdev.feasible)) { ## note never triggered if ucstart<=0
coef <- G$beta0
pdev0 <- pdev.feasible
feasible <- TRUE
}
Ain <- G$Ain;bin <- G$bin
}
start <- pcls(list(y=ls$z,w=w,X=ls$R,C=G$C,S=G$S,off=G$off-1L,sp=exp(lsp),
p=G$beta0,Ain=Ain,bin=bin))
if (efam) mu <- G$family$linkinv(as.numeric(G$X %*% start + G$offset))
} ## if (iter>1)
not.ok <- TRUE
while (not.ok) { ## step control
if (efam) {
dev <- sum(dev.resids(y, mu, G$w,theta))
} else { ## general family
ll <- llf(y,G$X,start,G$w,G$family,offset=G$offset,deriv=1)
dev <- -2*ll$l
}
if (is.null(start)) {
if (is.finite(dev)) not.ok <- FALSE else stop("non-finite initial deviance")
} else {
bSb <- bSbfun(start,G$S,G$off); penalty <- sum(exp(lsp)*bSb)
pdev <- dev + penalty
if (!is.null(coef) && (!is.finite(pdev) || (iter>1 && feasible && pdev - pdev0 > tol * abs(pdev)))) {
start <- (start + coef)/2
if (efam) mu <- G$family$linkinv(as.numeric(G$X %*% start + G$offset))
if (all.equal(as.numeric(start),as.numeric(coef),tol=.Machine$double.eps^.75*mean(abs(coef))) == TRUE) {
warning("step failure")
start <- coef
pdev <- pdev0
not.ok <- FALSE
}
} else not.ok <- FALSE
}
} ## not.ok step control loop
if (iter > ucstart+1 && feasible && abs(pdev-pdev0)<control$epsilon*abs(pdev)) { conv <- TRUE;break}
pdev0 <- pdev
if (!is.null(start) && efam && (G$family$n.theta>0||scale1<0)) {
## deal with any scale and theta parameter estimation
theta <- estimate.theta(theta,G$family,y,mu,scale=scale1,wt=G$w,tol=1e-6)
if (!is.null(theta0)) { ## theta zig-zag control
dth <- theta-theta0 ## proposed change in theta
if (is.null(dth0)) dth0 <- dth else {
if (any(dth0*dth<0&abs(dth)>abs(dth0)*.5)) thmult <- thmult/2 else {
thmult <- min(1,thmult*1.5)
}
}
theta <- theta0 + dth*thmult
dth0 <- dth
}
theta0 <- theta
if (!is.null(G$family$scale) && !scale.known) {
scale <- exp(theta[family$n.theta+1])
theta <- theta[-(family$n.theta+1)]
}
if (scale1<0 && iter>4) { ## freeze scale update
scale1 <- scale
theta0 <- theta0[-(family$n.theta+1)]
dth0 <- dth0[-(family$n.theta+1)]
}
G$family$putTheta(theta)
## need to recompute pdev0, with new theta...
pdev0 <- sum(dev.resids(y, mu, G$w,theta)) + penalty
} ## theta estimation
coef <- start ## current coef acceptable
if ((!feasible||iter<=ucstart) && iter > 1) { ## check whether coef vector feasible yet
feasible <- ((is.null(G$Ain)||!nrow(G$Ain)||min(G$Ain%*%coef - G$bin) >= -.Machine$double.eps^.5) &&
(is.null(G$C)||!nrow(G$C)||all.equal(G$C%*%coef,G$C%*%G$beta0)))
}
## get the gradient and Hessian of the deviance
if (efam) {
dd <- dDeta(y,mu,G$w,theta,G$family,0)
WX <- dd$Deta2*G$X
H <- crossprod(G$X,WX)
g <- drop(dd$Deta %*% G$X)
} else { ## general family
H <- -2*ll$lbb; g <- -2*ll$lb
}
} ## main loop
if (!scale.known) { ## Get scale MLE conditional on mu and theta
G$family$n.theta <- 0
theta0 <- estimate.theta(theta,G$family,y,mu,scale = -1,wt=G$w,tol=1e-6)
scale <- exp(theta0[family$n.theta+1]);
} else scale <- scale1
if (!conv) warn[length(warn)+1] <- "scasm.newton not converged"
if (efam) {
eta <- G$family$linkfun(mu); rsd <- dev.resids(y, mu, G$w,theta)
} else { ## general family
lpi <- attr(G$X,"lpi")
if (is.null(lpi)) { ## only one...
eta <- as.numeric(G$X %*% coef) + if (is.null(G$offset)) 0 else G$offset
mu <- G$family$linkinv(eta)
} else { ## multiple...
mu <- eta <- matrix(0,nrow(G$X),length(lpi))
if (!is.null(G$offset)) G$offset[[length(lpi)+1]] <- 0
for (j in 1:length(lpi)) {
eta[,j] <- as.numeric(G$X[,lpi[[j]],drop=FALSE] %*% coef[lpi[[j]]]) +
if (is.null(G$offset[[j]])) 0 else offset[[j]]
mu[,j] <- G$family$linfo[[j]]$linkinv(eta[,j])
}
}
rsd <- NULL
}
if (!feasible) stop("failed to find feasible starting values")
if (iter==control$maxit) warning("scasm.newton failed to converge")
aic.model <- if (inherits(G$family,"general.family")) dev else G$family$aic(y,mu,theta,G$w,scale*sum(G$w))
bsr <- matrix(0,length(start),bs)
if (bs>0) { ## always fixed Hessian version
if (efam) {
g <- matrix(0,ncol(G$X),bs)
for (k in 1:bs) { ## bootstrap gradient vectors
wb <- tabulate(sample(nobs,replace=TRUE),nobs) ## np bootstrap weights
g[,k] <- drop((dd$Deta*wb) %*% G$X) ## bootstrap gradient
}
} else g <- -2*llf(y,G$X,start,G$w,G$family,offset=G$offset,deriv=-bs)$lb
if (ncol(g)) for (k in 1:bs) { ## non-parametric bootstrap for coefs
ls <- gH2ls(coef,g[,k],R,eta=eta) ## convert to LS using original Hessian
bsr[,k] <- pcls(list(y=ls$z,w=w,X=ls$R,C=G$C,S=G$S,off=G$off-1L,sp=exp(lsp),##p=G$beta0,Ain=Ain,bin=bin))
p=coef,Ain=G$Ain,bin=G$bin,active=attr(coef,"active"))) ## seems to work just as well as above
} else bsr <- matrix(0,length(start),0) ## for ll that can't yet do this
}
if (FALSE&&bs>0) {
for (k in 1:bs) {
if (efam) {
wb <- tabulate(sample(nobs,replace=TRUE),nobs)
g <- drop((dd$Deta*wb) %*% G$X) ## bootstrap gradient
## NOTE: probably need to add Hessian BS
} else {
lf <- llf(y,G$X,start,G$w,G$family,offset=G$offset,deriv=-1)
g <- -2*lf$lb; suppressWarnings(R <- chol(-lf$lbb,pivot=TRUE)) ## PHP'/2 = R'R
}
ls <- gH2ls(coef,g,R,eta=eta) ## convert to LS using original Hessian
bsr[,k] <- pcls(list(y=ls$z,w=w,X=ls$R,C=G$C,S=G$S,off=G$off-1L,sp=exp(lsp),
p=coef,Ain=G$Ain,bin=G$bin,active=attr(coef,"active")))
}
}
list(coef=coef,bSb=bSb,niter=iter,pearson = NA ,weights=G$w, H = H/2,scale=scale,pdev=pdev,aic=aic.model,
fitted.values=mu,linear.predictors=eta,residuals=rsd,y=y,deviance= if (efam) dev else NULL,warn=warn,
bs=bsr)
} ## scasm.newton
check.psdef <- function(A,eps=.Machine$double.eps^.75) {
## Cholesky based test for positive semi-definite
suppressWarnings(R <- chol(A,pivot=TRUE))
r <- attr(R,"rank")
if (r == ncol(R)) return(TRUE) ## +def
ipiv <- order(attr(R,"pivot"))
ii <- (r+1):ncol(R)
R[ii,ii] <- 0
max(abs(crossprod(R[,ipiv])-A))<eps ## + semi def or not?
} ## check.psdef
getVb <- function(G,H,p,zob=list(),project = TRUE,HpSt=FALSE) {
## gets cov matrix from pcls fit object, to within the scale parameter
## if project==TRUE then if Z is constraint null space return
## Vb= Z(Z'HpZ)^{-1}Z', or Z'HpZ and Z'StZ if HPSt==TRUE.
## p is coefficient vector. zob is constraints as returned by
## getZ. G is pre-fit scasm list, including constraint spec.
c2inv <- function(Hp) {
## returns inverse of Hp if +ve def and pseudoinverse of nearest +ve def
## matrix otherwise...
suppressWarnings(R <- chol(Hp,pivot=TRUE))
r <- attr(R,"rank"); p <- ncol(R)
if (r<p) { ii <- (r+1):p; R[ii,ii] <- 0 }
r <- Rrank(R)
if (r<p) {
ev <- eigen(Hp,symmetric=TRUE)
ii <- ev$values > abs(ev$values[1])*.Machine$double.eps^.9
ev$values[ii] <- 1/sqrt(ev$values[ii])
ev$values[!ii] <- 0
return(crossprod(ev$values*t(ev$vectors)))
} else {
ipiv <- order(attr(R,"pivot"))
return(chol2inv(R)[ipiv,ipiv,drop=FALSE])
}
} ## c2inv
St <- H*0
m <- length(G$S); sedf <- pSp <- numeric(m)
if (m) {
for (i in 1:m) { ## get total penalty matrix and p'S_jp terms
ii <- 1:ncol(G$S[[i]]) + G$off[i] - 1
St[ii,ii] <- St[ii,ii] + G$S[[i]]*G$sp[i]
pSp[i] <- sum(p[ii]*drop(G$S[[i]]%*%p[ii]))
}
pSp <- pmax(pSp,.Machine$double.xmin)
}
if (project&&length(zob)) {
H <- ZtHZproj(H,zob)
St <- ZtHZproj(St,zob)
}
Hp <- H + St ## penalized Hessian
Hindef <- !check.psdef(H)
if (Hindef) { ## H not positive semi-definite
eh <- eigen(H,symmetric=TRUE)
eh$values[eh$values<0] <- 0
H <- eh$vectors %*% (eh$values*t(eh$vectors))
Vb1 <- H + St
}
if (project&&length(zob)) { ## there are constraints
if (HpSt) { ## then only penalty matrix and penalized Hessian needed
return(list(Hp=Hp,St=St))
}
Vb <- c2inv(Hp) ## (Z'HpZ)^{-1}
Vb <- ZHZtproj(Vb,zob) ## Z(Z'HpZ)^{-1}Z'
if (Hindef) {
Vb1 <- c2inv(Vb1)
Vb1 <- ZHZtproj(Vb1,zob)
} else Vb1 <- Vb
H <- ZHZtproj(H,zob) ## project H in same way
} else {
if (HpSt) return(list(Hp=Hp,St=St))
Vb <- c2inv(Hp)
if (Hindef) {
Vb1 <- c2inv(Vb1)
} else Vb1 <- Vb
}
edf <- rowSums(Vb1*H) ## vector of edf per coef.
if (m) for (i in 1:m) { ## sedf[i]*sp[i] is suppressed EDF per penalty
ii <- 1:ncol(G$S[[i]]) + G$off[i] - 1
sedf[i] <- sum(Vb1[ii,ii]*G$S[[i]])
}
fv <- G$X %*% p ## fitted values.
list(Vb=Vb,edf = edf,fv=fv,pSp=pSp,sedf=sedf,H=H,Hp=Hp,St=St)
} ## getVb
trSiSZ <- function(G,zob) {
## need tr(S^-Sj) terms, in this case projected into the null space of any constraints
## zob is result of getZ
sm <- ZSZproject(G,zob)
trS <- numeric(length(G$sp))
k <- sum(sapply(sm,function(x) length(x$ZSZ)))
if (k!=length(trS)) stop("sp and smooth length not matching")
k <- 1
for (i in 1:length(sm)) {
m <- length(sm[[i]]$ZSZ)
if (m==1) {
trS[k] <- suppressWarnings(Rrank(chol(sm[[i]]$ZSZ[[1]],pivot=TRUE)))/G$sp[k]
k <- k + 1
} else {
St <- sm[[i]]$ZSZ[[1]]*G$sp[k];
for (j in 2:m) { St <- St + sm[[i]]$ZSZ[[j]]*G$sp[k+j-1] }
R <- suppressWarnings(chol(St,pivot=TRUE)) ## otherwise chol warns not full rank!
r <- attr(R,"rank");p <- ncol(R)
if (r<p) { ii <- (r+1):p; R[ii,ii] <- 0}
r <- min(Rrank(R),sm[[i]]$df-sm[[i]]$null.space.dim);
piv <- attr(R,"pivot"); p <- ncol(St)
if (r<p) { ii <- (r+1):p; R[ii,ii] <- diag(1,length(ii)) }
for (j in 1:m) {
trS[k] <- sum(diag(forwardsolve(t(R),t(forwardsolve(t(R),sm[[i]]$ZSZ[[j]][piv,piv]))))[1:r])
k <- k + 1
}
}
}
trS
} ## trSiSZ
lpdgDet <- function(A,rank=0) {
## Get possibly generalized determinant of +def matrix A.
## If rank>0 then it supplies the rank of A.
## S <- crossprod(matrix(runif(20)-.5,4,5))
## lpdgDet(S);lpdgDet(S,4);sum(log(eigen(S)$values[1:4]))
R <- suppressWarnings(chol(A,pivot=TRUE))
r <- attr(R,"rank")
if (rank>0 && rank<r) r <- rank
if (r < ncol(R)) R <- suppressWarnings(chol(tcrossprod(R[1:r,]),pivot=TRUE))
return(list(ldet=2*sum(log(diag(R))),r=r))
} ## lpdgdet
ZSZproject <- function(G,zob) {
## produces a copy of G$smooth with additional elements Z'SZ
## which are the S matrices projected into the null space of
## any active constraints as stored in zob, produced by getZ
sm <- G$smooth
for (k in 1:length(sm)) sm[[k]]$ZSZ <- sm[[k]]$S ## default
if (length(zob)) for (j in 1:length(zob)) { ## work through Null space object
k <- zob[[j]]$i.smooth ## smooth this con applies to
zq <- zob[[j]]$q
for (i in 1:length(sm[[k]]$S)) sm[[k]]$ZSZ[[i]] <- ## Z'S_jZ
qr.qty(zob[[j]]$qra,t(qr.qty(zob[[j]]$qra,sm[[k]]$S[[i]])[-(1:zq),,drop=FALSE]))[-(1:zq),,drop=FALSE]
}
sm
} ## ZSZproject
ZtHZproj <- function(H,zob,left=TRUE) {
## Project H into null space of constraints, by forming Z'HZ.
## Here Z is in almost block diagonal form with jth block Z_j
## it's 'almost' because the Z are not square. So ijth block
## of result is Z_i'H_ijZ_j. The non-identity Z_i are stored
## in zob, produced by getZ.
## if left==FALSE, produces HZ
## Assumption is that zob is in ascending block order...
## work backwards so that ii still index correct rows/cols after
## previous dropping...
if (length(zob)) for (j in length(zob):1) {
ii <- zob[[j]]$ii ## what parameters are we looking at?
q <- zob[[j]]$q
H[,ii] <- t(qr.qty(zob[[j]]$qra,t(H[,ii])))
if (left) {
H[ii,] <- qr.qty(zob[[j]]$qra,H[ii,])
H <- H[-ii[1:q],-ii[1:q],drop=FALSE] ## now just drop the un-needed rows/cols from H
} else H <- H[,-ii[1:q],drop=FALSE]
}
H
} ## ZtHZproj
ZHZtproj <- function(H,zob) {
## counterpart of ZtHZ that forms ZHZ'. So ijth block of result
## is Z_iH_ijZ_j'. The non-identity Z_i are stored in zob,
## produced by getZ.
## Assumption is that zob is in ascending block order...
qtot <- sum(sapply(zob,function(x) x$q))
if (!qtot) return(H)
H1 <- matrix(0,nrow(H)+qtot,ncol(H))
# First pad out rows...
m <- length(zob)
## get unpenalized row/col indices...
iup <- sort(which(!1:nrow(H1) %in% as.integer(unlist(lapply(zob,function(x) x$ii)))))
if (length(iup)) {
next.iup <- iup[1] ## start of next unpenalized block to process
} else next.iup <- nrow(H1)
j0 <- 0 ## end of previous source (H) block
for (j in 1:m) {
if (zob[[j]]$ii[1]>next.iup) { ## next repara is after an unconstrained block
ii <- next.iup:(zob[[j]]$ii[1]-1) ## index of unconstrained in target
H1[ii,] <- H[j0+1:length(ii),] ## copy unchanged from H to correct rows of H1
j0 <- j0 + length(ii) ## update end of processed blocks in source, H
next.iup <- if (any(iup>max(ii))) min(iup[iup>max(ii)]) else nrow(H1)
}
## now copy required block itself...
ii <- zob[[j]]$ii[-(1:zob[[j]]$q)] ## target (H1) rows
jj <- 1:length(ii) + j0 ## source rows
j0 <- max(jj) ## new source block end
H1[ii,] <- H[jj,]
}
if (next.iup < nrow(H1)) { ## deal with any trailing unconstrained elements
ii <- iup[iup>=next.iup]
H1[ii,] <- H[j0+1:length(ii),]
}
## Now pad out columns (as above but on cols)...
H <- matrix(0,nrow(H1),nrow(H1))
if (length(iup)) {
next.iup <- iup[1]
} else next.iup <- nrow(H1)
j0 <- 0
for (j in 1:m) {
if (zob[[j]]$ii[1]>next.iup) {
ii <- next.iup:(zob[[j]]$ii[1]-1)
H[,ii] <- H1[,j0+1:length(ii)]
j0 <- j0 + length(ii)
next.iup <- if (any(iup>max(ii))) min(iup[iup>max(ii)]) else nrow(H1)
}
ii <- zob[[j]]$ii[-(1:zob[[j]]$q)] ## target (H) cols
jj <- 1:length(ii) + j0 ## source cols
j0 <- max(jj)
H[,ii] <- H1[,jj]
}
if (next.iup < nrow(H1)) {
ii <- iup[iup>=next.iup]
H[,ii] <- H1[,j0+1:length(ii)]
}
## H now contains original H, but with zero rows/cols inserted to
## facilitate multiplication by Z components...
for (j in 1:m) {
ii <- zob[[j]]$ii
H[,ii] <- t(qr.qy(zob[[j]]$qra,t(H[,ii])))
H[ii,] <- qr.qy(zob[[j]]$qra,H[ii,])
}
H
} ## ZHZtproj
Ztbproj <- function(b,zob) {
## Project b into null space of constraints, by forming Z'b.
## zob encodes Z as returned by getZ...
## Assumption is that zob is in ascending block order...
## work backwards so that ii still index correct rows/cols after
## previous dropping...
if (length(zob)) for (j in length(zob):1) {
ii <- zob[[j]]$ii ## what parameters are we looking at?
q <- zob[[j]]$q
b[ii] <- qr.qty(zob[[j]]$qra,b[ii])
b <- b[-ii[1:q]] ## now just drop the un-needed rows/cols from H
}
b
} ## Ztbproj
Zbproj <- function(b,zob) {
## Forms Zb where b is a vector, and zob is the structure returned by getZ defining Z.
## Assumption is that zob is in ascending block order...
qtot <- sum(sapply(zob,function(x) x$q))
if (!qtot) return(b)
b1 <- rep(0,length(b)+qtot)
# First pad out rows...
m <- length(zob)
## get unpenalized row/col indices...
iup <- sort(which(!1:length(b1) %in% as.integer(unlist(lapply(zob,function(x) x$ii)))))
next.iup <- if (length(iup)) iup[1] else length(b1) ## start of next unpenalized block to process
j0 <- 0 ## end of previous source (b) block
for (j in 1:m) {
if (zob[[j]]$ii[1]>next.iup) { ## next repara is after an unconstrained block
ii <- next.iup:(zob[[j]]$ii[1]-1) ## index of unconstrained in target
b1[ii] <- b[j0+1:length(ii)] ## copy unchanged from b to correct rows of b1
j0 <- j0 + length(ii) ## update end of processed blocks in source, b
next.iup <- if (any(iup>max(ii))) min(iup[iup>max(ii)]) else length(b1)
}
## now copy required block itself...
ii <- zob[[j]]$ii[-(1:zob[[j]]$q)] ## target (b1) rows
jj <- 1:length(ii) + j0 ## source rows
j0 <- max(jj) ## new source block end
b1[ii] <- b[jj]
}
if (next.iup < length(b1)) { ## deal with any trailing unconstrained elements
ii <- iup[iup>=next.iup]
b1[ii] <- b[j0+1:length(ii)]
}
for (j in 1:m) {
ii <- zob[[j]]$ii
#H[,ii] <- t(qr.qy(zob[[j]]$qra,t(H[,ii])))
b1[ii] <- qr.qy(zob[[j]]$qra,b1[ii])
}
b1
} ## Zbproj
getZ <- function(G,active=rep(0,0)) {
## Given an active set gets an object representing the constraint NULL space, Z.
## this is computed smooth by smooth, to ensure that Z'StZ is still block
## diagonal. See also ZHZtproj and ZtHZproj
zob <- list();zi <- 0;ns <- 0
sm <- G$smooth
nac <- length(active) ## number of active inequality constraints
Ain <- if (nac) G$Ain[active,,drop=FALSE] else NULL ## the active inequality constraints matrix
for (k in 1:length(sm)) { ## work through the smooths
ii <- sm[[k]]$first.para:sm[[k]]$last.para ## columns related to this smooth
if (!is.null(G$C)) { ## are there fixed constraints?
i <- rowSums(G$C[,ii,drop=FALSE]!=0)!=0 ## constraints relevant to this term
if (any(G$C[i,-ii]!=0)) stop("constraints overlap more than one term")
A <- G$C[i,ii,drop=FALSE] ## retain only smooth relevant part
} else A <- matrix(0,0,length(ii))
if (!is.null(Ain)) { ## are there active inequality constraints?
i <- rowSums(Ain[,ii,drop=FALSE]!=0)!=0 ## constraints relevant to this term
if (any(Ain[i,-ii]!=0)) stop("constraints overlap more than one term")
A <- rbind(A,Ain[i,ii]) ## append smooth relevant part to any fixed
}
if (nrow(A)) { ## any constraints for this smooth?
zi <- zi + 1
ms <- length(sm[[k]]$S)
zob[[zi]] <- list(qra=qr(t(A)), ## QR defining Z
q=nrow(A), ## number of constraints
ii=ii, ## coefs it applies to
i.smooth=k, ## smooth it applies to
i.S = if (ms) 1:ms + ns else numeric(0)) ## penalty matrices it applies to
ns <- ns + ms
}
}
zob
} ## getZ
Stot <- function(G,root=TRUE,sp=NULL) {
## Obtain the total penalty matrix and optionally its square root.
## If sp==NULL then a 'balanced' total penalty is produced, including
## a penalty on any fixed constraints.
m <- length(G$S); p <- ncol(G$X)
if (is.null(sp)) sp <- sapply(G$S,function(x) 1/norm(x))
if (is.null(G$C)||nrow(G$C)==0||is.null(sp)) S <- matrix(0,p,p) else {
S <- crossprod(G$C);
nors <- norm(S)
if (nors>0) S <- S/nors ## penalize departure from constraints
}
for (i in 1:m) {
ii <- G$off[i] + 1:ncol(G$S[[i]]) - 1
S[ii,ii] <- S[ii,ii] + sp[i]*G$S[[i]]
}
if (root) {
suppressWarnings(R <- chol(S,pivot=TRUE))
r <- attr(R,"rank");piv <- attr(R,"pivot")
R <- R[1:r,]; R[,piv] <- R
} else R <- NULL
list(S=S,R=R) ## R'R = S
} ## Stot
lsp.cov <- function(G,Vb,beta,n=1000) {
## Obtains approx cov matrix of log smoothing params by simulation.
## Based on fact that main uncertainty in EFS log sp estimation are
## the log(b'S_jb) terms...
m <- length(G$S); if (!m) return(NULL)
b <- rmvn(n,as.numeric(beta),Vb) ## n by p matrix of replicate coefs
bSb <- matrix(0,n,m)
for (i in 1:m) {
ii <- G$off[i] + 1:ncol(G$S[[i]]) - 1
bSb[,i] <- rowSums((b[,ii] %*% G$S[[i]]) * b[,ii])
}
cov(log(bSb))
} ## lsp.cov
imp.diff <- function(G,Hi,sp,beta) {
## implicit differentiation to get dbeta/drho_j
## Hi is the inverse Hessian (unscaled cov matrix)
m <- length(G$S); if (!m) return(NULL)
b1 <- matrix(0,length(beta),m)
for (i in 1:m) {
ii <- G$off[i] + 1:ncol(G$S[[i]]) - 1
b1[,i] <- Hi[,ii] %*% G$S[[i]] %*% beta[ii] * sp[i]
}
b1
} ## imp.diff
scasm.fit <- function(G,control=gam.control(),gamma=1,bs=0,...) {
## fits shape constrained additive model, alternating pirls and EFS,
## having first obtained initial smoothing parameters.
G$family <- fix.family.ls(fix.family.link(G$family))
if (!is.null(G$family$preinitialize)) {
if (inherits(G$family,"general.family")) {
Gmod <- G$family$preinitialize(G) ## modifies some elements of G
for (gnam in names(Gmod)) G[[gnam]] <- Gmod[[gnam]] ## copy these into G
} else {
## extended family - usually just initializes theta and possibly y
if (!is.null(attr(G$family$preinitialize,"needG"))) attr(G$family,"G") <- G ## more complicated
pini <- G$family$preinitialize(G$y,G$family)
attr(G$family,"G") <- NULL
if (!is.null(pini$family)) G$family <- pini$family
if (!is.null(pini$Theta)) G$family$putTheta(pini$Theta)
if (!is.null(pini$y)) G$y <- pini$y
}
}
G$Eb <- Stot(G,TRUE)$R ## balanced root penalty to regularize initialization
lsp <- if (length(G$sp)>0) log(initial.spg(G$X,G$y,G$w,G$family,G$S,G$rank,G$off,
offset=G$offset,L=G$L,lsp0=G$lsp0,E=G$Eb,...)) else rep(0,0)
L <- if (is.null(G$L)) diag(1,length(lsp)) else G$L
if (any(L<0)) stop("can not handle negative weighting of log smoothing parameters")
efam <- inherits(G$family,"extended.family")
scale.known <- (G$family$family %in% c("poisson","binomial")) ||
(efam && is.null(G$family$scale)) ||
(!is.null(G$family$scale) && G$family$scale > 0)
start <- NULL; scale <- 1;G$sp <- exp(L%*%lsp+G$lsp0)
dlsp <- lsp*0;alpha <- dlsp+1
eps <- .01;
for (i in 1:200) { ## main loop
fit <- if (efam) scasm.newton(G,log(G$sp),start=start,control=control,scale.known=scale.known)
else scasm.pirls(G,log(G$sp),start=start,control=control,eps=eps)
active <- attr(fit$coef,"active") ## active inequality constraints
zob <- getZ(G,active)
v <- getVb(G,H=fit$H,p=fit$coef,zob=zob,project = TRUE)
if (efam) {
if (!scale.known) fit$scale <- fit$scale*nrow(G$X)/(nrow(G$X)-sum(v$edf))
G$family$scale <- fit$scale ## updated scale estimate
}
if (!scale.known) scale <- if (is.null(fit$scale)) fit$pearson/(nrow(G$X)-sum(v$edf)) else fit$scale
if (length(lsp)==0) break
trS <- trSiSZ(G,zob)
redf <- drop(t(L)%*%((trS-v$sedf)*G$sp)) ## residual EDF per sp
if (any(redf<=0)) {
if (min(redf) < -1e-4) warning(paste("min(redf)=",min(redf)))
redf[redf<1e-7] <- 1e-7
}
sedf <- drop(t(L)%*%(v$sedf*G$sp)) ## suppressed EDF per sp
ii <- redf > .05; lsp1 <- lsp
dlsp0 <- dlsp;
dlsp <- log(scale) + log(redf) - log(drop(t(L)%*%(v$pSp*G$sp)))
maxstep <- max(abs(dlsp))
if (i==1 && maxstep > 8) dlsp <- 8*dlsp/maxstep
if (i>2 && maxstep > 4) dlsp <- 4*dlsp/maxstep ## avoid overly long steps
## don't increase if already too close to total suppression,
## or decrease if already too close to no suppression
dlsp[(redf<.05 & dlsp>0)|(sedf<.05 & dlsp<0)] <- 0
if (i>1) {
jj <- dlsp*dlsp0 > 0 ## same sign
alpha[!jj] <- alpha[!jj]/2
if (i>2) {
ii <- jj & jj.old ## same sign 3 times in a row
alpha[ii] <- pmin(2,alpha[ii]*1.2)
}
jj.old <- jj
}
lsp1 <- dlsp*alpha + lsp
if (i==7) eps <- 1e-3 # pirls eps
conv <- (i>1 && max(abs(pSpold-v$pSp)/pmax(1e-4,v$pSp))<.01)||(max(abs(lsp1-lsp))<.01)
if (conv) { if (eps<.005) break else eps <- 1e-3}
lsp <- lsp1
G$sp <- exp(drop(L%*%lsp+G$lsp0))
start <- fit$coef
pSpold <- v$pSp
} ## main loop
if (length(fit$warn)) for (i in 1:length(fit$warn)) warning(fit$warn[i])
ii <- diag(v$Vb) < 0; diag(v$Vb)[ii] <- 0 ## avoid triggering plot problem
F = v$Vb %*% v$H;
b1 <- imp.diff(G,Hi=v$Vb,sp=G$sp,beta=fit$coef) ## dbeta/dlsp
if (!inherits(G$family,"extended.family")) { ## still need aic
fit$aic <- sum(G$family$dev.resids(fit$y,fit$fitted.values,G$w))/scale -
2*G$family$ls(fit$y,G$w,fit$n,scale)[1]
}
if (efam&&!scale.known) G$family$scale <- -1
object <- list(coefficients = fit$coef,sp=exp(lsp),Vp=v$Vb*scale,Ve=F%*%v$Vb*scale,
scale=scale,edf=v$edf,prior.weights=fit$weights,family=G$family,deviance=fit$deviance,
residuals=fit$residuals,scale.known=scale.known,F = F,R=t(mroot(v$H)),
outer.info=list(converged = i<200,niter=i),y=fit$y,aic=fit$aic+2*sum(v$edf),
linear.predictors=fit$linear.predictors,fitted.values=fit$fitted.values)
Vrho <- lsp.cov(G,object$Vp,fit$coef,n=1000) ## approx lsp cov matrix
object$Vc <- b1%*%Vrho%*%t(b1) + object$Vp
object$edf2 <- rowSums(object$Vc*v$H)/scale
object$edf1 <- 2*v$edf - rowSums(F*t(F))
names(object$edf) <- names(object$edf1) <- names(object$edf2) <- names(object$coefficients)
## extended family may need to manipulate fit object.
if (!is.null(G$family$postproc)) {
if (inherits(G$family,"general.family")) eval(G$family$postproc) else {
posr <- G$family$postproc(family=G$family,y=G$y,prior.weights=G$w,
fitted=fit$fitted.values,linear.predictors=fit$linear.predictors,offset=G$offset,
intercept=G$intercept)
if (!is.null(posr$family)) object$family$family <- posr$family
if (!is.null(posr$deviance)) object$deviance <- posr$deviance
if (!is.null(posr$null.deviance)) object$null.deviance <- posr$null.deviance
}
}
if (is.null(object$deviance)) object$deviance <- sum(residuals.gam(object,"deviance")^2)
## now get LAML *excluding* active constraints (removing the 'as.numeric' in next line would re-instate active)
zob <- getZ(G)
z <- getVb(G,H=fit$H,p=fit$coef,zob=zob,project = TRUE,HpSt=FALSE)
ldetS <- lpdgDet(z$St); M <- ncol(z$Hp) - ldetS$r
laml <- (-fit$pdev/scale + ldetS$ldet -lpdgDet(z$Hp)$ldet)/2 +M*log(2*pi)/2
if (!inherits(G$family,"general.family")) laml <- laml +
if (inherits(G$family,"extended.family")) G$family$ls(fit$y,G$w,G$family$getTheta(),scale)$ls else
G$family$ls(fit$y,fit$w,fit$n,scale)[1]
object$laml <- laml
if (bs) {
object$bs <- if (inherits(G$family,"extended.family"))
scasm.newton(G,log(G$sp),start=start,control=control,scale.known=scale.known,bs=bs)$bs else
scasm.pirls(G,log(G$sp),start=start,control=control,eps=eps,bs=bs)$bs
attr(object$bs,"svar.only") <- TRUE ## signal that this covers sampling var, not smoothing bias.
}
object$Vp <- z$Vb*scale; object$Vc<- b1%*%Vrho%*%t(b1) + object$Vp ## no active cons
object$Ve = z$Vb%*%z$H%*%z$Vb*scale; attr(object$Vp,"zob") <- zob; object$St <- z$St
object
} ## scasm.fit
Any scripts or data that you put into this service are public.
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.