R/scasm.r
In mgcv: Mixed GAM Computation Vehicle with Automatic Smoothness Estimation

Documented in scasm smooth.construct.scad.smooth.spec smooth.construct.sc.smooth.spec

## shape constrained additive modelling code 
## (c) Simon N. Wood 2025

ucon <- function(A,b) {
## finds unique constraints for a constraint set A x = b or Ax >= b.
  n <- length(b)
  A <- cbind(A,b)
  p <- ncol(A)
  up <- rep(TRUE,n) ## not processed yet
  ## scale constraint so that first non-zero element is 1 - this will mean that
  ## constraints identical to a multiplicative constant are correctly trweated as duplicates.
  for (i in 1:p) {
    ii <- A[,i]!=0 & up
    A[ii,] <- A[ii,]/abs(A[ii,i])
    up[ii] <- FALSE
    if (sum(up)==0) break
  }
  A <- unique(A)
  list(A=A[,1:(p-1),drop=FALSE],b = A[,p])
} ## ucon

scasm <- function(formula,family=gaussian(),data=list(),weights=NULL,subset=NULL,na.action,offset=NULL,
                  control=list(),scale=0,select=FALSE,knots=NULL,sp=NULL,gamma=1,fit=TRUE,
                  G=NULL,drop.unused.levels=TRUE,drop.intercept=NULL,bs=0,...) {
## Shape Constrained Additive Smooth Model function. Modified from 'gam'. Smoothing parameter selection by EFS,
## constrained coefficient estimation by quadratic programming using 'pcls'.

   control <- do.call("gam.control",control)

   if (is.null(G)) {
    ## create model frame..... 
    gp <- interpret.gam(formula) # interpret the formula 
    cl <- match.call() # call needed in gam object for update to work
    mf <- match.call(expand.dots=FALSE)
    mf$formula <- gp$fake.formula 
    mf$family <- mf$control<-mf$scale<-mf$knots<-mf$sp<-mf$select <- mf$drop.intercept  <-
                 mf$gamma<-mf$fit<-mf$G <- mf$bs <- mf$... <-NULL
    mf$drop.unused.levels <- drop.unused.levels
    mf[[1]] <- quote(stats::model.frame) ## as.name("model.frame")
    pmf <- mf
    mf <- eval(mf, parent.frame()) # the model frame now contains all the data 
    if (nrow(mf)<2) stop("Not enough (non-NA) data to do anything meaningful")
    terms <- attr(mf,"terms")

    ## summarize the *raw* input variables
    ## note can't use get_all_vars here -- buggy with matrices
    vars <- all_vars1(gp$fake.formula[-2]) ## drop response here
    inp <- parse(text = paste("list(", paste(vars, collapse = ","),")"))

    ## allow a bit of extra flexibility in what `data' is allowed to be (as model.frame actually does)
    if (!is.list(data)&&!is.data.frame(data)) data <- as.data.frame(data) 

    dl <- eval(inp, data, parent.frame())
    names(dl) <- vars ## list of all variables needed
    var.summary <- variable.summary(gp$pf,dl,nrow(mf)) ## summarize the input data
    rm(dl) ## save space    

    ## pterms are terms objects for the parametric model components used in 
    ## model setup - don't try obtaining by evaluating pf in mf - doesn't
    ## work in general (e.g. with offset)...

    if (is.list(formula)) { ## then there are several linear predictors
      environment(formula) <- environment(formula[[1]]) ## e.g. termplots needs this
      pterms <- list()
      tlab <- rep("",0)
      for (i in 1:length(formula)) {
        pmf$formula <- gp[[i]]$pf 
        pterms[[i]] <- attr(eval(pmf, parent.frame()),"terms")
        tlabi <- attr(pterms[[i]],"term.labels")
        if (i>1&&length(tlabi)>0) tlabi <- paste(tlabi,i-1,sep=".")
        tlab <- c(tlab,tlabi)
      }
      attr(pterms,"term.labels") <- tlab ## labels for all parametric terms, distinguished by predictor
    } else { ## single linear predictor case
      pmf$formula <- gp$pf
      pmf <- eval(pmf, parent.frame()) # pmf contains all data for parametric part
      pterms <- attr(pmf,"terms") ## pmf only used for this
    }

    if (is.character(family)) family <- eval(parse(text=family))
    if (is.function(family)) family <- family()
    if (is.null(family$family)) stop("family not recognized")
  
    if (family$family[1]=="gaussian" && family$link=="identity") am <- TRUE
    else am <- FALSE
    
    if (!control$keepData) rm(data) ## save space

    ## check whether family requires intercept to be dropped...
    if (is.null(family$drop.intercept)) { ## family does not provide information
      lengthf <- if (is.list(formula)) length(formula) else 1
      if (is.null(drop.intercept)) drop.intercept <- rep(FALSE, lengthf) else {
        drop.intercept <- rep(drop.intercept,length=lengthf) ## force drop.intercept to correct length
	if (sum(drop.intercept)) family$drop.intercept <- drop.intercept ## ensure prediction works
      }
    } else drop.intercept <- as.logical(family$drop.intercept) ## family overrides argument
    
    if (inherits(family,"general.family")&&!is.null(family$presetup)) eval(family$presetup)

    gsname <- if (is.list(formula)) gam.setup.list else gam.setup 

    G <- do.call(gsname,list(formula=gp,pterms=pterms,
                 data=mf,knots=knots,sp=sp,min.sp=NULL,
                 H=NULL,absorb.cons=FALSE,sparse.cons=0,select=select,
                 idLinksBases=control$idLinksBases,scale.penalty=control$scalePenalty,
                 paraPen=NULL,drop.intercept=drop.intercept))

    m <- length(G$smooth); p <- ncol(G$X)

    ## Set up constraints and feasible initial params. pcls wants initial to not exactly
    ## satisfy inequality constraints...
    Ain <- matrix(0,0,p); bi <- b <- rep(0,0)
    b0 <- numeric(p)
    for (i in 1:m) {
      ii <- G$smooth[[i]]$first.para: G$smooth[[i]]$last.para
      if (!is.null(G$C)) {
        k <- which(rowSums(G$C[,ii,drop=FALSE]!=0)!=0)
        if (length(k)&&any(G$C[k,-ii]!=0)) stop("fixed constraints span multiple smooths")
        C0 <- if (length(k)) G$C[k,ii,drop=FALSE] else matrix(0,0,length(ii))
        d0 <- if (length(k) && !is.null(G$d)) G$d[k] else numeric(0)
      } else {
        C0 <- matrix(0,0,length(ii))
	d0 <- numeric(0)
      }	
      if (!is.null(G$smooth[[i]]$Ain)) {
        con <- ucon(G$smooth[[i]]$Ain,G$smooth[[i]]$bin) ## strip any duplicate constraints (this matters!)
	G$smooth[[i]]$Ain <- con$A; G$smooth[[i]]$bin <- con$b 
        A0 <- matrix(0,nrow(G$smooth[[i]]$Ain),p)
        A0[,ii] <- G$smooth[[i]]$Ain
        Ain <- rbind(Ain,A0)
        b <- c(b,G$smooth[[i]]$bin)
      }
      if (!all(d0==0)||!is.null(G$smooth[[i]]$Ain)) {
        b0[ii] <- feasible(G$smooth[[i]]$Ain,G$smooth[[i]]$bin + .Machine$double.eps^.25,C0,d0)
	if (any(G$smooth[[i]]$Ain %*% b0[ii] <= G$smooth[[i]]$bin)) stop("initial point fails inequality constraints")
	if (nrow(C0)&&any(abs(C0 %*% b0[ii] - d0)>.Machine$double.eps^.85*max(abs(b0[ii]))*norm(C0,"M"))) stop(
	    "initial point fails equality constraints")
      }
    }
    G$Ain <- Ain; G$bin <- b; # G$C <- C;
    G$beta0 <- b0
    ## ...so beta0 satisfies Ain beta0 > bin and C beta0 = d (here zero)
    ## point is that starting from beta_0, pcls can keep constraints satisfied.
    
    G$var.summary <- var.summary
    G$family <- fix.family(family)
   
    if ((is.list(formula)&&(is.null(family$nlp)||family$nlp!=gp$nlp))||
        (!is.list(formula)&&!is.null(family$npl)&&(family$npl>1)))
	stop("incorrect number of linear predictors for family")
       
    G$terms<-terms;
    G$mf<-mf;G$cl<-cl;
    G$am <- am

    if (is.null(G$offset)) G$offset<-rep(0,G$n)
     
    G$min.edf <- G$nsdf ## -dim(G$C)[1]
    if (G$m) for (i in 1:G$m) G$min.edf<-G$min.edf+G$smooth[[i]]$null.space.dim

    G$formula <- formula
    G$pred.formula <- gp$pred.formula
    environment(G$formula)<-environment(formula)
  } else { ## G not null
    if (!is.null(sp)&&any(sp>=0)) { ## request to modify smoothing parameters
      if (is.null(G$L)) G$L <- diag(length(G$sp))
      if (length(sp)!=ncol(G$L)) stop('length of sp must be number of free smoothing parameters in original model')
      ind <- sp>=0 ## which smoothing parameters are now fixed
      spind <- log(sp[ind]); 
      spind[!is.finite(spind)] <- -30 ## set any zero parameters to effective zero
      G$lsp0 <- G$lsp0 + drop(G$L[,ind,drop=FALSE] %*% spind) ## add fix to lsp0
      G$L <- G$L[,!ind,drop=FALSE] ## drop the cols of G
      G$sp <- rep(-1,ncol(G$L))
    }
  }

  if (!fit) {
    class(G) <- "gam.prefit"
    return(G)
  }  

  if (scale>0) G$family$scale <- scale

  object <- scasm.fit(G,control,gamma=gamma,bs=bs)

  object$Ain <- G$Ain; object$bin <- G$bin; object$beta0 <- G$beta0
  if (!is.null(G$L)) { 
    object$full.sp <- as.numeric(exp(G$L%*%log(object$sp)+G$lsp0))
    names(object$full.sp) <- names(G$lsp0)
  }
  names(object$sp) <- names(G$sp)
  object$paraPen <- G$pP
  object$formula <- G$formula
  ## store any lpi attribute of G$X for use in predict.gam...
  if (is.list(object$formula)) attr(object$formula,"lpi") <- attr(G$X,"lpi")
  object$var.summary <- G$var.summary 
  object$cmX <- G$cmX ## column means of model matrix --- useful for CIs
  object$model<-G$mf # store the model frame
  object$na.action <- attr(G$mf,"na.action") # how to deal with NA's
  object$control <- control
  object$terms <- G$terms
  object$pred.formula <- G$pred.formula

  attr(object$pred.formula,"full") <- reformulate(all.vars(object$terms))
  
  object$pterms <- G$pterms
  object$assign <- G$assign # applies only to pterms
  object$contrasts <- G$contrasts
  object$xlevels <- G$xlevels
  object$offset <- G$offset
  if (!is.null(G$Xcentre)) object$Xcentre <- G$Xcentre
  if (control$keepData) object$data <- data
  object$df.residual <- nrow(G$X) - sum(object$edf)
  object$min.edf <- G$min.edf
  object$optimizer <- "EFS"
  names(object$coefficients) <- G$term.names
  object$call <- G$cl # needed for update() to work
  class(object) <- c("gam","glm","lm")
  if (is.null(object$deviance)) object$deviance <- sum(residuals(object,"deviance")^2)
  object$gcv.ubre <- -object$laml
  names(object$gcv.ubre) <- "REML"
  object$method <- "REML" 
  object$smooth <- G$smooth
  object$nsdf <- G$nsdf
  object$sig2 <- object$scale
  !object$scale.known -> object$scale.estimated
  object$y <- G$y;object$prior.weights <- G$w
  ## The following lines avoid potentially very large objects in hidden environments being stored
  ## with fitted gam objects. The downside is that functions like 'termplot' that rely on searching in
  ## the environment of the formula can fail...
  environment(object$formula) <- environment(object$pred.formula) <-
  environment(object$terms) <- environment(object$pterms) <- .GlobalEnv
  if (!is.null(object$model))  environment(attr(object$model,"terms"))  <- .GlobalEnv
  if (!is.null(attr(object$pred.formula,"full"))) environment(attr(object$pred.formula,"full")) <- .GlobalEnv
  object
} ## scasm


smooth.construct.sc.smooth.spec <- function(object,data,knots) {
## basic shape constrained smooth based on bs basis, using the
## relationship between difference constraints on B-spline basis
## coeffs and constraints on derivative of the corresponding spline. 
## Options are c+, c-, m+, m-, + for convex, concave, increasing,
## decreasing and positive. All coherent combinations are possible,
## and Ain %*% b >= bin imposes the constraints.
## For c+ with +, Ain will have more rows than columns: all other 
## combinations there are at most as many rows as columns. 
  xt <- object$xt
  sm <- smooth.construct.bs.smooth.spec(object,data,knots)
  if (is.null(xt)) return(sm)
  p <- ncol(sm$X)
  if (is.list(xt)) { 
    if (is.character(xt[1]))  con <- xt[[1]] else con <- NULL
  } else con <- xt
  Ain <- matrix(0,0,p)
  Ip <- diag(1,nrow = p)
  ## get start and end, i.e. first and last interior knots...
  xint <- c(sm$knots[sm$m[1]+1],sm$knots[length(sm$knots)-sm$m[1]])
  dp <- data.frame(xint[1]); names(dp) <- sm$term ## used for start/end constraints
  if ("c+" %in% con) { ## increasing first derivative
    Ain <- rbind(Ain,diff(Ip,diff=2))
    if ("m+" %in% con) { ## must be monotonic increasing at start
      sm$deriv <- 1
      Ain <- rbind(Ain,Predict.matrix.Bspline.smooth(sm,dp))
      if ("+" %in% con) { ## ... and positive at start
        sm$deriv <- NULL
	Ain <- rbind(Ain,Predict.matrix.Bspline.smooth(sm,dp))
      }
    } else if ("m-" %in% con) { ## must be monotonic decreasing at end
      sm$deriv <- 1; dp[[1]] <- xint[2]
      Ain <- rbind(Ain,-Predict.matrix.Bspline.smooth(sm,dp))
      if ("+" %in% con) { ## ... and positive at end
        sm$deriv <- NULL
	Ain <- rbind(Ain,Predict.matrix.Bspline.smooth(sm,dp))
      }
    } else  if ("+" %in% con) { ## +ve but not monotonic
      Ain <- rbind(Ain,Ip) ## all needed as minimum location not known in advance :-(
    }
  } else if ("c-" %in% con) { ## decreasing first derivative
    Ain <- rbind(Ain,-diff(Ip,diff=2))
    if ("m+" %in% con) { ## must be monotonic increasing at end
      sm$deriv <- 1; dp[[1]] <- xint[2]
      Ain <- rbind(Ain,Predict.matrix.Bspline.smooth(sm,dp))
      if ("+" %in% con) { ## ... and positive at start
        sm$deriv <- NULL; dp[[1]] <- xint[1]
	Ain <- rbind(Ain,Predict.matrix.Bspline.smooth(sm,dp))
      }
    } else if ("m-" %in% con) { ## must be monotonic decreasing at start
      sm$deriv <- 1
      Ain <- rbind(Ain,-Predict.matrix.Bspline.smooth(sm,dp))
      if ("+" %in% con) { ## ... and positive at end
        sm$deriv <- NULL; dp[[1]] <- xint[2]
	Ain <- rbind(Ain,Predict.matrix.Bspline.smooth(sm,dp))
      }
    } else  if ("+" %in% con) { ## +ve but not nonotonic
      dp <- data.frame(xint); names(dp) <- sm$term ## +ve both ends
      Ain <- rbind(Ain,Predict.matrix.Bspline.smooth(sm,dp))
    }
  } else if ("m+" %in% con) { ## not convex/cave but increasing
    Ain <- rbind(Ain,diff(Ip))
    if ("+" %in% con) { ## positive at start
      Ain <- rbind(Ain,Predict.matrix.Bspline.smooth(sm,dp))
    }
  } else  if ("m-" %in% con) { ## not convex/cave but decreasing
    Ain <- rbind(Ain,-diff(Ip))
    if ("+" %in% con) { ## positive at end
      dp[[1]] <- xint[2]
      Ain <- rbind(Ain,Predict.matrix.Bspline.smooth(sm,dp))
    }
  } else if ("+" %in% con) Ain <- Ip ## simply positive
  
  if ("+" %in% con) sm$C <- matrix(0,0,p) ## +ve constraint makes no sense unless centering not needed
  
  sm$Ain <- Ain; sm$bin <- rep(0,nrow(Ain))
  sm$bin0 <- rep(0.01,nrow(Ain)) ## threshold for initializing to ensure > not =
  sm$deriv <- NULL ## make sure turned off
  sm
} ## smooth.construct.sc.smooth.spec


smooth.construct.scad.smooth.spec <- function(object,data,knots) {
## basic shape constrained smooth based on ad basis...
  xt <- object$xt
  object$xt <- list(bs="ps")
  sm <- smooth.construct.ad.smooth.spec(object,data,knots)
  if (is.null(xt)) return(sm)
  if (sm$dim>1) stop("scad is for 1D smoothing only")
  p <- ncol(sm$X)
  if (is.list(xt)) { 
    if (is.character(xt[1]))  con <- xt[[1]] else con <- NULL
  } else con <- xt
  Ain <- matrix(0,0,p)
  Ip <- diag(1,nrow = p)
  if ("+" %in% con) {
    Ain <- Ip
    sm$C <- matrix(0,0,p) ## never makes any sense to use positive constraint unless centring not needed 
  }  
  if ("m+" %in% con) Ain <- rbind(Ain,diff(Ip)) else
        if ("m-" %in% con) Ain <- rbind(Ain,-diff(Ip))
  if ("c+" %in% con) Ain <- rbind(Ain,diff(Ip,diff=2)) else
        if ("c-" %in% con) Ain <- rbind(Ain,-diff(Ip,diff=2))	
  sm$Ain <- Ain; sm$bin <- rep(0,nrow(Ain))
  sm$bin0 <- rep(0.01,nrow(Ain)) ## threshold for initializing to ensure > not =
  sm
} ## smooth.construct.scad.smooth.spec



bSbfun <- function(beta,S,off) {
  bSb <- numeric(length(S))
  for (j in 1:length(S)) {
    ii <- 1:ncol(S[[j]]) + off[j]-1
    bSb[j] <- beta[ii] %*% S[[j]] %*% beta[ii]
  }
  bSb
} ## bSbfun

scasm.pirls <- function(G,lsp,control,start=NULL,etastart=NULL,mustart=NULL,
                        nstep=control$maxit+10,eps=1e-4,bs=0) {
## penalized IRLS for constrained GAM fit. Weighted least squares is performed by
## pcls. Note that 'start' is never used as the pcls initial parameter value...
## G$beta0 is designed for that purpose.

   warn <- list()
   ## initialize as glm.fit
   weights <- G$w
   offset <- G$offset
   n <- nobs <- NROW(G$y) ## really n set in initialize
   nvars <- ncol(G$X)
   family <- G$family
   variance <- family$variance
   linkinv <- family$linkinv
   if (!is.function(variance) || !is.function(linkinv)) 
        stop("'family' argument seems not to be a valid family object", 
            call. = FALSE)
   dev.resids <- family$dev.resids
   aic <- family$aic
   mu.eta <- family$mu.eta
   valideta <- family$valideta %||% function(eta) TRUE
   validmu <- family$validmu %||% function(mu) TRUE
   y <- G$y
   if (is.null(mustart)) {
     eval(family$initialize)
   } else {
     mukeep <- mustart
     eval(family$initialize)
     mustart <- mukeep
   }
   pdevold <- coefold <- NULL
   eta <- etastart %||% {
     if (!is.null(start)) 
        if (length(start) != nvars) 
           stop(gettextf("length of 'start' should equal %d and correspond to initial coefs", 
                nvars),domain = NA)
        else {
          coefold <- start
          offset + as.vector(if (NCOL(G$X) == 1L)  G$X * start else G$X %*% start)
        }
      else G$family$linkfun(mustart)
   }
   mu <- linkinv(eta)
   etaold <- eta
   if (!(validmu(mu) && valideta(eta))) 
       stop("cannot find valid starting values: please specify some", call. = FALSE)
   boundary <- conv <- FALSE
   gausid <- family$family == "gaussian" && family$link == "id"

   if (is.null(coefold)) {
     coefold <- G$beta0 ## might as well use feasible beta as starting point
     etaold <- drop(G$X %*% coefold)+offset
   }
   pdevold <- sum(dev.resids(y,linkinv(etaold),weights))
   bSb <- bSbfun(coefold,G$S,G$off)
   pdevold <- pdevold + sum(exp(lsp)*bSb)
   if (is.null(start)||is.null(attr(start,"active"))) start <- G$beta0
   for (iter in 1L:control$maxit) {
     good <- weights > 0
     varmu <- variance(mu)[good]
     if (anyNA(varmu)) stop("NAs in V(mu)")
     if (any(varmu == 0)) stop("0s in V(mu)")
     mu.eta.val <- mu.eta(eta)
     if (any(is.na(mu.eta.val[good]))) stop("NAs in d(mu)/d(eta)")
     good <- (weights > 0) & (mu.eta.val != 0)
     if (all(!good)) {
       conv <- FALSE
       warning(gettextf("no observations informative at iteration %d", 
                  iter), domain = NA)
       break
     }
     z <- (eta - offset)[good] + (y - mu)[good]/mu.eta.val[good]
     w <- weights[good] * mu.eta.val[good]^2/variance(mu)[good]
     start <- pcls(list(y=z,w=w,X=G$X[good,],C=G$C,S=G$S,off=G$off-1L,sp=exp(lsp),
                   p=G$beta0,Ain=G$Ain,bin = G$bin,active=0*attr(start,"active")))
     bSb <- bSbfun(start,G$S,G$off)
     eta <- drop(G$X %*% start)
     mu <- linkinv(eta <- eta + offset)
     dev <- sum(dev.resids(y, mu, weights))
     pdev <- dev + sum(exp(lsp)*bSb)
     if (control$trace) cat("pdev = ", pdev, " Iterations - ", iter,"\n", sep = "")
     boundary <- FALSE
     if (!is.finite(pdev)) {
       if (is.null(coefold)) 
           stop("no valid set of coefficients has been found: please supply starting values",call. = FALSE)
       warn[length(warn)+1] <- "step size truncated due to divergence"
       ii <- 1
       while (!is.finite(pdev)) {
         if (ii > control$maxit) stop("inner loop 1; cannot correct step size", call. = FALSE)
         ii <- ii + 1
         start <- (start + coefold)/2
         eta <- drop(G$X %*% start)
         mu <- linkinv(eta <- eta + offset)
         dev <- sum(dev.resids(y, mu, weights))
	 bSb <- bSbfun(start,G$S,G$off)
	 pdev <- dev + sum(exp(lsp)*bSb)
       }
       boundary <- TRUE
       if (control$trace) cat("Step halved: new deviance = ", dev, "\n", sep = "")
     } ## finite pdev control
     
     if (!(valideta(eta) && validmu(mu))) {
       if (is.null(coefold)) 
         stop("no valid set of coefficients has been found: please supply starting values", 
                    call. = FALSE)
       warn[length(warn)+1] <- "step size truncated: out of bounds"
       ii <- 1
       while (!(valideta(eta) && validmu(mu))) {
         if (ii > control$maxit) 
           stop("inner loop 2; cannot correct step size", call. = FALSE)
           ii <- ii + 1
           start <- (start + coefold)/2
           eta <- drop(G$X %*% start)
           mu <- linkinv(eta <- eta + offset)
       }
       boundary <- TRUE
       dev <- sum(dev.resids(y, mu, weights))
       bSb <- bSbfun(start,G$S,G$off)
       pdev <- dev + sum(exp(lsp)*bSb)
       if (control$trace) cat("Step halved: new pdev = ", pdev, "\n", sep = "")
    } ## valid mu/eta control
    
   
    f <- function(x) {
      p <- coefold + x*(start-coefold)
      etax <- etaold + x*(eta-etaold)
      mu <- linkinv(etax)
      dev <- sum(dev.resids(y, mu, weights))
      bSb <- bSbfun(p,G$S,G$off)
      pdev <- dev + sum(exp(lsp)*bSb)
    }

    if (pdev-pdevold > abs(pdev)*control$epsilon) { ## golden section search for best
      alpha <- tau <- 2/(1 + sqrt(5))
      a0 <- 0; a1 <- 1; a2 <- tau^2
      ft <- f(alpha); ft2 <- f(a2)

      while (alpha-a2>1e-5)
      if (ft2<ft) {
        a1 <- alpha; alpha <- a2; ft <- ft2
        a2 <- a0 + (a1-a0)*tau^2
        ft2 <- f(a2)
      } else {
        a0 <- a2; ft2 <- ft; a2 <- alpha
        alpha <- a0 + (a1-a0)*tau
        ft <- f(alpha)
      }
      pdev <- ft
      start <- coefold + alpha*(start-coefold)
      if (!is.null(G$Ain) && nrow(G$Ain)>0) {
        active <- attr(start,"active")
        attr(start,"active")  <- active[abs(G$Ain[active,,drop=FALSE]%*%start-G$bin[active])<norm(G$Ain[active,,drop=FALSE],"M")*.Machine$double.eps^.9]
      }
      eta <- etaold + alpha*(eta-etaold)
      mu <- linkinv(eta)
    }

    if (iter == nstep || abs(pdev - pdevold)/(0.1 + abs(pdev)) < eps || gausid) {
       conv <- TRUE
       coef <- start
       break
    }

    if (FALSE) {
    ii <- 1
    while (pdev-pdevold > abs(pdev)*control$epsilon) { ## step halve
      if (ii > 20) break; ii <- ii + 1
      start <- (start + coefold)/2
      eta <- (eta + etaold)/2
      mu <- linkinv(eta <- eta + offset)
      dev <- sum(dev.resids(y, mu, weights))
      bSb <- bSbfun(start,G$S,G$off)
      pdev <- dev + sum(exp(lsp)*bSb)
    } ## divergence control
    }
    
    pdevold <- pdev
    coef <- coefold <- start
    etaold <- eta
    
  } ## main iteration
  if (!conv) warn[length(warn)+1] <- "scasm.pirls not converged"
  bsr <- matrix(0,length(start),bs)
  ng <- length(w)
  if (bs>0) for (k in 1:bs) { ## non-parametric bootstrap   
    bsr[,k] <- pcls(list(y=z,w=w*tabulate(sample(ng,replace=TRUE),ng),X=G$X[good,],C=G$C,S=G$S,off=G$off-1L,sp=exp(lsp),
                   p=start,Ain=G$Ain,bin = G$bin,active=attr(start,"active")))
  }		   
  ## weights are iterative and w prior below (w can be changed by initialization!)
  list(coef=coef,bSb=bSb,niter=iter,pearson = sum(w*(z-eta)^2),weights=w, H = crossprod(G$X,w*G$X),
       fitted.values=mu,linear.predictors=eta,residuals=z-eta,pdev=pdev,warn=warn,n=n,y=y,w=weights,bs=bsr) 
} ## scasm.pirls

gH2ls <- function(b,g,R,eta=NULL,WX=NULL) {
## Produces a least squares problem ||z-Rb||^2 with gradient g and Hessian H at b.
## H = 2 P'R'RP, where P is a pivot matrix and R a pivoted Colesky factor. If H is
## not full rank then the least squares problem includes an extra ridge penalty on the
## unidentifiable coefficient subspace. 
  p <- ncol(R); r <- attr(R,"rank")
  if (r<p ) { ii <- (r+1):p; R[ii,ii] <- diag(1,nrow=p-r) } ## deal with rank deficiency
  pi <- attr(R,"pivot"); ip <- order(pi) ## pivot and inverse pivot sequences
  if (is.null(b)) {
    z <- forwardsolve(R,(drop(eta %*% WX)-g)[pi]/2,upper.tri=TRUE,transpose=TRUE) ## R^{-T}P(Hb-g)/2
  } else {
    z <- R %*% b[pi]-forwardsolve(R,g[pi]/2,upper.tri=TRUE,transpose=TRUE)
  }
  list(z=z,R=R[,ip]) ## R = RP here
} ## gH2ls

scasm.newton <- function(G,lsp,control,start=NULL,scale.known=TRUE,bs=0) {
## Newton sequential QP fitting of shape constrained models for extended and general families
  warn <- list()
  y <- G$y; nobs <- length(G$w); family <- G$family; weights <- G$w
  mustart <- etastart <- NULL
  if (inherits(G$family,"general.family")) { ## general family initialization
    ## get unconstrained initial coefs, from which Hessian and grad can be computed
    start0 <- start
    x <- G$X; offset <- G$offset; E <- G$Eb
    eval(G$family$initialize)
    if (!is.null(start0)) start <- start0
    efam <- FALSE; llf <- family$ll
    WX <- eta <- NULL; scale1 <- 1
    ucstart <- 2 ## number of iterations before inequality constraints used
  } else { ## extended family initialization
    ## get initial mu and hence initial Hessian and grad
    dev.resids <- G$family$dev.resids;
    eval(G$family$initialize)
    if (!is.null(start)) mustart <- G$family$linkinv(as.numeric(G$X %*% start + G$offset))
    mu <- mustart; eta <- G$family$linkfun(mu); efam <- TRUE
    scale1 <- if (is.null(G$family$scale)) 1 else G$family$scale
    theta <- G$family$getTheta()
    ucstart <- 1
  }
  if (is.null(start)) {
    feasible <- FALSE; coef <- NULL
  } else {
    coef <- as.numeric(start)
    feasible <- ((is.null(G$Ain)||!nrow(G$Ain)||min(G$Ain%*%coef - G$bin) >= -.Machine$double.eps^.5) &&
                   (is.null(G$C)||!nrow(G$C)||all.equal(G$C%*%coef,G$C%*%G$beta0)==TRUE))	   
  }

  if (!feasible) { ## start not feasible so use G$beta0 as initial feasible point 
    if (efam) {
      mu <- G$family$linkinv(as.numeric(G$X %*% G$beta0 + G$offset))
      dev <- sum(dev.resids(y, mu, G$w,theta))
    } else { ## general family
      ll <- llf(y,G$X,G$beta0,G$w,G$family,offset=G$offset,deriv=0)
      dev <- -2*ll$l
    }
    bSb <- bSbfun(G$beta0,G$S,G$off); penalty <- sum(exp(lsp)*bSb)
    pdev.feasible <- dev + penalty ## penalized deviance according to G$beta0
    if (!is.finite(pdev.feasible))  pdev.feasible <- Inf
    pdev0 <- pdev.feasible; 
  } else ucstart <- 0 ## no unconstrained initialization if start already feasible

  w <- rep(1,ncol(G$X)); pdev <- pdev0 <- Inf;conv <- FALSE
  dth0 <- theta0 <- NULL; thmult <- 1
  tol <- .Machine$double.eps^.75

  for (iter in 1L:control$maxit) { ## Main loop
  
    if (iter>1) { ## convert to equivalent least squares problem and solve...
      suppressWarnings(R <- chol(H/2,pivot=TRUE)) ## PHP'/2 = R'R
      ls <- gH2ls(coef,g,R,eta=eta,WX=WX)
      if (iter<=ucstart) { ## run without inequality constraints to initialize
        Ain <- matrix(0,0,0);bin <- rep(0,0);feasible <- TRUE
      } else { ## inequality constraints imposed
        if (iter==ucstart+1&&is.finite(pdev.feasible)) { ## note never triggered if ucstart<=0
	  coef <- G$beta0
	  pdev0 <- pdev.feasible
	  feasible <- TRUE
	}  
        Ain <- G$Ain;bin <- G$bin
      }
      start <- pcls(list(y=ls$z,w=w,X=ls$R,C=G$C,S=G$S,off=G$off-1L,sp=exp(lsp),
                    p=G$beta0,Ain=Ain,bin=bin))
      if (efam) mu <- G$family$linkinv(as.numeric(G$X %*% start + G$offset))		    
    } ## if (iter>1)
    not.ok <- TRUE
    while (not.ok) { ## step control
      if (efam) {
        dev <- sum(dev.resids(y, mu, G$w,theta))
      } else { ## general family
        ll <- llf(y,G$X,start,G$w,G$family,offset=G$offset,deriv=1)
	dev <- -2*ll$l
      }
      if (is.null(start)) {
        if (is.finite(dev)) not.ok <- FALSE else stop("non-finite initial deviance")
      } else { 
        bSb <- bSbfun(start,G$S,G$off); penalty <- sum(exp(lsp)*bSb)
	pdev <- dev + penalty
	if (!is.null(coef) && (!is.finite(pdev) || (iter>1 && feasible && pdev - pdev0 > tol * abs(pdev)))) {
          start <- (start + coef)/2
	  if (efam) mu <- G$family$linkinv(as.numeric(G$X %*% start + G$offset))
	  if (all.equal(as.numeric(start),as.numeric(coef),tol=.Machine$double.eps^.75*mean(abs(coef))) == TRUE) {
            warning("step failure")
	    start <- coef
	    pdev <- pdev0
	    not.ok <- FALSE
          }
        } else not.ok <- FALSE
      }  
    } ## not.ok step control loop
    if (iter > ucstart+1 && feasible && abs(pdev-pdev0)<control$epsilon*abs(pdev)) { conv <- TRUE;break}
    pdev0 <- pdev
    if (!is.null(start) && efam && (G$family$n.theta>0||scale1<0)) {
      ## deal with any scale and theta parameter estimation
      theta <- estimate.theta(theta,G$family,y,mu,scale=scale1,wt=G$w,tol=1e-6)
      if (!is.null(theta0)) { ## theta zig-zag control
        dth <- theta-theta0 ## proposed change in theta
	if (is.null(dth0)) dth0 <- dth else {
          if (any(dth0*dth<0&abs(dth)>abs(dth0)*.5)) thmult <- thmult/2 else {
            thmult <- min(1,thmult*1.5)
          }
        }
	theta <- theta0 + dth*thmult
	dth0 <- dth
      }
      
      theta0 <- theta
      if (!is.null(G$family$scale) && !scale.known) {
	scale <- exp(theta[family$n.theta+1])
	theta <- theta[-(family$n.theta+1)]
      }
      if (scale1<0 && iter>4) { ## freeze scale update
        scale1 <- scale
	theta0 <- theta0[-(family$n.theta+1)]
	dth0 <- dth0[-(family$n.theta+1)]
      }
      G$family$putTheta(theta)
      ## need to recompute pdev0, with new theta...
      pdev0 <- sum(dev.resids(y, mu, G$w,theta)) + penalty
    } ## theta estimation
  
    coef <- start ## current coef acceptable
    
    if ((!feasible||iter<=ucstart) && iter > 1) { ## check whether coef vector feasible yet 
      feasible <- ((is.null(G$Ain)||!nrow(G$Ain)||min(G$Ain%*%coef - G$bin) >= -.Machine$double.eps^.5) &&
                   (is.null(G$C)||!nrow(G$C)||all.equal(G$C%*%coef,G$C%*%G$beta0)))
    }
    ## get the gradient and Hessian of the deviance
    if (efam) {
      dd <- dDeta(y,mu,G$w,theta,G$family,0)
      WX <- dd$Deta2*G$X
      H <- crossprod(G$X,WX)
      g <- drop(dd$Deta %*% G$X)
    } else { ## general family
      H <- -2*ll$lbb; g <- -2*ll$lb
    }
   
  } ## main loop

  if (!scale.known) { ## Get scale MLE conditional on mu and theta
    G$family$n.theta <- 0
    theta0 <- estimate.theta(theta,G$family,y,mu,scale = -1,wt=G$w,tol=1e-6)
    scale <- exp(theta0[family$n.theta+1]);
  } else scale <- scale1

  if (!conv) warn[length(warn)+1] <- "scasm.newton not converged"  
  if (efam) {
    eta <- G$family$linkfun(mu); rsd <- dev.resids(y, mu, G$w,theta)
  } else { ## general family
    lpi <- attr(G$X,"lpi")
    if (is.null(lpi)) { ## only one...
      eta <- as.numeric(G$X %*% coef) + if (is.null(G$offset)) 0 else G$offset
      mu <- G$family$linkinv(eta) 
    } else { ## multiple...
      mu <- eta <- matrix(0,nrow(G$X),length(lpi))
      if (!is.null(G$offset)) G$offset[[length(lpi)+1]] <- 0
      for (j in 1:length(lpi)) {
        eta[,j] <- as.numeric(G$X[,lpi[[j]],drop=FALSE] %*% coef[lpi[[j]]]) +
        if (is.null(G$offset[[j]])) 0 else offset[[j]]
        mu[,j] <- G$family$linfo[[j]]$linkinv(eta[,j]) 
      }
    }
    rsd <- NULL
  }
  if (!feasible) stop("failed to find feasible starting values")
  if (iter==control$maxit) warning("scasm.newton failed to converge")
  aic.model <- if (inherits(G$family,"general.family")) dev else G$family$aic(y,mu,theta,G$w,scale*sum(G$w))

  bsr <- matrix(0,length(start),bs)
  if (bs>0) { ## always fixed Hessian version
    if (efam) {
      g <- matrix(0,ncol(G$X),bs)
      for (k in 1:bs) { ## bootstrap gradient vectors
         wb <- tabulate(sample(nobs,replace=TRUE),nobs) ## np bootstrap weights
         g[,k] <- drop((dd$Deta*wb) %*% G$X) ## bootstrap gradient
      }
    } else g <- -2*llf(y,G$X,start,G$w,G$family,offset=G$offset,deriv=-bs)$lb
    if (ncol(g)) for (k in 1:bs) { ## non-parametric bootstrap for coefs
      ls <- gH2ls(coef,g[,k],R,eta=eta) ## convert to LS using original Hessian
      bsr[,k] <- pcls(list(y=ls$z,w=w,X=ls$R,C=G$C,S=G$S,off=G$off-1L,sp=exp(lsp),##p=G$beta0,Ain=Ain,bin=bin)) 
                      p=coef,Ain=G$Ain,bin=G$bin,active=attr(coef,"active"))) ## seems to work just as well as above
     	    
    } else bsr <- matrix(0,length(start),0) ## for ll that can't yet do this		    
  }
  if (FALSE&&bs>0) {
    for (k in 1:bs) {
      if (efam) {
        wb <- tabulate(sample(nobs,replace=TRUE),nobs)
        g <- drop((dd$Deta*wb) %*% G$X) ## bootstrap gradient
	## NOTE: probably need to add Hessian BS
      } else {
        lf <- llf(y,G$X,start,G$w,G$family,offset=G$offset,deriv=-1)
	g <- -2*lf$lb; suppressWarnings(R <- chol(-lf$lbb,pivot=TRUE)) ## PHP'/2 = R'R
      }
      ls <- gH2ls(coef,g,R,eta=eta) ## convert to LS using original Hessian
      bsr[,k] <- pcls(list(y=ls$z,w=w,X=ls$R,C=G$C,S=G$S,off=G$off-1L,sp=exp(lsp),
                      p=coef,Ain=G$Ain,bin=G$bin,active=attr(coef,"active")))
    }		      
  }

  list(coef=coef,bSb=bSb,niter=iter,pearson = NA ,weights=G$w, H = H/2,scale=scale,pdev=pdev,aic=aic.model,
       fitted.values=mu,linear.predictors=eta,residuals=rsd,y=y,deviance= if (efam) dev else NULL,warn=warn,
       bs=bsr)
} ## scasm.newton

check.psdef <- function(A,eps=.Machine$double.eps^.75) {
## Cholesky based test for positive semi-definite
  suppressWarnings(R <- chol(A,pivot=TRUE))
  r <- attr(R,"rank")
  if (r == ncol(R)) return(TRUE) ## +def
  ipiv <- order(attr(R,"pivot"))
  ii <- (r+1):ncol(R)
  R[ii,ii] <- 0
  max(abs(crossprod(R[,ipiv])-A))<eps ## + semi def or not?
} ## check.psdef

getVb <- function(G,H,p,zob=list(),project = TRUE,HpSt=FALSE) {
## gets cov matrix from pcls fit object, to within the scale parameter
## if project==TRUE then if Z is constraint null space return
## Vb= Z(Z'HpZ)^{-1}Z', or Z'HpZ and Z'StZ if HPSt==TRUE.
## p is coefficient vector. zob is constraints as returned by
## getZ. G is pre-fit scasm list, including constraint spec.
  c2inv <- function(Hp) {
  ## returns inverse of Hp if +ve def and pseudoinverse of nearest +ve def
  ## matrix otherwise...
    suppressWarnings(R <- chol(Hp,pivot=TRUE))
    r <- attr(R,"rank"); p <- ncol(R)
    if (r<p) { ii <- (r+1):p; R[ii,ii] <- 0 }
    r <- Rrank(R)
    if (r<p) {
      ev <- eigen(Hp,symmetric=TRUE)
      ii <- ev$values > abs(ev$values[1])*.Machine$double.eps^.9
      ev$values[ii] <- 1/sqrt(ev$values[ii])
      ev$values[!ii] <- 0
      return(crossprod(ev$values*t(ev$vectors)))
    } else {
      ipiv <- order(attr(R,"pivot"))
      return(chol2inv(R)[ipiv,ipiv,drop=FALSE])
    }  
  } ## c2inv
  
  St <- H*0
  m <- length(G$S); sedf <- pSp <- numeric(m)
  if (m) {
    for (i in 1:m) { ## get total penalty matrix and p'S_jp terms
      ii <- 1:ncol(G$S[[i]]) + G$off[i] - 1
      St[ii,ii] <- St[ii,ii] + G$S[[i]]*G$sp[i]
      pSp[i] <- sum(p[ii]*drop(G$S[[i]]%*%p[ii]))
    }
    pSp <- pmax(pSp,.Machine$double.xmin)
  }  
  
  if (project&&length(zob)) {
    H <- ZtHZproj(H,zob)
    St <- ZtHZproj(St,zob)
  }
  
  Hp <- H + St ## penalized Hessian
  
  Hindef <- !check.psdef(H)
  if (Hindef) { ## H not positive semi-definite 
    eh <- eigen(H,symmetric=TRUE)
    eh$values[eh$values<0] <- 0
    H <- eh$vectors %*% (eh$values*t(eh$vectors))
    Vb1 <- H + St
  }

  if (project&&length(zob)) { ## there are constraints
   
    if (HpSt) { ## then only penalty matrix and penalized Hessian needed
      return(list(Hp=Hp,St=St))
    }
    Vb <- c2inv(Hp) ## (Z'HpZ)^{-1}
    Vb <- ZHZtproj(Vb,zob) ## Z(Z'HpZ)^{-1}Z'
    if (Hindef) {
      Vb1 <- c2inv(Vb1)
      Vb1 <- ZHZtproj(Vb1,zob)
    } else Vb1 <- Vb
    H <-  ZHZtproj(H,zob) ## project H in same way
  } else {
    if (HpSt) return(list(Hp=Hp,St=St))
    Vb <- c2inv(Hp)
    if (Hindef) {
      Vb1 <- c2inv(Vb1)
    } else Vb1 <- Vb
  }
 
  edf <- rowSums(Vb1*H) ## vector of edf per coef.
  if (m) for (i in 1:m) { ## sedf[i]*sp[i] is suppressed EDF per penalty
    ii <- 1:ncol(G$S[[i]]) + G$off[i] - 1
    sedf[i] <- sum(Vb1[ii,ii]*G$S[[i]])
  }
  fv <- G$X %*% p ## fitted values.
  list(Vb=Vb,edf = edf,fv=fv,pSp=pSp,sedf=sedf,H=H,Hp=Hp,St=St)
} ## getVb


trSiSZ <- function(G,zob) {
## need tr(S^-Sj) terms, in this case projected into the null space of any constraints
## zob is result of getZ
  sm <- ZSZproject(G,zob) 
  trS <- numeric(length(G$sp))
  k <- sum(sapply(sm,function(x) length(x$ZSZ)))
  if (k!=length(trS)) stop("sp and smooth length not matching")
  k <- 1
  for (i in 1:length(sm)) {
    m <- length(sm[[i]]$ZSZ)
    if (m==1) {
      trS[k] <- suppressWarnings(Rrank(chol(sm[[i]]$ZSZ[[1]],pivot=TRUE)))/G$sp[k]
      k <- k + 1
    } else {
      St <- sm[[i]]$ZSZ[[1]]*G$sp[k];
      for (j in 2:m) { St <- St + sm[[i]]$ZSZ[[j]]*G$sp[k+j-1] }
      R <- suppressWarnings(chol(St,pivot=TRUE)) ## otherwise chol warns not full rank!
      r <- attr(R,"rank");p <- ncol(R)
      if (r<p) { ii <- (r+1):p; R[ii,ii] <- 0}
      r <- min(Rrank(R),sm[[i]]$df-sm[[i]]$null.space.dim);
      piv <- attr(R,"pivot"); p <- ncol(St)
      if (r<p) { ii <- (r+1):p; R[ii,ii] <- diag(1,length(ii)) }
      for (j in 1:m) {
        trS[k] <- sum(diag(forwardsolve(t(R),t(forwardsolve(t(R),sm[[i]]$ZSZ[[j]][piv,piv]))))[1:r])
	k <- k + 1
      }
    }     
  }
  trS
} ## trSiSZ

lpdgDet <- function(A,rank=0) {
## Get possibly generalized determinant of +def matrix A.
## If rank>0 then it supplies the rank of A.
## S <- crossprod(matrix(runif(20)-.5,4,5))
## lpdgDet(S);lpdgDet(S,4);sum(log(eigen(S)$values[1:4]))
   R <- suppressWarnings(chol(A,pivot=TRUE))
   r <- attr(R,"rank")
   if (rank>0 && rank<r) r <- rank
   if (r < ncol(R)) R <- suppressWarnings(chol(tcrossprod(R[1:r,]),pivot=TRUE))
   return(list(ldet=2*sum(log(diag(R))),r=r))
} ## lpdgdet



ZSZproject <- function(G,zob) {
## produces a copy of G$smooth with additional elements Z'SZ
## which are the S matrices projected into the null space of
## any active constraints as stored in zob, produced by getZ
  sm <- G$smooth
  for (k in 1:length(sm)) sm[[k]]$ZSZ <- sm[[k]]$S ## default
  if (length(zob)) for (j in 1:length(zob)) { ## work through Null space object
    k <- zob[[j]]$i.smooth ## smooth this con applies to
    zq <- zob[[j]]$q
    for (i in 1:length(sm[[k]]$S)) sm[[k]]$ZSZ[[i]] <- ## Z'S_jZ
         qr.qty(zob[[j]]$qra,t(qr.qty(zob[[j]]$qra,sm[[k]]$S[[i]])[-(1:zq),,drop=FALSE]))[-(1:zq),,drop=FALSE] 
  }
  sm
} ## ZSZproject



ZtHZproj <- function(H,zob,left=TRUE) {
## Project H into null space of constraints, by forming Z'HZ.
## Here Z is in almost block diagonal form with jth block Z_j
## it's 'almost' because the Z are not square. So ijth block
## of result is Z_i'H_ijZ_j. The non-identity Z_i are stored
## in zob, produced by getZ.
## if left==FALSE, produces HZ
## Assumption is that zob is in ascending block order...
  ## work backwards so that ii still index correct rows/cols after
  ## previous dropping...
  if (length(zob)) for (j in length(zob):1) { 
    ii <- zob[[j]]$ii ## what parameters are we looking at?
    q <- zob[[j]]$q
    H[,ii] <- t(qr.qty(zob[[j]]$qra,t(H[,ii])))
    if (left) {
      H[ii,] <- qr.qty(zob[[j]]$qra,H[ii,])
      H <- H[-ii[1:q],-ii[1:q],drop=FALSE] ## now just drop the un-needed rows/cols from H
    } else H <- H[,-ii[1:q],drop=FALSE]   
  }
  H
} ## ZtHZproj

ZHZtproj <- function(H,zob) {
## counterpart of ZtHZ that forms ZHZ'. So ijth block of result
## is Z_iH_ijZ_j'. The non-identity Z_i are stored in zob,
## produced by getZ.
## Assumption is that zob is in ascending block order...
  qtot <- sum(sapply(zob,function(x) x$q))
  if (!qtot) return(H)
  H1 <- matrix(0,nrow(H)+qtot,ncol(H))
  # First pad out rows... 
  m <- length(zob)
  ## get unpenalized row/col indices...
  iup <- sort(which(!1:nrow(H1) %in% as.integer(unlist(lapply(zob,function(x) x$ii)))))
   if (length(iup)) {
    next.iup <- iup[1] ## start of next unpenalized block to process
  } else next.iup <- nrow(H1)
  j0 <- 0 ## end of previous source (H) block
  for (j in 1:m) {
    if (zob[[j]]$ii[1]>next.iup) { ## next repara is after an unconstrained block
      ii <- next.iup:(zob[[j]]$ii[1]-1) ## index of unconstrained in target
      H1[ii,] <- H[j0+1:length(ii),] ## copy unchanged from H to correct rows of H1 
      j0 <- j0 + length(ii) ## update end of processed blocks in source, H
      next.iup <- if (any(iup>max(ii))) min(iup[iup>max(ii)]) else nrow(H1) 
    }
    ## now copy required block itself...
    ii <- zob[[j]]$ii[-(1:zob[[j]]$q)] ## target (H1) rows
    jj <- 1:length(ii) + j0 ## source rows
    j0 <- max(jj) ## new source block end
    H1[ii,] <- H[jj,] 
  }
  if (next.iup < nrow(H1)) { ## deal with any trailing unconstrained elements
    ii <- iup[iup>=next.iup]
    H1[ii,] <- H[j0+1:length(ii),]
  }

  ## Now pad out columns (as above but on cols)...
  H <- matrix(0,nrow(H1),nrow(H1))
  if (length(iup)) {
    next.iup <- iup[1]
  } else next.iup <- nrow(H1)
  j0 <- 0 

  for (j in 1:m) {
    if (zob[[j]]$ii[1]>next.iup) {
      ii <- next.iup:(zob[[j]]$ii[1]-1)
      H[,ii] <- H1[,j0+1:length(ii)]
      j0 <- j0 + length(ii)
      next.iup <- if (any(iup>max(ii))) min(iup[iup>max(ii)]) else nrow(H1)
    }
    ii <- zob[[j]]$ii[-(1:zob[[j]]$q)] ## target (H) cols
    jj <- 1:length(ii) + j0 ## source cols
    j0 <- max(jj)
    H[,ii] <- H1[,jj]
  }
  if (next.iup < nrow(H1)) {
    ii <- iup[iup>=next.iup]
    H[,ii] <- H1[,j0+1:length(ii)]
  }

  ## H now contains original H, but with zero rows/cols inserted to
  ## facilitate multiplication by Z components...
  for (j in 1:m) {
    ii <- zob[[j]]$ii
    H[,ii] <- t(qr.qy(zob[[j]]$qra,t(H[,ii])))
    H[ii,] <- qr.qy(zob[[j]]$qra,H[ii,])
  }
  H
} ## ZHZtproj

Ztbproj <- function(b,zob) {
## Project b into null space of constraints, by forming Z'b.
## zob encodes Z as returned by getZ...
## Assumption is that zob is in ascending block order...
  ## work backwards so that ii still index correct rows/cols after
  ## previous dropping...
  if (length(zob)) for (j in length(zob):1) { 
    ii <- zob[[j]]$ii ## what parameters are we looking at?
    q <- zob[[j]]$q
    b[ii] <- qr.qty(zob[[j]]$qra,b[ii])
    b <- b[-ii[1:q]] ## now just drop the un-needed rows/cols from H 
  }
  b
} ## Ztbproj

Zbproj <- function(b,zob) {
## Forms Zb where b is a vector, and zob is the structure returned by getZ defining Z.
## Assumption is that zob is in ascending block order...
  qtot <- sum(sapply(zob,function(x) x$q))
  if (!qtot) return(b)
  b1 <- rep(0,length(b)+qtot)
  # First pad out rows... 
  m <- length(zob)
  ## get unpenalized row/col indices...
  iup <- sort(which(!1:length(b1) %in% as.integer(unlist(lapply(zob,function(x) x$ii)))))
  next.iup <- if (length(iup)) iup[1] else length(b1) ## start of next unpenalized block to process
  j0 <- 0 ## end of previous source (b) block
  for (j in 1:m) {
    if (zob[[j]]$ii[1]>next.iup) { ## next repara is after an unconstrained block
      ii <- next.iup:(zob[[j]]$ii[1]-1) ## index of unconstrained in target
      b1[ii] <- b[j0+1:length(ii)] ## copy unchanged from b to correct rows of b1 
      j0 <- j0 + length(ii) ## update end of processed blocks in source, b
      next.iup <- if (any(iup>max(ii))) min(iup[iup>max(ii)]) else length(b1) 
    }
    ## now copy required block itself...
    ii <- zob[[j]]$ii[-(1:zob[[j]]$q)] ## target (b1) rows
    jj <- 1:length(ii) + j0 ## source rows
    j0 <- max(jj) ## new source block end
    b1[ii] <- b[jj] 
  }
  if (next.iup < length(b1)) { ## deal with any trailing unconstrained elements
    ii <- iup[iup>=next.iup]
    b1[ii] <- b[j0+1:length(ii)]
  }
  for (j in 1:m) {
    ii <- zob[[j]]$ii
    #H[,ii] <- t(qr.qy(zob[[j]]$qra,t(H[,ii])))
    b1[ii] <- qr.qy(zob[[j]]$qra,b1[ii])
  }
  b1
} ## Zbproj

getZ <- function(G,active=rep(0,0)) {
## Given an active set gets an object representing the constraint NULL space, Z.
## this is computed smooth by smooth, to ensure that Z'StZ is still block
## diagonal. See also ZHZtproj and ZtHZproj
  zob <- list();zi <- 0;ns <- 0
  sm <- G$smooth
  nac <- length(active) ## number of active inequality constraints
  Ain <- if (nac) G$Ain[active,,drop=FALSE] else NULL ## the active inequality constraints matrix  
  for (k in 1:length(sm)) { ## work through the smooths
    ii <- sm[[k]]$first.para:sm[[k]]$last.para ## columns related to this smooth
    if (!is.null(G$C)) { ## are there fixed constraints?
      i <- rowSums(G$C[,ii,drop=FALSE]!=0)!=0 ## constraints relevant to this term
      if (any(G$C[i,-ii]!=0)) stop("constraints overlap more than one term")
      A <- G$C[i,ii,drop=FALSE] ## retain only smooth relevant part
    } else A <- matrix(0,0,length(ii))
    if (!is.null(Ain)) { ## are there active inequality constraints?
      i <- rowSums(Ain[,ii,drop=FALSE]!=0)!=0 ## constraints relevant to this term
      if (any(Ain[i,-ii]!=0)) stop("constraints overlap more than one term")
      A <- rbind(A,Ain[i,ii]) ## append smooth relevant part to any fixed
    }
    if (nrow(A)) { ## any constraints for this smooth?
      zi <- zi + 1
      ms <- length(sm[[k]]$S)
      zob[[zi]] <- list(qra=qr(t(A)), ## QR defining Z
                        q=nrow(A), ## number of constraints
                        ii=ii, ## coefs it applies to
			i.smooth=k, ## smooth it applies to
			i.S = if (ms) 1:ms + ns else numeric(0)) ## penalty matrices it applies to
      ns <- ns + ms
    }
  }
  zob
} ## getZ

Stot <- function(G,root=TRUE,sp=NULL) {
## Obtain the total penalty matrix and optionally its square root.
## If sp==NULL then a 'balanced' total penalty is produced, including
## a penalty on any fixed constraints.
  m <- length(G$S); p <- ncol(G$X)
  if (is.null(sp)) sp <- sapply(G$S,function(x) 1/norm(x))
  if (is.null(G$C)||nrow(G$C)==0||is.null(sp)) S <- matrix(0,p,p) else {
    S <- crossprod(G$C);
    nors <- norm(S)
    if (nors>0) S <- S/nors ## penalize departure from constraints
  }  
  for (i in 1:m) {
    ii <- G$off[i] + 1:ncol(G$S[[i]]) - 1
    S[ii,ii] <- S[ii,ii] + sp[i]*G$S[[i]]
  }
  if (root) {
    suppressWarnings(R <- chol(S,pivot=TRUE))
    r <- attr(R,"rank");piv <- attr(R,"pivot")
    R <- R[1:r,]; R[,piv] <- R
  } else R <- NULL
  list(S=S,R=R) ## R'R = S
} ## Stot

lsp.cov <- function(G,Vb,beta,n=1000) {
## Obtains approx cov matrix of log smoothing params by simulation.
## Based on fact that main uncertainty in EFS log sp estimation are
## the log(b'S_jb) terms...   
  m <- length(G$S); if (!m) return(NULL)
  b <- rmvn(n,as.numeric(beta),Vb) ## n by p matrix of replicate coefs
  bSb <- matrix(0,n,m)
  for (i in 1:m) {
    ii <- G$off[i] + 1:ncol(G$S[[i]]) - 1
    bSb[,i] <- rowSums((b[,ii] %*% G$S[[i]]) * b[,ii])
  }
  cov(log(bSb)) 
} ## lsp.cov

imp.diff <- function(G,Hi,sp,beta) {
## implicit differentiation to get dbeta/drho_j
## Hi is the inverse Hessian (unscaled cov matrix)
  m <- length(G$S); if (!m) return(NULL)
  b1 <- matrix(0,length(beta),m)
  for (i in 1:m) {
    ii <- G$off[i] + 1:ncol(G$S[[i]]) - 1
    b1[,i] <- Hi[,ii] %*% G$S[[i]] %*% beta[ii] * sp[i]
  }
  b1
} ## imp.diff

scasm.fit <- function(G,control=gam.control(),gamma=1,bs=0,...) {
## fits shape constrained additive model, alternating pirls and EFS,
## having first obtained initial smoothing parameters.

 G$family <- fix.family.ls(fix.family.link(G$family))
 
 if (!is.null(G$family$preinitialize)) {
    if (inherits(G$family,"general.family")) {
      Gmod <- G$family$preinitialize(G) ## modifies some elements of G
      for (gnam in names(Gmod)) G[[gnam]] <- Gmod[[gnam]] ## copy these into G  
    } else {
      ## extended family - usually just initializes theta and possibly y
      if (!is.null(attr(G$family$preinitialize,"needG"))) attr(G$family,"G") <- G ## more complicated
      pini <- G$family$preinitialize(G$y,G$family)
      attr(G$family,"G") <- NULL
      if (!is.null(pini$family)) G$family <- pini$family
      if (!is.null(pini$Theta)) G$family$putTheta(pini$Theta)
      if (!is.null(pini$y)) G$y <- pini$y
    }
  }

  G$Eb <- Stot(G,TRUE)$R ## balanced root penalty to regularize initialization

  lsp <- if (length(G$sp)>0) log(initial.spg(G$X,G$y,G$w,G$family,G$S,G$rank,G$off,
                       offset=G$offset,L=G$L,lsp0=G$lsp0,E=G$Eb,...))  else rep(0,0)
		       
  L <- if (is.null(G$L)) diag(1,length(lsp)) else G$L
  if (any(L<0)) stop("can not handle negative weighting of log smoothing parameters")
  efam <- inherits(G$family,"extended.family")
  scale.known <- (G$family$family %in% c("poisson","binomial")) ||
                 (efam && is.null(G$family$scale)) ||
		 (!is.null(G$family$scale) && G$family$scale > 0) 		 
  start <- NULL; scale <- 1;G$sp <- exp(L%*%lsp+G$lsp0)
  dlsp <- lsp*0;alpha <- dlsp+1
  eps <- .01;
  for (i in 1:200) { ## main loop
    
    fit <- if (efam) scasm.newton(G,log(G$sp),start=start,control=control,scale.known=scale.known)
           else scasm.pirls(G,log(G$sp),start=start,control=control,eps=eps)
    active <- attr(fit$coef,"active") ## active inequality constraints
    zob <- getZ(G,active)
    v <- getVb(G,H=fit$H,p=fit$coef,zob=zob,project = TRUE)
    if (efam) {
      if (!scale.known) fit$scale <- fit$scale*nrow(G$X)/(nrow(G$X)-sum(v$edf))
      G$family$scale <- fit$scale ## updated scale estimate
    }  
    if (!scale.known) scale <- if (is.null(fit$scale)) fit$pearson/(nrow(G$X)-sum(v$edf)) else fit$scale
    if (length(lsp)==0) break
    trS <- trSiSZ(G,zob)
    redf <- drop(t(L)%*%((trS-v$sedf)*G$sp)) ## residual EDF per sp
    if (any(redf<=0)) {
      if (min(redf) < -1e-4) warning(paste("min(redf)=",min(redf)))
      redf[redf<1e-7] <- 1e-7
    }
    sedf <- drop(t(L)%*%(v$sedf*G$sp)) ## suppressed EDF per sp 
    ii <- redf > .05; lsp1 <- lsp
    dlsp0 <- dlsp;
    dlsp <- log(scale) + log(redf) - log(drop(t(L)%*%(v$pSp*G$sp)))
    maxstep <- max(abs(dlsp))
    if (i==1 && maxstep > 8)  dlsp <- 8*dlsp/maxstep
    if (i>2 && maxstep > 4) dlsp <- 4*dlsp/maxstep ## avoid overly long steps
    ## don't increase if already too close to total suppression,
    ## or decrease if already too close to no suppression
    dlsp[(redf<.05 & dlsp>0)|(sedf<.05 & dlsp<0)] <- 0
    if (i>1) {
      jj <- dlsp*dlsp0 > 0 ## same sign
      alpha[!jj] <- alpha[!jj]/2
      if (i>2) {
        ii <- jj & jj.old ## same sign 3 times in a row 
        alpha[ii] <- pmin(2,alpha[ii]*1.2)
      }
      jj.old <- jj
    }
    lsp1 <- dlsp*alpha + lsp
    if (i==7) eps <- 1e-3 # pirls eps
    conv <- (i>1 && max(abs(pSpold-v$pSp)/pmax(1e-4,v$pSp))<.01)||(max(abs(lsp1-lsp))<.01)
    if (conv) { if (eps<.005) break else eps <- 1e-3}
    lsp <- lsp1
    G$sp <- exp(drop(L%*%lsp+G$lsp0))
    start <- fit$coef
    pSpold <- v$pSp
  } ## main loop
  if (length(fit$warn)) for (i in 1:length(fit$warn)) warning(fit$warn[i])
  ii <- diag(v$Vb) < 0; diag(v$Vb)[ii] <- 0 ## avoid triggering plot problem
  F = v$Vb %*% v$H;
  b1 <- imp.diff(G,Hi=v$Vb,sp=G$sp,beta=fit$coef) ## dbeta/dlsp
  
  if (!inherits(G$family,"extended.family")) { ## still need aic
    fit$aic <- sum(G$family$dev.resids(fit$y,fit$fitted.values,G$w))/scale -
               2*G$family$ls(fit$y,G$w,fit$n,scale)[1]
  }
  if (efam&&!scale.known) G$family$scale <- -1
  object <- list(coefficients = fit$coef,sp=exp(lsp),Vp=v$Vb*scale,Ve=F%*%v$Vb*scale,
       scale=scale,edf=v$edf,prior.weights=fit$weights,family=G$family,deviance=fit$deviance,
       residuals=fit$residuals,scale.known=scale.known,F = F,R=t(mroot(v$H)),
       outer.info=list(converged = i<200,niter=i),y=fit$y,aic=fit$aic+2*sum(v$edf),
       linear.predictors=fit$linear.predictors,fitted.values=fit$fitted.values)

  Vrho <- lsp.cov(G,object$Vp,fit$coef,n=1000) ## approx lsp cov matrix 
  object$Vc <- b1%*%Vrho%*%t(b1) + object$Vp
  object$edf2 <- rowSums(object$Vc*v$H)/scale
  object$edf1 <- 2*v$edf - rowSums(F*t(F))
  names(object$edf) <- names(object$edf1) <- names(object$edf2) <- names(object$coefficients)
  ## extended family may need to manipulate fit object.
  if (!is.null(G$family$postproc)) {
    if (inherits(G$family,"general.family")) eval(G$family$postproc) else {
      posr <- G$family$postproc(family=G$family,y=G$y,prior.weights=G$w,
              fitted=fit$fitted.values,linear.predictors=fit$linear.predictors,offset=G$offset,
	      intercept=G$intercept)
      if (!is.null(posr$family)) object$family$family <- posr$family
      if (!is.null(posr$deviance)) object$deviance <- posr$deviance
      if (!is.null(posr$null.deviance)) object$null.deviance <- posr$null.deviance	      
    }
  } 
  if (is.null(object$deviance)) object$deviance <- sum(residuals.gam(object,"deviance")^2)
  ## now get LAML *excluding* active constraints (removing the 'as.numeric' in next line would re-instate active)
  zob <- getZ(G) 
  z <- getVb(G,H=fit$H,p=fit$coef,zob=zob,project = TRUE,HpSt=FALSE)
  ldetS <- lpdgDet(z$St); M <- ncol(z$Hp) - ldetS$r
  laml <- (-fit$pdev/scale + ldetS$ldet -lpdgDet(z$Hp)$ldet)/2 +M*log(2*pi)/2
  if (!inherits(G$family,"general.family")) laml <- laml +
     if (inherits(G$family,"extended.family")) G$family$ls(fit$y,G$w,G$family$getTheta(),scale)$ls else
                                               G$family$ls(fit$y,fit$w,fit$n,scale)[1]
  object$laml <- laml
  if (bs) {
    object$bs <- if (inherits(G$family,"extended.family"))
               scasm.newton(G,log(G$sp),start=start,control=control,scale.known=scale.known,bs=bs)$bs else
               scasm.pirls(G,log(G$sp),start=start,control=control,eps=eps,bs=bs)$bs
    attr(object$bs,"svar.only") <- TRUE ## signal that this covers sampling var, not smoothing bias.
  }	       
  object$Vp <- z$Vb*scale; object$Vc<- b1%*%Vrho%*%t(b1) + object$Vp ## no active cons
  object$Ve = z$Vb%*%z$H%*%z$Vb*scale; attr(object$Vp,"zob") <- zob; object$St <- z$St
  object 
} ## scasm.fit
Any scripts or data that you put into this service are public.
mgcv documentation built on Nov. 7, 2025, 5:06 p.m.
rdrr.io home R language documentation Run R code online
CRAN packages Bioconductor packages R-Forge packages GitHub packages
Note that we can't provide technical support on individual packages. You should contact the package authors for that.
mgcv
Mixed GAM Computation Vehicle with Automatic Smoothness Estimation

R/scasm.r
In mgcv: Mixed GAM Computation Vehicle with Automatic Smoothness Estimation

Defines functions scasm.fit imp.diff lsp.cov Stot getZ Zbproj Ztbproj ZHZtproj ZtHZproj ZSZproject lpdgDet trSiSZ getVb check.psdef scasm.newton gH2ls scasm.pirls bSbfun smooth.construct.scad.smooth.spec smooth.construct.sc.smooth.spec scasm ucon

Documented in scasm smooth.construct.scad.smooth.spec smooth.construct.sc.smooth.spec

Try the mgcv package in your browser

R Package Documentation

Browse R Packages

We want your feedback!

mgcv Mixed GAM Computation Vehicle with Automatic Smoothness Estimation

R/scasm.r In mgcv: Mixed GAM Computation Vehicle with Automatic Smoothness Estimation

Defines functions scasm.fit imp.diff lsp.cov Stot getZ Zbproj Ztbproj ZHZtproj ZtHZproj ZSZproject lpdgDet trSiSZ getVb check.psdef scasm.newton gH2ls scasm.pirls bSbfun smooth.construct.scad.smooth.spec smooth.construct.sc.smooth.spec scasm ucon

Documented in scasm smooth.construct.scad.smooth.spec smooth.construct.sc.smooth.spec

Try the mgcv package in your browser

R Package Documentation

Browse R Packages

We want your feedback!

mgcv
Mixed GAM Computation Vehicle with Automatic Smoothness Estimation

R/scasm.r
In mgcv: Mixed GAM Computation Vehicle with Automatic Smoothness Estimation