R/mgcv.r
In mgcv: Mixed GAM Computation Vehicle with Automatic Smoothness Estimation

Documented in concurvity fixDependence formula.gam full.score gam gam.outer gam.side gam.vcomp influence.gam initial.sp interpret.gam magic magic.post.proc model.matrix.gam mroot pcls pen.edf predict.gam print.anova.gam print.summary.gam psum.chisq residuals.gam rig Rrank slanczos sp.vcov vcov.gam

##  R routines for the package mgcv (c) Simon Wood 2000-2016
##  With contributions from Henric Nilsson

Rrank <- function(R,tol=.Machine$double.eps^.9) {
## Finds rank of upper triangular matrix R, by estimating condition
## number of upper rank by rank block, and reducing rank until this is 
## acceptably low... assumes R pivoted 
 m <- nrow(R)
 rank <- min(m,ncol(R))
 ok <- TRUE
  while (ok) {
    Rcond <- .C(C_R_cond,R=as.double(R),r=as.integer(m),c=as.integer(rank),
             work=as.double(rep(0,4*m)),Rcond=as.double(1))$Rcond
    if (Rcond*tol<1) ok <- FALSE else rank <- rank - 1  
  }
  rank
}
 
slanczos <- function(A,k=10,kl=-1,tol=.Machine$double.eps^.5,nt=1) {
## Computes truncated eigen decomposition of symmetric A by 
## Lanczos iteration. If kl < 0 then k largest magnitude
## eigenvalues returned, otherwise k highest and kl lowest.
## Eigenvectors are always returned too.
## set.seed(1);n <- 1000;A <- matrix(runif(n*n),n,n);A <- A+t(A);er <- slanczos(A,10)
## um <- eigen(A,symmetric=TRUE);ind <- c(1:5,(n-5+1):n)
## range(er$values-um$values[ind]);range(abs(er$vectors)-abs(um$vectors[,ind]))
## It seems that when k (or k+kl) is beyond 10-15% of n
## then you might as well use eigen(A,symmetric=TRUE), but the
## extra cost is the expensive accumulation of eigenvectors.
## Should re-write whole thing using LAPACK routines for eigenvectors. 
   if (tol<=0||tol>.01) stop("silly tolerance supplied")
   k <- round(k);kl <- round(kl)
   if (k<0) stop("argument k must be positive.")
   m <- k + max(0,kl) 
   n <- nrow(A) 
   if (m<1) return(list(values=rep(0,0),vectors=matrix(0,n,0),iter=0)) 
   if (n != ncol(A)) stop("A not square")
   if (m>n) stop("Can not have more eigenvalues than nrow(A)")
   oo <- .C(C_Rlanczos,A=as.double(A),U=as.double(rep(0,n*m)),D=as.double(rep(0,m)),
            n=as.integer(n),m=as.integer(k),ml=as.integer(kl),tol=as.double(tol),nt=as.integer(nt))
   list(values = oo$D,vectors = matrix(oo$U,n,m),iter=oo$n) 
}

rig <- function(n,mean,scale) {
## inverse gaussian deviates generated by algorithm 5.7 of
## Gentle, 2003. scale = 1/lambda. 
  if (length(n)>1) n <- length(n)
  x <- y <- rnorm(n)^2
  mys <- mean*scale*y
  mu <- 0*y + mean ## y is there to ensure mu is a vector
  mu2 <- mu^2;
  ind <- mys < .Machine$double.eps^-.5 ## cut off for tail computation
  x[ind] <- mu[ind]*(1 + 0.5*(mys[ind] - sqrt(mys[ind]*4+mys[ind]^2)))
  x[!ind] <- mu[!ind]/mys[!ind] ## tail term (derived from Taylor of sqrt(1+eps) etc)
  #my <- mean*y; sc <- 0*y + scale
  #ind <- my > 1 ## cancellation error can be severe, without splitting
  #x[!ind] <- mu[!ind]*(1 + 0.5*sc[!ind]*(my[!ind] - sqrt(4*my[!ind]/sc[!ind] + my[!ind]^2)))
  ## really the sqrt in the next term should be expanded beyond first order and then
  ## worked on - otherwise too many exact zeros?
  #x[ind] <- pmax(0,mu[ind]*(1+my[ind]*.5*sc[ind]*(1-sqrt(1+ 4/(sc[ind]*my[ind])))))
  ind <- runif(n) > mean/(mean+x)
  x[ind] <- mu2[ind]/x[ind]
  x ## E(x) = mean; var(x) = scale*mean^3
}

strip.offset <- function(x)
# sole purpose is to take a model frame and rename any "offset(a.name)"
# columns "a.name"
{ na <- names(x)
  for (i in 1:length(na)) {
    if (substr(na[i],1,7)=="offset(") 
      na[i] <- substr(na[i],8,nchar(na[i])-1)
  }
  names(x) <- na
  x
}


pcls <- function(M)
# Function to perform penalized constrained least squares.
# Problem to be solved is:
#
#  minimise      ||W^0.5 (y - Xp)||^2 + p'Bp
#  subject to    Ain p >= b  & C p = "constant"
#
# where B = \sum_{i=1}^m \theta_i S_i and W=diag(w)
# on entry this routine requires a list M, with the following elements:
# M$X - the design matrix for this problem.
# M$p - a feasible initial parameter vector - note that this should not be set up to
#       lie exactly on all the inequality constraints - which can easily happen if M$p=0!
# M$y - response variable
# M$w - weight vector: W= diag(M$w)
# M$Ain - matrix of inequality constraints
# M$bin - b above
# M$C  - fixed constraint matrix
# M$S  - List of (minimal) penalty matrices
# M$off - used for unpacking M$S
# M$sp - array of theta_i's 
# Ain, bin and p are not in the object needed to call mgcv....
#
{ nar<-c(length(M$y),length(M$p),dim(M$Ain)[1],dim(M$C)[1])
  ## sanity checking ...
  if (nrow(M$X)!=nar[1]) stop("nrow(M$X) != length(M$y)") 
  if (ncol(M$X)!=nar[2]) stop("ncol(M$X) != length(M$p)")
  if (length(M$w)!=nar[1]) stop("length(M$w) != length(M$y)")
  if (nar[3]!=length(M$bin)) stop("nrow(M$Ain) != length(M$bin)")
  if (nrow(M$Ain)>0) {
    if (ncol(M$Ain)!=nar[2]) stop("nrow(M$Ain) != length(M$p)") 
    res <- as.numeric(M$Ain%*%M$p) - as.numeric(M$bin)
    if (sum(res<0)>0) stop("initial parameters not feasible")
    res <- abs(res)
    if (sum(res<.Machine$double.eps^.5)>0) 
      warning("initial point very close to some inequality constraints")
    res <- mean(res)
    if (res<.Machine$double.eps^.5) 
      warning("initial parameters very close to inequality constraints")
  }
  
  if (nrow(M$C)>0) if (ncol(M$C)!=nar[2]) stop("ncol(M$C) != length(M$p)")  
  if (length(M$S)!=length(M$off)) stop("M$S and M$off have different lengths")
  if (length(M$S)!=length(M$sp)) stop("M$sp has different length to M$S and M$off")
  
  
  # pack the S array for mgcv call 
  m<-length(M$S)
  Sa<-array(0,0);df<-0
  if (m>0) for (i in 1:m)
  { Sa<-c(Sa,M$S[[i]])
    df[i]<-nrow(M$S[[i]])
    if (M$off[i]+df[i]-1>nar[2]) stop(gettextf("M$S[%d] is too large given M$off[%d]", i, i))
  }
  qra.exist <- FALSE
  if (ncol(M$X)>nrow(M$X)) {
    if (m>0) stop("Penalized model matrix must have no more columns than rows") 
    else { ## absorb M$C constraints
      qra <- qr(t(M$C))
      j <- nrow(M$C);k <- ncol(M$X)
      M$X <- t(qr.qty(qra,t(M$X))[(j+1):k,])
      M$Ain <- t(qr.qty(qra,t(M$Ain))[(j+1):k,])
      M$C <- matrix(0,0,0)
      M$p <- rep(0,ncol(M$X)) 
      nar[2] <- length(M$p)
      qra.exist <- TRUE
      if  (ncol(M$X)>nrow(M$X)) stop("Model matrix not full column rank")
    }
  }
  o<-.C(C_RPCLS,as.double(M$X),as.double(M$p),as.double(M$y),as.double(M$w),as.double(M$Ain),as.double(M$bin)
        ,as.double(M$C),as.double(Sa),as.integer(M$off),as.integer(df),as.double(M$sp),
        as.integer(length(M$off)),as.integer(nar))
  p <- array(o[[2]],length(M$p))
  if (qra.exist) p <- qr.qy(qra,c(rep(0,j),p))
  p
} ## pcls

all_vars1 <- function(form) {
## version of all.vars that doesn't split up terms like x$y into x and y
  vars <- all.vars(form)
  vn <- all.names(form)
  vn <- vn[vn%in%c(vars,"$","[[")] ## actual variable related names
  if ("[["%in%vn) stop("can't handle [[ in formula")
  ii <- which(vn%in%"$") ## index of '$'
  if (length(ii)) { ## assemble variable names
    vn1 <- if (ii[1]>1) vn[1:(ii[1]-1)]
    go <- TRUE
    k <- 1
    while (go) {
      n <- 2; 
      while(k<length(ii) && ii[k]==ii[k+1]-1) { k <- k + 1;n <- n + 1 }
      vn1 <- c(vn1,paste(vn[ii[k]+1:n],collapse="$"))
      if (k==length(ii)) {
        go <- FALSE
	ind <- if (ii[k]+n<length(vn)) (ii[k]+n+1):length(vn) else rep(0,0) 
      } else {
        k <- k +  1
	ind <- if (ii[k-1]+n<ii[k]-1) (ii[k-1]+n+1):(ii[k]-1) else rep(0,0)
      }
      vn1 <- c(vn1,vn[ind])
    }
  } else vn1 <- vn
  vn1
} ## all_vars1

interpret.gam0 <- function(gf,textra=NULL,extra.special=NULL)
# interprets a gam formula of the generic form:
#   y~x0+x1+x3*x4 + s(x5)+ s(x6,x7) ....
# and returns:
# 1. a model formula for the parametric part: pf (and pfok indicating whether it has terms)
# 2. a list of descriptors for the smooths: smooth.spec
# this is function does the work, and is called by in interpret.gam
# 'textra' is optional text to add to term labels
# 'extra.special' is label of extra smooth within formula.
{ p.env <- environment(gf) # environment of formula
  tf <- terms.formula(gf,specials=c("s","te","ti","t2",extra.special)) # specials attribute indicates which terms are smooth
 
  terms <- attr(tf,"term.labels") # labels of the model terms 
  nt <- length(terms) # how many terms?
  
  if (attr(tf,"response") > 0) {  # start the replacement formulae
    response <- as.character(attr(tf,"variables")[2])
  } else { 
    response <- NULL
  }
  sp <- attr(tf,"specials")$s     # array of indices of smooth terms 
  tp <- attr(tf,"specials")$te    # indices of tensor product terms
  tip <- attr(tf,"specials")$ti   # indices of tensor product pure interaction terms
  t2p <- attr(tf,"specials")$t2   # indices of type 2 tensor product terms
  zp <- if (is.null(extra.special)) NULL else attr(tf,"specials")[[extra.special]]
  off <- attr(tf,"offset") # location of offset in formula

  ## have to translate sp, tp, tip, t2p (zp) so that they relate to terms,
  ## rather than elements of the formula...
  vtab <- attr(tf,"factors") # cross tabulation of vars to terms
  if (length(sp)>0) for (i in 1:length(sp)) {
    ind <- (1:nt)[as.logical(vtab[sp[i],])]
    sp[i] <- ind # the term that smooth relates to
  }
  if (length(tp)>0) for (i in 1:length(tp)) {
    ind <- (1:nt)[as.logical(vtab[tp[i],])]
    tp[i] <- ind # the term that smooth relates to
  } 
  if (length(tip)>0) for (i in 1:length(tip)) {
    ind <- (1:nt)[as.logical(vtab[tip[i],])]
    tip[i] <- ind # the term that smooth relates to
  } 
  if (length(t2p)>0) for (i in 1:length(t2p)) {
    ind <- (1:nt)[as.logical(vtab[t2p[i],])]
    t2p[i] <- ind # the term that smooth relates to
  }
  if (length(zp)>0) for (i in 1:length(zp)) {
    ind <- (1:nt)[as.logical(vtab[zp[i],])]
    zp[i] <- ind # the term that smooth relates to
  } ## re-referencing is complete

  k <- kt <- kti <- kt2 <- ks <- kz <- kp <- 1 # counters for terms in the 2 formulae
  len.sp <- length(sp)
  len.tp <- length(tp)
  len.tip <- length(tip)
  len.t2p <- length(t2p)
  len.zp <- length(zp)
  ns <- len.sp + len.tp + len.tip + len.t2p + len.zp# number of smooths
  pav <- av <- rep("",0)
  smooth.spec <- list()
  #mgcvat <- "package:mgcv" %in% search() ## is mgcv in search path?
  mgcvns <- loadNamespace('mgcv')
  if (nt) for (i in 1:nt) { # work through all terms
    if (k <= ns&&((ks<=len.sp&&sp[ks]==i)||(kt<=len.tp&&tp[kt]==i)||(kz<=len.zp&&zp[kz]==i)||
                  (kti<=len.tip&&tip[kti]==i)||(kt2<=len.t2p&&t2p[kt2]==i))) { # it's a smooth
      ## have to evaluate in the environment of the formula or you can't find variables 
      ## supplied as smooth arguments, e.g. k <- 5;gam(y~s(x,k=k)), fails,
      ## but if you don't specify namespace of mgcv then stuff like 
      ## loadNamespace('mgcv'); k <- 10; mgcv::interpret.gam(y~s(x,k=k)) fails (can't find s)
      ## eval(parse(text=terms[i]),envir=p.env,enclos=loadNamespace('mgcv')) fails??
      ## following may supply namespace of mgcv explicitly if not on search path...
      ## If 's' etc are masked then we can fail even if mgcv on search path, hence paste
      ## of explicit mgcv reference into first attempt...
    
      st <- try(eval(parse(text=paste("mgcv::",terms[i],sep="")),envir=p.env),silent=TRUE)
      if (inherits(st,"try-error")) st <- 
            eval(parse(text=terms[i]),enclos=p.env,envir=mgcvns)
     
      if (!is.null(textra)) { ## modify the labels on smooths with textra
        pos <- regexpr("(",st$lab,fixed=TRUE)[1]
        st$label <- paste(substr(st$label,start=1,stop=pos-1),textra,
                    substr(st$label,start=pos,stop=nchar(st$label)),sep="")
      }
      smooth.spec[[k]] <- smooth.info(st) ## smooth.info supplies any extra specification info for class
      if (ks<=len.sp&&sp[ks]==i) ks <- ks + 1 else # counts s() terms
      if (kt<=len.tp&&tp[kt]==i) kt <- kt + 1 else # counts te() terms
      if (kti<=len.tip&&tip[kti]==i) kti <- kti + 1 else # counts ti() terms
      if (kt2<=len.t2p&&t2p[kt2]==i) kt2 <- kt2 + 1 # counts t2() terms
      else kz <- kz + 1
      k <- k + 1      # counts smooth terms 
    } else {          # parametric
      av[kp] <- terms[i] ## element kp on rhs of parametric
      kp <- kp+1    # counts parametric terms
    }
  }    
  if (!is.null(off)) { ## deal with offset 
    av[kp] <- as.character(attr(tf,"variables")[1+off])
    kp <- kp+1          
  }

  pf <- paste(response,"~",paste(av,collapse=" + "))
  if (attr(tf,"intercept")==0) {
    pf <- paste(pf,"-1",sep="")
    if (kp>1) pfok <- 1 else pfok <- 0
  } else { 
    pfok <- 1;if (kp==1) { 
      pf <- paste(pf,"1"); 
    }
  }

  fake.formula <- pf

  if (length(smooth.spec)>0) 
  for (i in 1:length(smooth.spec)) {
    nt <- length(smooth.spec[[i]]$term)
    ff1 <- paste(smooth.spec[[i]]$term[1:nt],collapse="+")
    fake.formula <- paste(fake.formula,"+",ff1)
    if (smooth.spec[[i]]$by!="NA") {
      fake.formula <- paste(fake.formula,"+",smooth.spec[[i]]$by)
      av <- c(av,smooth.spec[[i]]$term,smooth.spec[[i]]$by)
    } else av <- c(av,smooth.spec[[i]]$term)
  }
  fake.formula <- as.formula(fake.formula,p.env)
  if (length(av)) {
    pred.formula <- as.formula(paste("~",paste(av,collapse="+")))
    pav <- all.vars(pred.formula) ## trick to strip out 'offset(x)' etc...
    pred.formula <- reformulate(pav,env=p.env) 
  } else  pred.formula <- ~1
  ret <- list(pf=as.formula(pf,p.env),pfok=pfok,smooth.spec=smooth.spec,
            fake.formula=fake.formula,response=response,fake.names=av,
            pred.names=pav,pred.formula=pred.formula)
  #environment(ret$fake.formula)  <- environment(ret$pred.formula) <- p.env	    
  class(ret) <- "split.gam.formula"
  ret
} ## interpret.gam0

interpret.gam <- function(gf,extra.special=NULL) {
## wrapper to allow gf to be a list of formulae or 
## a single formula. This facilitates general penalized 
## likelihood models in which several linear predictors 
## may be involved...
##
## The list syntax is as follows. The first formula must have a response on
## the lhs, rather than labels. For m linear predictors, there 
## must be m 'base formulae' in linear predictor order. lhs labels will 
## be ignored in a base formula. Empty base formulae have '-1' on rhs.
## Further formulae have labels up to m labels 1,...,m on the lhs, in a 
## syntax like this: 3 + 5 ~ s(x), which indicates that the same s(x) 
## should be added to both linear predictors 3 and 5. 
## e.g. A bivariate normal model with common expected values might be
## list(y1~-1,y2~-1,1+2~s(x)), whereas if the second component was contaminated 
## by something else we might have list(y1~-1,y2~s(v)-1,1+2~s(x)) 
## 
## For a list argument, this routine returns a list of split.formula objects 
## with an extra field "lpi" indicating the linear predictors to which each 
## contributes...
  if (is.list(gf)) {
    d <- length(gf)
    p.env <- environment(gf[[1]])
    ## make sure all formulae have a response, to avoid
    ## problems with parametric sub formulae of the form ~1
    #if (length(gf[[1]])<3) stop("first formula must specify a response")
    resp <- gf[[1]][2]
    
    ret <- list()
    pav <- av <- rep("",0)
    nlp <- 0 ## count number of linear predictors (may be different from number of formulae)
    for (i in 1:d) {
      textra <- if (i==1) NULL else paste(".",i-1,sep="") ## modify smooth labels to identify to predictor  

      lpi <- getNumericResponse(gf[[i]]) ## get linear predictors to which this applies, if explicit
      if (length(lpi)==1) warning("single linear predictor indices are ignored")
      if (length(lpi)>0) gf[[i]][[2]] <- NULL else { ## delete l.p. labels from formula response 
        nlp <- nlp + 1;lpi <- nlp ## this is base formula for l.p. number nlp       
      }
      ret[[i]] <- interpret.gam0(gf[[i]],textra,extra.special=extra.special)
      ret[[i]]$lpi <- lpi ## record of the linear predictors to which this applies
      
      ## make sure all parametric formulae have a response, to avoid
      ## problems with parametric sub formulae of the form ~1
      respi <- rep("",0) ## no extra response terms
      if (length(ret[[i]]$pf)==2) { 
        ret[[i]]$pf[3] <- ret[[i]]$pf[2];ret[[i]]$pf[2] <- resp
        respi <- rep("",0)
      } else if (i>1) respi <- ret[[i]]$response ## extra response terms
      av <- c(av,ret[[i]]$fake.names,respi) ## accumulate all required variable names 
      pav <- c(pav,ret[[i]]$pred.names) ## predictors only 
    } 
    av <- unique(av) ## strip out duplicate variable names
    pav <- unique(pav)
    if (length(av)>0) {
      ## work around - reformulate with response = "log(x)" will treat log(x) as a name,
      ## not the call it should be... 
      fff <- formula(paste(ret[[1]]$response,"~ ."))
      ret$fake.formula <- reformulate(av,response=ret[[1]]$response,env=p.env) 
      ret$fake.formula[[2]] <- fff[[2]] ## fix messed up response
    } else ret$fake.formula <- ret[[1]]$fake.formula ## create fake formula containing all variables
    ret$pred.formula <- if (length(pav)>0) reformulate(pav) else ~1 ## predictor only formula
    environment(ret$pred.formula) <- p.env
    ret$response <- ret[[1]]$response 
    ret$nlp <- nlp ## number of linear predictors
    for (i in 1:d) if (max(ret[[i]]$lpi)>nlp||min(ret[[i]]$lpi)<1) stop("linear predictor labels out of range")
    class(ret) <- "split.gam.formula"
    return(ret)
  } else interpret.gam0(gf,extra.special=extra.special)  
} ## interpret.gam


fixDependence <- function(X1,X2,tol=.Machine$double.eps^.5,rank.def=0,strict=FALSE)
# model matrix X2 may be linearly dependent on X1. This 
# routine finds which columns of X2 should be zeroed to 
# fix this. If rank.def>0 then it is taken as the known degree 
# of dependence of X2 on X1 and tol is ignored.
{ qr1 <- qr(X1,LAPACK=TRUE)
  R11 <- abs(qr.R(qr1)[1,1])
  r<-ncol(X1);n<-nrow(X1)
  if (strict) { ## only delete columns of X2 individually dependent on X1
    ## Project columns of X2 into space of X1 and look at difference
    ## to orignal X2 to check for deficiency...  
    QtX2 <- qr.qty(qr1,X2)
    QtX2[-(1:r),] <- 0
    mdiff <- colMeans(abs(X2 - qr.qy(qr1,QtX2)))
    if (rank.def>0) ind <- (1:ncol(X2))[rank(mdiff) <= rank.def] else
    ind <- (1:ncol(X2))[mdiff < R11*tol]
    if (length(ind)<1) ind <- NULL
  } else { ## make X2 full rank given X1
    QtX2 <- qr.qty(qr1,X2)[(r+1):n,] # Q'X2
    qr2 <- qr(QtX2,LAPACK=TRUE)
    R <- qr.R(qr2)
    # now final diagonal block of R may be zero, indicating rank 
    # deficiency.
    r0 <- r <- nrow(R)
    if (rank.def > 0 && rank.def <= nrow(R)) r0 <- r - rank.def else ## degree of rank def known
      while (r0>0 && mean(abs(R[r0:r,r0:r]))< R11*tol) r0 <- r0 -1 ## compute rank def
    r0 <- r0 + 1
    if (r0>r) return(NULL) else
    ind <- qr2$pivot[r0:r] # the columns of X2 to zero in order to get independence
  }
  ind
} ## fixDependence


augment.smX <- function(sm,nobs,np) {
## augments a smooth model matrix with a square root penalty matrix for
## identifiability constraint purposes.
  ns <- length(sm$S) ## number of penalty matrices
  if (ns==0) { ## nothing to do
    return(rbind(sm$X,matrix(0,np,ncol(sm$X))))
  }
  ind <- colMeans(abs(sm$S[[1]]))!=0
  sqrmaX  <- mean(abs(sm$X[,ind]))^2
  alpha <- sqrmaX/mean(abs(sm$S[[1]][ind,ind]))
  St <- sm$S[[1]]*alpha
  if (ns>1) for (i in 2:ns) { 
    ind <- colMeans(abs(sm$S[[i]]))!=0
    alpha <- sqrmaX/mean(abs(sm$S[[i]][ind,ind]))
    St <- St +  sm$S[[i]]*alpha
  }
  rS <- mroot(St,rank=ncol(St)) ## get sqrt of penalty
  X <- rbind(sm$X,matrix(0,np,ncol(sm$X))) ## create augmented model matrix
  X[nobs+sm$p.ind,] <- t(rS) ## add in 
  X ## scaled augmented model matrix
} ## augment.smX

gam.side <- function(sm,Xp,tol=.Machine$double.eps^.5,with.pen=FALSE)
# works through a list of smooths, sm, aiming to identify nested or partially
# nested terms, and impose identifiability constraints on them.
# Xp is the parametric model matrix. It is needed in order to check whether
# there is a constant (or equivalent) in the model. If there is, then this needs 
# to be included when working out side constraints, otherwise dependencies can be 
# missed. 
# Note that with.pen is quite extreme, since you then pretty much only pick
# up dependencies in the null spaces
{ if (!with.pen) { ## check that's possible and reset if not!
    with.pen <- nrow(Xp) < ncol(Xp) + sum(unlist(lapply(sm,function(x) ncol(x$X))))
  }
  m <- length(sm)
  if (m==0) return(sm)
  v.names<-array("",0);maxDim<-1
  for (i in 1:m) { ## collect all term names and max smooth `dim'
    vn <- sm[[i]]$term
    ## need to include by variables in names
    if (sm[[i]]$by!="NA") vn <- paste(vn,sm[[i]]$by,sep="")
    ## need to distinguish levels of factor by variables...
    if (!is.null(sm[[i]]$by.level))  vn <- paste(vn,sm[[i]]$by.level,sep="")
    sm[[i]]$vn <- vn ## use this record to identify variables from now
    v.names <- c(v.names,vn)
    if (sm[[i]]$dim > maxDim) maxDim <- sm[[i]]$dim
  } 
  lv <- length(v.names)   
  v.names <- unique(v.names)
  if (lv == length(v.names)) return(sm) ## no repeats => no nesting

  ## Only get this far if there is nesting.
  ## Need to test for intercept or equivalent in Xp
  intercept <- FALSE
  if (ncol(Xp)) {
    ## first check columns directly...
    if (sum(apply(Xp,2,sd)<.Machine$double.eps^.75)>0) intercept <- TRUE else {
      ## no constant column, so need to check span of Xp...
      f <- rep(1,nrow(Xp))
      ff <- qr.fitted(qr(Xp),f)
      if (max(abs(ff-f))<.Machine$double.eps^.75) intercept <- TRUE 
    }
  }

  sm.id <- as.list(v.names)
  names(sm.id) <- v.names
  for (i in 1:length(sm.id)) sm.id[[i]]<-array(0,0)
  sm.dim <- sm.id
  for (d in 1:maxDim) {
    for (i in 1:m) {
      if (sm[[i]]$dim==d&&sm[[i]]$side.constrain) for (j in 1:d) { ## work through terms
        term<-sm[[i]]$vn[j]
        a <- sm.id[[term]]
        la <- length(a)+1
        sm.id[[term]][la] <- i   ## record smooth i.d. for this variable
        sm.dim[[term]][la] <- d  ## ... and smooth dim.
      }
    }
  }
  ## so now each unique variable name has an associated array of 
  ## the smooths of which it is an argument, arranged in ascending 
  ## order of dimension. Smooths for which side.constrain==FALSE are excluded.
  if (maxDim==1) warning("model has repeated 1-d smooths of same variable.")

  ## Now set things up to enable term specific model matrices to be
  ## augmented with square root penalties, on the fly...
  if (with.pen) {
    k <- 1
    for (i in 1:m) { ## create parameter indices for each term
      k1 <- k + ncol(sm[[i]]$X) - 1
      sm[[i]]$p.ind <- k:k1
      k <- k1 + 1
    }
    np <- k-1 ## number of penalized parameters
  }
  nobs <- nrow(sm[[1]]$X) ## number of observations
  
  for (d in 1:maxDim) { ## work up through dimensions 
    for (i in 1:m) { ## work through smooths
      if (sm[[i]]$dim == d&&sm[[i]]$side.constrain) { ## check for nesting
        if (with.pen) X1 <- matrix(c(rep(1,nobs),rep(0,np)),nobs+np,as.integer(intercept)) else
        X1 <- matrix(1,nobs,as.integer(intercept))
	X1comp <- rep(0,0) ## list of components of X1 to avoid duplication
        for (j in 1:d) { ## work through variables
          b <- sm.id[[sm[[i]]$vn[j]]] # list of smooths dependent on this variable
          k <- (1:length(b))[b==i] ## locate current smooth in list 
          if (k>1) for (l in 1:(k-1)) if (!b[l] %in% X1comp) { ## collect X columns
           X1comp <- c(X1comp,b[l]) ## keep track of components to avoid adding same one twice
           if (with.pen) { ## need to use augmented model matrix in testing 
              if (is.null(sm[[b[l]]]$Xa)) sm[[b[l]]]$Xa <- augment.smX(sm[[b[l]]],nobs,np)
              X1 <- cbind(X1,sm[[b[l]]]$Xa)
            } else X1 <- cbind(X1,sm[[b[l]]]$X) ## penalties not considered
          }
        } ## Now X1 contains columns for all lower dimensional terms
        if (ncol(X1)==as.integer(intercept)) ind <- NULL else {
          if (with.pen) {
             if (is.null(sm[[i]]$Xa)) sm[[i]]$Xa <- augment.smX(sm[[i]],nobs,np)
             ind <- fixDependence(X1,sm[[i]]$Xa,tol=tol) 
          } else ind <- fixDependence(X1,sm[[i]]$X,tol=tol)        
        }
        ## ... the columns to zero to ensure independence
        if (!is.null(ind)) { 
          sm[[i]]$X <- sm[[i]]$X[,-ind]
          ## work through list of penalty matrices, applying constraints...
          nsmS <- length(sm[[i]]$S)
          if (nsmS>0) for (j in nsmS:1) { ## working down so that dropping is painless
            sm[[i]]$S[[j]] <- sm[[i]]$S[[j]][-ind,-ind]
            if (sum(sm[[i]]$S[[j]]!=0)==0) rank <- 0 else
            rank <- qr(sm[[i]]$S[[j]],tol=tol,LAPACK=FALSE)$rank
            sm[[i]]$rank[j] <- rank ## replace previous rank with new rank
            if (rank == 0) { ## drop the penalty
              sm[[i]]$rank <- sm[[i]]$rank[-j]
              sm[[i]]$S[[j]] <- NULL
              sm[[i]]$S.scale <- sm[[i]]$S.scale[-j]
              if (!is.null(sm[[i]]$L)) sm[[i]]$L <- sm[[i]]$L[-j,,drop=FALSE]
            }
          } ## penalty matrices finished
          ## Now we need to establish null space rank for the term
          mi <- length(sm[[i]]$S)
          if (mi>0) {
            St <- sm[[i]]$S[[1]]/norm(sm[[i]]$S[[1]],type="F")
            if (mi>1) for (j in 1:mi) St <- St + 
                  sm[[i]]$S[[j]]/norm(sm[[i]]$S[[j]],type="F")
            es <- eigen(St,symmetric=TRUE,only.values=TRUE)
            sm[[i]]$null.space.dim <- sum(es$values<max(es$values)*.Machine$double.eps^.75) 
          } ## rank found

          if (!is.null(sm[[i]]$L)) {
            ind <- as.numeric(colSums(sm[[i]]$L!=0))!=0
            sm[[i]]$L <- sm[[i]]$L[,ind,drop=FALSE] ## retain only those sps that influence something!
          }

          sm[[i]]$df <- ncol(sm[[i]]$X)
          attr(sm[[i]],"del.index") <- ind
          ## Now deal with case in which prediction constraints differ from fit constraints
          if (!is.null(sm[[i]]$Xp)) { ## need to get deletion indices under prediction parameterization
            ## Note that: i) it doesn't matter what the identifiability con on X1 is
            ##            ii) the degree of rank deficiency can't be changed by an identifiability con
            if (with.pen) { 
              smi <- sm[[i]]  ## clone smooth 
              smi$X <- smi$Xp ## copy prediction Xp to X slot.
              smi$S <- smi$Sp ## and make sure penalty parameterization matches. 
              Xpa <- augment.smX(smi,nobs,np)
              ind <- fixDependence(X1,Xpa,rank.def=length(ind)) 
            } else ind <- fixDependence(X1,sm[[i]]$Xp,rank.def=length(ind)) 
            sm[[i]]$Xp <- sm[[i]]$Xp[,-ind,drop=FALSE]
            attr(sm[[i]],"del.index") <- ind ## over-writes original
          }
        }
        sm[[i]]$vn <- NULL
      } ## end if (sm[[i]]$dim == d)
    } ## end i in 1:m loop
  }  
  if (with.pen) for (i in 1:m) { ## remove working variables that were added
    sm[[i]]$p.ind <- NULL
    if (!is.null(sm[[i]]$Xa)) sm[[i]]$Xa <- NULL
  }
  sm
} ## gam.side

clone.smooth.spec <- function(specb,spec) {
## produces a version of base smooth.spec, `specb', but with 
## the variables relating to `spec'. Used by `gam.setup' in handling 
## of linked smooths.
 ## check dimensions same...
 if (specb$dim!=spec$dim) stop("`id' linked smooths must have same number of arguments") 
 ## Now start cloning...
 if (inherits(specb,c("tensor.smooth.spec","t2.smooth.spec"))) { ##`te' or `t2' generated base smooth.spec
    specb$term <- spec$term
    specb$label <- spec$label 
    specb$by <- spec$by
    k <- 1
    for (i in 1:length(specb$margin)) {
      if (is.null(spec$margin)) { ## sloppy user -- have to construct margin info...
         for (j in 1:length(specb$margin[[i]]$term)) {
           specb$margin[[i]]$term[j] <- spec$term[k]
           k <- k + 1
         }
         specb$margin[[i]]$label <- ""
 
      } else { ## second term was at least `te'/`t2', so margin cloning is easy
        specb$margin[[i]]$term <- spec$margin[[i]]$term
        specb$margin[[i]]$label <- spec$margin[[i]]$label
        specb$margin[[i]]$xt <- spec$margin[[i]]$xt
      }
    }

  } else { ## `s' generated case
    specb$term <- spec$term
    specb$label <- spec$label 
    specb$by <- spec$by
    specb$xt <- spec$xt ## don't generally know what's in here => don't clone
  }
  specb ## return clone
} ## clone.smooth.spec


parametricPenalty <- function(pterms,assign,paraPen,sp0) {
## routine to process any penalties on the parametric part of the model.
## paraPen is a list whose items have names corresponding to the 
## term.labels in pterms. Each list item may have named elements 
## L, rank and sp. All other elements should be penalty coefficient matrices.
  S <- list()     ## penalty matrix list
  off <- rep(0,0) ## offset array
  rank <- rep(0,0) ## rank array
  sp <- rep(0,0)    ## smoothing param array
  full.sp.names <- rep("",0) ## names for sp's multiplying penalties (not underlying)
  L <- matrix(0,0,0) 
  k <- 0
  tind <- unique(assign) ## unique term indices
  n.t <- length(tind)
  if (n.t>0) for (j in 1:n.t) if (tind[j]>0) {
    term.label <- attr(pterms[tind[j]],"term.label")
    P <- paraPen[[term.label]] ## get any penalty information for this term
    if (!is.null(P)) { ## then there is information
      ind <- (1:length(assign))[assign==tind[j]] ## index of coefs involved here
      Li <- P$L;P$L <- NULL
      spi <- P$sp;P$sp <- NULL
      ranki <- P$rank;P$rank <- NULL
      ## remaining terms should be penalty matrices...
      np <- length(P)

      if (!is.null(ranki)&&length(ranki)!=np) stop("`rank' has wrong length in `paraPen'") 
      if (np) for (i in 1:np) { ## unpack penalty matrices, offsets and ranks
        k <- k + 1
        S[[k]] <- P[[i]]
        off[k] <- min(ind) ## index of first coef penalized by this term
        if ( ncol(P[[i]])!=nrow(P[[i]])||nrow(P[[i]])!=length(ind)) stop(" a parametric penalty has wrong dimension")
        if (is.null(ranki)) {
          ev <- eigen(S[[k]],symmetric=TRUE,only.values=TRUE)$values
          rank[k] <- sum(ev>max(ev)*.Machine$double.eps*10) ## estimate rank
        } else rank[k] <- ranki[i]
      }
      ## now deal with L matrices
      if (np) { ## only do this stuff if there are any penalties!
        if (is.null(Li)) Li <- diag(np)
        if (nrow(Li)!=np) stop("L has wrong dimension in `paraPen'")
        L <- rbind(cbind(L,matrix(0,nrow(L),ncol(Li))),
                   cbind(matrix(0,nrow(Li),ncol(L)),Li))
        ind <- (length(sp)+1):(length(sp)+ncol(Li))
        ind2 <- (length(sp)+1):(length(sp)+nrow(Li)) ## used to produce names for full sp array
        if (is.null(spi)) {
          sp[ind] <- -1 ## auto-initialize
        } else {
          if (length(spi)!=ncol(Li)) stop("`sp' dimension wrong in `paraPen'")
          sp[ind] <- spi
        }
        ## add smoothing parameter names....
        if (length(ind)>1) names(sp)[ind] <- paste(term.label,ind-ind[1]+1,sep="") 
        else names(sp)[ind] <- term.label
        
        if (length(ind2)>1) full.sp.names[ind2] <- paste(term.label,ind2-ind2[1]+1,sep="") 
        else full.sp.names[ind2] <- term.label
      }
    } ## end !is.null(P)  
  } ## looped through all terms
  if (k==0) return(NULL)
  if (!is.null(sp0)) {
    if (length(sp0)<length(sp)) stop("`sp' too short")
    sp0 <- sp0[1:length(sp)]
    sp[sp<0] <- sp0[sp<0]
  }
  ## S is list of penalty matrices, off[i] is index of first coefficient penalized by each S[[i]]
  ## sp is array of underlying smoothing parameter (-ve to estimate), L is matrix mapping log
  ## underlying smoothing parameters to log smoothing parameters, rank[i] is the rank of S[[i]].
  list(S=S,off=off,sp=sp,L=L,rank=rank,full.sp.names=full.sp.names)
} ## parametricPenalty

getNumericResponse <- function(form) {
## takes a formula for which the lhs contains numbers, but no variable 
## names, and returns an array of those numbers. For example if 'form' 
## is '1+26~s(x)', this will return the numeric vector c(1,26). 
## A zero length vector is returned if the lhs contains no numbers,
## or contains variable names.  
## This is useful for setting up models in which
## multiple linear predictors share terms. The lhs numbers can then 
## indicate which linear predictors the rhs will contribute to.

  ## if the response contains variables or there is no
  ## response then return nothing...

  if (length(form)==2||length(all.vars(form[[2]]))>0) return(rep(0,0))

  ## deparse turns lhs into a string; strsplit extracts the characters 
  ## corresponding to numbers; unlist deals with the fact that deparse 
  ## will split long lines resulting in multiple list items from 
  ## strsplit; as.numeric converts the numbers; na.omit drops NAs
  ## resulting from "" elements; unique & round are obvious... 

  round(unique(na.omit(as.numeric(unlist(strsplit(deparse(form[[2]]), "[^0-9]+"))))))  

} ## getNumericResponse

olid <- function(X,nsdf,pstart,flpi,lpi) {
## X is a model matrix, made up of nf=length(nsdf) column blocks.
## The ith block starts at column pstart[i] and its first nsdf[i]
## columns are unpenalized. X is used to define nlp=length(lpi)
## linear predictors. lpi[[i]] gives the columns of X used in the 
## ith linear predictor. flpi[j] gives the linear predictor(s)
## to which the jth block of X belongs. The problem is that the 
## unpenalized blocks need not be identifiable when used in combination. 
## This function returns a vector dind of columns of X to drop for 
## identifiability, along with modified lpi, pstart and nsdf vectors.
  nlp <- length(lpi) ## number of linear predictors
  n <- nrow(X) 
  nf <- length(nsdf) ## number of formulae blocks
  Xp <- matrix(0,n*nlp,sum(nsdf))
  start <- 1
  ii <- 1:n
  tind <- rep(0,0) ## complete index of all parametric columns in X
  ## create a block matrix, Xp, with the same identifiability properties as
  ## unpenalized part of model...
  for (i in 1:nf) {
    stop <- start - 1 + nsdf[i]
    if (stop>=start) {
      ind <- pstart[i] + 1:nsdf[i] - 1
      for (k in flpi[[i]]) {
        Xp[ii+(k-1)*n,start:stop] <- X[,ind]
      }
      tind <- c(tind,ind)
      start <- start + nsdf[i]
    }
  }
  ## rank deficiency of Xp will reveal number of redundant parametric 
  ## terms, and a pivoted QR will reveal which to drop to restore
  ## full rank...
  qrx <- qr(Xp,LAPACK=TRUE,tol=0.0) ## unidentifiable columns get pivoted to final cols
  r <- Rrank(qr.R(qrx)) ## get rank from R factor of pivoted QR
  if (r==ncol(Xp)) { ## full rank, all fine, drop nothing
    dind <- rep(0,0)
  } else { ## reduced rank, drop some columns
    dind <- tind[sort(qrx$pivot[(r+1):ncol(X)],decreasing=TRUE)] ## columns to drop
    ## now we need to adjust nsdf, pstart and lpi
    for (d in dind) { ## working down through drop indices
      ## following commented out code is useful should it ever prove necessary to 
      ## adjust pstart and nsdf, but at present these are only used in prediction, 
      ## and it is cleaner to leave them unchanged, and simply drop using dind during prediction.
      #k <- if (d>=pstart[nf]) nlp else which(d >= pstart[1:(nf-1)] & d < pstart[2:nf])
      #nsdf[k] <- nsdf[k] - 1 ## one less unpenalized column in this block
      #if (k<nf) pstart[(k+1):nf] <-  pstart[(k+1):nf] - 1 ## later block starts move down 1 
      for (i in 1:nlp) { 
        k <- which(d == lpi[[i]])
        if (length(k)>0) lpi[[i]] <- lpi[[i]][-k] ## drop row
        k <- which(lpi[[i]]>d)
        if (length(k)>0) lpi[[i]][k] <- lpi[[i]][k] - 1 ## close up
      }
    } ## end of drop index loop
  }
  list(dind=dind,lpi=lpi) ##,pstart=pstart,nsdf=nsdf)
} ## olid


gam.setup.list <- function(formula,pterms,
                    data=stop("No data supplied to gam.setup"),knots=NULL,sp=NULL,
                    min.sp=NULL,H=NULL,absorb.cons=TRUE,sparse.cons=0,select=FALSE,idLinksBases=TRUE,
                    scale.penalty=TRUE,paraPen=NULL,gamm.call=FALSE,drop.intercept=NULL,apply.by=TRUE,modCon=0) {
## version of gam.setup for when gam is called with a list of formulae, 
## specifying several linear predictors...
## key difference to gam.setup is an attribute to the model matrix, "lpi", which is a list
## of column indices for each linear predictor 
  if (!is.null(paraPen)) stop("paraPen not supported for multi-formula models")
  if (!absorb.cons) stop("absorb.cons must be TRUE for multi-formula models")
  d <- length(pterms) ## number of formulae
  if (is.null(drop.intercept)) drop.intercept <- rep(FALSE, d)
  if (length(drop.intercept) != d) stop("length(drop.intercept) should be equal to number of model formulas")

  lp.overlap <- if (formula$nlp<d) TRUE else FALSE ## predictors share terms

  G <- gam.setup(formula[[1]],pterms[[1]],
              data,knots,sp,min.sp,H,absorb.cons,sparse.cons,select,
              idLinksBases,scale.penalty,paraPen,gamm.call,drop.intercept[1],apply.by=apply.by,list.call=TRUE,modCon=modCon)
  G$pterms <- pterms
  
  G$offset <- list(G$offset)
  #G$contrasts <- list(G$contrasts)
  G$xlevels <- list(G$xlevels)
  G$assign <- list(G$assign)
  used.sp <- length(G$lsp0)
  if (!is.null(sp)&&used.sp>0) sp <- sp[-(1:used.sp)] ## need to strip off already used sp's
  if (!is.null(min.sp)&&nrow(G$L)>0) min.sp <- min.sp[-(1:nrow(G$L))]  

  ## formula[[1]] always relates to the base formula of the first linear predictor...

  flpi <- lpi <- list()
  for (i in 1:formula$nlp) lpi[[i]] <- rep(0,0)
  lpi[[1]] <- 1:ncol(G$X) ## lpi[[j]] is index of cols for jth linear predictor 
  flpi[[1]] <- formula[[1]]$lpi ## used in identifiability testing by olid, later  

  pof <- ncol(G$X) ## counts the model matrix columns produced so far
  pstart <- rep(0,d) ## indexes where parameteric columns start in each formula block of X
  pstart[1] <- 1
  if (d>1) for (i in 2:d) {
    if (is.null(formula[[i]]$response)) {  ## keep gam.setup happy
      formula[[i]]$response <- formula$response 
      mv.response <- FALSE
    } else mv.response <- TRUE
    #spind <- if (is.null(sp)) 1 else (length(G$S)+1):length(sp)
    formula[[i]]$pfok <- 1 ## empty formulae OK here!
    um <- gam.setup(formula[[i]],pterms[[i]],
              data,knots,sp,min.sp,#sp[spind],min.sp[spind],
	      H,absorb.cons,sparse.cons,select,
              idLinksBases,scale.penalty,paraPen,gamm.call,drop.intercept[i],apply.by=apply.by,list.call=TRUE,modCon=modCon)
    used.sp <- length(um$lsp0)	      
    if (!is.null(sp)&&used.sp>0) sp <- sp[-(1:used.sp)] ## need to strip off already used sp's
    if (!is.null(min.sp)&&nrow(um$L)>0) min.sp <- min.sp[-(1:nrow(um$L))]  

    flpi[[i]] <- formula[[i]]$lpi
    for (j in formula[[i]]$lpi) { ## loop through all l.p.s to which this term contributes
      lpi[[j]] <- c(lpi[[j]],pof + 1:ncol(um$X)) ## add these cols to lpi[[j]]
      ##lpi[[i]] <- pof + 1:ncol(um$X) ## old code
    }
    if (mv.response) G$y <- cbind(G$y,um$y)
    if (i>formula$nlp&&!is.null(um$offset)) {
      stop("shared offsets not allowed")
    }
    G$offset[[i]] <- um$offset
    #G$contrasts[[i]] <- um$contrasts
    if (!is.null(um$contrasts)) G$contrasts <- c(G$contrasts,um$contrasts)
    G$xlevels[[i]] <- um$xlevels
    G$assign[[i]] <- um$assign
    G$rank <- c(G$rank,um$rank)
    pstart[i] <- pof+1
    G$X <- cbind(G$X,um$X) ## extend model matrix
    ## deal with the smooths...
    k <- G$m
    if (um$m) for (j in 1:um$m) {
      um$smooth[[j]]$first.para <- um$smooth[[j]]$first.para + pof
      um$smooth[[j]]$last.para <- um$smooth[[j]]$last.para + pof
      k <- k + 1 
      G$smooth[[k]] <- um$smooth[[j]]
    }
    ## L, S and off...
    ks <- length(G$S)
    M <- length(um$S)
 
    if (!is.null(um$L)||!is.null(G$L)) {
      if (is.null(G$L)) G$L <- diag(1,nrow=ks)
      if (is.null(um$L)) um$L <- diag(1,nrow=M)
      G$L <- rbind(cbind(G$L,matrix(0,nrow(G$L),ncol(um$L))),cbind(matrix(0,nrow(um$L),ncol(G$L)),um$L))
    }

    G$off <- c(G$off,um$off+pof)
    if (M) for (j in 1:M) {
      ks <- ks + 1
      G$S[[ks]] <- um$S[[j]]
    }
 
    G$m <- G$m + um$m ## number of smooths
    ##G$nsdf <- G$nsdf + um$nsdf ## or list??
    G$nsdf[i] <- um$nsdf
    if (!is.null(um$P)||!is.null(G$P)) {
      if (is.null(G$P)) G$P <- diag(1,nrow=pof)
      k <- ncol(um$X)
      if (is.null(um$P)) um$P <- diag(1,nrow=k)
      G$P <- rbind(cbind(G$P,matrix(0,pof,k)),cbind(matrix(0,k,pof),um$P))
    }
    G$cmX <- c(G$cmX,um$cmX)
    if (um$nsdf>0) um$term.names[1:um$nsdf] <- paste(um$term.names[1:um$nsdf],i-1,sep=".")
    G$term.names <- c(G$term.names,um$term.names)
    G$lsp0 <- c(G$lsp0,um$lsp0)
    G$sp <- c(G$sp,um$sp)
    pof <- ncol(G$X)
  } ## formula loop end

  ## If there is overlap then there is a danger of lack of identifiability of the 
  ## parameteric terms, especially if there are factors present in shared components. 
  ## The following code deals with this possibility... 
  if (lp.overlap) {
    rt <- olid(G$X,G$nsdf,pstart,flpi,lpi)
    if (length(rt$dind)>0) { ## then columns have to be dropped
      warning("dropping unidentifiable parametric terms from model",call.=FALSE)
      G$X <- G$X[,-rt$dind] ## drop cols 
      G$cmX <- G$cmX[-rt$dind]
      G$term.names <- G$term.names[-rt$dind]
      ## adjust indexing in smooth list, noting that coefs of smooths 
      ## are never dropped by dind 
      for (i in 1:length(G$smooth)) {
        k <- sum(rt$dind < G$smooth[[i]]$first.para)
        G$smooth[[i]]$first.para <- G$smooth[[i]]$first.para - k
        G$smooth[[i]]$last.para <- G$smooth[[i]]$last.para - k
      }
      for (i in 1:length(G$off)) G$off[i] <- G$off[i] - sum(rt$dind < G$off[i])
      ## replace various indices with updated versions...
      # pstart <- rt$pstart; G$nsdf <- rt$nsdf ## these two only needed in predict.gam - cleaner to leave unchanged
      lpi <- rt$lpi
      attr(G$nsdf,"drop.ind") <- rt$dind ## store drop index
    } 
  }
  attr(lpi,"overlap") <- lp.overlap
  attr(G$X,"lpi") <- lpi
  attr(G$nsdf,"pstart") <- pstart ##unlist(lapply(lpi,min))

  ## assemble a global indicator array for non-linear parameters... 
  G$g.index <- rep(FALSE,ncol(G$X))
  n.sp0 <- 0
  if (length(G$smooth)) for (i in 1:length(G$smooth)) {
    if (!is.null(G$smooth[[i]]$g.index)) G$g.index[G$smooth[[i]]$first.para:G$smooth[[i]]$last.para] <- G$smooth[[i]]$g.index
    n.sp <- length(G$smooth[[i]]$S)
    if (n.sp) {
      G$smooth[[i]]$first.sp <- n.sp0 + 1
      n.sp0 <- G$smooth[[i]]$last.sp <- n.sp0 + n.sp
    }
  }  
  if (!any(G$g.index)) G$g.index <- NULL  

  G
} ## gam.setup.list



gam.setup <- function(formula,pterms,
                     data=stop("No data supplied to gam.setup"),knots=NULL,sp=NULL,
                    min.sp=NULL,H=NULL,absorb.cons=TRUE,sparse.cons=0,select=FALSE,idLinksBases=TRUE,
                    scale.penalty=TRUE,paraPen=NULL,gamm.call=FALSE,drop.intercept=FALSE,
                    diagonal.penalty=FALSE,apply.by=TRUE,list.call=FALSE,modCon=0) 
## set up the model matrix, penalty matrices and auxilliary information about the smoothing bases
## needed for a gam fit.
## elements of returned object:
## * m - number of smooths
## * min.sp - minimum smoothing parameters
## * H supplied H matrix
## * pearson.extra, dev.extra, n.true --- entries to hold these quantities
## * pterms - terms object for parametric terms
## * intercept TRUE if intercept present
## * offset - the model offset
## * nsdf - number of strictly parameteric coefs
## * contrasts 
## * xlevels - records levels of factors
## * assign - indexes which parametric model matrix columns map to which term in pterms
## * smooth - list of smooths
## * S - penalties (non-zero block only)
## * off - first coef penalized by each element of S
## * cmX - col mean of X
## * P - maps parameters in fit constraint parameterization to those in prediction parameterization
## * X - model matrix
## * sp
## * rank
## * n.paraPen
## * L 
## * lsp0
## * y - response
## * C - constraint matrix - only if absorb.cons==FALSE
## * n - dim(y)
## * w - weights
## * term.names
## * nP
{ # split the formula if the object being passed is a formula, otherwise it's already split

  if (inherits(formula,"split.gam.formula")) split <- formula else
  if (inherits(formula,"formula")) split <- interpret.gam(formula) 
  else stop("First argument is no sort of formula!") 
  
  if (length(split$smooth.spec)==0) {
    if (split$pfok==0) stop("You've got no model....")
    m <- 0
  } else  m <- length(split$smooth.spec) # number of smooth terms
  
  G <- list(m=m,min.sp=min.sp,H=H,pearson.extra=0,
            dev.extra=0,n.true=-1,pterms=pterms) ## dev.extra gets added to deviance if REML/ML used in gam.fit3
  
  if (is.null(attr(data,"terms"))) # then data is not a model frame
  mf <- model.frame(split$pf,data,drop.unused.levels=FALSE) # must be false or can end up with wrong prediction matrix!
  else mf <- data # data is already a model frame

  G$intercept <-  attr(attr(mf,"terms"),"intercept")>0

  ## get any model offset. Complicated by possibility of offsets in multiple formulae...
  if (list.call) {
    offi <- attr(pterms,"offset")
    if (!is.null(offi)) {
      G$offset <- mf[[names(attr(pterms,"dataClasses"))[offi]]]
    }
  } else G$offset <- model.offset(mf)   # get any model offset including from offset argument
  
  if (!is.null(G$offset))  G$offset <- as.numeric(G$offset) 

  # construct strictly parametric model matrix.... 
  if (drop.intercept) attr(pterms,"intercept") <- 1 ## ensure there is an intercept to drop
  X <- model.matrix(pterms,mf)
  if (drop.intercept) { ## some extended families require intercept to be dropped 
    xat <- attributes(X);ind <- xat$assign>0 ## index of non intercept columns 
    X <- X[,ind,drop=FALSE] ## some extended families need to drop intercept
    xat$assign <- xat$assign[ind];xat$dimnames[[2]]<-xat$dimnames[[2]][ind];
    xat$dim[2] <- xat$dim[2]-1;attributes(X) <- xat
    G$intercept <- FALSE
  } 
  rownames(X) <- NULL ## save memory
  
  G$nsdf <- ncol(X)
  G$contrasts <- attr(X,"contrasts")
  G$xlevels <- .getXlevels(pterms,mf)
  G$assign <- attr(X,"assign") # used to tell which coeffs relate to which pterms

  ## now deal with any user supplied penalties on the parametric part of the model...
  PP <- parametricPenalty(pterms,G$assign,paraPen,sp)
  if (!is.null(PP)) { ## strip out supplied sps already used
    ind <- 1:length(PP$sp)
    if (!is.null(sp)) sp <- sp[-ind]
    if (!is.null(min.sp)) { 
      PP$min.sp <- min.sp[ind]
      min.sp <- min.sp[-ind]
    } 
  }
  
  # next work through smooth terms (if any) extending model matrix.....
  
  G$smooth <- list()
  G$S <- list()
 
  if (gamm.call) { ## flag that this is a call from gamm --- some smoothers need to know!
    if (m>0) for (i in 1:m) attr(split$smooth.spec[[i]],"gamm") <- TRUE
  }

  if (m>0 && idLinksBases) { ## search smooth.spec[[]] for terms linked by common id's
    id.list <- list() ## id information list
    for (i in 1:m) if (!is.null(split$smooth.spec[[i]]$id)) {
      id <- as.character(split$smooth.spec[[i]]$id)
      if (length(id.list)&&id%in%names(id.list)) { ## it's an existing id
        ni <- length(id.list[[id]]$sm.i) ## number of terms so far with this id
        id.list[[id]]$sm.i[ni+1] <- i    ## adding smooth.spec index to this id's list
        ## clone smooth.spec from base smooth spec....
        base.i <- id.list[[id]]$sm.i[1]
         
        split$smooth.spec[[i]] <- clone.smooth.spec(split$smooth.spec[[base.i]],
                                                      split$smooth.spec[[i]])
        
        ## add data for this term to the data list for basis setup...
        temp.term <- split$smooth.spec[[i]]$term
       
        ## note cbind deliberate in next line, as construction will handle matrix argument 
        ## correctly... 
        for (j in 1:length(temp.term)) id.list[[id]]$data[[j]] <- cbind(id.list[[id]]$data[[j]],
                                                          get.var(temp.term[j],data,vecMat=FALSE))
       
       } else { ## new id
        id.list[[id]] <- list(sm.i=i) ## start the array of indices of smooths with this id
        id.list[[id]]$data <- list()
        ## need to collect together all data for which this basis will be used,
        ## for basis setup...
        term <- split$smooth.spec[[i]]$term
        for (j in 1:length(term)) id.list[[id]]$data[[j]] <- get.var(term[j],data,vecMat=FALSE)
      } ## new id finished
    }
  } ## id.list complete

  G$off<-array(0,0)
  first.para<-G$nsdf+1
  sm <- list()
  newm <- 0
  if (m>0) for (i in 1:m) {
    # idea here is that terms are set up in accordance with information given in split$smooth.spec
    # appropriate basis constructor is called depending on the class of the smooth
    # constructor returns penalty matrices model matrix and basis specific information
  
    id <- split$smooth.spec[[i]]$id
    if (is.null(id)||!idLinksBases) { ## regular evaluation
      sml <- smoothCon(split$smooth.spec[[i]],data,knots,absorb.cons,scale.penalty=scale.penalty,
                       null.space.penalty=select,sparse.cons=sparse.cons,
                       diagonal.penalty=diagonal.penalty,apply.by=apply.by,modCon=modCon) 
    } else { ## it's a smooth with an id, so basis setup data differs from model matrix data
      names(id.list[[id]]$data) <- split$smooth.spec[[i]]$term ## give basis data suitable names
      sml <- smoothCon(split$smooth.spec[[i]],id.list[[id]]$data,knots,
                       absorb.cons,n=nrow(data),dataX=data,scale.penalty=scale.penalty,
                       null.space.penalty=select,sparse.cons=sparse.cons,
                       diagonal.penalty=diagonal.penalty,apply.by=apply.by,modCon=modCon)
    }
    #for (j in 1:length(sml)) {
    #  newm <- newm + 1
    #  sm[[newm]] <- sml[[j]]
    #}
    ind <- 1:length(sml)
    sm[ind+newm] <- sml[ind]
    newm <- newm + length(sml)
  }
  
  G$m <- m <- newm ## number of actual smooths

  ## at this stage, it is neccessary to impose any side conditions required
  ## for identifiability
  if (m>0) { 
    sm <- gam.side(sm,X,tol=.Machine$double.eps^.5)
    if (!apply.by) for (i in 1:length(sm)) { ## restore any by-free model matrices
      if (!is.null(sm[[i]]$X0)) { ## there is a by-free matrix to restore 
        ind <- attr(sm[[i]],"del.index") ## columns, if any to delete
        sm[[i]]$X <- if (is.null(ind)) sm[[i]]$X0 else sm[[i]]$X0[,-ind,drop=FALSE] 
      }
    }
  }

  ## The matrix, L, mapping the underlying log smoothing parameters to the
  ## log of the smoothing parameter multiplying the S[[i]] must be
  ## worked out...
  idx <- list() ## idx[[id]]$c contains index of first col in L relating to id
  L <- matrix(0,0,0)
  lsp.names <- sp.names <- rep("",0) ## need a list of names to identify sps in global sp array
  if (m>0) for (i in 1:m) {
    id <- sm[[i]]$id
    ## get the L matrix for this smooth...
    length.S <- if (is.null(sm[[i]]$updateS)) length(sm[[i]]$S) else sm[[i]]$n.sp ## deals with possibility of non-linear penalty
    Li <- if (is.null(sm[[i]]$L)) diag(length.S) else sm[[i]]$L 
     
    if (length.S > 0) { ## there are smoothing parameters to name
       if (length.S == 1) lspn <- sm[[i]]$label else {
          Sname <- names(sm[[i]]$S)
          lspn <- if (is.null(Sname)) paste(sm[[i]]$label,1:length.S,sep="") else
                  paste(sm[[i]]$label,Sname,sep="") ## names for all sp's
       }
       spn <- lspn[1:ncol(Li)] ## names for actual working sps
    }

    ## extend the global L matrix...
    if (is.null(id)||is.null(idx[[id]])) { ## new `id'     
      if (!is.null(id)) { ## create record in `idx'
        idx[[id]]$c <- ncol(L)+1   ## starting column in L for this `id'
        idx[[id]]$nc <- ncol(Li)   ## number of columns relating to this `id'
      }
      L <- rbind(cbind(L,matrix(0,nrow(L),ncol(Li))),
                 cbind(matrix(0,nrow(Li),ncol(L)),Li))
      if (length.S > 0) { ## there are smoothing parameters to name
        sp.names <- c(sp.names,spn) ## extend the sp name vector
        lsp.names <- c(lsp.names,lspn) ## extend full.sp name vector
      }
    } else { ## it's a repeat id => shares existing sp's
      L0 <- matrix(0,nrow(Li),ncol(L))
      if (ncol(Li)>idx[[id]]$nc) {
        stop("Later terms sharing an `id' can not have more smoothing parameters than the first such term")
      }
      L0[,idx[[id]]$c:(idx[[id]]$c+ncol(Li)-1)] <- Li
      L <- rbind(L,L0)
      if (length.S > 0) { ## there are smoothing parameters to name
        lsp.names <- c(lsp.names,lspn) ## extend full.sp name vector
      }
    }
  }

  ## create the model matrix...

  Xp <- NULL ## model matrix under prediction constraints, if given
  if (m>0) for (i in 1:m) {
    n.para <- ncol(sm[[i]]$X)
    # define which elements in the parameter vector this smooth relates to....
    sm[[i]]$first.para<-first.para     
    first.para <- first.para+n.para
    sm[[i]]$last.para <- first.para-1
    ## termwise offset handling ...
    Xoff <- attr(sm[[i]]$X,"offset")
    if (!is.null(Xoff)) { 
      if (is.null(G$offset)) G$offset <- Xoff
      else G$offset <- G$offset + Xoff
    }
    ## model matrix accumulation ...
    
    ## alternative version under alternative constraint first (prediction only)
    if (is.null(sm[[i]]$Xp)) {
      if (!is.null(Xp)) Xp <- cbind2(Xp,sm[[i]]$X)
    } else { 
      if (is.null(Xp)) Xp <- X
      Xp <- cbind2(Xp,sm[[i]]$Xp);sm[[i]]$Xp <- NULL
    }
    ## now version to use for fitting ...
    X <- cbind2(X,sm[[i]]$X);sm[[i]]$X<-NULL
   
    G$smooth[[i]] <- sm[[i]]   
  }

  if (is.null(Xp)) {
    G$cmX <- colMeans(X) ## useful for componentwise CI construction 
  } else {
    G$cmX <- colMeans(Xp)
    ## transform from fit params to prediction params...
    ## G$P <- qr.coef(qr(Xp),X) ## old code assumes always full rank!!
    
    qrx <- qr(Xp,LAPACK=TRUE)
    R <- qr.R(qrx)
    p <- ncol(R)
    rank <- Rrank(R) ## rank of Xp/R    
    QtX <- qr.qty(qrx,X)[1:rank,]
    if (rank<p) { ## rank deficient  
      R <- R[1:rank,]
      qrr <- qr(t(R),tol=0)
      R <- qr.R(qrr)
      G$P <- forwardsolve(t(R),QtX)
    } else {
      G$P <- backsolve(R,QtX)
    }
    if (rank<p) {
      G$P <- qr.qy(qrr,rbind(G$P,matrix(0,p-rank,p)))
    }
    G$P[qrx$pivot,] <- G$P
  }
  ## cmX relates to computation of CIs incorportating uncertainty about the mean
  ## It may make more sense to incorporate all uncertainty about the mean,
  ## rather than just the uncertainty in the fixed effects mean. This means
  ## incorporating the mean of random effects and unconstrained smooths. Hence
  ## comment out the following.
  #if (G$nsdf>0) G$cmX[-(1:G$nsdf)] <- 0 ## zero the smooth parts here 
  #else G$cmX <- G$cmX * 0
  G$X <- X;rm(X)
  n.p <- ncol(G$X) 
  # deal with penalties


  ## min.sp must be length nrow(L) to make sense
  ## sp must be length ncol(L) --- need to partition
  ## L into columns relating to free log smoothing parameters,
  ## and columns, L0, corresponding to values supplied in sp.
  ## lsp0 = L0%*%log(sp[sp>=0]) [need to fudge sp==0 case by
  ## setting log(0) to log(effective zero) computed case-by-case]

  ## following deals with supplied and estimated smoothing parameters...
  ## first process the `sp' array supplied to `gam'...
  
  if (!is.null(sp)) { # then user has supplied fixed smoothing parameters
   ok <- TRUE 
   if (length(sp) < ncol(L)) { 
      warning("Supplied smoothing parameter vector is too short - ignored.")
      ok <- FALSE
    }
    if (sum(is.na(sp))) { 
      warning("NA's in supplied smoothing parameter vector - ignoring.")
      ok <- FALSE
    }
  } else ok <- FALSE
  G$sp <- if (ok) sp[1:ncol(L)] else rep(-1,ncol(L))
  
  names(G$sp) <- sp.names

  ## now work through the smooths searching for any `sp' elements
  ## supplied in `s' or `te' terms.... This relies on `idx' created 
  ## above...
  
  k <- 1 ## current location in `sp' array
  if (m>0) for (i in 1:m) {
    id <- sm[[i]]$id
    if (is.null(sm[[i]]$L)) Li <- diag(length(sm[[i]]$S)) else Li <- sm[[i]]$L 
    if (is.null(id)) { ## it's a smooth without an id
      spi <- sm[[i]]$sp
      if (!is.null(spi)) { ## sp supplied in `s' or `te'
        if (length(spi)!=ncol(Li)) stop("incorrect number of smoothing parameters supplied for a smooth term")
        G$sp[k:(k+ncol(Li)-1)] <- spi
      }       
      k <- k + ncol(Li) 
    } else { ## smooth has an id
      spi <- sm[[i]]$sp
      if (is.null(idx[[id]]$sp.done)) { ## not already dealt with these sp's
        if (!is.null(spi)) { ## sp supplied in `s' or `te'
          if (length(spi)!=ncol(Li)) stop("incorrect number of smoothing parameters supplied for a smooth term")
          G$sp[idx[[id]]$c:(idx[[id]]$c+idx[[id]]$nc-1)] <- spi
        }
        idx[[id]]$sp.done <- TRUE ## only makes sense to use supplied `sp' from defining term
        k <- k + idx[[id]]$nc 
      }
    }
  } ## finished processing `sp' vectors supplied in `s' or `te' terms

  ## copy initial sp's back into smooth objects, so there is a record of
  ## fixed and free...
  k <- 1 
  if (length(idx)) for (i in 1:length(idx)) idx[[i]]$sp.done <- FALSE
  if (m>0) for (i in 1:m) { ## work through all smooths
    id <- sm[[i]]$id 
    if (!is.null(id)) { ## smooth with id
      if (idx[[id]]$nc>0) { ## only copy if there are sp's
        G$smooth[[i]]$sp <- G$sp[idx[[id]]$c:(idx[[id]]$c+idx[[id]]$nc-1)]
      }   
      if (!idx[[id]]$sp.done) { ## only update k on first encounter with this smooth
        idx[[id]]$sp.done <- TRUE
        k <- k + idx[[id]]$nc
      }
    
    } else { ## no id, just work through sp 
      if (is.null(sm[[i]]$L)) nc <- length(sm[[i]]$S) else nc <- ncol(sm[[i]]$L)
      if (nc>0) G$smooth[[i]]$sp <- G$sp[k:(k+nc-1)]
      k <- k + nc
    }
  } ## now all elements of G$smooth have a record of initial sp. 


  if (!is.null(min.sp)) { # then minimum s.p.'s supplied
    if (length(min.sp)<nrow(L)) stop("length of min.sp is wrong.")
    if (nrow(L)>0) min.sp <- min.sp[1:nrow(L)]
    if (sum(is.na(min.sp))) stop("NA's in min.sp.")
    if (sum(min.sp<0)) stop("elements of min.sp must be non negative.")
  }

  k.sp <- 0 # count through sp and S
  G$rank <- array(0,0)
  if (m>0) for (i in 1:m) {
    sm<-G$smooth[[i]]
    if (length(sm$S)>0)
    for (j in 1:length(sm$S)) {  # work through penalty matrices
      k.sp <- k.sp+1
      G$off[k.sp] <- sm$first.para 
      G$S[[k.sp]] <- sm$S[[j]]
      G$rank[k.sp]<-sm$rank[j]
      if (!is.null(min.sp)) {
        if (is.null(H)) H<-matrix(0,n.p,n.p)
        H[sm$first.para:sm$last.para,sm$first.para:sm$last.para] <-
        H[sm$first.para:sm$last.para,sm$first.para:sm$last.para]+min.sp[k.sp]*sm$S[[j]] 
      }           
    } 
  }
 
  ## need to modify L, lsp.names, G$S, G$sp, G$rank and G$off to include any penalties
  ## on parametric stuff, at this point....
  if (!is.null(PP)) { ## deal with penalties on parametric terms
    L <- rbind(cbind(L,matrix(0,nrow(L),ncol(PP$L))),
                 cbind(matrix(0,nrow(PP$L),ncol(L)),PP$L))
    G$off <- c(PP$off,G$off)
    G$S <- c(PP$S,G$S)
    G$rank <- c(PP$rank,G$rank)
    G$sp <- c(PP$sp,G$sp)
    lsp.names <- c(PP$full.sp.names,lsp.names)
    G$n.paraPen <- length(PP$off)
    if (!is.null(PP$min.sp)) { ## deal with minimum sps
      if (is.null(H)) H <- matrix(0,n.p,n.p)
      for (i in 1:length(PP$S)) {
        ind <- PP$off[i]:(PP$off[i]+ncol(PP$S[[i]])-1)
        H[ind,ind] <- H[ind,ind] + PP$min.sp[i] * PP$S[[i]]
      }
    } ## min.sp stuff finished
  } else G$n.paraPen <- 0


  ## Now remove columns of L and rows of sp relating to fixed 
  ## smoothing parameters, and use removed elements to create lsp0

  fix.ind <- G$sp>=0

  if (sum(fix.ind)) {
    lsp0 <- G$sp[fix.ind]
    ind <- lsp0==0 ## find the zero s.p.s
    ef0 <- indi <- (1:length(ind))[ind]
    if (length(indi)>0) for (i in 1:length(indi)) {
      ## find "effective zero" to replace each zero s.p. with
      ii <- G$off[i]:(G$off[i]+ncol(G$S[[i]])-1) 
      ef0[i] <- norm(G$X[,ii],type="F")^2/norm(G$S[[i]],type="F")*.Machine$double.eps*.1
    }
    lsp0[!ind] <- log(lsp0[!ind])
    lsp0[ind] <- log(ef0) ##log(.Machine$double.xmin)*1000 ## zero fudge
    lsp0 <- as.numeric(L[,fix.ind,drop=FALSE]%*%lsp0)

    L <- L[,!fix.ind,drop=FALSE]  
    G$sp <- G$sp[!fix.ind]
  } else {lsp0 <- rep(0,nrow(L))}

  G$H <- H

  if (ncol(L)==nrow(L)&&!sum(L!=diag(ncol(L)))) L <- NULL ## it's just the identity

  G$L <- L;G$lsp0 <- lsp0
  names(G$lsp0) <- lsp.names ## names of all smoothing parameters (not just underlying)

  if (absorb.cons==FALSE) {  ## need to accumulate constraints 
    G$C <- matrix(0,0,n.p)
    if (m>0) {
      for (i in 1:m) {
        if (is.null(G$smooth[[i]]$C)) n.con<-0 
        else n.con<- nrow(G$smooth[[i]]$C)
        C <- matrix(0,n.con,n.p)
        C[,G$smooth[[i]]$first.para:G$smooth[[i]]$last.para]<-G$smooth[[i]]$C
        G$C <- rbind(G$C,C)
        G$smooth[[i]]$C <- NULL
      }
      rm(C)
    }
  } ## absorb.cons == FALSE
 
  G$y <- drop(data[[split$response]])
  ydim <- dim(G$y)
  if (!is.null(ydim)&&length(ydim)<2) dim(G$y) <- NULL
         
  ##data[[deparse(split$full.formula[[2]],backtick=TRUE)]]
  
  G$n <- nrow(data)

  if (is.null(data$"(weights)")) G$w <- rep(1,G$n)
  else G$w <- data$"(weights)"  

  ## Create names for model coefficients... 

  if (G$nsdf > 0) term.names <- colnames(G$X)[1:G$nsdf] else term.names<-array("",0)
  n.smooth <- length(G$smooth)
  ## create coef names, if smooth has any coefs, and create a global indicator of non-linear parameters
  ## g.index, if needed
  n.sp0 <- 0
  if (n.smooth) for (i in 1:n.smooth) {
    k <- 1
    jj <- G$smooth[[i]]$first.para:G$smooth[[i]]$last.para
    if (G$smooth[[i]]$df > 0) for (j in jj) {
      term.names[j] <- paste(G$smooth[[i]]$label,".",as.character(k),sep="")
      k <- k+1
    }
    n.sp <- length(G$smooth[[i]]$S)
    if (n.sp) { ## record sp this relates to in full sp vector
      G$smooth[[i]]$first.sp <- n.sp0 + 1
      n.sp0 <- G$smooth[[i]]$last.sp <- n.sp0 + n.sp
    }
    if (!is.null(G$smooth[[i]]$g.index)) {
      if (is.null(G$g.index)) G$g.index <- rep(FALSE,n.p)
      G$g.index[jj] <- G$smooth[[i]]$g.index
    }
    
  }
  G$term.names <- term.names

  ## Deal with non-linear parameterizations...
 

  G$pP <- PP ## return paraPen object, if present

  G
} ## gam.setup



formula.gam <- function(x, ...)
# formula.lm and formula.glm reconstruct the formula from x$terms, this is 
# problematic because of the way mgcv handles s() and te() terms 
{ x$formula
}

BC <- function(y,lambda=1) {
  y <- if (lambda==0) log(y) else (y^lambda-1)/lambda
}

corBC <- function(lambda,y,mu) {
  n <- length(y)
  qn <- qnorm((1:n-.5)/n)
  -cor(qn,sort(BC(y,lambda)-BC(mu,lambda)))
}

neico4 <- function(nei,dd,dd1) {
## nei is neighbourhood structure. dd are leave one out perturbations.
## dd1 the same for a different residual
## This routine computes the most basic covariance
## matrix estimate, based on observed correlation within neighbourhoods
## and assumption of zero correlation without. Somehow it is a testement
## to my extreme slowness that it took me 6 months to come up with this
## skull-thumpingly obvious solution having tried every other hare brained
## scheme first.
  n <- length(nei$i)
  i1 <- 0
  W <- dd*0 ## dbeta/dy (y-mu)
  for (i in 1:n) {
    i0 <- i1+1
    i1 <- nei$m[i]
    ii <- nei$k[i0:i1] ## neighbours of i
    W[nei$i[i],] <-  W[nei$i[i],] + colSums(dd[ii,,drop=FALSE]) 
  }
  V <- t(dd1)%*%W
} ## neico4

pdef <- function(V,eps = .Machine$double.eps^.9) {
## find nearest pdf matrix to symmetric matrix V
  ev <- eigen(V,symmetric=TRUE)
  thresh <- max(abs(ev$values))*eps
  ii <- ev$values<thresh
  if (any(ii)) {
    ev$values[ii] <- thresh
    V <- ev$vectors %*% (ev$values*t(ev$vectors))
  }
  V
} ## pdef

gam.outer <- function(lsp,fscale,family,control,method,optimizer,criterion,scale,gamma,G,start=NULL,nei=NULL,...)
# function for smoothing parameter estimation by outer optimization. i.e.
# P-IRLS scheme iterated to convergence for each trial set of smoothing
# parameters.
# MAJOR STEPS:
#  1. Call appropriate smoothing parameter optimizer, and extract fitted model
#    `object'
#  2. Call `gam.fit3.post.proc' to get parameter covariance matrices, edf etc to
#     add to `object' 
{ if (is.na(optimizer[2])) optimizer[2] <- "newton"
  if (!optimizer[2]%in%c("newton","bfgs","nlm","optim")) stop("unknown outer optimization method.")

  if (length(lsp)==0) { ## no sp estimation to do -- run a fit instead
    optimizer[2] <- "no.sps" ## will cause gam2objective to be called, below
  }
  nbGetTheta <- substr(family$family[1],1,17)=="Negative Binomial" && length(family$getTheta())>1
  if (nbGetTheta) stop("Please provide a single value for theta or use nb to estimate it")
 
  ## some preparations for the other methods, which all use gam.fit3...
 
  family <- fix.family.link(family)
  family <- fix.family.var(family)
  if (method%in%c("REML","ML","P-REML","P-ML")) family <- fix.family.ls(family)
  
  if (optimizer[1]=="efs"&& optimizer[2] != "no.sps" ) { ## experimental extended efs
    ##warning("efs is still experimental!")
    if (inherits(family,"general.family")) {
      object <- efsud(x=G$X,y=G$y,lsp=lsp,Sl=G$Sl,weights=G$w,offset=G$offxset,family=family,
                     control=control,Mp=G$Mp,start=start)
    } else {
      family <- fix.family.ls(family)
      object <- efsudr(x=G$X,y=G$y,lsp=lsp,Eb=G$Eb,UrS=G$UrS,weights=G$w,family=family,offset=G$offset,
                       start=start,
                       U1=G$U1, intercept = TRUE,scale=scale,Mp=G$Mp,control=control,n.true=G$n.true,...)
    }
    object$gcv.ubre <- object$REML
  } else if (optimizer[2]=="newton"||optimizer[2]=="bfgs"){ ## the gam.fit3 method 
    if (optimizer[2]=="bfgs") 
    b <- bfgs(lsp=lsp,X=G$X,y=G$y,Eb=G$Eb,UrS=G$UrS,L=G$L,lsp0=G$lsp0,offset=G$offset,U1=G$U1,Mp = G$Mp,
                family=family,weights=G$w,control=control,gamma=gamma,scale=scale,conv.tol=control$newton$conv.tol,
                maxNstep= control$newton$maxNstep,maxSstep=control$newton$maxSstep,maxHalf=control$newton$maxHalf, 
                printWarn=FALSE,scoreType=criterion,null.coef=G$null.coef,start=start,
                pearson.extra=G$pearson.extra,dev.extra=G$dev.extra,n.true=G$n.true,Sl=G$Sl,nei=nei,...) else
    b <- newton(lsp=lsp,X=G$X,y=G$y,Eb=G$Eb,UrS=G$UrS,L=G$L,lsp0=G$lsp0,offset=G$offset,U1=G$U1,Mp=G$Mp,
                family=family,weights=G$w,control=control,gamma=gamma,scale=scale,conv.tol=control$newton$conv.tol,
                maxNstep= control$newton$maxNstep,maxSstep=control$newton$maxSstep,maxHalf=control$newton$maxHalf, 
                printWarn=FALSE,scoreType=criterion,null.coef=G$null.coef,start=start,
                pearson.extra=G$pearson.extra,dev.extra=G$dev.extra,n.true=G$n.true,Sl=G$Sl,
		edge.correct=control$edge.correct,,nei=nei,...)                
                
    object <- b$object
    object$REML <- object$REML1 <- object$REML2 <-
    object$GACV <- object$D2 <- object$P2 <- object$UBRE2 <- object$trA2 <- 
    object$GACV1 <- object$GACV2 <- object$GCV2 <- object$D1 <- object$P1 <- NULL
    object$sp <- as.numeric(exp(b$lsp))
    object$gcv.ubre <- b$score
    b <- list(conv=b$conv,iter=b$iter,grad=b$grad,hess=b$hess,score.hist=b$score.hist) ## return info
    object$outer.info <- b   
  } else { ## methods calling gam.fit3 
    args <- list(X=G$X,y=G$y,Eb=G$Eb,UrS=G$UrS,offset=G$offset,U1=G$U1,Mp=G$Mp,family=family,
             weights=G$w,control=control,scoreType=criterion,gamma=gamma,scale=scale,
             L=G$L,lsp0=G$lsp0,null.coef=G$null.coef,n.true=G$n.true,Sl=G$Sl,start=start,nei=nei)
  
    if (optimizer[2]=="nlm") {
       b <- nlm(gam4objective, lsp, typsize = lsp, fscale = fscale, 
            stepmax = control$nlm$stepmax, ndigit = control$nlm$ndigit,
	    gradtol = control$nlm$gradtol, steptol = control$nlm$steptol, 
            iterlim = control$nlm$iterlim,
	    check.analyticals=control$nlm$check.analyticals,
            args=args,...)
      lsp <- b$estimate
      
    } else if (optimizer[2]=="optim") {
      b<-optim(par=lsp,fn=gam2objective,gr=gam2derivative,method="L-BFGS-B",control=
         list(fnscale=fscale,factr=control$optim$factr,lmm=min(5,length(lsp))),args=args,...)
      lsp <- b$par
    } else b <- NULL
    obj <- gam2objective(lsp,args,printWarn=TRUE,...) # final model fit, with warnings 
    object <- attr(obj,"full.fit")
    attr(obj,"full.fit") <- NULL
    object$gcv.ubre <- obj 
    object$outer.info <- b
    object$sp <- exp(lsp)
  } # end of methods calling gam.fit2
  
  
  if (scale>0) { 
    object$scale.estimated <- FALSE; object$scale <- scale} else {
    object$scale <- object$scale.est;object$scale.estimated <- TRUE
  } 
  
  object$control <- control
  object$method <- method
  if (inherits(family,"general.family")) {
    mv <- gam.fit5.post.proc(object,G$Sl,G$L,G$lsp0,G$S,G$off,gamma)
    ## object$coefficients <- Sl.initial.repara(G$Sl,object$coefficients,inverse=TRUE)
  } else mv <- gam.fit3.post.proc(G$X,G$L,G$lsp0,G$S,G$off,object,gamma)

  object[names(mv)] <- mv
  if (!is.null(nei)) {
    if (isTRUE(all.equal(sort(nei$i),1:nrow(G$X))) && length(nei$mi)==nrow(G$X) && criterion=="NCV") { ## each point predicted on its own
      loocv <- length(nei$k)==length(nei$i) && isTRUE(all.equal(nei$i,nei$k)) && length(nei$m) == length(nei$k) ## leave one out CV,
      if (is.logical(nei$jackknife)&&nei$jackknife) loocv <- TRUE ## straight jackknife requested
      if (nei$jackknife < 0) nei$jackknife <- TRUE ## signal cov matrix calc
    } else {
      loocv <- FALSE
      if (nei$jackknife < 0) nei$jackknife <- FALSE
    }  
  }
  if (!is.null(nei)&&(criterion!="NCV"||nei$jackknife)) { ## returning NCV when other criterion used for sp selection, or computing perturbations
    if (!is.null(nei$QNCV)&&nei$GNCV) family$qapprox <- TRUE
    if (is.null(family$qapprox)) family$qapprox <- FALSE
    lsp <- if (is.null(G$L)) log(object$sp) + G$lsp0 else G$L%*%log(object$sp)+G$lsp0
    if (object$scale.estimated && criterion %in% c("REML","ML","EFS")) lsp <- lsp[-length(lsp)] ## drop log scale estimate
    if (is.null(nei$gamma)) nei$gamma <- 1 ## a major application of this NCV is to select gamma - so it must not itself change with gamma!
    if (criterion!="NCV") nei$jackknife <- FALSE ## no cov matrix stuff.
    if (nei$jackknife) {
      n <- length(nei$i)
      nei1 <- if (loocv) nei else list(i=1:n,mi=1:n,m=1:n,k=1:n) ## set up for LOO CV or requested straight jackknife
      nei1$jackknife <- 10 ## signal that cross-validated beta perturbations are required
    } else nei1 <- nei ## called with another criteria
    b <- gam.fit3(x=G$X, y=G$y, sp=lsp,Eb=G$Eb,UrS=G$UrS,
                 offset = G$offset,U1=G$U1,Mp=G$Mp,family = family,weights=G$w,deriv=0,
                 control=control,gamma=nei$gamma, 
		 scale=scale,printWarn=FALSE,start=start,scoreType="NCV",null.coef=G$null.coef,
                 pearson.extra=G$pearson.extra,dev.extra=G$dev.extra,n.true=G$n.true,Sl=G$Sl,nei=nei1,...)
    object$NCV <- as.numeric(b$NCV)
    if (nei$jackknife) { ## need to compute direct cov matrix estimate
      dd <- attr(b$NCV,"dd") ## leave-one-out parameter perturbations
      if (!loocv) { ## need to account for assumed independence structure
        rsd0 <- residuals.gam(object)
        ## compute cross validated residuals...
        eta.cv <- attr(object$gcv.ubre,"eta.cv")
        fitted0 <- object$fitted.values 
        if (inherits(object$family,"general.family")) {
          for (j in 1:ncol(eta.cv)) {
            object$fitted.values[,j] <-
	      if (j <= length(G$offset)&&!is.null(G$offset[[j]])) family$linfo[[j]]$linkinv(eta.cv[,j]+G$offset[[j]]) else 
                family$linfo[[j]]$linkinv(eta.cv[,j])
          }	
        } else {
          object$fitted.values <- object$family$linkinv(eta.cv+G$offset)
        }
        rsd <- residuals.gam(object)
        object$fitted.values <- fitted0
        ii <- !is.finite(rsd);if (any(ii)) rsd[ii] <- rsd0[ii]
        dd1 <- dd*rsd/rsd0 ## correct to avoid underestimation (over-estimation if CV residuals used alone)	

        Vcv <- pdef(neicov(dd1,nei))  #*n/(n-sum(object$edf)) ## cross validated V (too large)
	V0 <- pdef(neicov(dd,nei))  #*n/(n-sum(object$edf))   ## direct V (too small)
	Vj <- (Vcv+V0)/2       ## combination less bad
	alpha <- max(sum(diag(Vj))/sum(diag(object$Ve)),1) ## inverse learning rate - does lower limit make sense? 
	alpha1 <- max(sum(Vj*object$Ve)/sum(object$Ve^2),1)
	Vcv <- Vcv + (object$Vp-object$Ve)*alpha1 ## bias correct conservative (too large)
	Vj <- Vj + (object$Vp-object$Ve)*alpha ## bias correct
	attr(Vj,"Vcv") <- Vcv ## conservative as attribute
      } else { ## LOO or straight jackknife requested
        Vj <- pdef(crossprod(dd)) ## straight jackknife is fine.
	alpha <- sum(diag(Vj))/sum(diag(object$Ve)) ## inverse learning rate (do not impose lower limit)
	Vj <- Vj + (object$Vp-object$Ve)*alpha ## bias correct
      }
      attr(object$Ve,"Vp") <- object$Vp ## keep original
      attr(object$Ve,"inv.learn") <- alpha
      object$Vp <- Vj     
    }
  }
  ## note: use of the following (Vc) in place of Vp appears to mess up p-values for smooths,
  ##       but doesn't change r.e. p-values of course. 
  #if (!is.null(mv$Vc)) object$Vc <- mv$Vc 
  #if (!is.null(mv$edf2)) object$edf2 <- mv$edf2
  #object$Vp <- mv$Vb
  #object$V.sp <- mv$V.sp
  #object$hat<-mv$hat
  #object$Ve <- mv$Ve
  #object$edf<-mv$edf
  #object$edf1 <- mv$edf1
  ##object$F <- mv$F ## DoF matrix --- probably not needed
  #object$R <- mv$R ## qr.R(sqrt(W)X)
  object$aic <- object$aic + 2*sum(object$edf)
  object$nsdf <- G$nsdf
  object$K <-  object$D1 <-  object$D2 <-  object$P <-  object$P1 <-  object$P2 <- object$dw.drho <-  
  object$GACV <-  object$GACV1 <-  object$GACV2 <-  object$REML <-  object$REML1 <-  object$REML2 <-  
  object$GCV<-object$GCV1<- object$GCV2 <- object$UBRE <-object$UBRE1 <- object$UBRE2 <- object$trA <-
  object$trA1<- object$trA2 <- object$alpha <- object$alpha1 <- object$scale.est <- NULL
  object$sig2 <- object$scale
  
  object
} ## gam.outer

get.null.coef <- function(G,start=NULL,etastart=NULL,mustart=NULL,...) {
## Get an estimate of the coefs corresponding to maximum reasonable deviance...
  y <- G$y
  weights <- G$w
  nobs <- G$n ## ignore codetools warning!!
  ##start <- etastart <- mustart <- NULL
  family <- G$family
  eval(family$initialize) ## have to do this to ensure y numeric
  y <- as.numeric(y)
  mum <- mean(y)+0*y
  etam <- family$linkfun(mum)
  null.coef <- qr.coef(qr(G$X),etam)
  null.coef[is.na(null.coef)] <- 0;
  ## get a suitable function scale for optimization routines
  null.scale <- sum(family$dev.resids(y,mum,weights))/nrow(G$X) 
  list(null.coef=null.coef,null.scale=null.scale)
}

estimate.gam <- function (G,method,optimizer,control,in.out,scale,gamma,start=NULL,nei=NULL,...) {
## Do gam estimation and smoothness selection...

  if (!is.null(G$family$method) && !method %in% G$family$method) method <- G$family$method[1] 

  if (method %in% c("QNCV","NCV")||!is.null(nei)) {
    optimizer <- c("outer","bfgs")
    if (method=="QNCV") { method <- "NCV";G$family$qapprox <- TRUE } else G$family$qapprox <- FALSE
    if (is.null(nei)) nei <- list(i=1:G$n,mi=1:G$n,m=1:G$n,k=1:G$n) ## LOOCV
    if (is.null(nei$k)||is.null(nei$m)) nei$k <- nei$m <- nei$mi <- nei$i <- 1:G$n 
    if (is.null(nei$i)) if (length(nei$m)==G$n) nei$mi <- nei$i <- 1:G$n else stop("unclear which points NCV neighbourhoods belong to")
    if (length(nei$mi)!=length(nei$m)) stop("for NCV number of dropped and predicted neighbourhoods must match")
    nei$m <- round(nei$m); nei$mi <- round(nei$mi); nei$k <- round(nei$k); nei$i <- round(nei$i); 
    if (min(nei$i)<1||max(nei$i>G$n)||min(nei$k)<1||max(nei$k>G$n)) stop("nei indexes non-existent points")
    if (nei$m[1]<1||max(nei$m)>length(nei$k)||length(nei$m)<2||any(diff(nei$m)<1)) stop('nei$m faulty')
    if (nei$mi[1]<1||max(nei$mi)>length(nei$i)||length(nei$mi)<2||any(diff(nei$mi)<1)) stop('nei$mi faulty')
    if (is.null(nei$jackknife)) nei$jackknife <- -1
  }  

  if (inherits(G$family,"extended.family")) { ## then there are some restrictions...
    if (!(method%in%c("REML","ML","NCV"))) method <- "REML"
    if (inherits(G$family,"general.family")) {
       if (!(method%in%c("REML","NCV"))||optimizer[1]=="efs") method <- "REML"
       if (method=="NCV"&&is.null(G$family$ncv)) {
         warning("family lacks a Neighbourhood Cross Validation method")
         method <- "REML"
       }	 
       G$Sl <- Sl.setup(G) ## prepare penalty sequence
      
       G$X <- Sl.initial.repara(G$Sl,G$X,both.sides=FALSE) ## re-parameterize accordingly
 
       if (!is.null(start)) start <- Sl.initial.repara(G$Sl,start,inverse=FALSE,both.sides=FALSE)

       ## make sure optimizer appropriate for available derivatives
       if (!is.null(G$family$available.derivs)) {
         if (G$family$available.deriv==1 && optimizer[1]!="efs")  optimizer <- c("outer","bfgs")
	 if (G$family$available.derivs==0) optimizer <- "efs"
       }	 
    }
  }

  if (!optimizer[1]%in%c("outer","efs")) stop("unknown optimizer")
  if (optimizer[1]=="efs") method <- "REML"
  if (!method%in%c("GCV.Cp","GACV.Cp","REML","P-REML","ML","P-ML","NCV")) stop("unknown smoothness selection criterion") 
  G$family <- fix.family(G$family)
  G$rS <- mini.roots(G$S,G$off,ncol(G$X),G$rank)
 
  reml <- method%in%c("REML","P-REML","ML","P-ML","NCV")
  
  Ssp <- totalPenaltySpace(G$S,G$H,G$off,ncol(G$X))
  G$Eb <- Ssp$E       ## balanced penalty square root for rank determination purposes 
  G$U1 <- cbind(Ssp$Y,Ssp$Z) ## eigen space basis
  G$Mp <- ncol(Ssp$Z) ## null space dimension
  G$UrS <- list()     ## need penalty matrices in overall penalty range space...
  if (length(G$S)>0) for (i in 1:length(G$S)) G$UrS[[i]] <- t(Ssp$Y)%*%G$rS[[i]] else i <- 0
  if (!is.null(G$H)) { ## then the sqrt fixed penalty matrix H is needed for (RE)ML 
      G$UrS[[i+1]] <- t(Ssp$Y)%*%mroot(G$H)
  }


  # is outer looping needed ?
  outer.looping <- (!G$am ||reml||method=="GACV.Cp"||method=="NCV"||!is.null(nei)) 

  ## sort out exact sp selection criterion to use

  fam.name <- G$family$family[1]

  if (scale==0) { ## choose criterion for performance iteration
    if (fam.name == "binomial"||fam.name == "poisson") G$sig2<-1 #ubre
      else G$sig2 <- -1 #gcv
  } else {G$sig2 <- scale}


  if (reml||method=="NCV") { ## then RE(ML) selection, but which variant?
   criterion <- method
   if (fam.name == "binomial"||fam.name == "poisson") scale <- 1
   if (inherits(G$family,"extended.family") && scale <=0) {
     scale <- if (is.null(G$family$scale)) 1 else G$family$scale
   }
  } else {
    if (scale==0) { 
      if (fam.name=="binomial"||fam.name=="poisson") scale <- 1 #ubre
      else scale <- -1 #gcv
    }
    if (scale > 0) criterion <- "UBRE"
    else {
      if (method=="GCV.Cp") criterion <- "GCV" else criterion <- "GACV"
    }
  }


  if (substr(fam.name,1,17)=="Negative Binomial") { 
    scale <- 1; ## no choice
    if (method%in%c("GCV.Cp","GACV.Cp")) criterion <- "UBRE"
  }
  ## Reset P-ML or P-REML in known scale parameter cases
  if (scale>0) {
    if (method=="P-ML") criterion <- method <- "ML" else 
    if (method=="P-REML")  criterion <- method <- "REML"
  } 

  family <- G$family; nb.fam.reset <- FALSE
  
  if (substr(G$family$family[1],1,17)=="Negative Binomial") { ## initialize sensibly
      scale <- G$sig2 <- 1
      G$family <- negbin(max(family$getTheta()),link=family$link)
      nb.fam.reset <- TRUE
  }
  
  ## extended family may need to manipulate G...
    
  if (!is.null(G$family$preinitialize)) {
    if (inherits(G$family,"general.family")) {
      Gmod <- G$family$preinitialize(G) ## modifies some elements of G
      for (gnam in names(Gmod)) G[[gnam]] <- Gmod[[gnam]] ## copy these into G  
    } else {
      ## extended family - usually just initializes theta and possibly y
      if (!is.null(attr(G$family$preinitialize,"needG"))) attr(G$family,"G") <- G ## more complicated
      pini <- G$family$preinitialize(G$y,G$family)
      attr(G$family,"G") <- NULL
      if (!is.null(pini$family)) G$family <- pini$family
      if (!is.null(pini$Theta)) G$family$putTheta(pini$Theta)
      if (!is.null(pini$y)) G$y <- pini$y
    }
  }

  if (length(G$sp)>0) lsp2 <- log(initial.spg(G$X,G$y,G$w,G$family,G$S,G$rank,G$off,
                                  offset=G$offset,L=G$L,lsp0=G$lsp0,E=G$Eb,...))
  else lsp2 <- rep(0,0)

  if (!outer.looping) { ## additive GCV/UBRE
    object <- am.fit(G,control=control,gamma=gamma,...)
    lsp <- log(object$sp) 
  } else {
    if (!is.null(in.out)) { # initial s.p.s and scale provided
      ok <- TRUE ## run a few basic checks
      if (is.null(in.out$sp)||is.null(in.out$scale)) ok <- FALSE
      if (length(in.out$sp)!=length(G$sp)) ok <- FALSE
      if (!ok) stop("in.out incorrect: see documentation")
      lsp <- log(in.out$sp) 
    } else { 
      lsp <- lsp2
    } 

    if (nb.fam.reset) G$family <- family ## restore, in case manipulated for negative binomial 
       
    ## Get an estimate of the coefs corresponding to maximum reasonable deviance,
    ## and an estimate of the function scale, suitable for optimizers that need this.
    ## Doesn't make sense for general families that have to initialize coefs directly.
  
    null.stuff  <- if (inherits(G$family,"general.family")) list() else { 
      if (is.null(G$family$get.null.coef)) get.null.coef(G,...) else G$family$get.null.coef(G,...)
    }

    scale.as.sp <- (criterion%in%c("REML","ML")||(criterion=="NCV"&&inherits(G$family,"extended.family")))&&scale<=0
 
    if (scale.as.sp) { ## log(scale) to be estimated as a smoothing parameter
      if (is.null(in.out)) {
          log.scale <- log(null.stuff$null.scale/10)
      } else {
          log.scale <- log(in.out$scale)
      }
      lsp <- c(lsp,log.scale) ## append log initial scale estimate to lsp
      ## extend G$L, if present...
      if (!is.null(G$L)) { 
        G$L <- cbind(rbind(G$L,rep(0,ncol(G$L))),c(rep(0,nrow(G$L)),1))
      }
      if (!is.null(G$lsp0)) G$lsp0 <- c(G$lsp0,0)
    } 
    ## check if there are extra parameters to estimate
    if (inherits(G$family,"extended.family")&&!inherits(G$family,"general.family")&&G$family$n.theta>0) {
      th0 <- G$family$getTheta() ## additional (initial) parameters of likelihood 
      nth <- length(th0)
      nlsp <- length(lsp)
      ind <- 1:nlsp + nth ## only used if nlsp>0
      lsp <- c(th0,lsp) ## append to start of lsp
      ## extend G$L, G$lsp0 if present...
      if (!is.null(G$L)&&nth>0) { 
        L <- rbind(cbind(diag(nth),matrix(0,nth,ncol(G$L))),
                   cbind(matrix(0,nrow(G$L),nth),G$L))
        #sat <- attr(G$L,"scale")
        G$L <- L
        #attr(G$L,"scale") <- sat
        #attr(G$L,"not.sp") <- nth ## first not.sp params are not smoothing params
      }
      if (!is.null(G$lsp0)) G$lsp0 <- c(th0*0,G$lsp0)
    } else nth <- 0

   
    G$null.coef <- null.stuff$null.coef

    object <- gam.outer(lsp,fscale=null.stuff$null.scale, ##abs(object$gcv.ubre)+object$sig2/length(G$y),
                        family=G$family,control=control,criterion=criterion,method=method,
                        optimizer=optimizer,scale=scale,gamma=gamma,G=G,start=start,nei=nei,...)
    
    if (scale.as.sp)  object$sp <- object$sp[-length(object$sp)] ## drop scale estimate from sp array
    
    if (inherits(G$family,"extended.family")&&nth>0) object$sp <- object$sp[-(1:nth)] ## drop theta params
 
  } ## finished outer looping

  ## correct null deviance if there's an offset [Why not correct calc in gam.fit/3???]....

  if (!inherits(G$family,"extended.family")&&G$intercept&&any(G$offset!=0)) object$null.deviance <-
         glm(object$y~offset(G$offset),family=object$family,weights=object$prior.weights)$deviance

  object$method <- if (method=="NCV"&&G$family$qapprox) "QNCV" else criterion

  object$smooth<-G$smooth

  names(object$edf) <- G$term.names
  names(object$edf1) <- G$term.names

  if (inherits(family,"general.family")) {
    object$coefficients <- Sl.initial.repara(G$Sl,object$coefficients,inverse=TRUE)
  }
  
  ## extended family may need to manipulate fit object. Code
  ## will need to include the following line if G$X used...
  ## G$X <- Sl.initial.repara(G$Sl,G$X,inverse=TRUE,cov=FALSE,both.sides=FALSE)
  if (!is.null(G$family$postproc)) {
    if (inherits(G$family,"general.family")) eval(G$family$postproc) else {
      posr <- G$family$postproc(family=object$family,y=G$y,prior.weights=object$prior.weights,
              fitted=object$fitted.values,linear.predictors=object$linear.predictors,offset=G$offset,
	      intercept=G$intercept)
      if (!is.null(posr$family)) object$family$family <- posr$family
      if (!is.null(posr$deviance)) object$deviance <- posr$deviance
      if (!is.null(posr$null.deviance)) object$null.deviance <- posr$null.deviance	      
    }
  } 

  if (!is.null(G$P)) { ## matrix transforming from fit to prediction parameterization
    object$coefficients <- as.numeric(G$P %*% object$coefficients)
    object$Vp <- G$P %*% object$Vp %*% t(G$P)
    object$Ve <- G$P %*% object$Ve %*% t(G$P)
    rownames(object$Vp) <- colnames(object$Vp) <- G$term.names
    rownames(object$Ve) <- colnames(object$Ve) <- G$term.names
  }
  names(object$coefficients) <- G$term.names 
  
  object
} ## end estimate.gam

variable.summary <- function(pf,dl,n) {
## routine to summarize all the variables in dl, which is a list
## containing raw input variables to a model (i.e. no functions applied)
## pf is a formula containing the strictly parametric part of the
## model for the variables in dl. A list is returned, with names given by 
## the variables. For variables in the parametric part, then the list elements
## may be:
## * a 1 column matrix with elements set to the column medians, if variable 
##   is a matrix.
## * a 3 figure summary (min,median,max) for a numeric variable.
## * a factor variable, with the most commonly occuring factor (all levels)
## --- classes are as original data type, but anything not numeric, factor or matrix
## is coerced to numeric. 
## For non-parametric variables, any matrices are coerced to numeric, otherwise as 
## parametric.      
## medians in the above are always observed values (to deal with variables coerced to 
## factors in the model formulae in a nice way).
## variables with less than `n' entries are discarded
   v.n <- length(dl)
   ## if (v.n) for (i in 1:v.n) if (length(dl[[i]])<n) dl[[i]] <- NULL 
   
   v.name <- v.name1 <- names(dl)
   if (v.n) ## need to strip out names of any variables that are too short.
   { k <- 0 ## counter for retained variables
     for (i in 1:v.n) if (length(dl[[i]])>=n) { 
       k <- k+1
       v.name[k] <- v.name1[i] ## save names of variables of correct length
     }
     if (k>0) v.name <- v.name[1:k] else v.name <- rep("",k)
   }

   ## v.name <- names(dl)    ## the variable names
   p.name <- all.vars(pf[-2]) ## variables in parametric part (not response)
   vs <- list()
   v.n <- length(v.name)
   if (v.n>0) for (i in 1:v.n) {
     if (v.name[i]%in%p.name) para <- TRUE else para <- FALSE ## is variable in the parametric part?

     if (para&&is.matrix(dl[[v.name[i]]])&&ncol(dl[[v.name[i]]])>1) { ## parametric matrix --- a special case
       x <- matrix(apply(dl[[v.name[i]]],2,quantile,probs=0.5,type=3,na.rm=TRUE),1,ncol(dl[[v.name[i]]])) ## nearest to median entries
     } else { ## anything else
       x <- dl[[v.name[i]]]
       if (is.character(x)) x <- as.factor(x)
       if (is.factor(x)) {
         x <- x[!is.na(x)]
         lx <- levels(x)
         freq <- tabulate(x)
         ii <- min((1:length(lx))[freq==max(freq)])
         x <- factor(lx[ii],levels=lx) 
       } else {
         x <- as.numeric(x)
         x <- c(min(x,na.rm=TRUE),as.numeric(quantile(x,probs=.5,type=3,na.rm=TRUE)) ,max(x,na.rm=TRUE)) ## 3 figure summary
       }
     }
     vs[[v.name[i]]] <- x
   }
   vs
} ## end variable.summary


nanei <- function(nb,k) {
## nb is an NCV neighbourhood defining list, k an array of points to drop.
## this function adjusts nb to remove the dropped points and adjust the
## indices accordingly, so that the structure works with a data frame
## from which rows in k have been dropped.
  if (!length(k)) return()
  if (is.null(nb$k)||is.null(nb$m)||is.null(nb$mi)||is.null(nb$i)) stop("full nei list needed if data incomplete")
  ## first work on dropped folds...
  kk <- which(nb$k %in% k) ## position of dropped in nb$k
  ## adjust m for the fact that points are to be dropped...
  nb$m <- nb$m - cumsum(tabulate(findInterval(kk-1,nb$m)+1,nbins=length(nb$m)))
  ii <- which(diff(c(0,nb$m))==0) ## identify zero length folds
  if (length(ii)) nb$m <- nb$m[-ii] ## drop zero length folds
  nb$k <- nb$k[-kk] ## drop the elements of k
  nb$k <- nb$k - findInterval(nb$k,sort(k)) ## and shift indices to account for dropped

  ## now the prediction folds...
  kk <- which(nb$i %in% k) ## position of dropped in nb$i
  ## adjust mi for the fact that points are to be dropped...
  nb$mi <- nb$mi - cumsum(tabulate(findInterval(kk-1,nb$mi)+1,nbins=length(nb$m)))
  ii <- which(diff(c(0,nb$mi))==0) ## identify zero length folds
  if (length(ii)) nb$mi <- nb$mi[-ii] ## drop zero length folds
  nb$i <- nb$i[-kk] ## drop the elements of k
  nb$i <- nb$i - findInterval(nb$i,sort(k)) ## and shift indices to account for dropped
  nb 
} ## nanei

## don't be tempted to change to control=list(...) --- messes up passing on other stuff via ...

gam <- function(formula,family=gaussian(),data=list(),weights=NULL,subset=NULL,na.action,offset=NULL,
                method="GCV.Cp",optimizer=c("outer","newton"),control=list(),#gam.control(),
                scale=0,select=FALSE,knots=NULL,sp=NULL,min.sp=NULL,H=NULL,gamma=1,fit=TRUE,
                paraPen=NULL,G=NULL,in.out=NULL,drop.unused.levels=TRUE,drop.intercept=NULL,
		nei=NULL,discrete=FALSE,...) {
## Routine to fit a GAM to some data. The model is stated in the formula, which is then 
## interpreted to figure out which bits relate to smooth terms and which to parametric terms.
## Basic steps:
## 1. Formula is split up into parametric and non-parametric parts,
##    and a fake formula constructed to be used to pick up data for
##    model frame. pterms "terms" object(s) created for parametric 
##    components, model frame created along with terms object.
## 2. 'gam.setup' called to do most of basis construction and other
##    elements of model setup.
## 3. 'estimate.gam' is called to estimate the model. This performs further 
##    pre- and post- fitting steps and calls either 'gam.fit' (performance
##    iteration) or 'gam.outer' (default method). 'gam.outer' calls the actual 
##    smoothing parameter optimizer ('newton' by default) and then any post 
##    processing. The optimizer calls 'gam.fit3/4/5' to estimate the model 
##    coefficients and obtain derivatives w.r.t. the smoothing parameters.
## 4. Finished 'gam' object assembled.
   control <- do.call("gam.control",control)
   if (is.null(G) && discrete) { ## get bam to do the setup
     cl <- match.call() ## NOTE: check all arguments more carefully
     cl[[1]] <- quote(bam)
     cl$fit = FALSE
     G <- eval(cl,parent.frame()) ## NOTE: cl probaby needs modifying in G to work properly (with fit=FALSE reset?? also below??)
   }
   if (is.null(G)) {
    ## create model frame..... 
    gp <- interpret.gam(formula) # interpret the formula 
    cl <- match.call() # call needed in gam object for update to work
    mf <- match.call(expand.dots=FALSE)
    mf$formula <- gp$fake.formula 
    mf$family <- mf$control<-mf$scale<-mf$knots<-mf$sp<-mf$min.sp<-mf$H<-mf$select <- mf$drop.intercept <- mf$nei <-
                 mf$gamma<-mf$method<-mf$fit<-mf$paraPen<-mf$G<-mf$optimizer <- mf$in.out <- mf$discrete <- mf$...<-NULL
    mf$drop.unused.levels <- drop.unused.levels
    mf[[1]] <- quote(stats::model.frame) ## as.name("model.frame")
    pmf <- mf
    mf <- eval(mf, parent.frame()) # the model frame now contains all the data 
    if (nrow(mf)<2) stop("Not enough (non-NA) data to do anything meaningful")
    terms <- attr(mf,"terms")

    if (!is.null(nei)) { ## check if data dropped
      k <- attr(mf,"na.action")
      if (!is.null(k)) { ## need to adjust nei for dropped data
        nei <- nanei(nei,as.numeric(k))
      }
    }
    ## summarize the *raw* input variables
    ## note can't use get_all_vars here -- buggy with matrices
    vars <- all_vars1(gp$fake.formula[-2]) ## drop response here
    inp <- parse(text = paste("list(", paste(vars, collapse = ","),")"))

    ## allow a bit of extra flexibility in what `data' is allowed to be (as model.frame actually does)
    if (!is.list(data)&&!is.data.frame(data)) data <- as.data.frame(data) 

    dl <- eval(inp, data, parent.frame())
    names(dl) <- vars ## list of all variables needed
    var.summary <- variable.summary(gp$pf,dl,nrow(mf)) ## summarize the input data
    rm(dl) ## save space    

    ## pterms are terms objects for the parametric model components used in 
    ## model setup - don't try obtaining by evaluating pf in mf - doesn't
    ## work in general (e.g. with offset)...

    if (is.list(formula)) { ## then there are several linear predictors
      environment(formula) <- environment(formula[[1]]) ## e.g. termplots needs this
      pterms <- list()
      tlab <- rep("",0)
      for (i in 1:length(formula)) {
        pmf$formula <- gp[[i]]$pf 
        pterms[[i]] <- attr(eval(pmf, parent.frame()),"terms")
        tlabi <- attr(pterms[[i]],"term.labels")
        if (i>1&&length(tlabi)>0) tlabi <- paste(tlabi,i-1,sep=".")
        tlab <- c(tlab,tlabi)
      }
      attr(pterms,"term.labels") <- tlab ## labels for all parametric terms, distinguished by predictor
    } else { ## single linear predictor case
      pmf$formula <- gp$pf
      pmf <- eval(pmf, parent.frame()) # pmf contains all data for parametric part
      pterms <- attr(pmf,"terms") ## pmf only used for this
    }

    if (is.character(family)) family <- eval(parse(text=family))
    if (is.function(family)) family <- family()
    if (is.null(family$family)) stop("family not recognized")
  
    if (family$family[1]=="gaussian" && family$link=="identity") am <- TRUE
    else am <- FALSE
    
    if (!control$keepData) rm(data) ## save space

    ## check whether family requires intercept to be dropped...
    # drop.intercept <- if (is.null(family$drop.intercept) || !family$drop.intercept) FALSE else TRUE
    # drop.intercept <- as.logical(family$drop.intercept)
    if (is.null(family$drop.intercept)) { ## family does not provide information
      lengthf <- if (is.list(formula)) length(formula) else 1
      if (is.null(drop.intercept)) drop.intercept <- rep(FALSE, lengthf) else {
        drop.intercept <- rep(drop.intercept,length=lengthf) ## force drop.intercept to correct length
	if (sum(drop.intercept)) family$drop.intercept <- drop.intercept ## ensure prediction works
      }
    } else drop.intercept <- as.logical(family$drop.intercept) ## family overrides argument
    
    if (inherits(family,"general.family")&&!is.null(family$presetup)) eval(family$presetup)

    gsname <- if (is.list(formula)) "gam.setup.list" else "gam.setup" 

    G <- do.call(gsname,list(formula=gp,pterms=pterms,
                 data=mf,knots=knots,sp=sp,min.sp=min.sp,
                 H=H,absorb.cons=TRUE,sparse.cons=0,select=select,
                 idLinksBases=control$idLinksBases,scale.penalty=control$scalePenalty,
                 paraPen=paraPen,drop.intercept=drop.intercept))
    
    G$var.summary <- var.summary
    G$family <- family
   
    if ((is.list(formula)&&(is.null(family$nlp)||family$nlp!=gp$nlp))||
        (!is.list(formula)&&!is.null(family$npl)&&(family$npl>1))) stop("incorrect number of linear predictors for family")
       
    G$terms<-terms;
    G$mf<-mf;G$cl<-cl;
    G$am <- am

    if (is.null(G$offset)) G$offset<-rep(0,G$n)
     
    G$min.edf <- G$nsdf ## -dim(G$C)[1]
    if (G$m) for (i in 1:G$m) G$min.edf<-G$min.edf+G$smooth[[i]]$null.space.dim

    G$formula <- formula
    G$pred.formula <- gp$pred.formula
    environment(G$formula)<-environment(formula)
  } else { ## G not null
    if (!is.null(sp)&&any(sp>=0)) { ## request to modify smoothing parameters
      if (is.null(G$L)) G$L <- diag(length(G$sp))
      if (length(sp)!=ncol(G$L)) stop('length of sp must be number of free smoothing parameters in original model')
      ind <- sp>=0 ## which smoothing parameters are now fixed
      spind <- log(sp[ind]); 
      spind[!is.finite(spind)] <- -30 ## set any zero parameters to effective zero
      G$lsp0 <- G$lsp0 + drop(G$L[,ind,drop=FALSE] %*% spind) ## add fix to lsp0
      G$L <- G$L[,!ind,drop=FALSE] ## drop the cols of G
      G$sp <- rep(-1,ncol(G$L))
    }
  }

  if (!fit) {
    class(G) <- "gam.prefit"
    return(G)
  }  


  ## if (ncol(G$X)>nrow(G$X)) warning("Model has more coefficients than data") ##stop("Model has more coefficients than data") 

  G$conv.tol <- control$mgcv.tol      # tolerence for mgcv
  G$max.half <- control$mgcv.half # max step halving in Newton update mgcv

  object <- estimate.gam(G,method,optimizer,control,in.out,scale,gamma,nei=nei,...)

  
  if (!is.null(G$L)) { 
    object$full.sp <- as.numeric(exp(G$L%*%log(object$sp)+G$lsp0))
    names(object$full.sp) <- names(G$lsp0)
  }
  names(object$sp) <- names(G$sp)
  object$paraPen <- G$pP
  object$formula <- G$formula
  ## store any lpi attribute of G$X for use in predict.gam...
  if (is.list(object$formula)) attr(object$formula,"lpi") <- attr(G$X,"lpi")
  object$var.summary <- G$var.summary 
  object$cmX <- G$cmX ## column means of model matrix --- useful for CIs
  object$model<-G$mf # store the model frame
  object$na.action <- attr(G$mf,"na.action") # how to deal with NA's
  object$control <- control
  object$terms <- G$terms
  object$pred.formula <- G$pred.formula

  attr(object$pred.formula,"full") <- reformulate(all.vars(object$terms))
  
  object$pterms <- G$pterms
  object$assign <- G$assign # applies only to pterms
  object$contrasts <- G$contrasts
  object$xlevels <- G$xlevels
  object$offset <- G$offset
  if (!is.null(G$Xcentre)) object$Xcentre <- G$Xcentre
  if (control$keepData) object$data <- data
  object$df.residual <- nrow(G$X) - sum(object$edf)
  object$min.edf <- G$min.edf
  if (G$am&&!(method%in%c("REML","ML","P-ML","P-REML"))) object$optimizer <- "magic" else object$optimizer <- optimizer
  object$call <- G$cl # needed for update() to work
  class(object) <- c("gam","glm","lm")
  if (is.null(object$deviance)) object$deviance <- sum(residuals(object,"deviance")^2)
  names(object$gcv.ubre) <- method
  ## The following lines avoid potentially very large objects in hidden environments being stored
  ## with fitted gam objects. The downside is that functions like 'termplot' that rely on searching in
  ## the environment of the formula can fail...
  environment(object$formula) <- environment(object$pred.formula) <-
  environment(object$terms) <- environment(object$pterms) <- .GlobalEnv
  if (!is.null(object$model))  environment(attr(object$model,"terms"))  <- .GlobalEnv
  if (!is.null(attr(object$pred.formula,"full"))) environment(attr(object$pred.formula,"full")) <- .GlobalEnv
  object
} ## gam



print.gam<-function (x,...) 
# default print function for gam objects
{ print(x$family)
  cat("Formula:\n")
  if (is.list(x$formula)) for (i in 1:length(x$formula)) print(x$formula[[i]]) else
     print(x$formula)
  n.smooth<-length(x$smooth)
  if (n.smooth==0)
  cat("Total model degrees of freedom",sum(x$edf),"\n")
  else
  { edf<-0
    cat("\nEstimated degrees of freedom:\n")
    for (i in 1:n.smooth)
    edf[i]<-sum(x$edf[x$smooth[[i]]$first.para:x$smooth[[i]]$last.para])
    edf.str <- format(round(edf,digits=4),digits=3,scientific=FALSE)
    for (i in 1:n.smooth) {   
    cat(edf.str[i]," ",sep="")
      if (i%%7==0) cat("\n")
    }
    cat(" total =",round(sum(x$edf),digits=2),"\n")
  }
  if (!is.null(x$method)&&!(x$method%in%c("PQL","lme.ML","lme.REML")))  
  cat("\n",x$method," score: ",x$gcv.ubre,"     ",sep="")
  if (!is.null(x$rank) && x$rank< length(x$coefficients)) cat("rank: ",x$rank,"/",length(x$coefficients),sep="") 
  cat("\n")
  invisible(x)
} ## print.gam

gam.control <- function (nthreads=1,ncv.threads=1,irls.reg=0.0,epsilon = 1e-7, maxit = 200,
                         mgcv.tol=1e-7,mgcv.half=15,trace =FALSE,
                         rank.tol=.Machine$double.eps^0.5,
                         nlm=list(),optim=list(),newton=list(),#outerPIsteps=0,
                         idLinksBases=TRUE,scalePenalty=TRUE,efs.lspmax=15,efs.tol=.1,
                         keepData=FALSE,scale.est="fletcher",edge.correct=FALSE) 
# Control structure for a gam. 
# irls.reg is the regularization parameter to use in the GAM fitting IRLS loop.
# epsilon is the tolerance to use in the IRLS MLE loop. maxit is the number 
# of IRLS iterations to use. mgcv.tol is the tolerance to use in the mgcv call within each IRLS. mgcv.half is the 
# number of step halvings to employ in the mgcv search for the optimal GCV score, before giving up 
# on a search direction. trace turns on or off some de-bugging information.
# rank.tol is the tolerance to use for rank determination
# outerPIsteps was the number of performance iteration steps used to intialize
#                         outer iteration (unused for a while) 
{   scale.est <- match.arg(scale.est,c("fletcher","pearson","deviance"))
    if (!is.logical(edge.correct)&&(!is.numeric(edge.correct)||edge.correct<0)) stop(
        "edge.correct must be logical or a positive number")
    if (!is.numeric(nthreads) || nthreads <1) stop("nthreads must be a positive integer")
    if (!is.numeric(ncv.threads) || ncv.threads <1) stop("ncv.threads must be a positive integer") 
    if (!is.numeric(irls.reg) || irls.reg <0.0) stop("IRLS regularizing parameter must be a non-negative number.")
    if (!is.numeric(epsilon) || epsilon <= 0) 
        stop("value of epsilon must be > 0")
    if (!is.numeric(maxit) || maxit <= 0) 
        stop("maximum number of iterations must be > 0")
    if (rank.tol<0||rank.tol>1) 
    { rank.tol=.Machine$double.eps^0.5
      warning("silly value supplied for rank.tol: reset to square root of machine precision.")
    }
    # work through nlm defaults
    if (is.null(nlm$ndigit)||nlm$ndigit<2) nlm$ndigit <- max(2,ceiling(-log10(epsilon)))
    nlm$ndigit <- round(nlm$ndigit)
    ndigit <- floor(-log10(.Machine$double.eps))
    if (nlm$ndigit>ndigit) nlm$ndigit <- ndigit
    if (is.null(nlm$gradtol)) nlm$gradtol <- epsilon*10
    nlm$gradtol <- abs(nlm$gradtol)
    ## note that nlm will stop after hitting stepmax 5 consecutive times
    ## hence should not be set too small ... 
    if (is.null(nlm$stepmax)||nlm$stepmax==0) nlm$stepmax <- 2
    nlm$stepmax <- abs(nlm$stepmax)
    if (is.null(nlm$steptol)) nlm$steptol <- 1e-4
    nlm$steptol <- abs(nlm$steptol)
    if (is.null(nlm$iterlim)) nlm$iterlim <- 200
    nlm$iterlim <- abs(nlm$iterlim)
    ## Should be reset for a while anytime derivative code altered...
    if (is.null(nlm$check.analyticals)) nlm$check.analyticals <- FALSE
    nlm$check.analyticals <- as.logical(nlm$check.analyticals) 

    # and newton defaults
    if (is.null(newton$conv.tol)) newton$conv.tol <- 1e-6
    if (is.null(newton$maxNstep)) newton$maxNstep <- 5
    if (is.null(newton$maxSstep)) newton$maxSstep <- 2
    if (is.null(newton$maxHalf)) newton$maxHalf <- 30
    if (is.null(newton$use.svd)) newton$use.svd <- FALSE

    # and optim defaults
    if (is.null(optim$factr)) optim$factr <- 1e7
    optim$factr <- abs(optim$factr)
    if (efs.tol<=0) efs.tol <- .1

    list(nthreads=round(nthreads),ncv.threads=round(ncv.threads),irls.reg=irls.reg,epsilon = epsilon, maxit = maxit,
         trace = trace, mgcv.tol=mgcv.tol,mgcv.half=mgcv.half,
         rank.tol=rank.tol,nlm=nlm,
         optim=optim,newton=newton,#outerPIsteps=outerPIsteps,
         idLinksBases=idLinksBases,scalePenalty=scalePenalty,efs.lspmax=efs.lspmax,efs.tol=efs.tol,
         keepData=as.logical(keepData[1]),scale.est=scale.est,edge.correct=edge.correct)
    
} ## gam.control



mgcv.get.scale<-function(Theta,weights,good,mu,mu.eta.val,G)
# Get scale implied by current fit and trial -ve binom Theta, I've used
# mu and mu.eta.val used in fit rather than implied by it....
{ variance<- negbin(Theta)$variance
  w<-sqrt(weights[good]*mu.eta.val[good]^2/variance(mu)[good])
  wres<-w*(G$y-G$X%*%G$p)
  sum(wres^2)/(G$n-sum(G$edf))
}


mgcv.find.theta<-function(Theta,T.max,T.min,weights,good,mu,mu.eta.val,G,tol)
# searches for -ve binomial theta between given limits to get scale=1 
{ scale<-mgcv.get.scale(Theta,weights,good,mu,mu.eta.val,G)
  T.hi<-T.low<-Theta
  while (scale<1&&T.hi<T.max) 
  { T.hi<-T.hi*2
    T.hi<-min(T.hi,T.max)
    scale<-mgcv.get.scale(T.hi,weights,good,mu,mu.eta.val,G)
  } 
  if (all.equal(T.hi,T.max)==TRUE && scale<1) return(T.hi)
  T.low<-T.hi
  while (scale>=1&&T.low>T.min)
  { T.low<-T.low/2 
    T.low<-max(T.low,T.min)
    scale<-mgcv.get.scale(T.low,weights,good,mu,mu.eta.val,G)
  } 
  if (all.equal(T.low,T.min)==TRUE && scale>1) return(T.low)
  # (T.low,T.hi) now brackets scale=1. 
  while (abs(scale-1)>tol)
  { Theta<-(T.low+T.hi)/2
    scale<-mgcv.get.scale(Theta,weights,good,mu,mu.eta.val,G)
    if (scale<1) T.low<-Theta
    else T.hi<-Theta
  }
  Theta
}


full.score <- function(sp,G,family,control,gamma,...)
# function suitable for calling from nlm in order to polish gam fit
# so that actual minimum of score is found in generalized cases
{ .Deprecated(msg="Internal mgcv function full.score is no longer used and will be removed soon.")
  if (is.null(G$L)) {
    G$sp<-exp(sp);
  } else {
    G$sp <- as.numeric(exp(G$L%*%sp + G$lsp0))
  }
  # set up single fixed penalty....
  q<-NCOL(G$X)
  if (is.null(G$H)) G$H<-matrix(0,q,q)
  for (i in 1:length(G$S))
  { j<-ncol(G$S[[i]])
    off1<-G$off[i];off2<-off1+j-1
    G$H[off1:off2,off1:off2]<-G$H[off1:off2,off1:off2]+G$sp[i]*G$S[[i]]
  }
  G$S<-list() # have to reset since length of this is used as number of penalties
  G$L <- NULL
  xx<-gam.fit(G,family=family,control=control,gamma=gamma,...)
  res <- xx$gcv.ubre.dev
  attr(res,"full.gam.object")<-xx
  res
} ## full.score

am.fit <- function (G, #start = NULL, etastart = NULL, 
    #mustart = NULL,# family = gaussian(), 
    control = gam.control(),gamma=1,...) {
    ## fit additive model using 'magic' returning gam type object.

    family <- gaussian()
    intercept<-G$intercept
    n <- nobs <- NROW(G$y) ## n just there to keep codetools happy
    y <- G$y # original data
    X <- G$X # original design matrix
    if (ncol(G$X) == 0) stop("Model seems to contain no terms")
    olm <- G$am   # only need 1 iteration as it's a pure additive model.
    
    # obtain average element sizes for the penalties
    n.S <- length(G$S)
    if (n.S>0) {
      S.size <- rep(0,n.S)
      for (i in 1:n.S) S.size[i] <- mean(abs(G$S[[i]])) 
    }
    weights<-G$w # original weights

    n.score <- sum(weights!=0) ## n to use in GCV score (i.e. don't count points with no influence)   

    offset<-G$offset 

    variance <- family$variance;dev.resids <- family$dev.resids
    aic <- family$aic
    if (NCOL(y) > 1) stop("y must be univariate unless binomial")
    
    scale <- G$sig2

    msp <- G$sp
    magic.control<-list(tol=G$conv.tol,step.half=G$max.half,#maxit=control$maxit+control$globit,
                          rank.tol=control$rank.tol)

    ## solve the penalized LS problem ...
    good <- weights > 0
    G$y <- y[good] - offset[good]
    G$X <- X[good,,drop=FALSE]
    G$w <- sqrt(weights[good]) ## note magic assumes sqrt weights
    mr <- magic(G$y,G$X,msp,G$S,G$off,L=G$L,lsp0=G$lsp0,G$rank,G$H,matrix(0,0,ncol(G$X)),
                G$w,gamma=gamma,G$sig2,G$sig2<0,
                ridge.parameter=control$irls.reg,control=magic.control,n.score=n.score,nthreads=control$nthreads)
    coef <- mr$b;msp <- mr$sp;G$sig2 <- mr$scale;G$gcv.ubre <- mr$score;

    if (any(!is.finite(coef))) warning(gettextf("Non-finite coefficients at iteration"))
  
    mu <- eta <- drop(X %*% coef + offset) 
    dev <- sum(dev.resids(y, mu, weights))
    
    residuals <- rep(NA, nobs)
    residuals[good] <- G$y - (mu - offset)[good]
       
    wtdmu <- if (intercept) sum(weights * y)/sum(weights) else offset
    nulldev <- sum(dev.resids(y, wtdmu, weights))
    n.ok <- nobs - sum(weights == 0)
    nulldf <- n.ok - as.integer(intercept)
    
    ## Extract a little more information from the fit....

    mv <- magic.post.proc(G$X,mr,w=G$w^2)
    G$Vp<-mv$Vb;G$hat<-mv$hat;
    G$Ve <- mv$Ve # frequentist cov. matrix
    G$edf<-mv$edf
    G$conv<-mr$gcv.info
    G$sp<-msp
    rank<-G$conv$rank

    ## use MLE of scale in Gaussian case - best estimate otherwise. 
    dev1 <- if (scale>0) scale*sum(weights) else dev 

    aic.model <- aic(y, n, mu, weights, dev1) + 2 * sum(G$edf)
    if (scale < 0) { ## deviance based GCV
      gcv.ubre.dev <- n.score*dev/(n.score-gamma*sum(G$edf))^2
    } else { # deviance based UBRE, which is just AIC
      gcv.ubre.dev <- dev/n.score + 2 * gamma * sum(G$edf)/n.score - G$sig2
    }

    list(coefficients = as.vector(coef), residuals = residuals, fitted.values = mu, 
        family = family,linear.predictors = eta, deviance = dev,
        null.deviance = nulldev, iter = 1, weights = weights, prior.weights = weights,  
        df.null = nulldf, y = y, converged = TRUE,sig2=G$sig2,edf=G$edf,edf1=mv$edf1,hat=G$hat,
        R=mr$R,
        boundary = FALSE,sp = G$sp,nsdf=G$nsdf,Ve=G$Ve,Vp=G$Vp,rV=mr$rV,mgcv.conv=G$conv,
        gcv.ubre=G$gcv.ubre,aic=aic.model,rank=rank,gcv.ubre.dev=gcv.ubre.dev,scale.estimated = (scale < 0))
} ## am.fit


gam.fit <- function (G, start = NULL, etastart = NULL, 
    mustart = NULL, family = gaussian(), 
    control = gam.control(),gamma=1,
    fixedSteps=(control$maxit+1),...) 
# fitting function for a gam, modified from glm.fit.
# note that smoothing parameter estimates from one irls iterate are carried over to the next irls iterate
# unless the range of s.p.s is large enough that numerical problems might be encountered (want to avoid 
# completely flat parts of gcv/ubre score). In the latter case autoinitialization is requested.
# fixedSteps < its default causes at most fixedSteps iterations to be taken,
# without warning if convergence has not been achieved. This is useful for
# obtaining starting values for outer iteration.
{   .Deprecated(msg="Internal mgcv function gam.fit is no longer used - see gam.fit3.")
    intercept<-G$intercept
    conv <- FALSE
    n <- nobs <- NROW(G$y) ## n just there to keep codetools happy
    nvars <- NCOL(G$X) # check this needed
    y<-G$y # original data
    X<-G$X # original design matrix
    if (nvars == 0) stop("Model seems to contain no terms")
    olm <- G$am   # only need 1 iteration as it's a pure additive model.
    if (!olm)  .Deprecated(msg="performance iteration with gam is deprecated, use bam instead")
    find.theta<-FALSE # any supplied -ve binomial theta treated as known, G$sig2 is scale parameter
    if (substr(family$family[1],1,17)=="Negative Binomial")
    { Theta <- family$getTheta()
      if (length(Theta)==1) { ## Theta fixed
        find.theta <- FALSE
        G$sig2 <- 1
      } else {
        if (length(Theta)>2)
        warning("Discrete Theta search not available with performance iteration")
        Theta <- range(Theta)
        T.max <- Theta[2]          ## upper search limit
        T.min <- Theta[1]          ## lower search limit
        Theta <- sqrt(T.max*T.min) ## initial value
        find.theta <- TRUE
      }
      nb.link<-family$link # negative.binomial family, there's a choise of links
    }

    
    # obtain average element sizes for the penalties
    n.S<-length(G$S)
    if (n.S>0)
    { S.size<-0
      for (i in 1:n.S) S.size[i]<-mean(abs(G$S[[i]])) 
    }
    weights<-G$w # original weights

    n.score <- sum(weights!=0) ## n to use in GCV score (i.e. don't count points with no influence)   

    offset<-G$offset 

    variance <- family$variance;dev.resids <- family$dev.resids
    aic <- family$aic
    linkinv <- family$linkinv;linkfun <- family$linkfun;mu.eta <- family$mu.eta
    if (!is.function(variance) || !is.function(linkinv)) 
        stop("illegal `family' argument")
    valideta <- family$valideta
    if (is.null(valideta)) 
        valideta <- function(eta) TRUE
    validmu <- family$validmu
    if (is.null(validmu)) 
        validmu <- function(mu) TRUE
    if (is.null(mustart))   # new from version 1.5.0 
    { eval(family$initialize)}
    else 
    { mukeep <- mustart
      eval(family$initialize)
      mustart <- mukeep
    }
    if (NCOL(y) > 1) 
        stop("y must be univariate unless binomial")
    
    coefold <- NULL                 # 1.5.0
    eta <- if (!is.null(etastart))  # 1.5.0
        etastart

    else if (!is.null(start)) 
    if (length(start) != nvars) 
    stop(gettextf("Length of start should equal %d and correspond to initial coefs.",nvars)) 
    else 
    { coefold<-start                        #1.5.0
      offset+as.vector(if (NCOL(G$X) == 1)
       G$X * start
       else G$X %*% start)
    }
    else family$linkfun(mustart)
    mu <- linkinv(eta)
    if (!(validmu(mu) && valideta(eta))) 
        stop("Can't find valid starting values: please specify some")
    devold <- sum(dev.resids(y, mu, weights))
   
    boundary <- FALSE
    scale <- G$sig2

    msp <- G$sp
    magic.control<-list(tol=G$conv.tol,step.half=G$max.half,#maxit=control$maxit+control$globit,
                          rank.tol=control$rank.tol)

    for (iter in 1:(control$maxit)) 
    {
        good <- weights > 0
        varmu <- variance(mu)[good]
        if (any(is.na(varmu))) 
            stop("NAs in V(mu)")
        if (any(varmu == 0)) 
            stop("0s in V(mu)")
        mu.eta.val <- mu.eta(eta)
        if (any(is.na(mu.eta.val[good]))) 
            stop("NAs in d(mu)/d(eta)")
        good <- (weights > 0) & (mu.eta.val != 0) # note good modified here => must re-calc each iter
        if (all(!good)) {
            conv <- FALSE
            warning(gettextf("No observations informative at iteration %d", 
                iter))
            break
        }
   
        mevg <- mu.eta.val[good];mug <- mu[good];yg <- y[good]
        weg <- weights[good];##etag <- eta[good]
        var.mug<-variance(mug)

        G$y <- z <- (eta - offset)[good] + (yg - mug)/mevg
        w <- sqrt((weg * mevg^2)/var.mug)
        
        G$w<-w
        G$X<-X[good,,drop=FALSE]  # truncated design matrix       
     
        # must set G$sig2 to scale parameter or -1 here....
        G$sig2 <- scale


        if (sum(!is.finite(G$y))+sum(!is.finite(G$w))>0) 
        stop("iterative weights or data non-finite in gam.fit - regularization may help. See ?gam.control.")

        ## solve the working weighted penalized LS problem ...

        mr <- magic(G$y,G$X,msp,G$S,G$off,L=G$L,lsp0=G$lsp0,G$rank,G$H,matrix(0,0,ncol(G$X)),  #G$C,
                    G$w,gamma=gamma,G$sig2,G$sig2<0,
                    ridge.parameter=control$irls.reg,control=magic.control,n.score=n.score,nthreads=control$nthreads)
        G$p<-mr$b;msp<-mr$sp;G$sig2<-mr$scale;G$gcv.ubre<-mr$score;

        if (find.theta) {# then family is negative binomial with unknown theta - estimate it here from G$sig2
            ##  need to get edf array
          mv <- magic.post.proc(G$X,mr,w=G$w^2)
          G$edf <- mv$edf

          Theta<-mgcv.find.theta(Theta,T.max,T.min,weights,good,mu,mu.eta.val,G,.Machine$double.eps^0.5)
          family<-do.call("negbin",list(theta=Theta,link=nb.link))
          variance <- family$variance;dev.resids <- family$dev.resids
          aic <- family$aic
          family$Theta <- Theta ## save Theta estimate in family
        }

        if (any(!is.finite(G$p))) {
            conv <- FALSE   
            warning(gettextf("Non-finite coefficients at iteration %d",iter))
            break
        }

		
        start <- G$p
        eta <- drop(X %*% start) # 1.5.0
        mu <- linkinv(eta <- eta + offset)
        eta <- linkfun(mu) # force eta/mu consistency even if linkinv truncates
        dev <- sum(dev.resids(y, mu, weights))
        if (control$trace) 
            message(gettextf("Deviance = %s Iterations - %d", dev, iter, domain = "R-mgcv"))
        boundary <- FALSE
        if (!is.finite(dev)) {
            if (is.null(coefold))
            stop("no valid set of coefficients has been found:please supply starting values",
            call. = FALSE)
            warning("Step size truncated due to divergence",call.=FALSE)
            ii <- 1
            while (!is.finite(dev)) {
                if (ii > control$maxit) 
                  stop("inner loop 1; can't correct step size")
                ii <- ii + 1
                start <- (start + coefold)/2
                eta<-drop(X %*% start)
                mu <- linkinv(eta <- eta + offset)
                eta <- linkfun(mu) 
                dev <- sum(dev.resids(y, mu, weights))
            }
            boundary <- TRUE
            if (control$trace) 
                cat("Step halved: new deviance =", dev, "\n")
        }
        if (!(valideta(eta) && validmu(mu))) {
            warning("Step size truncated: out of bounds.",call.=FALSE)
            ii <- 1
            while (!(valideta(eta) && validmu(mu))) {
                if (ii > control$maxit) 
                  stop("inner loop 2; can't correct step size")
                ii <- ii + 1
                start <- (start + coefold)/2
                eta<-drop(X %*% start)
                mu <- linkinv(eta <- eta + offset)
                eta<-linkfun(mu)
            }
            boundary <- TRUE
            dev <- sum(dev.resids(y, mu, weights))
            if (control$trace) 
                cat("Step halved: new deviance =", dev, "\n")
        }

        ## Test for convergence here ...

        if (abs(dev - devold)/(0.1 + abs(dev)) < control$epsilon || olm ||
            iter >= fixedSteps) {
            conv <- TRUE
            coef <- start #1.5.0
            break
        }
        else {
            devold <- dev
            coefold <- coef<-start
        }
    }
    if (!conv) 
    { warning("Algorithm did not converge") 
    }
    if (boundary) 
        warning("Algorithm stopped at boundary value")
    eps <- 10 * .Machine$double.eps
    if (family$family[1] == "binomial") {
        if (any(mu > 1 - eps) || any(mu < eps)) 
            warning("fitted probabilities numerically 0 or 1 occurred")
    }
    if (family$family[1] == "poisson") {
        if (any(mu < eps)) 
            warning("fitted rates numerically 0 occurred")
    }
    
    residuals <- rep(NA, nobs)
    residuals[good] <- z - (eta - offset)[good]
    
    wt <- rep(0, nobs)
    wt[good] <- w^2
   
    wtdmu <- if (intercept) sum(weights * y)/sum(weights) else linkinv(offset)
    nulldev <- sum(dev.resids(y, wtdmu, weights))
    n.ok <- nobs - sum(weights == 0)
    nulldf <- n.ok - as.integer(intercept)
    
    ## Extract a little more information from the fit....

    mv <- magic.post.proc(G$X,mr,w=G$w^2)
    G$Vp<-mv$Vb;G$hat<-mv$hat;
    G$Ve <- mv$Ve # frequentist cov. matrix
    G$edf<-mv$edf
    G$conv<-mr$gcv.info
    G$sp<-msp
    rank<-G$conv$rank

    ## use MLE of scale in Gaussian case - best estimate otherwise. 
    dev1 <- if (scale>0) scale*sum(weights) else if (family$family=="gaussian") dev else G$sig2*sum(weights)  

    aic.model <- aic(y, n, mu, weights, dev1) + 2 * sum(G$edf)
    if (scale < 0) { ## deviance based GCV
      gcv.ubre.dev <- n.score*dev/(n.score-gamma*sum(G$edf))^2
    } else { # deviance based UBRE, which is just AIC
      gcv.ubre.dev <- dev/n.score + 2 * gamma * sum(G$edf)/n.score - G$sig2
    }
  
	list(coefficients = as.vector(coef), residuals = residuals, fitted.values = mu, 
        family = family,linear.predictors = eta, deviance = dev,
        null.deviance = nulldev, iter = iter, weights = wt, prior.weights = weights,  
        df.null = nulldf, y = y, converged = conv,sig2=G$sig2,edf=G$edf,edf1=mv$edf1,hat=G$hat,
        ##F=mv$F,
        R=mr$R,
        boundary = boundary,sp = G$sp,nsdf=G$nsdf,Ve=G$Ve,Vp=G$Vp,rV=mr$rV,mgcv.conv=G$conv,
        gcv.ubre=G$gcv.ubre,aic=aic.model,rank=rank,gcv.ubre.dev=gcv.ubre.dev,scale.estimated = (scale < 0))
} ## gam.fit


model.matrix.gam <- function(object,...)
{ if (!inherits(object,"gam")) stop("`object' is not of class \"gam\"")
  predict(object,type="lpmatrix",...)
}

get.na.action <- function(na.action) {
## get the name of the na.action whether function or text string.
## avoids deparse(substitute(na.action)) which is easily broken by 
## nested calls.
  if (is.character(na.action)) {
    if (na.action%in%c("na.omit","na.exclude","na.pass","na.fail")) 
      return(na.action) else stop("unrecognised na.action")
  } 
  if (!is.function(na.action)) stop("na.action not character or function")
  a <- try(na.action(c(0,NA)),silent=TRUE)
  if (inherits(a,"try-error")) return("na.fail")
  if (inherits((attr(a,"na.action")),"omit")) return("na.omit")
  if (inherits((attr(a,"na.action")),"exclude")) return("na.exclude")
  return("na.pass")
} ## get.na.action


predict.gam <- function(object,newdata,type="link",se.fit=FALSE,terms=NULL,exclude=NULL,
                       block.size=NULL,newdata.guaranteed=FALSE,na.action=na.pass,
                       unconditional=FALSE,iterms.type=NULL,...) {

# This function is used for predicting from a GAM. 'object' is a gam object, newdata a dataframe to
# be used in prediction......
#
# Type == "link"     - for linear predictor (may be several for extended case)
#      == "response" - for fitted values: may be several if several linear predictors,
#                      and may return something other than inverse link of l.p. for some families
#      == "terms"    - for individual terms on scale of linear predictor 
#      == "iterms"   - exactly as "terms" except that se's include uncertainty about mean  
#      == "lpmatrix" - for matrix mapping parameters to l.p. - has "lpi" attribute if multiple l.p.s
#      == "newdata"  - returns newdata after pre-processing 
# Steps are:
#  1. Set newdata to object$model if no newdata supplied
#  2. split up newdata into manageable blocks if too large
#  3. Obtain parametric model matrix (safely!)
#  4. Work through smooths calling prediction.matrix constructors for each term
#  5. Work out required quantities
# 
# The splitting into blocks enables blocks of compiled code to be called efficiently
# using smooth class specific prediction matrix constructors, without having to 
# build up potentially enormous prediction matrices.
# if newdata.guaranteed == TRUE then the data.frame is assumed complete and
# ready to go, so that only factor levels are checked for sanity.
# 
# if `terms' is non null then it should be a list of terms to be returned 
# when type=="terms" or "iterms". 
# if `object' has an attribute `para.only' then only parametric terms of order
# 1 are returned for type=="terms"/"iterms" : i.e. only what termplot can handle.
#
# if no new data is supplied then na.action does nothing, otherwise 
# if na.action == "na.pass" then NA predictors result in NA predictions (as lm
#                   or glm)
#              == "na.omit" or "na.exclude" then NA predictors result in
#                       dropping
# if GC is TRUE then gc() is called after each block is processed

  ## para acts by adding all smooths to the exclude list. 
  ## it also causes any lp matrix to be smaller than it would otherwise have been.
  #if (para) exclude <- c(exclude,unlist(lapply(object$smooth,function(x) x$label)))

  if (unconditional) {
    if (is.null(object$Vc)) warning("Smoothness uncertainty corrected covariance not available") else 
    object$Vp <- object$Vc
  }

  if (type!="link"&&type!="terms"&&type!="iterms"&&type!="response"&&type!="lpmatrix"&&type!="newdata")  { 
    warning("Unknown type, reset to terms.")
    type<-"terms"
  }
  if (!inherits(object,"gam")) stop("predict.gam can only be used to predict from gam objects")

  ## to mimic behaviour of predict.lm, some resetting is required ...
  if (missing(newdata)) na.act <- object$na.action else {
    if (is.null(na.action)) na.act <- NULL 
    else {
      na.txt <- if (is.character(na.action)||is.function(na.action)) get.na.action(na.action) else "na.pass"
      #if (is.character(na.action))
      #na.txt <- na.action else ## substitute(na.action) else
      #if (is.function(na.action)) na.txt <- deparse(substitute(na.action))
      if (na.txt=="na.pass") na.act <- "na.exclude" else
      if (na.txt=="na.exclude") na.act <- "na.omit" else
      na.act <- na.action
    }
  } ## ... done

  # get data from which to predict.....  
  nd.is.mf <- FALSE # need to flag if supplied newdata is already a model frame 
  ## get name of response...
  # yname <- all.vars(object$terms)[attr(object$terms,"response")]
  yname <- attr(attr(object$terms,"dataClasses"),"names")[attr(object$terms,"response")]
  if (newdata.guaranteed==FALSE) {
    if (missing(newdata)) { # then "fake" an object suitable for prediction 
      newdata <- object$model
      new.data.ok <- FALSE
      nd.is.mf <- TRUE
      response <- newdata[[yname]] ## ok even with "cbind(foo,bar)" as yname 
    } else {  # do an R ``standard'' evaluation to pick up data
      new.data.ok <- TRUE
      if (is.data.frame(newdata)&&!is.null(attr(newdata,"terms"))) { # it's a model frame
        if (sum(!(names(object$model)%in%names(newdata)))) stop(
        "newdata is a model.frame: it should contain all required variables\n")
         nd.is.mf <- TRUE
	 response <- newdata[[yname]]
      } else {
        ## Following is non-standard to allow convenient splitting into blocks
        ## below, and to allow checking that all variables are in newdata ...

        ## get names of required variables, less response, but including offset variable
        ## see ?terms.object and ?terms for more information on terms objects
        # yname <- all.vars(object$terms)[attr(object$terms,"response")] ## redundant
        resp <- get.var(yname,newdata,FALSE)
        naresp <- FALSE
        #if (!is.null(object$family$predict)&&!is.null(newdata[[yname]])) {
	if (!is.null(object$family$predict)&&!is.null(resp)) {
          ## response provided, and potentially needed for prediction (e.g. Cox PH family)
          if (!is.null(object$pred.formula)) object$pred.formula <- attr(object$pred.formula,"full")
          response <- TRUE
          Terms <- terms(object)
          #resp <- newdata[[yname]]
	  if (is.matrix(resp)) {
            if (sum(is.na(rowSums(resp)))>0) stop("no NAs allowed in response data for this model")
          } else { ## vector response
            if (sum(is.na(resp))>0) {
              naresp <- TRUE ## there are NAs in supplied response
              ## replace them with a numeric code, so that rows are not dropped below
              rar <- range(resp,na.rm=TRUE)
              thresh <- rar[1]*1.01-rar[2]*.01
              resp[is.na(resp)] <- thresh
              newdata[[yname]] <- thresh 
            }
	  }  
        } else { ## response not provided
          response <- FALSE 
          Terms <- delete.response(terms(object))
        }
        allNames <- if (is.null(object$pred.formula)) all.vars(Terms) else all.vars(object$pred.formula)
        if (length(allNames) > 0) { 
          ff <- if (is.null(object$pred.formula)) reformulate(allNames) else  object$pred.formula
          if (sum(!(allNames%in%names(newdata)))) { 
            warning("not all required variables have been supplied in  newdata!\n")
          }
          ## note that `xlev' argument not used here, otherwise `as.factor' in 
          ## formula can cause a problem ... levels reset later.
          newdata <- eval(model.frame(ff,data=newdata,na.action=na.act),parent.frame())
          if (naresp) newdata[[yname]][newdata[[yname]]<=thresh] <- NA ## reinstate as NA  
        } ## otherwise it's intercept only and newdata can be left alone
        na.act <- attr(newdata,"na.action")
        #response <- if (response) newdata[[yname]] else NULL
	response <- if (response) get.var(yname,newdata,FALSE) else NULL
      }
    }
  } else { ## newdata.guaranteed == TRUE
    na.act <- NULL
    new.data.ok=TRUE ## it's guaranteed!
    if (!is.null(attr(newdata,"terms"))) nd.is.mf <- TRUE
    #response <- newdata[[yname]]
    response <- get.var(yname,newdata,FALSE)
  }


  ## now check the factor levels and split into blocks...

  if (new.data.ok) {
    ## check factor levels are right ...
    names(newdata)->nn # new data names
    colnames(object$model)->mn # original names
    for (i in 1:length(newdata)) 
    if (nn[i]%in%mn && is.factor(object$model[,nn[i]])) { # then so should newdata[[i]] be 
      levm <- levels(object$model[,nn[i]]) ## original levels
      ## need to avoid dropping NAs if they are a factor level in origianl model
      levn <- if (any(is.na(levm))) levels(factor(newdata[[i]],exclude=NULL)) else levels(factor(newdata[[i]])) ## new levels
      if (sum(!levn%in%levm)>0) { ## check not trying to sneak in new levels 
        msg <- paste("factor levels",paste(levn[!levn%in%levm],collapse=", "),"not in original fit",collapse="")
        warning(msg)
      }
      ## set prediction levels to fit levels...
      if (is.matrix(newdata[[i]])) {
        dum <- factor(newdata[[i]],levels=levm,exclude=NULL)
	dim(dum) <- dim(newdata[[i]])
	newdata[[i]] <- dum
      } else newdata[[i]] <- factor(newdata[[i]],levels=levm,exclude=NULL)
    }
    if (type=="newdata") return(newdata)

    # split prediction into blocks, to avoid running out of memory
    if (length(newdata)==1) newdata[[2]] <- newdata[[1]] # avoids data frame losing its labels and dimensions below!
    if (is.null(dim(newdata[[1]]))) np <- length(newdata[[1]]) 
    else np <- dim(newdata[[1]])[1] 
    nb <- length(object$coefficients)
    if (is.null(block.size)) block.size <- 1000
    if (block.size < 1) block.size <- np
  } else { # no new data, just use object$model
    np <- nrow(object$model)
    nb <- length(object$coefficients)
  }
  
  if (type=="lpmatrix") block.size <- NULL ## nothing gained by blocking in this case - and offset handling easier this way

  ## split prediction into blocks, to avoid running out of memory
  if (is.null(block.size)) { 
    ## use one block as predicting using model frame
    ## and no block size supplied... 
    n.blocks <- 1
    b.size <- array(np,1)
  } else {
    n.blocks <- np %/% block.size
    b.size <- rep(block.size,n.blocks)
    last.block <- np-sum(b.size)
    if (last.block>0) {
      n.blocks <- n.blocks+1  
      b.size[n.blocks] <- last.block
    }
  }


  # setup prediction arrays...
  ## in multi-linear predictor models, lpi[[i]][j] is the column of model matrix contributing the jth col to lp i 
  lpi <- if (is.list(object$formula)) attr(object$formula,"lpi") else NULL
  nlp <- if (is.null(lpi)) 1 else length(lpi)  ## number of linear predictors
  n.smooth<-length(object$smooth)
  if (type=="lpmatrix") {
    H <- matrix(0,np,nb)
  } else if (type=="terms"||type=="iterms") { 
    term.labels <- attr(object$pterms,"term.labels")
    para.only <- attr(object,"para.only")
    if (is.null(para.only)) para.only <- FALSE  # if TRUE then only return information on parametric part
    n.pterms <- length(term.labels)
    fit <- array(0,c(np,n.pterms+as.numeric(para.only==0)*n.smooth))
    if (se.fit) se <- fit
    ColNames <- term.labels
  } else { ## "response" or "link"
    ## get number of linear predictors, in case it's more than 1...
    #if (is.list(object$formula)) {
    #  nlp <- length(lpi) ## number of linear predictors
    #} else nlp <- 1 
   
    fit <- if (nlp>1) matrix(0,np,nlp) else array(0,np)
    if (se.fit) se <- fit
    fit1 <- NULL ## "response" returned by fam$fv can be non-vector 
  }
  stop <- 0
  if (is.list(object$pterms)) { ## multiple linear predictors
    if (type=="iterms") {
      warning("type iterms not available for multiple predictor cases")
      type <- "terms"
    }
    pstart <- attr(object$nsdf,"pstart") ## starts of parametric blocks in coef vector
    pind <- rep(0,0) ## index of parametric coefs
    Terms <- list();pterms <- object$pterms
    for (i in 1:length(object$nsdf)) {
      Terms[[i]] <- delete.response(object$pterms[[i]])
      if (object$nsdf[i]>0) pind <- c(pind,pstart[i]-1+1:object$nsdf[i])
    }
  } else { ## normal single predictor case
    Terms <- list(delete.response(object$pterms)) ## make into a list anyway
    pterms <- list(object$pterms)
    pstart <- 1
    pind <- 1:object$nsdf ## index of parameteric coefficients
  }

  ## check if extended family required intercept to be dropped...
  #drop.intercept <- FALSE 
  #if (!is.null(object$family$drop.intercept)&&object$family$drop.intercept) {
  #  drop.intercept <- TRUE;
  #  ## make sure intercept explicitly included, so it can be cleanly dropped...
  #  for (i in 1:length(Terms)) attr(Terms[[i]],"intercept") <- 1 
  #} 
  drop.intercept <- object$family$drop.intercept
  if (is.null(drop.intercept)) {
    drop.intercept <- rep(FALSE, length(Terms))
  } else {
    ## make sure intercept explicitly included, so it can be cleanly dropped...
    for (i in 1:length(Terms)) {
      if (drop.intercept[i] == TRUE) attr(Terms[[i]],"intercept") <- 1 
    }
  }
  ## index of any parametric terms that have to be dropped
  ## this is used to help with identifiability in multi-
  ## formula models...

  drop.ind <- attr(object$nsdf,"drop.ind") 

  ####################################
  ## Actual prediction starts here...
  ####################################

  s.offset <- NULL # to accumulate any smooth term specific offset
  any.soff <- FALSE # indicator of term specific offset existence
  if (n.blocks > 0) for (b in 1:n.blocks) { # work through prediction blocks
    start <- stop+1
    stop <- start + b.size[b] - 1
    if (n.blocks==1) data <- newdata else data <- newdata[start:stop,]
    X <- matrix(0,b.size[b],nb+length(drop.ind))
    Xoff <- matrix(0,b.size[b],n.smooth) ## term specific offsets 
    offs <- list()
    for (i in 1:length(Terms)) { ## loop for parametric components (1 per lp)
      ## implements safe prediction for parametric part as described in
      ## http://developer.r-project.org/model-fitting-functions.txt
      if (new.data.ok) {
        if (nd.is.mf) mf <- model.frame(data,xlev=object$xlevels) else {
          mf <- model.frame(Terms[[i]],data,xlev=object$xlevels)
          if (!is.null(cl <- attr(pterms[[i]],"dataClasses"))) .checkMFClasses(cl,mf)
        }
	## next line is just a work around to prevent a spurious warning (e.g. R 3.6) from
	## model.matrix if contrast relates to a term in mf which is not
	## part of Terms[[i]] (model.matrix doc actually defines contrast w.r.t. mf,
	## not Terms[[i]])...
	oc <- if (length(object$contrasts)==0) object$contrasts else
	      object$contrasts[names(object$contrasts)%in%attr(Terms[[i]],"term.labels")]
        Xp <- model.matrix(Terms[[i]],mf,contrasts=oc) 
      } else { 
        Xp <- model.matrix(Terms[[i]],object$model)
        mf <- newdata # needed in case of offset, below
      }
      if (!is.null(terms)||!is.null(exclude)) { ## work out which parts of Xp to zero
        assign <- attr(Xp,"assign") ## assign[i] is the term to which Xp[,i] relates
	if (min(assign)==0&&("(Intercept)"%in%exclude||(!is.null(terms)&&!"(Intercept)"%in%terms))) Xp[,which(assign==0)] <- 0
	tlab <- attr(Terms[[i]],"term.labels")
	ii <- which(assign%in%which(tlab%in%exclude))
	if (length(ii)) Xp[,ii] <- 0
	if (!is.null(terms)) {
	  ii <- which(assign%in%which(!tlab%in%terms))
	  if (length(ii)) Xp[,ii] <- 0
	}   
      }
      offi <- attr(Terms[[i]],"offset")
      if (is.null(offi)) offs[[i]] <- 0 else { ## extract offset
        offs[[i]] <- mf[[names(attr(Terms[[i]],"dataClasses"))[offi+1]]]
      }
      if (drop.intercept[i]) { 
        xat <- attributes(Xp);ind <- xat$assign>0 
        Xp <- Xp[,xat$assign>0,drop=FALSE] ## some extended families need to drop intercept
        xat$assign <- xat$assign[ind];xat$dimnames[[2]]<-xat$dimnames[[2]][ind];
        xat$dim[2] <- xat$dim[2]-1;attributes(Xp) <- xat 
      }
      if (object$nsdf[i]>0) X[,pstart[i]-1 + 1:object$nsdf[i]] <- Xp
    } ## end of parametric loop
 ##   if (length(offs)==1) offs <- offs[[1]] ## messes up later handling
     
    if (!is.null(drop.ind)) X <- X[,-drop.ind]

    if (n.smooth) for (k in 1:n.smooth) { ## loop through smooths
      klab <- object$smooth[[k]]$label
      if ((is.null(terms)||(klab%in%terms))&&(is.null(exclude)||!(klab%in%exclude))) {
        Xfrag <- PredictMat(object$smooth[[k]],data)		 
        X[,object$smooth[[k]]$first.para:object$smooth[[k]]$last.para] <- Xfrag
        Xfrag.off <- attr(Xfrag,"offset") ## any term specific offsets?
        if (!is.null(Xfrag.off)) { Xoff[,k] <- Xfrag.off; any.soff <- TRUE }
      }
      if (type=="terms"||type=="iterms") ColNames[n.pterms+k] <- klab
    } ## smooths done

    

    if (!is.null(object$Xcentre)) { ## Apply any column centering
      X <- sweep(X,2,object$Xcentre)
    }

    # Now have prediction matrix, X, for this block, need to do something with it...

    if (type=="lpmatrix") { 
      H[start:stop,] <- X
      if (any.soff) s.offset <- rbind(s.offset,Xoff)
    } else if (type=="terms"||type=="iterms") { ## split results into terms
      lass <- if (is.list(object$assign)) object$assign else list(object$assign)
      k <- 0
      for (j in 1:length(lass)) if (length(lass[[j]])) { ## work through assign list
        ind <- 1:length(lass[[j]]) ## index vector for coefs involved
        nptj <- max(lass[[j]]) ## number of terms involved here
        if (nptj>0) for (i in 1:nptj) { ## work through parametric part
          k <- k + 1 ## counts total number of parametric terms
          ii <- ind[lass[[j]]==i] + pstart[j] - 1 
          fit[start:stop,k] <- X[,ii,drop=FALSE]%*%object$coefficients[ii]
          if (se.fit) se[start:stop,k] <-
          sqrt(pmax(0,rowSums((X[,ii,drop=FALSE]%*%object$Vp[ii,ii])*X[,ii,drop=FALSE])))
        }
      } ## assign list done
      if (n.smooth&&!para.only) {
        for (k in 1:n.smooth) # work through the smooth terms 
        { first <- object$smooth[[k]]$first.para; last <- object$smooth[[k]]$last.para
          fit[start:stop,n.pterms+k] <- X[,first:last,drop=FALSE] %*% object$coefficients[first:last] + Xoff[,k]
          if (se.fit) { # diag(Z%*%V%*%t(Z))^0.5; Z=X[,first:last]; V is sub-matrix of Vp
            if (type=="iterms"&& attr(object$smooth[[k]],"nCons")>0) { ## termwise se to "carry the intercept
              ## some general families, add parameters after cmX created, which are irrelevant to cmX... 
              if (length(object$cmX) < ncol(X)) object$cmX <- c(object$cmX,rep(0,ncol(X)-length(object$cmX)))
              if (!is.null(iterms.type)&&iterms.type==2) object$cmX[-(1:object$nsdf)] <- 0 ## variability of fixed effects mean only
              X1 <- matrix(object$cmX,nrow(X),ncol(X),byrow=TRUE)
              meanL1 <- object$smooth[[k]]$meanL1
              if (!is.null(meanL1)) X1 <- X1 / meanL1              
              X1[,first:last] <- X[,first:last]
              se[start:stop,n.pterms+k] <- sqrt(pmax(0,rowSums((X1%*%object$Vp)*X1)))
            } else se[start:stop,n.pterms+k] <- ## terms strictly centred
            sqrt(pmax(0,rowSums((X[,first:last,drop=FALSE]%*%
                          object$Vp[first:last,first:last,drop=FALSE])*X[,first:last,drop=FALSE])))
          } ## end if (se.fit)
        } 
        colnames(fit) <- ColNames
        if (se.fit) colnames(se) <- ColNames
      } else {
        if (para.only&&is.list(object$pterms)) { 
          ## have to use term labels that match original data, or termplot fails 
          ## to plot. This only applies for 'para.only==1' calls which are 
          ## designed for use from termplot called from plot.gam
          term.labels <- unlist(lapply(object$pterms,attr,"term.labels"))
        }
        colnames(fit) <- term.labels
        if (se.fit) colnames(se) <- term.labels
        if (para.only) { 
          # retain only terms of order 1 - this is to make termplot work
          order <- if (is.list(object$pterms)) unlist(lapply(object$pterms,attr,"order")) else attr(object$pterms,"order")
          term.labels <- term.labels[order==1]
          ## fit <- as.matrix(as.matrix(fit)[,order==1])
          fit <- fit[,order==1,drop=FALSE]
          colnames(fit) <- term.labels
          if (se.fit) { ## se <- as.matrix(as.matrix(se)[,order==1])
            se <- se[,order==1,drop=FALSE]
            colnames(se) <- term.labels 
          }
        } 
      } 
    } else { ## "link" or "response" case
      fam <- object$family
      if (!is.null(object$family$setInd)) object$family$setInd(start:stop) ## family may need to know subset index - e.g gfam
      k <- attr(attr(object$model,"terms"),"offset")
      if (nlp>1) { ## multiple linear predictor case
        if (is.null(fam$predict)||type=="link") {
          ##pstart <- c(pstart,ncol(X)+1)
          ## get index of smooths with an offset...
          off.ind <- (1:n.smooth)[as.logical(colSums(abs(Xoff)))]
          for (j in 1:nlp) { ## looping over the model formulae
            ind <- lpi[[j]] ##pstart[j]:(pstart[j+1]-1)
            fit[start:stop,j] <- X[,ind,drop=FALSE]%*%object$coefficients[ind] + offs[[j]]
            if (length(off.ind)) for (i in off.ind) { ## add any term specific offsets
              if (object$smooth[[i]]$first.para%in%ind)  fit[start:stop,j] <- fit[start:stop,j] + Xoff[,i]
            }
            if (se.fit) se[start:stop,j] <- 
            sqrt(pmax(0,rowSums((X[,ind,drop=FALSE]%*%object$Vp[ind,ind,drop=FALSE])*X[,ind,drop=FALSE])))
            ## model offset only handled for first predictor... fixed
            ##if (j==1&&!is.null(k))  fit[start:stop,j] <- fit[start:stop,j] + model.offset(mf)
            if (type=="response") { ## need to transform lp to response scale
              linfo <- object$family$linfo[[j]] ## link information
              if (se.fit) se[start:stop,j] <- se[start:stop,j]*abs(linfo$mu.eta(fit[start:stop,j]))
              fit[start:stop,j] <- linfo$linkinv(fit[start:stop,j])
            }
          } ## end of lp loop
        } else { ## response case with own predict code
          #lpi <- list();pst <- c(pstart,ncol(X)+1)
          #for (i in 1:(length(pst)-1)) lpi[[i]] <- pst[i]:(pst[i+1]-1)
          attr(X,"lpi") <- lpi  
          ffv <- fam$predict(fam,se.fit,y= if (is.matrix(response)) response[start:stop,] else response[start:stop],
                             X=X,beta=object$coefficients,off=offs,Vb=object$Vp)
          if (is.matrix(fit)&&!is.matrix(ffv[[1]])) {
            fit <- fit[,1]; if (se.fit) se <- se[,1]
          }
          if (is.matrix(ffv[[1]])&&(!is.matrix(fit)||ncol(ffv[[1]])!=ncol(fit))) {
            fit <- matrix(0,np,ncol(ffv[[1]])); if (se.fit) se <- fit
          }
          if (is.matrix(fit)) {
            fit[start:stop,] <- ffv[[1]]
            if (se.fit) se[start:stop,] <- ffv[[2]]
          } else {
            fit[start:stop] <- ffv[[1]]
            if (se.fit) se[start:stop] <- ffv[[2]]
          }
        } ## end of own response prediction code
      } else { ## single linear predictor
        offs <- if (is.null(k)) rowSums(Xoff) else rowSums(Xoff) + model.offset(mf)
        fit[start:stop] <- X%*%object$coefficients + offs
        if (se.fit) se[start:stop] <- sqrt(pmax(0,rowSums((X%*%object$Vp)*X)))
        if (type=="response") { # transform    
          linkinv <- fam$linkinv
          if (is.null(fam$predict)) {
            dmu.deta <- fam$mu.eta  
            if (se.fit) se[start:stop]<-se[start:stop]*abs(dmu.deta(fit[start:stop])) 
            fit[start:stop] <- linkinv(fit[start:stop])
          } else { ## family has its own prediction code for response case
            ffv <- fam$predict(fam,se.fit,y= if (is.matrix(response)) response[start:stop,] else response[start:stop],
                               X=X,beta=object$coefficients,off=offs,Vb=object$Vp)
            if (is.null(fit1)&&is.matrix(ffv[[1]])) {
              fit1 <- matrix(0,np,ncol(ffv[[1]]))
              if (se.fit) se1 <- fit1
            }
            if (is.null(fit1)) {
              fit[start:stop] <- ffv[[1]]
              if (se.fit) se[start:stop] <- ffv[[2]]
            } else {
              fit1[start:stop,] <- ffv[[1]]
              if (se.fit) se1[start:stop,] <- ffv[[2]]
            }
          }
        }
      } ## single lp done
    } ## end of link or response case 
    rm(X)
  } ## end of prediction block loop
 
  if (!is.null(object$family$setInd)) object$family$setInd(NULL) ## reset any set subset index
  
  if ((type=="terms"||type=="iterms")&&(!is.null(terms)||!is.null(exclude))) { # return only terms requested via `terms'
    cnames <- colnames(fit)
    if (!is.null(terms)) {
      if (sum(!(terms %in%cnames))) 
        warning("non-existent terms requested - ignoring")
      else { 
        fit <- fit[,terms,drop=FALSE]
        if (se.fit) {
           se <- se[,terms,drop=FALSE]
        }
      }
    }
    if (!is.null(exclude)) {
      if (sum(!(exclude %in%cnames))) 
        warning("non-existent exclude terms requested - ignoring")
      else { 
        exclude <- which(cnames%in%exclude) ## convert to numeric column index
        fit <- fit[,-exclude,drop=FALSE]
        if (se.fit) {
          se <- se[,-exclude,drop=FALSE]
        }
      }
    }
  }

  if (type=="response"&&!is.null(fit1)) {
    fit <- fit1
    if (se.fit) se <- se1
  }

  rn <- rownames(newdata)
  if (type=="lpmatrix") { 
    colnames(H) <- names(object$coefficients);rownames(H)<-rn
    if (!is.null(s.offset)) { 
      s.offset <- napredict(na.act,s.offset)
      attr(H,"offset") <- s.offset ## term specific offsets...
    }
    #if (!is.null(attr(attr(object$model,"terms"),"offset"))) {
    #  attr(H,"model.offset") <- napredict(na.act,model.offset(mf)) 
    #}
    if (!is.null(offs)) {
      offs <- offs[1:nlp]
      for (i in 1:nlp) offs[[i]] <- napredict(na.act,offs[[i]])
      attr(H,"model.offset") <- if (nlp==1) offs[[1]] else offs
    }
    H <- napredict(na.act,H)
    if (length(object$nsdf)>1) { ## add "lpi" attribute if more than one l.p.
      #lpi <- list();pst <- c(pstart,ncol(H)+1)
      #for (i in 1:(length(pst)-1)) lpi[[i]] <- pst[i]:(pst[i+1]-1)  
      attr(H,"lpi") <- lpi
    }
  } else { 
    if (se.fit) { 
      if (is.null(nrow(fit))) {
        names(fit) <- rn
        names(se) <- rn
        fit <- napredict(na.act,fit)
        se <- napredict(na.act,se) 
      } else { 
        rownames(fit)<-rn
        rownames(se)<-rn
        fit <- napredict(na.act,fit)
        se <- napredict(na.act,se)
      }
      H<-list(fit=fit,se.fit=se) 
    } else { 
      H <- fit
      if (is.null(nrow(H))) names(H) <- rn else
      rownames(H)<-rn
      H <- napredict(na.act,H)
    }
  }
  if ((type=="terms"||type=="iterms")&&attr(object$terms,"intercept")==1) attr(H,"constant") <- object$coefficients[1]
  H # ... and return
} ## end of predict.gam



concurvity <- function(b,full=TRUE) {
## b is a gam object
## full==TRUE means that dependence of each term on rest of model 
##            is considered.
## full==FALSE => pairwise comparison.
  if (!inherits(b,"gam")) stop("requires an object of class gam")
  m <- length(b$smooth)
  if (m<1) stop("nothing to do for this model")
  X <- model.matrix(b)
  X <- X[rowSums(is.na(X))==0,]
  ## this step speeds up remaining computation...
  X <- qr.R(qr(X,tol=0,LAPACK=FALSE)) 
  stop <- start <- rep(1,m)
  lab <- rep("",m)
  for (i in 1:m) { ## loop through smooths
    start[i] <- b$smooth[[i]]$first.para
    stop[i] <- b$smooth[[i]]$last.para
    lab[i] <- b$smooth[[i]]$label
  }
  if (min(start)>1) { ## append parametric terms
    start <- c(1,start)
    stop <- c(min(start)-1,stop)
    lab <- c("para",lab)
    m <- m + 1
  }

  n.measures <- 3
  measure.names <- c("worst","observed","estimate")

  ##n <- nrow(X)
  if (full) { ## get dependence of each smooth on all the rest...
    conc <- matrix(0,n.measures,m)
    for (i in 1:m) {
      Xi <- X[,-(start[i]:stop[i]),drop=FALSE]
      Xj <- X[,start[i]:stop[i],drop=FALSE]
      r <- ncol(Xi) 
      R <- qr.R(qr(cbind(Xi,Xj),LAPACK=FALSE,tol=0))[,-(1:r),drop=FALSE] ## No pivoting!!  
       
      ##u worst case...
      Rt <- qr.R(qr(R,tol=0)) 
      conc[1,i] <- svd(forwardsolve(t(Rt),t(R[1:r,,drop=FALSE])))$d[1]^2
       
      ## observed...
      beta <- b$coef[start[i]:stop[i]]
      conc[2,i] <- sum((R[1:r,,drop=FALSE]%*%beta)^2)/sum((Rt%*%beta)^2)

      ## less pessimistic...
      conc[3,i] <- sum(R[1:r,]^2)/sum(R^2)
    }
    colnames(conc) <- lab
    rownames(conc) <- measure.names
  } else { ## pairwise measures
    conc <- list()
    for (i in 1:n.measures) conc[[i]] <- matrix(1,m,m) ## concurvity matrix
    for (i in 1:m) { ## concurvity calculation loop
      Xi <- X[,start[i]:stop[i],drop=FALSE]
      r <- ncol(Xi)
      for (j in 1:m) if (i!=j) { 
        Xj <- X[,start[j]:stop[j],drop=FALSE]
        R <- qr.R(qr(cbind(Xi,Xj),LAPACK=FALSE,tol=0))[,-(1:r),drop=FALSE] ## No pivoting!!  
        
        ## worst case...
        Rt <- qr.R(qr(R,tol=0)) 
        conc[[1]][i,j] <- svd(forwardsolve(t(Rt),t(R[1:r,,drop=FALSE])))$d[1]^2
       
        ## observed...
        beta <- b$coef[start[j]:stop[j]]
        conc[[2]][i,j] <- sum((R[1:r,,drop=FALSE]%*%beta)^2)/sum((Rt%*%beta)^2)

        ## less pessimistic...
        conc[[3]][i,j] <- sum(R[1:r,]^2)/sum(R^2)
        
        ## Alternative less pessimistic
       # log.det.R <- sum(log(abs(diag(R[(r+1):nrow(R),,drop=FALSE]))))
       # log.det.Rt <- sum(log(abs(diag(Rt))))
       # conc[[4]][i,j] <- 1 - exp(log.det.R-log.det.Rt) 
        rm(Xj,R,Rt)
      }
    } ## end of conc loop
    for (i in 1:n.measures) rownames(conc[[i]]) <- colnames(conc[[i]]) <- lab
    names(conc) <- measure.names
  } ## end of pairwise
  conc ## 
} ## end of concurvity


residuals.gam <-function(object, type = "deviance",...)
## calculates residuals for gam object 
{ ## if family has its own residual function, then use that...
  if (!is.null(object$family$residuals)) {
    res <- object$family$residuals(object,type,...)
    res <- naresid(object$na.action,res)
    return(res)
  }
  type <- match.arg(type,c("deviance", "pearson","scaled.pearson", "working", "response"))
  #if (sum(type %in% c("deviance", "pearson","scaled.pearson", "working", "response") )==0) 
  #      stop(paste(type," residuals not available"))
  ## default computations...
  y <- object$y
  mu <- object$fitted.values
  wts <- object$prior.weights
  if (type == "working") { 
    res <- object$residuals 
  } else if (type == "response") {
    res <- y - mu 
  } else if (type == "deviance") {
    res <- object$family$dev.resids(y,mu,wts)
    s <- attr(res,"sign")
    if (is.null(s)) s <- sign(y-mu)
    res <- sqrt(pmax(res,0)) * s 
  } else { ## some sort of Pearson
    var <- object$family$variance
    if (is.null(var)) {
      warning("Pearson residuals not available for this family - returning deviance residuals")
      return(residuals.gam(object))
    }
    res <- (y-mu)*sqrt(wts)/sqrt(var(mu))
    if (type == "scaled.pearson") res <- res/sqrt(object$sig2)
  }
  res <- naresid(object$na.action,res)
  res
} ## residuals.gam


## Start of anova and summary (with contributions from Henric Nilsson) ....

psum.chisq <- function(q,lb,df=rep(1,length(lb)),nc=rep(0,length(lb)),sigz=0,
                            lower.tail=FALSE,tol=2e-5,nlim=100000,trace=FALSE) {
## compute Pr(q>\sum_j lb[j] X_j + sigz Z) where X_j ~ chisq(df[j],nc[j]), Z~N(0,1) and nc is
## a vector of non-centrality parameters. lb can be either sign. df should be integer. 
  p <- q
  r <- length(lb)
  if (length(df)==1) df <- rep(df,r)
  if (length(nc)==1) nc <- rep(nc,r)
  if (length(df)!=r||length(nc)!=r) stop("lengths of lb, df and nc must match")
  df <- round(df)
  if (any(df<1)) stop("df must be positive integers")
  if (all(lb==0)) stop("at least one element of lb must be non-zero")
  if (sigz<0) sigz <- 0
  for (i in 1:length(q)) {  
    oo <- .C(C_davies,as.double(lb),as.double(nc),as.integer(df),as.integer(r),as.double(sigz),
             c=as.double(q[i]),as.integer(nlim),as.double(tol),trace=as.double(rep(0,7)),
             ifault=as.integer(0))
    if (oo$ifault!=0) {
      if (oo$ifault==2) { 
        warning("danger of round-off error")
        p[i] <- if (lower.tail) oo$c else 1 - oo$c
      } else { 
        warning("failure of Davies method, falling back on Liu et al approximtion")
        p[i] <- if (all(nc==0)) liu2(q[i],lb,h=df) else NA
      }
    } else p[i] <- if (lower.tail) oo$c else 1 - oo$c
  } 
  if (trace) {
    attr(p,"trace") <- oo$trace
    attr(p,"ifault") <- oo$ifault
  }
  p
} ##psum.chisq

liu2 <- function(x, lambda, h = rep(1,length(lambda)),lower.tail=FALSE) {
# Evaluate Pr[sum_i \lambda_i \chi^2_h_i < x] approximately.
# Code adapted from CompQuadForm package of Pierre Lafaye de Micheaux 
# and directly from....
# H. Liu, Y. Tang, H.H. Zhang, A new chi-square approximation to the 
# distribution of non-negative definite quadratic forms in non-central 
# normal variables, Computational Statistics and Data Analysis, Volume 53, 
# (2009), 853-856. Actually, this is just Pearson (1959) given that
# the chi^2 variables are central. 
# Note that this can be rubbish in lower tail (e.g. lambda=c(1.2,.3), x = .15)
  
#  if (FALSE) { ## use Davies exact method in place of Liu et al/ Pearson approx.
#    require(CompQuadForm)
#    r <- x
#    for (i in 1:length(x)) r[i] <- davies(x[i],lambda,h)$Qq
#    return(pmin(r,1))
#  }

  if (length(h) != length(lambda)) stop("lambda and h should have the same length!")
 
  lh <- lambda*h
  muQ <- sum(lh)
  
  lh <- lh*lambda
  c2 <- sum(lh)
  
  lh <- lh*lambda
  c3 <- sum(lh)
  
  xpos <- x > 0
  res <- 1 + 0 * x
  if (sum(xpos)==0 || c2 <= 0) return(res)

  s1 <- c3/c2^1.5
  s2 <- sum(lh*lambda)/c2^2

  sigQ <- sqrt(2*c2)

  t <- (x[xpos]-muQ)/sigQ

  if (s1^2>s2) {
    a <- 1/(s1-sqrt(s1^2-s2))
    delta <- s1*a^3-a^2
    l <- a^2-2*delta
  } else {
    a <- 1/s1
    delta <- 0
    if (c3==0) return(res)
    l <- c2^3/c3^2
  }

  muX <- l+delta
  sigX <- sqrt(2)*a
  res[xpos] <- pchisq(t*sigX+muX,df=l,ncp=delta,lower.tail=lower.tail)
  res
} ## liu2



simf <- function(x,a,df,nq=50) {
## suppose T = sum(a_i \chi^2_1)/(chi^2_df/df). We need
## Pr[T>x] = Pr(sum(a_i \chi^2_1) > x *chi^2_df/df). Quadrature 
## used here. So, e.g.
## 1-pf(4/3,3,40);simf(4,rep(1,3),40);1-pchisq(4,3)
## DEPRECATED in favour of psum.chisq...
  p <- (1:nq-.5)/nq
  q <- qchisq(p,df)
  x <- x*q/df
  pr <- sum(liu2(x,a)) ## Pearson/Liu approx to chi^2 mixture
  pr/nq 
}

packing.ind <- function(first,last,p,n) {
## Let A be a matrix with n rows, and B be a p by p matrix.
## A[first:last,first:last] is to be filled using B. If
## B is of the corect dimension then A[first:last,first:last] <-B
## but if p is a submultiple of (last-first+1) then the leading
## block diagonal of the block is to be filled repeatedly with
## B. In either case this routine returns an index ii such that
## A[ii] <- B fills the block appropriately
  if (last-first == p-1) {
    ii <- first:last
    return(rep(ii,p) + n*rep(ii-1,each=p))
  } else {
    k <- round((last-first+1)/p)
    if (k*p!=last-first+1) stop("incorrect sub-block size")
    ii <- rep(1:p,p) + n*rep(1:p-1,each=p)
    p2 <- p*p
    ind <- rep(0,p2*k)
    jj <- 1:(p2)
    for (i in 1:k) {
      bs <- first + (i-1)*p -1 
      ind[jj] <- ii + bs + bs*n
      jj <- jj + p2
    }
    return(ind)
  }
} ## packing.ind

recov <- function(b,re=rep(0,0),m=0) {
## b is a fitted gam object. re is an array of indices of 
## smooth terms to be treated as fully random....
## Returns frequentist Cov matrix based on the given
## mapping from data to params, but with dist of data
## corresponding to that implied by treating terms indexed
## by re as random effects... (would be usual frequentist 
## if nothing treated as random)
## if m>0, then this indexes a term, not in re, whose
## unpenalized cov matrix is required, with the elements of re
## dropped.
  if (!inherits(b,"gam")) stop("recov works with fitted gam objects only") 
  if (is.null(b$full.sp)) sp <- b$sp else sp <- b$full.sp
  if (length(re)<1) { 
    if (m>0) {
      ## annoyingly, need total penalty  
      np <- length(coef(b))
      k <- 1;S1 <- matrix(0,np,np)
      for (i in 1:length(b$smooth)) { 
        ns <- length(b$smooth[[i]]$S)
        #ind <- b$smooth[[i]]$first.para:b$smooth[[i]]$last.para
        if (ns>0) {
          ii <- packing.ind(b$smooth[[i]]$first.para,b$smooth[[i]]$last.para,ncol(b$smooth[[i]]$S[[1]]),np) 
          for (j in 1:ns) {
            #S1[ind,ind] <- S1[ind,ind] + sp[k]*b$smooth[[i]]$S[[j]]
            S1[ii] <- S1[ii] + sp[k]*as.numeric(b$smooth[[i]]$S[[j]])
            k <- k + 1
          }
        }
      }
      LRB <- rbind(b$R,t(mroot(S1)))
      ii <- b$smooth[[m]]$first.para:b$smooth[[m]]$last.para 
      ## ii is cols of LRB related to smooth m, which need 
      ## to be moved to the end...
      LRB <- cbind(LRB[,-ii],LRB[,ii])
      ii <- (ncol(LRB)-length(ii)+1):ncol(LRB)
      Rm <- qr.R(qr(LRB,tol=0,LAPACK=FALSE))[ii,ii] ## unpivoted QR
    } else Rm <- NULL
    return(list(Ve=(t(b$Ve)+b$Ve)*.5,Rm=Rm)) 
  }

  if (m%in%re) stop("m can't be in re")
  ## partition R into R1 ("fixed") and R2 ("random"), with S1 and S2
  p <- length(b$coefficients)
  rind <- rep(FALSE,p) ## random coefficient index
  for (i in 1:length(re)) {
    rind[b$smooth[[re[i]]]$first.para:b$smooth[[re[i]]]$last.para] <- TRUE
  }
  p2 <- sum(rind) ## number random
  p1 <- p - p2 ## number fixed
  map <- rep(0,p) ## remaps param indices to indices in split version
  map[rind] <- 1:p2 ## random
  map[!rind] <- 1:p1 ## fixed
  
  ## split R...
  R1 <- b$R[,!rind]  ## fixed effect columns
  R2 <- b$R[,rind]   ## random effect columns
  ## seitdem ich dich kennen, hab ich ein probleme,

  ## assemble S1 and S2
  S1 <- matrix(0,p1,p1);S2 <- matrix(0,p2,p2)
 
  k <- 1
  for (i in 1:length(b$smooth)) { 
    ns <- length(b$smooth[[i]]$S)
    #ind <- map[b$smooth[[i]]$first.para:b$smooth[[i]]$last.para]
    is.random <- i%in%re
    if (ns>0) { 
     ii <- packing.ind(map[b$smooth[[i]]$first.para],map[b$smooth[[i]]$last.para],ncol(b$smooth[[i]]$S[[1]]),if (is.random) p2 else p1)
     for (j in 1:ns) {
        #if (is.random) S2[ind,ind] <- S2[ind,ind] +  sp[k]*b$smooth[[i]]$S[[j]] else
        #   S1[ind,ind] <- S1[ind,ind] + sp[k]*b$smooth[[i]]$S[[j]]
        if (is.random) S2[ii] <- S2[ii] +  sp[k]*as.numeric(b$smooth[[i]]$S[[j]]) else
           S1[ii] <- S1[ii] + sp[k]*as.numeric(b$smooth[[i]]$S[[j]])
        k <- k + 1
      }
    }
  }
  ## pseudoinvert S2
  if (nrow(S2)==1) {
    S2[1,1] <- 1/sqrt(S2[1,1])
  } else if (max(abs(diag(diag(S2))-S2))==0) {
    ds2 <- diag(S2)
    ind <- ds2 > max(ds2)*.Machine$double.eps^.8
    ds2[ind] <- 1/ds2[ind];ds2[!ind] <- 0
    diag(S2) <- sqrt(ds2)
  } else {
    ev <- eigen((S2+t(S2))/2,symmetric=TRUE)
    ind <- ev$values > max(ev$values)*.Machine$double.eps^.8
    ev$values[ind] <- 1/ev$values[ind];ev$values[!ind] <- 0 
    ## S2 <- ev$vectors%*%(ev$values*t(ev$vectors))
    S2 <- sqrt(ev$values)*t(ev$vectors)
  }
  ## choleski of cov matrix....
  ## L <- chol(diag(p)+R2%*%S2%*%t(R2)) ## L'L = I + R2 S2^- R2'
  L <- chol(diag(p) + crossprod(S2%*%t(R2)))  

  ## now we need the square root of the unpenalized
  ## cov matrix for m
  if (m>0) {
      ## llr version
      LRB <- rbind(L%*%R1,t(mroot(S1)))
      ii <- map[b$smooth[[m]]$first.para:b$smooth[[m]]$last.para] 
      ## ii is cols of LRB related to smooth m, which need 
      ## to be moved to the end...
      LRB <- cbind(LRB[,-ii],LRB[,ii])
      ii <- (ncol(LRB)-length(ii)+1):ncol(LRB) ## need to pick up final block
      Rm <- qr.R(qr(LRB,tol=0,LAPACK=FALSE))[ii,ii,drop=FALSE] ## unpivoted QR
  } else Rm <- NULL

  list(Ve= crossprod(L%*%b$R%*%b$Vp)/b$sig2, ## Frequentist cov matrix
       Rm=Rm)
 # mapi <- (1:p)[!rind] ## indexes mapi[j] is index of total coef vector to which jth row/col of Vb/e relates
  
} ## end of recov


reTest <- function(b,m) {
## Test the mth smooth for equality to zero
## and accounting for all random effects in model 
  
  ## check that smooth penalty matrices are full size.  
  ## e.g. "fs" type smooths estimated by gamm do not 
  ## have full sized S matrices, and we can't compute 
  ## p-values here - actually we can see recov!
  #if (ncol(b$smooth[[m]]$S[[1]]) != b$smooth[[m]]$last.para-b$smooth[[m]]$first.para+1) {
  #  return(list(stat=NA,pval=NA,rank=NA)) 
  #}

  ## find indices of random effects other than m
  rind <- rep(0,0)
  for (i in 1:length(b$smooth)) if (!is.null(b$smooth[[i]]$random)&&b$smooth[[i]]$random&&i!=m) rind <- c(rind,i)
  ## get frequentist cov matrix of effects treating smooth terms in rind as random
  rc <- recov(b,rind,m) 
  Ve <- rc$Ve
  ind <- b$smooth[[m]]$first.para:b$smooth[[m]]$last.para
  B <- mroot(Ve[ind,ind,drop=FALSE]) ## BB'=Ve
 
  Rm <- rc$Rm
  
  b.hat <- coef(b)[ind]
  d <- Rm%*%b.hat
  stat <- sum(d^2)/b$sig2
  ev <- eigen(crossprod(Rm%*%B)/b$sig2,symmetric=TRUE,only.values=TRUE)$values
  ev[ev<0] <- 0
  rank <- sum(ev>max(ev)*.Machine$double.eps^.8)
  
  if (b$scale.estimated) {
    #pval <- simf(stat,ev,b$df.residual)
    k <- max(1,round(b$df.residual))
    pval <- psum.chisq(0,c(ev,-stat/k),df=c(rep(1,length(ev)),k))
  } else { 
    #pval <- liu2(stat,ev)
    pval <- psum.chisq(stat,ev) 
  }
  list(stat=stat,pval=pval,rank=rank)
} ## end reTest



testStat <- function(p,X,V,rank=NULL,type=0,res.df= -1) {
## Implements Wood (2013) Biometrika 100(1), 221-228
## The type argument specifies the type of truncation to use.
## on entry `rank' should be an edf estimate
## 0. Default using the fractionally truncated pinv.
## 1. Round down to k if k<= rank < k+0.05, otherwise up.
## res.df is residual dof used to estimate scale. <=0 implies
## fixed scale.

  qrx <- qr(X,tol=0)
  R <- qr.R(qrx)
  V <- R%*%V[qrx$pivot,qrx$pivot,drop=FALSE]%*%t(R)
  V <- (V + t(V))/2
  ed <- eigen(V,symmetric=TRUE)
  ## remove possible ambiguity from statistic...
  siv <- sign(ed$vectors[1,]);siv[siv==0] <- 1
  ed$vectors <- sweep(ed$vectors,2,siv,"*")

  k <- max(0,floor(rank)) 
  nu <- abs(rank - k)     ## fractional part of supplied edf
  if (type==1) { ## round up is more than .05 above lower
    if (rank > k + .05||k==0) k <- k + 1
    nu <- 0;rank <- k
  }

  if (nu>0) k1 <- k+1 else k1 <- k

  ## check that actual rank is not below supplied rank+1
  r.est <- sum(ed$values > max(ed$values)*.Machine$double.eps^.9)
  if (r.est<k1) {k1 <- k <- r.est;nu <- 0;rank <- r.est}

  ## Get the eigenvectors...
  # vec <- qr.qy(qrx,rbind(ed$vectors,matrix(0,nrow(X)-ncol(X),ncol(X))))
  vec <- ed$vectors
  if (k1<ncol(vec)) vec <- vec[,1:k1,drop=FALSE]

  ## deal with the fractional part of the pinv...
  if (nu>0&&k>0) {
     if (k>1) vec[,1:(k-1)] <- t(t(vec[,1:(k-1)])/sqrt(ed$val[1:(k-1)]))
     b12 <- .5*nu*(1-nu)
     if (b12<0) b12 <- 0
     b12 <- sqrt(b12)
     B <- matrix(c(1,b12,b12,nu),2,2)
     ev <- diag(ed$values[k:k1]^-.5,nrow=k1-k+1)
     B <- ev%*%B%*%ev
     eb <- eigen(B,symmetric=TRUE)
     rB <- eb$vectors%*%diag(sqrt(eb$values))%*%t(eb$vectors)
     vec1 <- vec
     vec1[,k:k1] <- t(rB%*%diag(c(-1,1))%*%t(vec[,k:k1]))
     vec[,k:k1] <- t(rB%*%t(vec[,k:k1]))
  } else {
    vec1 <- vec <- if (k==0) t(t(vec)*sqrt(1/ed$val[1])) else
            t(t(vec)/sqrt(ed$val[1:k]))
    if (k==1) rank <- 1
  }
  ## there is an ambiguity in the choise of test statistic, leading to slight
  ## differences in the p-value computation depending on which of 2 alternatives 
  ## is arbitrarily selected. Following allows both to be computed and p-values
  ## averaged (can't average test stat as dist then unknown) 
  d <- t(vec)%*%(R%*%p)
  d <- sum(d^2) 
  d1 <- t(vec1)%*%(R%*%p)
  d1 <- sum(d1^2)
  ##d <- d1 ## uncomment to avoid averaging

  rank1 <- rank ## rank for lower tail pval computation below

  ## note that for <1 edf then d is not weighted by EDF, and instead is 
  ## simply refered to a chi-squared 1

  if (nu>0) { ## mixture of chi^2 ref dist
     if (k1==1) rank1 <- val <- 1 else { 
       val <- rep(1,k1) ##ed$val[1:k1]
       rp <- nu+1
       val[k] <- (rp + sqrt(rp*(2-rp)))/2
       val[k1] <- (rp - val[k])
     }
   
     if (res.df <= 0) pval <- (psum.chisq(d,val)+psum.chisq(d1,val))/2 else {  ## (liu2(d,val) + liu2(d1,val))/2 else
       k0 <- max(1,round(res.df))
       pval <- (psum.chisq(0,c(val,-d/k0),df=c(rep(1,length(val)),k0)) + psum.chisq(0,c(val,-d1/k0),df=c(rep(1,length(val)),k0)) )/2 
       #pval <- (simf(d,val,res.df) + simf(d1,val,res.df))/2
     }
  } else { pval <- 2 }
  ## integer case still needs computing, 
  ## OLD: also liu/pearson approx only good in 
  ## upper tail. In lower tail, 2 moment approximation is better (Can check this 
  ## by simply plotting the whole interesting range as a contour plot!)
  ##if (pval > .5) 
  if (pval > 1) { 
    if (res.df <= 0) pval <- (pchisq(d,df=rank1,lower.tail=FALSE)+pchisq(d1,df=rank1,lower.tail=FALSE))/2 else
    pval <- (pf(d/rank1,rank1,res.df,lower.tail=FALSE)+pf(d1/rank1,rank1,res.df,lower.tail=FALSE))/2
  }
  list(stat=d,pval=min(1,pval),rank=rank)
} ## end of testStat




summary.gam <- function (object, dispersion = NULL, freq = FALSE,re.test = TRUE, ...) {
## summary method for gam object - provides approximate p values 
## for terms + other diagnostics
## Improved by Henric Nilsson
## * freq determines whether a frequentist or Bayesian cov matrix is 
##   used for parametric terms. Usually the default TRUE will result
##   in reasonable results with paraPen.
## If a smooth has a field 'random' and it is set to TRUE then 
## it is treated as a random effect for some p-value dist calcs 


  pinv<-function(V,M,rank.tol=1e-6) {
  ## a local pseudoinverse function
    D <- eigen(V,symmetric=TRUE)
    M1<-length(D$values[D$values>rank.tol*D$values[1]])
    if (M>M1) M<-M1 # avoid problems with zero eigen-values
  
    if (M+1<=length(D$values)) D$values[(M+1):length(D$values)]<-1
    D$values<- 1/D$values
    if (M+1<=length(D$values)) D$values[(M+1):length(D$values)]<-0
    res <- D$vectors%*%(D$values*t(D$vectors))  ##D$u%*%diag(D$d)%*%D$v
    attr(res,"rank") <- M
    res
  } ## end of pinv
  
  if (is.null(object$R)) { ## Factor from QR decomp of sqrt(W)X
    warning("p-values for any terms that can be penalized to zero will be unreliable: refit model to fix this.")
    useR <- FALSE
  } else useR <- TRUE

  p.table <- pTerms.table <- s.table <- NULL

  if (freq) covmat <- object$Ve else covmat <- object$Vp
  name <- names(object$edf)
  dimnames(covmat) <- list(name, name)
  covmat.unscaled <- covmat/object$sig2
  est.disp <- object$scale.estimated
  if (!is.null(dispersion)) { 
    covmat <- dispersion * covmat.unscaled
    object$Ve <- object$Ve*dispersion/object$sig2 ## freq
    object$Vp <- object$Vp*dispersion/object$sig2 ## Bayes
    est.disp <- FALSE
  } else dispersion <- object$sig2
 

  ## Now the individual parameteric coefficient p-values...

  se <- diag(covmat)^0.5
  residual.df<-length(object$y)-sum(object$edf)
  if (sum(object$nsdf) > 0) { # individual parameters
    if (length(object$nsdf)>1) { ## several linear predictors 
      pstart <- attr(object$nsdf,"pstart")
      ind <- rep(0,0)
      for (i in 1:length(object$nsdf)) if (object$nsdf[i]>0) ind <- 
          c(ind,pstart[i]:(pstart[i]+object$nsdf[i]-1))
    } else { pstart <- 1;ind <- 1:object$nsdf} ## only one lp
    p.coeff <- object$coefficients[ind]
    p.se <- se[ind]
    p.t<-p.coeff/p.se
    if (!est.disp) {
      p.pv <- 2*pnorm(abs(p.t),lower.tail=FALSE)
      p.table <- cbind(p.coeff, p.se, p.t, p.pv)   
      dimnames(p.table) <- list(names(p.coeff), c("Estimate", "Std. Error", "z value", "Pr(>|z|)"))
    } else {
      p.pv <- 2*pt(abs(p.t),df=residual.df,lower.tail=FALSE)
      p.table <- cbind(p.coeff, p.se, p.t, p.pv)
      dimnames(p.table) <- list(names(p.coeff), c("Estimate", "Std. Error", "t value", "Pr(>|t|)"))
    }    
  } else {p.coeff <- p.t <- p.pv <- array(0,0)}

  ## Next the p-values for parametric terms, so that factors are treated whole... 
  
  pterms <- if (is.list(object$pterms)) object$pterms else list(object$pterms)
  if (!is.list(object$assign)) object$assign <- list(object$assign)
  npt <- length(unlist(lapply(pterms,attr,"term.labels")))
  if (npt>0)  pTerms.df <- pTerms.chi.sq <- pTerms.pv <- array(0,npt)
  term.labels <- rep("",0)
  k <- 0 ## total term counter
  for (j in 1:length(pterms)) {
    tlj <- attr(pterms[[j]],"term.labels") 
    nt <- length(tlj)
    if (j>1 && nt>0) tlj <- paste(tlj,j-1,sep=".")
    term.labels <- c(term.labels,tlj)
    if (nt>0) { # individual parametric terms
      np <- length(object$assign[[j]])
      ind <- pstart[j] - 1 + 1:np 
      Vb <- covmat[ind,ind,drop=FALSE]
      bp <- array(object$coefficients[ind],np)
    
      for (i in 1:nt) { 
        k <- k + 1
        ind <- object$assign[[j]]==i
        b <- bp[ind];V <- Vb[ind,ind]
        ## pseudo-inverse needed in case of truncation of parametric space 
        if (length(b)==1) { 
          V <- 1/V 
          pTerms.df[k] <- nb <- 1      
          pTerms.chi.sq[k] <- V*b*b
        } else {
          V <- pinv(V,length(b),rank.tol=.Machine$double.eps^.5)
          pTerms.df[k] <- nb <- attr(V,"rank")      
          pTerms.chi.sq[k] <- t(b)%*%V%*%b
        }
        if (!est.disp)
        pTerms.pv[k] <- pchisq(pTerms.chi.sq[k],df=nb,lower.tail=FALSE)
        else
        pTerms.pv[k] <- pf(pTerms.chi.sq[k]/nb,df1=nb,df2=residual.df,lower.tail=FALSE)      
      } ## for (i in 1:nt)
    } ## if (nt>0)
  }

  if (npt) {
    attr(pTerms.pv,"names") <- term.labels
    if (!est.disp) {      
      pTerms.table <- cbind(pTerms.df, pTerms.chi.sq, pTerms.pv)   
      dimnames(pTerms.table) <- list(term.labels, c("df", "Chi.sq", "p-value"))
    } else {
      pTerms.table <- cbind(pTerms.df, pTerms.chi.sq/pTerms.df, pTerms.pv)   
      dimnames(pTerms.table) <- list(term.labels, c("df", "F", "p-value"))
    }
  } else { pTerms.df<-pTerms.chi.sq<-pTerms.pv<-array(0,0)}

  ## Now deal with the smooth terms....

  m <- length(object$smooth) # number of smooth terms
  
  df <- edf1 <- edf <- s.pv <- chi.sq <- array(0, m)
  if (m>0) { # form test statistics for each smooth
    ## Bayesian p-values required 
    if (useR)  X <- object$R else {
      sub.samp <- max(1000,2*length(object$coefficients)) 
      if (nrow(object$model)>sub.samp) { ## subsample to get X for p-values calc.
        kind <- temp.seed(11)
        #seed <- try(get(".Random.seed",envir=.GlobalEnv),silent=TRUE) ## store RNG seed
        #if (inherits(seed,"try-error")) {
        #  runif(1)
        #  seed <- get(".Random.seed",envir=.GlobalEnv)
        #}
        #kind <- RNGkind(NULL)
        #RNGkind("default","default")
        #set.seed(11) ## ensure repeatability
        ind <- sample(1:nrow(object$model),sub.samp,replace=FALSE)  ## sample these rows from X
        X <- predict(object,object$model[ind,],type="lpmatrix")
        #RNGkind(kind[1],kind[2])
        #assign(".Random.seed",seed,envir=.GlobalEnv) ## RNG behaves as if it had not been used
        temp.seed(kind)
      } else { ## don't need to subsample 
        X <- model.matrix(object)
      }
      X <- X[!is.na(rowSums(X)),] ## exclude NA's (possible under na.exclude)    
    } ## end if (m>0)
    ii <- 0
    for (i in 1:m) { ## loop through smooths

      start <- object$smooth[[i]]$first.para;stop <- object$smooth[[i]]$last.para

      V <- object$Vp[start:stop,start:stop,drop=FALSE] ## Bayesian
      
      p <- object$coefficients[start:stop]  # params for smooth
      edf1i <- edfi <- sum(object$edf[start:stop]) # edf for this smooth
      ## extract alternative edf estimate for this smooth, if possible...
      if (!is.null(object$edf1)) edf1i <-  sum(object$edf1[start:stop])
      Xt <- X[,start:stop,drop=FALSE]  
      fx <- if (inherits(object$smooth[[i]],"tensor.smooth")&&
                !is.null(object$smooth[[i]]$fx)) all(object$smooth[[i]]$fx) else object$smooth[[i]]$fixed
      if (!fx&&object$smooth[[i]]$null.space.dim==0&&!is.null(object$R)) { ## random effect or fully penalized term
        res <- if (re.test) reTest(object,i) else NULL
      } else { ## Inverted Nychka interval statistics
       
        if (est.disp) rdf <- residual.df else rdf <- -1
        res <- testStat(p,Xt,V,min(ncol(Xt),edf1i),type=0,res.df = rdf)
      }
      if (!is.null(res)) {
        ii <- ii + 1
        df[ii] <- res$rank
        chi.sq[ii] <- res$stat
        s.pv[ii] <- res$pval 
        edf1[ii] <- edf1i 
        edf[ii] <- edfi 
        names(chi.sq)[ii]<- object$smooth[[i]]$label
      }
    } 
    if (ii==0) df <- edf1 <- edf <- s.pv <- chi.sq <- array(0, 0) else {
      df <- df[1:ii];chi.sq <- chi.sq[1:ii];edf1 <- edf1[1:ii]
      edf <- edf[1:ii];s.pv <- s.pv[1:ii]
    }
    if (!est.disp) {
      s.table <- cbind(edf, df, chi.sq, s.pv)      
      dimnames(s.table) <- list(names(chi.sq), c("edf", "Ref.df", "Chi.sq", "p-value"))
    } else {
      s.table <- cbind(edf, df, chi.sq/df, s.pv)      
      dimnames(s.table) <- list(names(chi.sq), c("edf", "Ref.df", "F", "p-value"))
    }
  }
  w <- as.numeric(object$prior.weights)
  mean.y <- sum(w*object$y)/sum(w)
  w <- sqrt(w)
  nobs <- nrow(object$model)
  r.sq <- if (inherits(object$family,"general.family")||!is.null(object$family$no.r.sq)) NULL else  
          1 - var(w*(as.numeric(object$y)-object$fitted.values))*(nobs-1)/(var(w*(as.numeric(object$y)-mean.y))*residual.df) 
  dev.expl<-(object$null.deviance-object$deviance)/object$null.deviance
  if (object$method%in%c("REML","ML")) object$method <- paste("-",object$method,sep="")
  ret<-list(p.coeff=p.coeff,se=se,p.t=p.t,p.pv=p.pv,residual.df=residual.df,m=m,chi.sq=chi.sq,
       s.pv=s.pv,scale=dispersion,r.sq=r.sq,family=object$family,formula=object$formula,n=nobs,
       dev.expl=dev.expl,edf=edf,dispersion=dispersion,pTerms.pv=pTerms.pv,pTerms.chi.sq=pTerms.chi.sq,
       pTerms.df = pTerms.df, cov.unscaled = covmat.unscaled, cov.scaled = covmat, p.table = p.table,
       pTerms.table = pTerms.table, s.table = s.table,method=object$method,sp.criterion=object$gcv.ubre,
       rank=object$rank,np=length(object$coefficients))
 
  class(ret)<-"summary.gam"
  ret
} ## end summary.gam

print.summary.gam <- function(x, digits = max(3, getOption("digits") - 3), 
                              signif.stars = getOption("show.signif.stars"), ...)
# print method for gam summary method. Improved by Henric Nilsson
{ print(x$family)
  cat("Formula:\n") 

  if (is.list(x$formula)) for (i in 1:length(x$formula)) print(x$formula[[i]]) else
     print(x$formula)

  if (length(x$p.coeff)>0)
  { cat("\nParametric coefficients:\n")
    printCoefmat(x$p.table, digits = digits, signif.stars = signif.stars, na.print = "NA", ...)
  }
  cat("\n")
  if(x$m>0)
  { cat("Approximate significance of smooth terms:\n")
    printCoefmat(x$s.table, digits = digits, signif.stars = signif.stars, has.Pvalue = TRUE, na.print = "NA",cs.ind=1, ...)
  }
  cat("\n")
  if (!is.null(x$rank) && x$rank< x$np) cat("Rank: ",x$rank,"/",x$np,"\n",sep="") 
  if (!is.null(x$r.sq)) cat("R-sq.(adj) = ",formatC(x$r.sq,digits=3,width=5),"  ")
  if (length(x$dev.expl)>0) cat("Deviance explained = ",formatC(x$dev.expl*100,digits=3,width=4),"%",sep="")
  cat("\n")
  if (!is.null(x$method)&&!(x$method%in%c("PQL","lme.ML","lme.REML")))  
    cat(x$method," = ",formatC(x$sp.criterion,digits=5),sep="")
 
  cat("  Scale est. = ",formatC(x$scale,digits=5,width=8,flag="-"),"  n = ",x$n,"\n",sep="")
  invisible(x)
} ## print.summary.gam


anova.gam <- function (object, ..., dispersion = NULL, test = NULL,  freq=FALSE)
# improved by Henric Nilsson
{   # adapted from anova.glm: R stats package
    dotargs <- list(...)
    named <- if (is.null(names(dotargs)))
        rep(FALSE, length(dotargs))
    else (names(dotargs) != "")
    if (any(named))
        warning("The following arguments to anova.glm(..) are invalid and dropped: ",
            paste(deparse(dotargs[named]), collapse = ", "))
    dotargs <- dotargs[!named]
    is.glm <- unlist(lapply(dotargs, function(x) inherits(x,
        "glm")))
    dotargs <- dotargs[is.glm]
    if (length(dotargs) > 0) {
      if (!is.null(test)&&!test%in%c("Chisq","LRT","F")) stop("un-supported test")
      ## check for multiple formulae to avoid problems...
      if (is.list(object$formula)) object$formula <- object$formula[[1]]
      ## reset df.residual to value appropriate for GLRT...
      n <- if (is.matrix(object$y)) nrow(object$y) else length(object$y)
      dfc <- if (is.null(object$edf2)) 0 else sum(object$edf2) - sum(object$edf) 
      object$df.residual <- n - sum(object$edf1) - dfc
      ## reset the deviance to -2*logLik for general families...
      if (inherits(object$family,"extended.family")) {  
        min.df <- object$df.residual
        disp <- if (is.null(dispersion)) object$sig2 else dispersion
        object$deviance <- -2 * as.numeric(logLik(object)) ## deviance returned is not always suitable for e.g. F test
        if (!is.null(test)) test <- "Chisq"
      }
      ## repeat above 3 steps for each element of dotargs...
      for (i in 1:length(dotargs)) {
        if (is.list(dotargs[[i]]$formula)) dotargs[[i]]$formula <- dotargs[[i]]$formula[[1]]
        dfc <- if (is.null(dotargs[[i]]$edf2)) 0 else sum(dotargs[[i]]$edf2) - sum(dotargs[[i]]$edf) 
        dotargs[[i]]$df.residual <- n - sum(dotargs[[i]]$edf1) - dfc
        if (inherits(dotargs[[i]]$family,"extended.family")) {
          if (object$df.residual < min.df) {
            object$df.residual -> min.df
            disp <- if (is.null(dispersion))  dotargs[[i]]$sig2 else dispersion
          }
          dotargs[[i]]$deviance <- -2 * as.numeric(logLik(dotargs[[i]]))
        } 
      } 
      if (inherits(object$family,"extended.family")) { ## multiply by dispersion as anova.glm will divide by it!
         object$deviance <- object$deviance * disp
         for (i in 1:length(dotargs)) dotargs[[i]]$deviance <- dotargs[[i]]$deviance * disp
      } 
      return(anova(structure(c(list(object), dotargs), class="glmlist"), 
            dispersion = dispersion, test = test))
    } 
    if (!is.null(test)) warning("test argument ignored")
    if (!inherits(object,"gam")) stop("anova.gam called with non gam object")
    sg <- summary(object, dispersion = dispersion, freq = freq)
    class(sg) <- "anova.gam"
    sg
} ## anova.gam


print.anova.gam <- function(x, digits = max(3, getOption("digits") - 3), ...)
{ # print method for class anova.gam resulting from single
  # gam model calls to anova. Improved by Henric Nilsson.
  print(x$family)
  cat("Formula:\n") 
  if (is.list(x$formula)) for (i in 1:length(x$formula)) print(x$formula[[i]]) else
  print(x$formula)
  if (length(x$pTerms.pv)>0)
  { cat("\nParametric Terms:\n")
    printCoefmat(x$pTerms.table, digits = digits, signif.stars = FALSE, has.Pvalue = TRUE, na.print = "NA", ...)
  }
  cat("\n")
  if(x$m>0)
  { cat("Approximate significance of smooth terms:\n")
    printCoefmat(x$s.table, digits = digits, signif.stars = FALSE, has.Pvalue = TRUE, na.print = "NA", ...)
  }
  invisible(x)
} ## print.anova.gam

## End of improved anova and summary code. 


pen.edf <- function(x) {
## obtains the edf associated with each penalty. That is the edf 
## of the group of coefficients penalized by each penalty.
## hard to interpret for overlapping penalties. brilliant for t2
## smooths!
  if (!inherits(x,"gam")) stop("not a gam object")
  if (length(x$smooth)==0) return(NULL)
  k <- 0 ## penalty counter
  edf <- rep(0,0)
  edf.name <- rep("",0)
  for (i in 1:length(x$smooth)) { ## work through smooths
    if (length(x$smooth[[i]]$S)>0) {
      pind <- x$smooth[[i]]$first.para:x$smooth[[i]]$last.para ## range of coefs relating to this term
      Snames <- names(x$smooth[[i]]$S)
      if (is.null(Snames)) Snames <- as.character(1:length(x$smooth[[i]]$S))
      if (length(Snames)==1) Snames <- ""
      for (j in 1:length(x$smooth[[i]]$S)) {
        ind <- rowSums(x$smooth[[i]]$S[[j]]!=0)!=0 ## index of penalized coefs (within pind)
        k <- k+1
        edf[k] <- sum(x$edf[pind[ind]]) 
        edf.name[k] <- paste(x$smooth[[i]]$label,Snames[j],sep="")
      }    
    }
  } ## finished all penalties
  names(edf) <- edf.name
  if (k==0) return(NULL)
  edf
} ## end of pen.edf



cooks.distance.gam <- function(model,...)
{ res <- residuals(model,type="pearson")
  dispersion <- model$sig2
  hat <- model$hat
  p <- sum(model$edf)
  (res/(1 - hat))^2 * hat/(dispersion * p)
}


sp.vcov <- function(x,edge.correct=TRUE,reg=1e-3) {
## get cov matrix of smoothing parameters, if available
  if (!inherits(x,"gam")) stop("argument is not a gam object")
  if (x$method%in%c("ML","P-ML","REML","P-REML","fREML")&&!is.null(x$outer.info$hess)) {
    hess <- x$outer.info$hess
    p <- ncol(hess)
    if (edge.correct&&!is.null(attr(hess,"hess1"))) {
      V <- solve(attr(hess,"hess1")+diag(p)*reg) 
      attr(V,"lsp") <- attr(hess,"lsp1")
      return(V)
    } else return(solve(hess+reg))
  } else return(NULL)
}

gam.vcomp <- function(x,rescale=TRUE,conf.lev=.95) {
## Routine to convert smoothing parameters to variance components
## in a fitted `gam' object.
  if (!inherits(x,"gam")) stop("requires an object of class gam")
  if (!is.null(x$reml.scale)&&is.finite(x$reml.scale)) scale <- x$reml.scale else scale <- x$sig2
  if (length(x$sp)==0) return()
  if (rescale) { ## undo any rescaling of S[[i]] that may have been done
    m <- length(x$smooth)
    if (is.null(x$paraPen)) { 
      k <- 1;
      if (is.null(x$full.sp)) kf <- -1 else kf <- 1 ## place holder in full sp vector
    } else { ## don't rescale paraPen related stuff
      k <- sum(x$paraPen$sp<0)+1 ## count free sp's for paraPen
      if (is.null(x$full.sp)) kf <- -1 else kf <- length(x$paraPen$full.sp.names)+1
    }
    idx <- rep("",0) ## vector of ids used
    idxi <- rep(0,0) ## indexes ids in smooth list
    if (m>0) for (i in 1:m) { ## loop through all smooths
      if (!is.null(x$smooth[[i]]$id)) { ## smooth has an id
        if (x$smooth[[i]]$id%in%idx) { 
          ok <- FALSE ## id already dealt with --- ignore smooth
        } else {
          idx <- c(idx,x$smooth[[i]]$id) ## add id to id list
          idxi <- c(idxi,i) ## so smooth[[idxi[k]]] is prototype for idx[k]
          ok <- TRUE
        } 
      } else { ok <- TRUE} ## no id so proceed
      if (ok) { 
       if (length(x$smooth[[i]]$S.scale)!=length(x$smooth[[i]]$S))
         warning("S.scale vector doesn't match S list - please report to maintainer")
        for (j in 1:length(x$smooth[[i]]$S.scale)) {
          if (x$smooth[[i]]$sp[j]<0) { ## sp not supplied
            x$sp[k] <- x$sp[k] / x$smooth[[i]]$S.scale[j]
            k <- k + 1
            if (kf>0) {
              x$full.sp[kf] <- x$full.sp[kf] / x$smooth[[i]]$S.scale[j]
              kf <- kf + 1
            }
          } else { ## sp supplied
            x$full.sp[kf] <- x$full.sp[kf] / x$smooth[[i]]$S.scale[j]
            kf <- kf + 1
          } 
        }
      } else { ## this id already dealt with, but full.sp not scaled yet 
        ii <- idxi[idx%in%x$smooth[[i]]$id] ## smooth prototype
        for (j in 1:length(x$smooth[[ii]]$S.scale)) {
          x$full.sp[kf] <- x$full.sp[kf] / x$smooth[[ii]]$S.scale[j]
          kf <- kf + 1
        }
      } 
    } ## finished rescaling
  }
  ## variance components (original scale)
  vc <- c(scale/x$sp)
  names(vc) <- names(x$sp)

  if (is.null(x$full.sp)) vc.full <- NULL else { 
    vc.full <- c(scale/x$full.sp)
    names(vc.full) <- names(x$full.sp)
  }
  ## If a Hessian exists, get CI's for variance components...

  if (x$method%in%c("ML","P-ML","REML","P-REML","fREML")&&!is.null(x$outer.info$hess)) {
    if (is.null(x$family$n.theta)||x$family$n.theta<=0) H <- x$outer.info$hess ## the hessian w.r.t. log sps and log scale
    else {
      ind <- 1:x$family$n.theta
      H <- x$outer.info$hess[-ind,-ind,drop=FALSE]
    }

    if (ncol(H)>length(x$sp)) scale.est <- TRUE else scale.est <- FALSE
    
    ## get derivs of log sqrt var comps wrt log sp and log scale....
    J <- matrix(0,nrow(H),ncol(H)) 
    if (scale.est) { 
      diag(J) <- -.5 # -2
      J[,ncol(J)] <- .5 # 2
      vc <- c(vc,scale);names(vc) <- c(names(x$sp),"scale")
    } else {
      diag(J) <- -0.5 #-2
    }
    #H <- t(J)%*%H%*%J ## hessian of log sqrt variances
    eh <- eigen(H,symmetric=TRUE)
    ind <- eh$values>max(eh$values)*.Machine$double.eps^75 ## index of non zero eigenvalues 
    rank <- sum(ind) ## rank of hessian
    iv <- eh$values*0;iv[ind] <- 1/eh$values[ind]
    V <- eh$vectors%*%(iv*t(eh$vectors)) ## cov matrix for sp's ## log sqrt variances
    V <- J%*%V%*%t(J) ## cov matrix for log sqrt variance
    lsd <- log(sqrt(vc)) ## log sqrt variances
    sd.lsd <- sqrt(diag(V))
    if (conf.lev<=0||conf.lev>=1) conf.lev <- 0.95
    crit <- qnorm(1-(1-conf.lev)/2)
    ll <- lsd - crit * sd.lsd
    ul <- lsd + crit * sd.lsd
    res <- cbind(exp(lsd),exp(ll),exp(ul))
    rownames(res) <- names(vc)
    colnames(res) <- c("std.dev","lower","upper")
    cat("\n")
    cat(paste("Standard deviations and",conf.lev,"confidence intervals:\n\n"))
    print(res)
    cat("\nRank: ");cat(rank);cat("/");cat(ncol(H));cat("\n")
    if (!is.null(vc.full)) { 
      cat("\nAll smooth components:\n")
      print(sqrt(vc.full))
      res <- list(all=sqrt(vc.full),vc=res)
    }
    invisible(res)
  } else {
    if (is.null(vc.full)) return(sqrt(vc)) else return(list(vc=sqrt(vc),all=sqrt(vc.full)))
  } 
} ## end of gam.vcomp


gam.sandwich <- function(b,freq=FALSE) {
## computes sandwich estimator of variance
  B2 <- if (freq) 0 else b$Vp - b$Ve ## Bayes squared bias estimate
  X <- model.matrix(b)
  m <- nrow(X); m <- m/(m-sum(b$edf))
  if (inherits(b$family,"extended.family")) {
    if (inherits(b$family,"general.family")) {
       if (is.null(b$family$sandwich)) stop("no sandwich estimate available for this model")
       Vs <- m*b$Vp%*%b$family$sandwich(b$y,X,b$coefficients,b$prior.weights,b$family,offset=attr(X,"offset"))%*%b$Vp + B2
    } else {
      dd <- dDeta(b$y,b$fitted.values,b$prior.weights,b$family$getTheta(),b$family,deriv=0)
      Vs <- m*b$Vp%*%crossprod(0.5/b$sig2*dd$Deta*X)%*%b$Vp + B2 
    }
  } else { ## exponential family
    mu <- b$fitted.values
    w <- b$family$mu.eta(b$linear.predictors)*(b$y - mu)/(b$sig2*b$family$variance(mu))
    Vs <- m*b$Vp%*%crossprod(w*X)%*%b$Vp + B2
  }
  Vs
} ## gam.sandwich


vcov.gam <- function(object, sandwich=FALSE, freq = FALSE, dispersion = NULL,unconditional=FALSE, ...)
## supplied by Henric Nilsson <henric.nilsson@statisticon.se> 
{ if (sandwich) vc <- gam.sandwich(object,freq) else { 
    if (freq)
      vc <- object$Ve
    else { 
      vc <- if (unconditional&&!is.null(object$Vc)) object$Vc else object$Vp
    }
  }
  if (!is.null(dispersion))
    vc <- dispersion * vc / object$sig2
  name <- names(object$edf)
  dimnames(vc) <- list(name, name)
  vc
}




influence.gam <- function(model,...) { model$hat }




logLik.gam <- function (object,...)
{  # based on logLik.glm - is ordering of p correction right???
   # if (length(list(...)))
   #     warning("extra arguments discarded")
   
    ##fam <- family(object)$family
    sc.p <- as.numeric(object$scale.estimated)
    p <- sum(object$edf) + sc.p
    val <- p - object$aic/2
    #if (fam %in% c("gaussian", "Gamma", "inverse.gaussian","Tweedie"))
    #    p <- p + 1
    if (!is.null(object$edf2)) p <- sum(object$edf2) + sc.p
    np <- length(object$coefficients) + sc.p 
    if (p > np) p <- np 
    if (inherits(object$family,"extended.family")&&!is.null(object$family$n.theta)) p <- p + object$family$n.theta 
    attr(val, "df") <- p
    class(val) <- "logLik"
    val
} ## logLik.gam



# From here on is the code for magic.....

mroot <- function(A,rank=NULL,method="chol")
# finds the smallest square root of A, or the best approximate square root of 
# given rank. B is returned where BB'=A. A assumed non-negative definite. 
# Current methods "chol", "svd". "svd" is much slower, but much better at getting the 
# correct rank if it isn't known in advance. 
{ if (is.null(rank)) rank <- 0 
  if (!isTRUE(all.equal(A,t(A)))) stop("Supplied matrix not symmetric")
  if (method=="svd") { 
    um <- La.svd(A)
    if (sum(um$d!=sort(um$d,decreasing=TRUE))>0) 
    stop("singular values not returned in order")
    if (rank < 1) # have to work out rank
    { rank <- dim(A)[1]
      if (um$d[1]<=0) rank <- 1 else
      while (rank>0&&(um$d[rank]/um$d[1]<.Machine$double.eps||
                           all.equal(um$u[,rank],um$vt[rank,])!=TRUE)) rank<-rank-1 
      if (rank==0) stop("Something wrong - matrix probably not +ve semi definite")    
    }
    d<-um$d[1:rank]^0.5
    return(t(t(um$u[,1:rank])*as.vector(d))) # note recycling rule used for efficiency
  } else
  if (method=="chol") { 
    ## don't want to be warned it's not +ve def...
    L <- suppressWarnings(chol(A,pivot=TRUE,tol=0))
    piv <- order(attr(L,"pivot"))
    ## chol does not work as documented (reported), have to explicitly zero
    ## the trailing block...
    r <- attr(L,"rank")
    p <- ncol(L)
    if (r < p) L[(r+1):p,(r+1):p] <- 0
    if (rank < 1) rank <- r
    L <- L[,piv,drop=FALSE]; L <- t(L[1:rank,,drop=FALSE])
    return(L)
  } else
  stop("method not recognised.")
} ## mroot



magic.post.proc <- function(X,object,w=NULL)
# routine to take list returned by magic and extract:
# Vb the estimated bayesian parameter covariance matrix. rV%*%t(rV)*scale
# Ve the frequentist parameter estimator covariance matrix.
# edf the array of estimated degrees of freedom per parameter Vb%*%t(X)%*%W%*%X /scale
# hat the leading diagonal of the hat/influence matrix 
# NOTE: W=diag(w) if w non-matrix, otherwise w is a matrix square root. 
# flop count is O(nq^2) if X is n by q... this is why routine not part of magic
{ ## V<-object$rV%*%t(object$rV)
  V <- tcrossprod(object$rV)
  if (!is.null(w)) 
  { if (is.matrix(w)) WX <- X <- w%*%X else 
    WX <- as.vector(w)*X # use recycling rule to form diag(w)%*%X cheaply 
    
  } else {WX <- X}
  ##if (nthreads <= 1) M <- WX%*%V else M <- pmmult(WX,V,tA=FALSE,tB=FALSE,nt=nthreads)
  M <- WX%*%V  ## O(np^2) part
  ##Ve <- (V%*%t(X))%*%M*object$scale # frequentist cov. matrix
  XWX <- crossprod(object$R) #t(X)%*%WX
  F <- Ve <- V%*%XWX
  edf1 <- rowSums(t(Ve)*Ve) ## this is diag(FF), where F is edf matrix
  Ve <- Ve%*%V*object$scale ## frequentist cov matrix
  B <- X*M
  rm(M)
  hat <- rowSums(B) #apply(B,1,sum) # diag(X%*%V%*%t(WX))
  edf <- colSums(B) #apply(B,2,sum) # diag(V%*%t(X)%*%WX)
  Vb <- V*object$scale;rm(V)
  list(Ve=Ve,Vb=Vb,hat=hat,edf=edf,edf1=2*edf-edf1,F=F)
} ## magic.post.proc

single.sp <- function(X,S,target=.5,tol=.Machine$double.eps*100)
## function to find smoothing parameter corresponding to particular 
## target e.d.f. for a single smoothing parameter problem. 
## X is model matrix; S is penalty matrix; target is target 
## average e.d.f. per penalized term.
{ R <- qr.R(qr(X)) ### BUG? pivoting?
  te <- try(RS <- backsolve(R,S,transpose=TRUE),silent=TRUE)
  if (inherits(te,"try-error")) return(-1)
  te <- try(RSR <- backsolve(R,t(RS),transpose=TRUE),silent=TRUE)
  if (inherits(te,"try-error")) return(-1)
  RSR <- (RSR+t(RSR))/2
  d <- eigen(RSR,symmetric=TRUE)$values
  d <- d[d>max(d)*tol]
  ff <- function(lambda,d,target) { 
    mean(1/(1+exp(lambda)*d))-target
  }
  lower <- 0
  while (ff(lower,d,target) <= 0) lower <- lower - 1
  upper <- lower
  while (ff(upper,d,target) > 0) upper <- upper + 1
  exp(uniroot(ff,c(lower,upper),d=d,target=target)$root)
}


initial.spg <- function(x,y,weights,family,S,rank,off,offset=NULL,L=NULL,lsp0=NULL,type=1,
                        start=NULL,mustart=NULL,etastart=NULL,E=NULL,...) {
## initial smoothing parameter values based on approximate matching 
## of Frob norm of XWX and S. If L is non null then it is assumed
## that the sps multiplying S elements are given by L%*%sp+lsp0 and 
## an appropriate regression step is used to find `sp' itself.
## This routine evaluates initial guesses at W.
  ## Get the initial weights...
  if (length(S)==0) return(rep(0,0))
  ## start <- etastart <- mustart <- NULL
  nobs <- nrow(x) ## ignore codetools warning - required for initialization
  if (is.null(mustart)) mukeep <- NULL else mukeep <- mustart 
  eval(family$initialize) 
  if (inherits(family,"general.family")) { ## Cox, gamlss etc...   
    lbb <- family$ll(y,x,start,weights,family,offset=offset,deriv=1)$lbb ## initial Hessian 
    ## initially work out the number of times that each coefficient is penalized
    pcount <- rep(0,ncol(lbb))
    for (i in 1:length(S)) {
      ind <- off[i]:(off[i]+ncol(S[[i]])-1)
      dlb <- -diag(lbb[ind,ind,drop=FALSE])
      indp <- rowSums(abs(S[[i]]))>max(S[[i]])*.Machine$double.eps^.75 & dlb!=0
      ind <- ind[indp] ## drop indices of unpenalized
      pcount[ind] <- pcount[ind] + 1 ## add up times penalized
    }

    lambda <- rep(0,length(S))
    ## choose lambda so that corresponding elements of lbb and S[[i]]
    ## are roughly in balance...
    for (i in 1:length(S)) {
      ind <- off[i]:(off[i]+ncol(S[[i]])-1)
      lami <- 1
      #dlb <- -diag(lbb[ind,ind])
      dlb <- abs(diag(lbb[ind,ind,drop=FALSE])) 
      dS <- diag(S[[i]])
      pc <- pcount[ind]
      ## get index of elements doing any actual penalization...
      ind <- rowSums(abs(S[[i]]))>max(S[[i]])*.Machine$double.eps^.75 & dlb!=0 ## dlb > 0
      ## drop elements that are not penalizing
      dlb <- dlb[ind]/pc[ind] ## idea is to share out between penalties
      dS <- dS[ind]
      rm <- max(length(dS)/rank[i],1) ## rough correction for rank deficiency in penalty
      #while (mean(dlb/(dlb + lami * dS * rm)) > 0.4) lami <- lami*5
      #while (mean(dlb/(dlb + lami * dS * rm )) < 0.4) lami <- lami/5 
      while (sqrt(mean(dlb/(dlb + lami * dS * rm))*mean(dlb)/mean(dlb+lami*dS*rm)) > 0.4) lami <- lami*5
      while (sqrt(mean(dlb/(dlb + lami * dS * rm))*mean(dlb)/mean(dlb+lami*dS*rm)) < 0.4) lami <- lami/5
      lambda[i] <- lami 
      ## norm(lbb[ind,ind])/norm(S[[i]])
    }
  } else { ## some sort of conventional regression
    if (is.null(mukeep)) {
      if (!is.null(start)) etastart <- drop(x%*%start)
      if (!is.null(etastart)) mustart <- family$linkinv(etastart)
    } else mustart <- mukeep
    if (inherits(family,"extended.family")) {
      theta <- family$getTheta()
      ## use 'as.numeric' - 'drop' can leave result as 1D array...
      Ddo <- family$Dd(y,mustart,theta,weights)
      mu.eta2 <-family$mu.eta(family$linkfun(mustart))^2 
      w <- .5 * as.numeric(Ddo$Dmu2 * mu.eta2)
      if (any(w<0)) w <- .5 * as.numeric(Ddo$EDmu2 * mu.eta2) 
      #w <- .5 * as.numeric(family$Dd(y,mustart,theta,weights)$EDmu2*family$mu.eta(family$linkfun(mustart))^2)  
    } else w <- as.numeric(weights*family$mu.eta(family$linkfun(mustart))^2/family$variance(mustart))
    w <- sqrt(w)
    if (type==1) { ## what PI would have used
      lambda <-  initial.sp(w*x,S,off)
    } else { ## balance frobenius norms
      csX <- colSums((w*x)^2) 
      lambda <- rep(0,length(S))
      for (i in 1:length(S)) {
        ind <- off[i]:(off[i]+ncol(S[[i]])-1)
        lambda[i] <- sum(csX[ind])/sqrt(sum(S[[i]]^2))
      }
    }
  }
  if (!is.null(L)) {
    lsp <- log(lambda)
    if (is.null(lsp0)) lsp0 <- rep(0,nrow(L))
    lsp <- as.numeric(coef(lm(lsp~L-1+offset(lsp0))))
    lambda <- exp(lsp)
  }

  lambda ## initial values

}

initial.sp <- function(X,S,off,expensive=FALSE,XX=FALSE)
# Find initial smoothing parameter guesstimates based on model matrix X 
# and penalty list S. off[i] is the index of the first parameter to
# which S[[i]] applies, since S[[i]]'s only store non-zero submatrix of 
# penalty coefficient matrix.
# if XX==TRUE then X contains X'X, not X!
{ n.p <- length(S) 
  if (XX) expensive <- FALSE 
  def.sp <- array(0,n.p)
  if (n.p) { 
    ldxx <- if (XX) diag(X) else colSums(X*X) # yields diag(t(X)%*%X)
    ldss <- ldxx*0       # storage for combined penalty l.d. 
    if (expensive) St <- matrix(0,ncol(X),ncol(X)) 
    pen <- rep(FALSE,length(ldxx)) # index of what actually gets penalized
    for (i in 1:n.p) { # loop over penalties
      maS <- max(abs(S[[i]])) 
      rsS <- rowMeans(abs(S[[i]]))
      csS <- colMeans(abs(S[[i]]))
      dS <- diag(abs(S[[i]])) ## new 1.8-4
      thresh <- .Machine$double.eps^.8 * maS ## .Machine$double.eps*maS*10
      ind <- rsS > thresh & csS > thresh & dS > thresh # only these columns really penalize
      ss <- diag(S[[i]])[ind] # non-zero elements of l.d. S[[i]]
      start <- off[i];finish <- start+ncol(S[[i]])-1
      xx <- ldxx[start:finish]
      xx <- xx[ind]
      pen[start:finish] <- pen[start:finish]|ind
      sizeXX <- mean(xx)
      sizeS <- mean(ss)
      if (sizeS <= 0) stop(gettextf("S[[%d]] matrix is not +ve definite.", i)) 
      def.sp[i] <- sizeXX/ sizeS # relative s.p. estimate
      ## accumulate leading diagonal of \sum sp[i]*S[[i]]
      ldss[start:finish] <- ldss[start:finish] + def.sp[i]*diag(S[[i]]) 
      
      if (expensive) St[start:finish,start:finish] <- 
                     St[start:finish,start:finish] + def.sp[i]*S[[i]]
    }
    if (expensive) { ## does full search for overall s.p.
      msp <- single.sp(X,St)           
      if (msp>0) def.sp <- def.sp*msp  
    } else {
      ind <- ldss > 0 & pen & ldxx > 0 # base following only on penalized terms
      ldxx<-ldxx[ind];ldss<-ldss[ind]
      while (mean(ldxx/(ldxx+ldss))>.4) { def.sp <- def.sp*10;ldss <- ldss*10 }
      while (mean(ldxx/(ldxx+ldss))<.4) { def.sp <- def.sp/10;ldss <- ldss/10 }
    }
  } 
  as.numeric(def.sp)
} ## initial.sp



 
magic <- function(y,X,sp,S,off,L=NULL,lsp0=NULL,rank=NULL,H=NULL,C=NULL,w=NULL,gamma=1,scale=1,gcv=TRUE,
                ridge.parameter=NULL,control=list(tol=1e-6,step.half=25,
                rank.tol=.Machine$double.eps^0.5),extra.rss=0,n.score=length(y),nthreads=1)
# Wrapper for C routine magic. Deals with constraints weights and square roots of 
# penalties. 
# y is data vector, X is model matrix, sp is array of smoothing parameters,
# S is list of penalty matrices stored as smallest square submatrix excluding no 
# non-zero entries, off[i] is the location on the leading diagonal of the
# total penalty matrix of element (1,1) of S[[i]], rank is an array of penalty 
# ranks, L is a matrix mapping the log underlying smoothing parameters to the 
# smoothing parameters that actually multiply the penalties. i.e. the 
# log smoothing parameters are L%*%sp + lsp0
# H is any fixed penalty, C is a linear constraint matrix and w is the 
# weight vector. gamma is the dof inflation factor, scale is the scale parameter, only 
# used with UBRE, gcv TRUE means use GCV, if false, use UBRE.  
# Return list includes rV such that cov(b)=rV%*%t(rV)*scale and the leading diagonal
# of rV%*%t(rV)%*%t(X)%*%X gives the edf for each parameter.
# NOTE: W is assumed to be square root of inverse of covariance matrix. i.e. if
# W=diag(w) RSS is ||W(y-Xb||^2  
# If `ridge.parameter' is a positive number then then it is assumed to be the multiplier
# for a ridge penalty to be applied during fitting. 
# `extra.rss' is an additive constant by which the RSS is modified in the
#  GCV/UBRE or scale calculations, n.score is the `n' to use in the GCV/UBRE
#  score calcualtions (Useful for dealing with huge datasets).
{ if (is.null(control)) control <- list()
  if (is.null(control$tol)) control$tol <- 1e-6
  if (is.null(control$step.half)) control$step.half <- 25
  if (is.null(control$rank.tol)) control$rank.tol <- .Machine$double.eps^0.5

  n.p<-length(S)
  n.b<-dim(X)[2] # number of parameters
  # get initial estimates of smoothing parameters, using better method than is
  # built in to C code. This must be done before application of general 
  # constraints.
  if (n.p) def.sp <- initial.sp(X,S,off) else def.sp <- sp

  if (!is.null(L)) { ## have to estimate appropriate starting coefs
    if (!inherits(L,"matrix")) stop("L must be a matrix.")
    if (nrow(L)<ncol(L)) stop("L must have at least as many rows as columns.")
    if (nrow(L)!=n.p||ncol(L)!=length(sp)) stop("L has inconsistent dimensions.")
    if (is.null(lsp0)) lsp0 <- rep(0,nrow(L))
    if (ncol(L)) def.sp <- exp(as.numeric(coef(lm(log(def.sp)~L-1+offset(lsp0)))))
  }

  # get square roots of penalties using supplied ranks or estimated 
  if (n.p>0)
  { for (i in 1:n.p) 
    { if (is.null(rank)) B <- mroot(S[[i]],method="svd") 
      else B <- mroot(S[[i]],rank=rank[i],method="chol")
      m <- dim(B)[2]
      R<-matrix(0,n.b,m)
      R[off[i]:(off[i]+dim(B)[1]-1),]<-B
      S[[i]]<-R
    }
    rm(B);rm(R)
  }
  # if there are constraints then need to form null space of constraints Z 
  # (from final columns of Q, from QR=C'). Then form XZ and Z'S_i^0.5 for all i 
  # and Z'HZ.
  # On return from mgcv2 set parameters to Zb (apply Q to [0,b']').   
  ##Xo<-X
  if (!is.null(C)) # then impose constraints 
   { n.con<-dim(C)[1]
    ns.qr<-qr(t(C)) # last n.b-n.con columns of Q are the null space of C
    X<-t(qr.qty(ns.qr,t(X)))[,(n.con+1):n.b,drop=FALSE] # last n.b-n.con cols of XQ (=(Q'X')')
    # need to work through penalties forming Z'S_i^0.5 's
    if (n.p>0) for (i in 1:n.p) { 
      S[[i]]<-qr.qty(ns.qr,S[[i]])[(n.con+1):n.b,,drop=FALSE]
      ## following essential given assumptions of the C code...
      if (ncol(S[[i]])>nrow(S[[i]])) { ## no longer have a min col square root.
        S[[i]] <- t(qr.R(qr(t(S[[i]])))) ## better!
      }
    }
    # and Z'HZ too
    if (!is.null(H))
    { H<-qr.qty(ns.qr,H)[(n.con+1):n.b,,drop=FALSE] # Z'H
      H<-t(qr.qty(ns.qr,t(H))[(n.con+1):n.b,,drop=FALSE]) # Z'HZ = (Z'[Z'H]')' 
    }
    full.rank=n.b-n.con
  } else full.rank=n.b
  # now deal with weights....
  if (!is.null(w))
  { if (is.matrix(w))
    { if (dim(w)[1]!=dim(w)[2]||dim(w)[2]!=dim(X)[1]) stop("dimensions of supplied w wrong.")
      y<-w%*%y
      X<-w%*%X
    } else
    { if (length(y)!=length(w)) stop("w different length from y!")
      y<-y*w
      X<-as.vector(w)*X # use recycling rule to form diag(w)%*%X cheaply
    }
  }
  if (is.null(dim(X))) { # lost dimensions as result of being single columned! 
    n <- length(y)
    if (n!=length(X)) stop("X lost dimensions in magic!!")
    dim(X) <- c(n,1)
  }
  # call real mgcv engine...
  Si<-array(0,0);cS<-0
  if (n.p>0) for (i in 1:n.p) { 
    Si <- c(Si,S[[i]]);
    cS[i] <- dim(S[[i]])[2]
  }
  rdef <- ncol(X) - nrow(X)
  if (rdef>0) { ## need to zero pad model matrix
    n.score <- n.score ## force evaluation *before* y lengthened
    X <- rbind(X,matrix(0,rdef,ncol(X)))
    y <- c(y,rep(0,rdef))
  }

  icontrol<-as.integer(gcv);icontrol[2]<-length(y);q<-icontrol[3]<-dim(X)[2];
  if (!is.null(ridge.parameter)&&ridge.parameter>0)
  { if(is.null(H)) H<-diag(ridge.parameter,q) else H<-H+diag(ridge.parameter,q)}
  icontrol[4]<-as.integer(!is.null(H));icontrol[5]<- n.p;icontrol[6]<-control$step.half
  if (is.null(L)) { icontrol[7] <- -1;L <- diag(n.p) } else icontrol[7]<-ncol(L)
  if (is.null(lsp0)) lsp0 <- rep(0,nrow(L))
 
  b<-array(0,icontrol[3])
  # argument names in call refer to returned values.
  if (nthreads<1) nthreads <- 1 ## can't set up storage without knowing nthreads
  if (nthreads>1) extra.x <- q^2 * nthreads else extra.x <- 0 
  um<-.C(C_magic,as.double(y),X=as.double(c(X,rep(0,extra.x))),sp=as.double(sp),as.double(def.sp),
          as.double(Si),as.double(H),as.double(L),
          lsp0=as.double(lsp0),score=as.double(gamma),scale=as.double(scale),info=as.integer(icontrol),as.integer(cS),
          as.double(control$rank.tol),rms.grad=as.double(control$tol),b=as.double(b),rV=double(q*q),
          as.double(extra.rss),as.integer(n.score),as.integer(nthreads))
  res<-list(b=um$b,scale=um$scale,score=um$score,sp=um$sp,sp.full=as.numeric(exp(L%*%log(um$sp))))
  res$R <- matrix(um$X[1:q^2],q,q)
  res$rV<-matrix(um$rV[1:(um$info[1]*q)],q,um$info[1])
  gcv.info<-list(full.rank=full.rank,rank=um$info[1],fully.converged=as.logical(um$info[2]),
      hess.pos.def=as.logical(um$info[3]),iter=um$info[4],score.calls=um$info[5],rms.grad=um$rms.grad)
  res$gcv.info<-gcv.info
  if (!is.null(C)) { # need image of constrained parameter vector in full space
    b <- c(rep(0,n.con),res$b)
    res$b <- qr.qy(ns.qr,b) # Zb 
    b <- matrix(0,n.b,dim(res$rV)[2])
    b[(n.con+1):n.b,] <- res$rV 
    res$rV <- qr.qy(ns.qr,b)# ZrV
  } 
  res
} ## magic


print.mgcv.version <- function()
{ library(help=mgcv)$info[[1]] -> version
  version <- version[pmatch("Version",version)]
  um <- strsplit(version," ")[[1]]
  version <- um[nchar(um)>0][2]
  hello <- paste("This is mgcv ",version,". For overview type 'help(\"mgcv-package\")'.",sep="")
  packageStartupMessage(hello)
}

set.mgcv.options <- function()
## function used to set optional value used in notLog
## and notExp...
{ ##runif(1) ## ensure there is a seed (can be removed by user!)
  options(mgcv.vc.logrange=25)
}

.onLoad <- function(...) {
  set.mgcv.options()
}

.onAttach <- function(...) { 
  print.mgcv.version()
  set.mgcv.options()
}

.onUnload <- function(libpath) library.dynam.unload("mgcv", libpath)



###############################################################################
### ISSUES.....
#
#* Could use R_CheckUserInterrupt() to allow user interupt of
#  mgcv code. (6.12) But then what about memory?#
#
#* predict.gam and plot.gam "iterms" and `seWithMean' options
#  don't deal properly with case in which centering constraints
#  are not conventional sum to zero ones.
#
# * add randomized residuals (see Mark B email)?
#
# * sort out all the different scale parameters floating around, and explain the 
#   sp variance link better.
Any scripts or data that you put into this service are public.
mgcv documentation built on May 29, 2024, 4:34 a.m.
rdrr.io home R language documentation Run R code online
CRAN packages Bioconductor packages R-Forge packages GitHub packages
Note that we can't provide technical support on individual packages. You should contact the package authors for that.
mgcv
Mixed GAM Computation Vehicle with Automatic Smoothness Estimation

R/mgcv.r
In mgcv: Mixed GAM Computation Vehicle with Automatic Smoothness Estimation

Documented in concurvity fixDependence formula.gam full.score gam gam.outer gam.side gam.vcomp influence.gam initial.sp interpret.gam magic magic.post.proc model.matrix.gam mroot pcls pen.edf predict.gam print.anova.gam print.summary.gam psum.chisq residuals.gam rig Rrank slanczos sp.vcov vcov.gam

Try the mgcv package in your browser

R Package Documentation

Browse R Packages

We want your feedback!

mgcv Mixed GAM Computation Vehicle with Automatic Smoothness Estimation

R/mgcv.r In mgcv: Mixed GAM Computation Vehicle with Automatic Smoothness Estimation

Documented in concurvity fixDependence formula.gam full.score gam gam.outer gam.side gam.vcomp influence.gam initial.sp interpret.gam magic magic.post.proc model.matrix.gam mroot pcls pen.edf predict.gam print.anova.gam print.summary.gam psum.chisq residuals.gam rig Rrank slanczos sp.vcov vcov.gam

Try the mgcv package in your browser

R Package Documentation

Browse R Packages

We want your feedback!

mgcv
Mixed GAM Computation Vehicle with Automatic Smoothness Estimation

R/mgcv.r
In mgcv: Mixed GAM Computation Vehicle with Automatic Smoothness Estimation