R/mgcv.r

Defines functions .onUnload .onAttach .onLoad set.mgcv.options print.mgcv.version magic initial.sp initial.spg single.sp magic.post.proc mroot influence.gam vcov.gam gam.sandwich gam.vcomp sp.vcov cooks.distance.gam pen.edf print.anova.gam print.summary.gam testStat reTest recov packing.ind simf liu2 psum.chisq residuals.gam concurvity predict.gam get.na.action model.matrix.gam full.score mgcv.find.theta mgcv.get.scale gam nanei variable.summary get.null.coef gam.outer pdef neico4 corBC BC formula.gam gam.setup gam.setup.list olid getNumericResponse parametricPenalty clone.smooth.spec gam.side augment.smX fixDependence interpret.gam interpret.gam0 all_vars1 pcls strip.offset rig slanczos Rrank

Documented in concurvity fixDependence formula.gam full.score gam gam.outer gam.side gam.vcomp influence.gam initial.sp interpret.gam magic magic.post.proc model.matrix.gam mroot pcls pen.edf predict.gam print.anova.gam print.summary.gam psum.chisq residuals.gam rig Rrank slanczos sp.vcov vcov.gam

##  R routines for the package mgcv (c) Simon Wood 2000-2016
##  With contributions from Henric Nilsson

Rrank <- function(R,tol=.Machine$double.eps^.9) {
## Finds rank of upper triangular matrix R, by estimating condition
## number of upper rank by rank block, and reducing rank until this is 
## acceptably low... assumes R pivoted 
 m <- nrow(R)
 rank <- min(m,ncol(R))
 ok <- TRUE
  while (ok) {
    Rcond <- .C(C_R_cond,R=as.double(R),r=as.integer(m),c=as.integer(rank),
             work=as.double(rep(0,4*m)),Rcond=as.double(1))$Rcond
    if (Rcond*tol<1) ok <- FALSE else rank <- rank - 1  
  }
  rank
}
 
slanczos <- function(A,k=10,kl=-1,tol=.Machine$double.eps^.5,nt=1) {
## Computes truncated eigen decomposition of symmetric A by 
## Lanczos iteration. If kl < 0 then k largest magnitude
## eigenvalues returned, otherwise k highest and kl lowest.
## Eigenvectors are always returned too.
## set.seed(1);n <- 1000;A <- matrix(runif(n*n),n,n);A <- A+t(A);er <- slanczos(A,10)
## um <- eigen(A,symmetric=TRUE);ind <- c(1:5,(n-5+1):n)
## range(er$values-um$values[ind]);range(abs(er$vectors)-abs(um$vectors[,ind]))
## It seems that when k (or k+kl) is beyond 10-15% of n
## then you might as well use eigen(A,symmetric=TRUE), but the
## extra cost is the expensive accumulation of eigenvectors.
## Should re-write whole thing using LAPACK routines for eigenvectors. 
   if (tol<=0||tol>.01) stop("silly tolerance supplied")
   k <- round(k);kl <- round(kl)
   if (k<0) stop("argument k must be positive.")
   m <- k + max(0,kl) 
   n <- nrow(A) 
   if (m<1) return(list(values=rep(0,0),vectors=matrix(0,n,0),iter=0)) 
   if (n != ncol(A)) stop("A not square")
   if (m>n) stop("Can not have more eigenvalues than nrow(A)")
   oo <- .C(C_Rlanczos,A=as.double(A),U=as.double(rep(0,n*m)),D=as.double(rep(0,m)),
            n=as.integer(n),m=as.integer(k),ml=as.integer(kl),tol=as.double(tol),nt=as.integer(nt))
   list(values = oo$D,vectors = matrix(oo$U,n,m),iter=oo$n) 
}

rig <- function(n,mean,scale) {
## inverse gaussian deviates generated by algorithm 5.7 of
## Gentle, 2003. scale = 1/lambda. 
  if (length(n)>1) n <- length(n)
  x <- y <- rnorm(n)^2
  mys <- mean*scale*y
  mu <- 0*y + mean ## y is there to ensure mu is a vector
  mu2 <- mu^2;
  ind <- mys < .Machine$double.eps^-.5 ## cut off for tail computation
  x[ind] <- mu[ind]*(1 + 0.5*(mys[ind] - sqrt(mys[ind]*4+mys[ind]^2)))
  x[!ind] <- mu[!ind]/mys[!ind] ## tail term (derived from Taylor of sqrt(1+eps) etc)
  #my <- mean*y; sc <- 0*y + scale
  #ind <- my > 1 ## cancellation error can be severe, without splitting
  #x[!ind] <- mu[!ind]*(1 + 0.5*sc[!ind]*(my[!ind] - sqrt(4*my[!ind]/sc[!ind] + my[!ind]^2)))
  ## really the sqrt in the next term should be expanded beyond first order and then
  ## worked on - otherwise too many exact zeros?
  #x[ind] <- pmax(0,mu[ind]*(1+my[ind]*.5*sc[ind]*(1-sqrt(1+ 4/(sc[ind]*my[ind])))))
  ind <- runif(n) > mean/(mean+x)
  x[ind] <- mu2[ind]/x[ind]
  x ## E(x) = mean; var(x) = scale*mean^3
}

strip.offset <- function(x)
# sole purpose is to take a model frame and rename any "offset(a.name)"
# columns "a.name"
{ na <- names(x)
  for (i in 1:length(na)) {
    if (substr(na[i],1,7)=="offset(") 
      na[i] <- substr(na[i],8,nchar(na[i])-1)
  }
  names(x) <- na
  x
}


pcls <- function(M)
# Function to perform penalized constrained least squares.
# Problem to be solved is:
#
#  minimise      ||W^0.5 (y - Xp)||^2 + p'Bp
#  subject to    Ain p >= b  & C p = "constant"
#
# where B = \sum_{i=1}^m \theta_i S_i and W=diag(w)
# on entry this routine requires a list M, with the following elements:
# M$X - the design matrix for this problem.
# M$p - a feasible initial parameter vector - note that this should not be set up to
#       lie exactly on all the inequality constraints - which can easily happen if M$p=0!
# M$y - response variable
# M$w - weight vector: W= diag(M$w)
# M$Ain - matrix of inequality constraints
# M$bin - b above
# M$C  - fixed constraint matrix
# M$S  - List of (minimal) penalty matrices
# M$off - used for unpacking M$S
# M$sp - array of theta_i's 
# Ain, bin and p are not in the object needed to call mgcv....
#
{ nar<-c(length(M$y),length(M$p),dim(M$Ain)[1],dim(M$C)[1])
  ## sanity checking ...
  if (nrow(M$X)!=nar[1]) stop("nrow(M$X) != length(M$y)") 
  if (ncol(M$X)!=nar[2]) stop("ncol(M$X) != length(M$p)")
  if (length(M$w)!=nar[1]) stop("length(M$w) != length(M$y)")
  if (nar[3]!=length(M$bin)) stop("nrow(M$Ain) != length(M$bin)")
  if (nrow(M$Ain)>0) {
    if (ncol(M$Ain)!=nar[2]) stop("nrow(M$Ain) != length(M$p)") 
    res <- as.numeric(M$Ain%*%M$p) - as.numeric(M$bin)
    if (sum(res<0)>0) stop("initial parameters not feasible")
    res <- abs(res)
    if (sum(res<.Machine$double.eps^.5)>0) 
      warning("initial point very close to some inequality constraints")
    res <- mean(res)
    if (res<.Machine$double.eps^.5) 
      warning("initial parameters very close to inequality constraints")
  }
  
  if (nrow(M$C)>0) if (ncol(M$C)!=nar[2]) stop("ncol(M$C) != length(M$p)")  
  if (length(M$S)!=length(M$off)) stop("M$S and M$off have different lengths")
  if (length(M$S)!=length(M$sp)) stop("M$sp has different length to M$S and M$off")
  
  
  # pack the S array for mgcv call 
  m<-length(M$S)
  Sa<-array(0,0);df<-0
  if (m>0) for (i in 1:m)
  { Sa<-c(Sa,M$S[[i]])
    df[i]<-nrow(M$S[[i]])
    if (M$off[i]+df[i]-1>nar[2]) stop(gettextf("M$S[%d] is too large given M$off[%d]", i, i))
  }
  qra.exist <- FALSE
  if (ncol(M$X)>nrow(M$X)) {
    if (m>0) stop("Penalized model matrix must have no more columns than rows") 
    else { ## absorb M$C constraints
      qra <- qr(t(M$C))
      j <- nrow(M$C);k <- ncol(M$X)
      M$X <- t(qr.qty(qra,t(M$X))[(j+1):k,])
      M$Ain <- t(qr.qty(qra,t(M$Ain))[(j+1):k,])
      M$C <- matrix(0,0,0)
      M$p <- rep(0,ncol(M$X)) 
      nar[2] <- length(M$p)
      qra.exist <- TRUE
      if  (ncol(M$X)>nrow(M$X)) stop("Model matrix not full column rank")
    }
  }
  o<-.C(C_RPCLS,as.double(M$X),as.double(M$p),as.double(M$y),as.double(M$w),as.double(M$Ain),as.double(M$bin)
        ,as.double(M$C),as.double(Sa),as.integer(M$off),as.integer(df),as.double(M$sp),
        as.integer(length(M$off)),as.integer(nar))
  p <- array(o[[2]],length(M$p))
  if (qra.exist) p <- qr.qy(qra,c(rep(0,j),p))
  p
} ## pcls

all_vars1 <- function(form) {
## version of all.vars that doesn't split up terms like x$y into x and y
  vars <- all.vars(form)
  vn <- all.names(form)
  vn <- vn[vn%in%c(vars,"$","[[")] ## actual variable related names
  if ("[["%in%vn) stop("can't handle [[ in formula")
  ii <- which(vn%in%"$") ## index of '$'
  if (length(ii)) { ## assemble variable names
    vn1 <- if (ii[1]>1) vn[1:(ii[1]-1)]
    go <- TRUE
    k <- 1
    while (go) {
      n <- 2; 
      while(k<length(ii) && ii[k]==ii[k+1]-1) { k <- k + 1;n <- n + 1 }
      vn1 <- c(vn1,paste(vn[ii[k]+1:n],collapse="$"))
      if (k==length(ii)) {
        go <- FALSE
	ind <- if (ii[k]+n<length(vn)) (ii[k]+n+1):length(vn) else rep(0,0) 
      } else {
        k <- k +  1
	ind <- if (ii[k-1]+n<ii[k]-1) (ii[k-1]+n+1):(ii[k]-1) else rep(0,0)
      }
      vn1 <- c(vn1,vn[ind])
    }
  } else vn1 <- vn
  vn1
} ## all_vars1

interpret.gam0 <- function(gf,textra=NULL,extra.special=NULL)
# interprets a gam formula of the generic form:
#   y~x0+x1+x3*x4 + s(x5)+ s(x6,x7) ....
# and returns:
# 1. a model formula for the parametric part: pf (and pfok indicating whether it has terms)
# 2. a list of descriptors for the smooths: smooth.spec
# this is function does the work, and is called by in interpret.gam
# 'textra' is optional text to add to term labels
# 'extra.special' is label of extra smooth within formula.
{ p.env <- environment(gf) # environment of formula
  tf <- terms.formula(gf,specials=c("s","te","ti","t2",extra.special)) # specials attribute indicates which terms are smooth
 
  terms <- attr(tf,"term.labels") # labels of the model terms 
  nt <- length(terms) # how many terms?
  
  if (attr(tf,"response") > 0) {  # start the replacement formulae
    response <- as.character(attr(tf,"variables")[2])
  } else { 
    response <- NULL
  }
  sp <- attr(tf,"specials")$s     # array of indices of smooth terms 
  tp <- attr(tf,"specials")$te    # indices of tensor product terms
  tip <- attr(tf,"specials")$ti   # indices of tensor product pure interaction terms
  t2p <- attr(tf,"specials")$t2   # indices of type 2 tensor product terms
  zp <- if (is.null(extra.special)) NULL else attr(tf,"specials")[[extra.special]]
  off <- attr(tf,"offset") # location of offset in formula

  ## have to translate sp, tp, tip, t2p (zp) so that they relate to terms,
  ## rather than elements of the formula...
  vtab <- attr(tf,"factors") # cross tabulation of vars to terms
  if (length(sp)>0) for (i in 1:length(sp)) {
    ind <- (1:nt)[as.logical(vtab[sp[i],])]
    sp[i] <- ind # the term that smooth relates to
  }
  if (length(tp)>0) for (i in 1:length(tp)) {
    ind <- (1:nt)[as.logical(vtab[tp[i],])]
    tp[i] <- ind # the term that smooth relates to
  } 
  if (length(tip)>0) for (i in 1:length(tip)) {
    ind <- (1:nt)[as.logical(vtab[tip[i],])]
    tip[i] <- ind # the term that smooth relates to
  } 
  if (length(t2p)>0) for (i in 1:length(t2p)) {
    ind <- (1:nt)[as.logical(vtab[t2p[i],])]
    t2p[i] <- ind # the term that smooth relates to
  }
  if (length(zp)>0) for (i in 1:length(zp)) {
    ind <- (1:nt)[as.logical(vtab[zp[i],])]
    zp[i] <- ind # the term that smooth relates to
  } ## re-referencing is complete

  k <- kt <- kti <- kt2 <- ks <- kz <- kp <- 1 # counters for terms in the 2 formulae
  len.sp <- length(sp)
  len.tp <- length(tp)
  len.tip <- length(tip)
  len.t2p <- length(t2p)
  len.zp <- length(zp)
  ns <- len.sp + len.tp + len.tip + len.t2p + len.zp# number of smooths
  pav <- av <- rep("",0)
  smooth.spec <- list()
  #mgcvat <- "package:mgcv" %in% search() ## is mgcv in search path?
  mgcvns <- loadNamespace('mgcv')
  if (nt) for (i in 1:nt) { # work through all terms
    if (k <= ns&&((ks<=len.sp&&sp[ks]==i)||(kt<=len.tp&&tp[kt]==i)||(kz<=len.zp&&zp[kz]==i)||
                  (kti<=len.tip&&tip[kti]==i)||(kt2<=len.t2p&&t2p[kt2]==i))) { # it's a smooth
      ## have to evaluate in the environment of the formula or you can't find variables 
      ## supplied as smooth arguments, e.g. k <- 5;gam(y~s(x,k=k)), fails,
      ## but if you don't specify namespace of mgcv then stuff like 
      ## loadNamespace('mgcv'); k <- 10; mgcv::interpret.gam(y~s(x,k=k)) fails (can't find s)
      ## eval(parse(text=terms[i]),envir=p.env,enclos=loadNamespace('mgcv')) fails??
      ## following may supply namespace of mgcv explicitly if not on search path...
      ## If 's' etc are masked then we can fail even if mgcv on search path, hence paste
      ## of explicit mgcv reference into first attempt...
    
      st <- try(eval(parse(text=paste("mgcv::",terms[i],sep="")),envir=p.env),silent=TRUE)
      if (inherits(st,"try-error")) st <- 
            eval(parse(text=terms[i]),enclos=p.env,envir=mgcvns)
     
      if (!is.null(textra)) { ## modify the labels on smooths with textra
        pos <- regexpr("(",st$lab,fixed=TRUE)[1]
        st$label <- paste(substr(st$label,start=1,stop=pos-1),textra,
                    substr(st$label,start=pos,stop=nchar(st$label)),sep="")
      }
      smooth.spec[[k]] <- smooth.info(st) ## smooth.info supplies any extra specification info for class
      if (ks<=len.sp&&sp[ks]==i) ks <- ks + 1 else # counts s() terms
      if (kt<=len.tp&&tp[kt]==i) kt <- kt + 1 else # counts te() terms
      if (kti<=len.tip&&tip[kti]==i) kti <- kti + 1 else # counts ti() terms
      if (kt2<=len.t2p&&t2p[kt2]==i) kt2 <- kt2 + 1 # counts t2() terms
      else kz <- kz + 1
      k <- k + 1      # counts smooth terms 
    } else {          # parametric
      av[kp] <- terms[i] ## element kp on rhs of parametric
      kp <- kp+1    # counts parametric terms
    }
  }    
  if (!is.null(off)) { ## deal with offset 
    av[kp] <- as.character(attr(tf,"variables")[1+off])
    kp <- kp+1          
  }

  pf <- paste(response,"~",paste(av,collapse=" + "))
  if (attr(tf,"intercept")==0) {
    pf <- paste(pf,"-1",sep="")
    if (kp>1) pfok <- 1 else pfok <- 0
  } else { 
    pfok <- 1;if (kp==1) { 
      pf <- paste(pf,"1"); 
    }
  }

  fake.formula <- pf

  if (length(smooth.spec)>0) 
  for (i in 1:length(smooth.spec)) {
    nt <- length(smooth.spec[[i]]$term)
    ff1 <- paste(smooth.spec[[i]]$term[1:nt],collapse="+")
    fake.formula <- paste(fake.formula,"+",ff1)
    if (smooth.spec[[i]]$by!="NA") {
      fake.formula <- paste(fake.formula,"+",smooth.spec[[i]]$by)
      av <- c(av,smooth.spec[[i]]$term,smooth.spec[[i]]$by)
    } else av <- c(av,smooth.spec[[i]]$term)
  }
  fake.formula <- as.formula(fake.formula,p.env)
  if (length(av)) {
    pred.formula <- as.formula(paste("~",paste(av,collapse="+")))
    pav <- all.vars(pred.formula) ## trick to strip out 'offset(x)' etc...
    pred.formula <- reformulate(pav,env=p.env) 
  } else  pred.formula <- ~1
  ret <- list(pf=as.formula(pf,p.env),pfok=pfok,smooth.spec=smooth.spec,
            fake.formula=fake.formula,response=response,fake.names=av,
            pred.names=pav,pred.formula=pred.formula)
  #environment(ret$fake.formula)  <- environment(ret$pred.formula) <- p.env	    
  class(ret) <- "split.gam.formula"
  ret
} ## interpret.gam0

interpret.gam <- function(gf,extra.special=NULL) {
## wrapper to allow gf to be a list of formulae or 
## a single formula. This facilitates general penalized 
## likelihood models in which several linear predictors 
## may be involved...
##
## The list syntax is as follows. The first formula must have a response on
## the lhs, rather than labels. For m linear predictors, there 
## must be m 'base formulae' in linear predictor order. lhs labels will 
## be ignored in a base formula. Empty base formulae have '-1' on rhs.
## Further formulae have labels up to m labels 1,...,m on the lhs, in a 
## syntax like this: 3 + 5 ~ s(x), which indicates that the same s(x) 
## should be added to both linear predictors 3 and 5. 
## e.g. A bivariate normal model with common expected values might be
## list(y1~-1,y2~-1,1+2~s(x)), whereas if the second component was contaminated 
## by something else we might have list(y1~-1,y2~s(v)-1,1+2~s(x)) 
## 
## For a list argument, this routine returns a list of split.formula objects 
## with an extra field "lpi" indicating the linear predictors to which each 
## contributes...
  if (is.list(gf)) {
    d <- length(gf)
    p.env <- environment(gf[[1]])
    ## make sure all formulae have a response, to avoid
    ## problems with parametric sub formulae of the form ~1
    #if (length(gf[[1]])<3) stop("first formula must specify a response")
    resp <- gf[[1]][2]
    
    ret <- list()
    pav <- av <- rep("",0)
    nlp <- 0 ## count number of linear predictors (may be different from number of formulae)
    for (i in 1:d) {
      textra <- if (i==1) NULL else paste(".",i-1,sep="") ## modify smooth labels to identify to predictor  

      lpi <- getNumericResponse(gf[[i]]) ## get linear predictors to which this applies, if explicit
      if (length(lpi)==1) warning("single linear predictor indices are ignored")
      if (length(lpi)>0) gf[[i]][[2]] <- NULL else { ## delete l.p. labels from formula response 
        nlp <- nlp + 1;lpi <- nlp ## this is base formula for l.p. number nlp       
      }
      ret[[i]] <- interpret.gam0(gf[[i]],textra,extra.special=extra.special)
      ret[[i]]$lpi <- lpi ## record of the linear predictors to which this applies
      
      ## make sure all parametric formulae have a response, to avoid
      ## problems with parametric sub formulae of the form ~1
      respi <- rep("",0) ## no extra response terms
      if (length(ret[[i]]$pf)==2) { 
        ret[[i]]$pf[3] <- ret[[i]]$pf[2];ret[[i]]$pf[2] <- resp
        respi <- rep("",0)
      } else if (i>1) respi <- ret[[i]]$response ## extra response terms
      av <- c(av,ret[[i]]$fake.names,respi) ## accumulate all required variable names 
      pav <- c(pav,ret[[i]]$pred.names) ## predictors only 
    } 
    av <- unique(av) ## strip out duplicate variable names
    pav <- unique(pav)
    if (length(av)>0) {
      ## work around - reformulate with response = "log(x)" will treat log(x) as a name,
      ## not the call it should be... 
      fff <- formula(paste(ret[[1]]$response,"~ ."))
      ret$fake.formula <- reformulate(av,response=ret[[1]]$response,env=p.env) 
      ret$fake.formula[[2]] <- fff[[2]] ## fix messed up response
    } else ret$fake.formula <- ret[[1]]$fake.formula ## create fake formula containing all variables
    ret$pred.formula <- if (length(pav)>0) reformulate(pav) else ~1 ## predictor only formula
    environment(ret$pred.formula) <- p.env
    ret$response <- ret[[1]]$response 
    ret$nlp <- nlp ## number of linear predictors
    for (i in 1:d) if (max(ret[[i]]$lpi)>nlp||min(ret[[i]]$lpi)<1) stop("linear predictor labels out of range")
    class(ret) <- "split.gam.formula"
    return(ret)
  } else interpret.gam0(gf,extra.special=extra.special)  
} ## interpret.gam


fixDependence <- function(X1,X2,tol=.Machine$double.eps^.5,rank.def=0,strict=FALSE)
# model matrix X2 may be linearly dependent on X1. This 
# routine finds which columns of X2 should be zeroed to 
# fix this. If rank.def>0 then it is taken as the known degree 
# of dependence of X2 on X1 and tol is ignored.
{ qr1 <- qr(X1,LAPACK=TRUE)
  R11 <- abs(qr.R(qr1)[1,1])
  r<-ncol(X1);n<-nrow(X1)
  if (strict) { ## only delete columns of X2 individually dependent on X1
    ## Project columns of X2 into space of X1 and look at difference
    ## to orignal X2 to check for deficiency...  
    QtX2 <- qr.qty(qr1,X2)
    QtX2[-(1:r),] <- 0
    mdiff <- colMeans(abs(X2 - qr.qy(qr1,QtX2)))
    if (rank.def>0) ind <- (1:ncol(X2))[rank(mdiff) <= rank.def] else
    ind <- (1:ncol(X2))[mdiff < R11*tol]
    if (length(ind)<1) ind <- NULL
  } else { ## make X2 full rank given X1
    QtX2 <- qr.qty(qr1,X2)[(r+1):n,] # Q'X2
    qr2 <- qr(QtX2,LAPACK=TRUE)
    R <- qr.R(qr2)
    # now final diagonal block of R may be zero, indicating rank 
    # deficiency.
    r0 <- r <- nrow(R)
    if (rank.def > 0 && rank.def <= nrow(R)) r0 <- r - rank.def else ## degree of rank def known
      while (r0>0 && mean(abs(R[r0:r,r0:r]))< R11*tol) r0 <- r0 -1 ## compute rank def
    r0 <- r0 + 1
    if (r0>r) return(NULL) else
    ind <- qr2$pivot[r0:r] # the columns of X2 to zero in order to get independence
  }
  ind
} ## fixDependence


augment.smX <- function(sm,nobs,np) {
## augments a smooth model matrix with a square root penalty matrix for
## identifiability constraint purposes.
  ns <- length(sm$S) ## number of penalty matrices
  if (ns==0) { ## nothing to do
    return(rbind(sm$X,matrix(0,np,ncol(sm$X))))
  }
  ind <- colMeans(abs(sm$S[[1]]))!=0
  sqrmaX  <- mean(abs(sm$X[,ind]))^2
  alpha <- sqrmaX/mean(abs(sm$S[[1]][ind,ind]))
  St <- sm$S[[1]]*alpha
  if (ns>1) for (i in 2:ns) { 
    ind <- colMeans(abs(sm$S[[i]]))!=0
    alpha <- sqrmaX/mean(abs(sm$S[[i]][ind,ind]))
    St <- St +  sm$S[[i]]*alpha
  }
  rS <- mroot(St,rank=ncol(St)) ## get sqrt of penalty
  X <- rbind(sm$X,matrix(0,np,ncol(sm$X))) ## create augmented model matrix
  X[nobs+sm$p.ind,] <- t(rS) ## add in 
  X ## scaled augmented model matrix
} ## augment.smX

gam.side <- function(sm,Xp,tol=.Machine$double.eps^.5,with.pen=FALSE)
# works through a list of smooths, sm, aiming to identify nested or partially
# nested terms, and impose identifiability constraints on them.
# Xp is the parametric model matrix. It is needed in order to check whether
# there is a constant (or equivalent) in the model. If there is, then this needs 
# to be included when working out side constraints, otherwise dependencies can be 
# missed. 
# Note that with.pen is quite extreme, since you then pretty much only pick
# up dependencies in the null spaces
{ if (!with.pen) { ## check that's possible and reset if not!
    with.pen <- nrow(Xp) < ncol(Xp) + sum(unlist(lapply(sm,function(x) ncol(x$X))))
  }
  m <- length(sm)
  if (m==0) return(sm)
  v.names<-array("",0);maxDim<-1
  for (i in 1:m) { ## collect all term names and max smooth `dim'
    vn <- sm[[i]]$term
    ## need to include by variables in names
    if (sm[[i]]$by!="NA") vn <- paste(vn,sm[[i]]$by,sep="")
    ## need to distinguish levels of factor by variables...
    if (!is.null(sm[[i]]$by.level))  vn <- paste(vn,sm[[i]]$by.level,sep="")
    sm[[i]]$vn <- vn ## use this record to identify variables from now
    v.names <- c(v.names,vn)
    if (sm[[i]]$dim > maxDim) maxDim <- sm[[i]]$dim
  } 
  lv <- length(v.names)   
  v.names <- unique(v.names)
  if (lv == length(v.names)) return(sm) ## no repeats => no nesting

  ## Only get this far if there is nesting.
  ## Need to test for intercept or equivalent in Xp
  intercept <- FALSE
  if (ncol(Xp)) {
    ## first check columns directly...
    if (sum(apply(Xp,2,sd)<.Machine$double.eps^.75)>0) intercept <- TRUE else {
      ## no constant column, so need to check span of Xp...
      f <- rep(1,nrow(Xp))
      ff <- qr.fitted(qr(Xp),f)
      if (max(abs(ff-f))<.Machine$double.eps^.75) intercept <- TRUE 
    }
  }

  sm.id <- as.list(v.names)
  names(sm.id) <- v.names
  for (i in 1:length(sm.id)) sm.id[[i]]<-array(0,0)
  sm.dim <- sm.id
  for (d in 1:maxDim) {
    for (i in 1:m) {
      if (sm[[i]]$dim==d&&sm[[i]]$side.constrain) for (j in 1:d) { ## work through terms
        term<-sm[[i]]$vn[j]
        a <- sm.id[[term]]
        la <- length(a)+1
        sm.id[[term]][la] <- i   ## record smooth i.d. for this variable
        sm.dim[[term]][la] <- d  ## ... and smooth dim.
      }
    }
  }
  ## so now each unique variable name has an associated array of 
  ## the smooths of which it is an argument, arranged in ascending 
  ## order of dimension. Smooths for which side.constrain==FALSE are excluded.
  if (maxDim==1) warning("model has repeated 1-d smooths of same variable.")

  ## Now set things up to enable term specific model matrices to be
  ## augmented with square root penalties, on the fly...
  if (with.pen) {
    k <- 1
    for (i in 1:m) { ## create parameter indices for each term
      k1 <- k + ncol(sm[[i]]$X) - 1
      sm[[i]]$p.ind <- k:k1
      k <- k1 + 1
    }
    np <- k-1 ## number of penalized parameters
  }
  nobs <- nrow(sm[[1]]$X) ## number of observations
  
  for (d in 1:maxDim) { ## work up through dimensions 
    for (i in 1:m) { ## work through smooths
      if (sm[[i]]$dim == d&&sm[[i]]$side.constrain) { ## check for nesting
        if (with.pen) X1 <- matrix(c(rep(1,nobs),rep(0,np)),nobs+np,as.integer(intercept)) else
        X1 <- matrix(1,nobs,as.integer(intercept))
	X1comp <- rep(0,0) ## list of components of X1 to avoid duplication
        for (j in 1:d) { ## work through variables
          b <- sm.id[[sm[[i]]$vn[j]]] # list of smooths dependent on this variable
          k <- (1:length(b))[b==i] ## locate current smooth in list 
          if (k>1) for (l in 1:(k-1)) if (!b[l] %in% X1comp) { ## collect X columns
           X1comp <- c(X1comp,b[l]) ## keep track of components to avoid adding same one twice
           if (with.pen) { ## need to use augmented model matrix in testing 
              if (is.null(sm[[b[l]]]$Xa)) sm[[b[l]]]$Xa <- augment.smX(sm[[b[l]]],nobs,np)
              X1 <- cbind(X1,sm[[b[l]]]$Xa)
            } else X1 <- cbind(X1,sm[[b[l]]]$X) ## penalties not considered
          }
        } ## Now X1 contains columns for all lower dimensional terms
        if (ncol(X1)==as.integer(intercept)) ind <- NULL else {
          if (with.pen) {
             if (is.null(sm[[i]]$Xa)) sm[[i]]$Xa <- augment.smX(sm[[i]],nobs,np)
             ind <- fixDependence(X1,sm[[i]]$Xa,tol=tol) 
          } else ind <- fixDependence(X1,sm[[i]]$X,tol=tol)        
        }
        ## ... the columns to zero to ensure independence
        if (!is.null(ind)) { 
          sm[[i]]$X <- sm[[i]]$X[,-ind]
          ## work through list of penalty matrices, applying constraints...
          nsmS <- length(sm[[i]]$S)
          if (nsmS>0) for (j in nsmS:1) { ## working down so that dropping is painless
            sm[[i]]$S[[j]] <- sm[[i]]$S[[j]][-ind,-ind]
            if (sum(sm[[i]]$S[[j]]!=0)==0) rank <- 0 else
            rank <- qr(sm[[i]]$S[[j]],tol=tol,LAPACK=FALSE)$rank
            sm[[i]]$rank[j] <- rank ## replace previous rank with new rank
            if (rank == 0) { ## drop the penalty
              sm[[i]]$rank <- sm[[i]]$rank[-j]
              sm[[i]]$S[[j]] <- NULL
              sm[[i]]$S.scale <- sm[[i]]$S.scale[-j]
              if (!is.null(sm[[i]]$L)) sm[[i]]$L <- sm[[i]]$L[-j,,drop=FALSE]
            }
          } ## penalty matrices finished
          ## Now we need to establish null space rank for the term
          mi <- length(sm[[i]]$S)
          if (mi>0) {
            St <- sm[[i]]$S[[1]]/norm(sm[[i]]$S[[1]],type="F")
            if (mi>1) for (j in 1:mi) St <- St + 
                  sm[[i]]$S[[j]]/norm(sm[[i]]$S[[j]],type="F")
            es <- eigen(St,symmetric=TRUE,only.values=TRUE)
            sm[[i]]$null.space.dim <- sum(es$values<max(es$values)*.Machine$double.eps^.75) 
          } ## rank found

          if (!is.null(sm[[i]]$L)) {
            ind <- as.numeric(colSums(sm[[i]]$L!=0))!=0
            sm[[i]]$L <- sm[[i]]$L[,ind,drop=FALSE] ## retain only those sps that influence something!
          }

          sm[[i]]$df <- ncol(sm[[i]]$X)
          attr(sm[[i]],"del.index") <- ind
          ## Now deal with case in which prediction constraints differ from fit constraints
          if (!is.null(sm[[i]]$Xp)) { ## need to get deletion indices under prediction parameterization
            ## Note that: i) it doesn't matter what the identifiability con on X1 is
            ##            ii) the degree of rank deficiency can't be changed by an identifiability con
            if (with.pen) { 
              smi <- sm[[i]]  ## clone smooth 
              smi$X <- smi$Xp ## copy prediction Xp to X slot.
              smi$S <- smi$Sp ## and make sure penalty parameterization matches. 
              Xpa <- augment.smX(smi,nobs,np)
              ind <- fixDependence(X1,Xpa,rank.def=length(ind)) 
            } else ind <- fixDependence(X1,sm[[i]]$Xp,rank.def=length(ind)) 
            sm[[i]]$Xp <- sm[[i]]$Xp[,-ind,drop=FALSE]
            attr(sm[[i]],"del.index") <- ind ## over-writes original
          }
        }
        sm[[i]]$vn <- NULL
      } ## end if (sm[[i]]$dim == d)
    } ## end i in 1:m loop
  }  
  if (with.pen) for (i in 1:m) { ## remove working variables that were added
    sm[[i]]$p.ind <- NULL
    if (!is.null(sm[[i]]$Xa)) sm[[i]]$Xa <- NULL
  }
  sm
} ## gam.side

clone.smooth.spec <- function(specb,spec) {
## produces a version of base smooth.spec, `specb', but with 
## the variables relating to `spec'. Used by `gam.setup' in handling 
## of linked smooths.
 ## check dimensions same...
 if (specb$dim!=spec$dim) stop("`id' linked smooths must have same number of arguments") 
 ## Now start cloning...
 if (inherits(specb,c("tensor.smooth.spec","t2.smooth.spec"))) { ##`te' or `t2' generated base smooth.spec
    specb$term <- spec$term
    specb$label <- spec$label 
    specb$by <- spec$by
    k <- 1
    for (i in 1:length(specb$margin)) {
      if (is.null(spec$margin)) { ## sloppy user -- have to construct margin info...
         for (j in 1:length(specb$margin[[i]]$term)) {
           specb$margin[[i]]$term[j] <- spec$term[k]
           k <- k + 1
         }
         specb$margin[[i]]$label <- ""
 
      } else { ## second term was at least `te'/`t2', so margin cloning is easy
        specb$margin[[i]]$term <- spec$margin[[i]]$term
        specb$margin[[i]]$label <- spec$margin[[i]]$label
        specb$margin[[i]]$xt <- spec$margin[[i]]$xt
      }
    }

  } else { ## `s' generated case
    specb$term <- spec$term
    specb$label <- spec$label 
    specb$by <- spec$by
    specb$xt <- spec$xt ## don't generally know what's in here => don't clone
  }
  specb ## return clone
} ## clone.smooth.spec


parametricPenalty <- function(pterms,assign,paraPen,sp0) {
## routine to process any penalties on the parametric part of the model.
## paraPen is a list whose items have names corresponding to the 
## term.labels in pterms. Each list item may have named elements 
## L, rank and sp. All other elements should be penalty coefficient matrices.
  S <- list()     ## penalty matrix list
  off <- rep(0,0) ## offset array
  rank <- rep(0,0) ## rank array
  sp <- rep(0,0)    ## smoothing param array
  full.sp.names <- rep("",0) ## names for sp's multiplying penalties (not underlying)
  L <- matrix(0,0,0) 
  k <- 0
  tind <- unique(assign) ## unique term indices
  n.t <- length(tind)
  if (n.t>0) for (j in 1:n.t) if (tind[j]>0) {
    term.label <- attr(pterms[tind[j]],"term.label")
    P <- paraPen[[term.label]] ## get any penalty information for this term
    if (!is.null(P)) { ## then there is information
      ind <- (1:length(assign))[assign==tind[j]] ## index of coefs involved here
      Li <- P$L;P$L <- NULL
      spi <- P$sp;P$sp <- NULL
      ranki <- P$rank;P$rank <- NULL
      ## remaining terms should be penalty matrices...
      np <- length(P)

      if (!is.null(ranki)&&length(ranki)!=np) stop("`rank' has wrong length in `paraPen'") 
      if (np) for (i in 1:np) { ## unpack penalty matrices, offsets and ranks
        k <- k + 1
        S[[k]] <- P[[i]]
        off[k] <- min(ind) ## index of first coef penalized by this term
        if ( ncol(P[[i]])!=nrow(P[[i]])||nrow(P[[i]])!=length(ind)) stop(" a parametric penalty has wrong dimension")
        if (is.null(ranki)) {
          ev <- eigen(S[[k]],symmetric=TRUE,only.values=TRUE)$values
          rank[k] <- sum(ev>max(ev)*.Machine$double.eps*10) ## estimate rank
        } else rank[k] <- ranki[i]
      }
      ## now deal with L matrices
      if (np) { ## only do this stuff if there are any penalties!
        if (is.null(Li)) Li <- diag(np)
        if (nrow(Li)!=np) stop("L has wrong dimension in `paraPen'")
        L <- rbind(cbind(L,matrix(0,nrow(L),ncol(Li))),
                   cbind(matrix(0,nrow(Li),ncol(L)),Li))
        ind <- (length(sp)+1):(length(sp)+ncol(Li))
        ind2 <- (length(sp)+1):(length(sp)+nrow(Li)) ## used to produce names for full sp array
        if (is.null(spi)) {
          sp[ind] <- -1 ## auto-initialize
        } else {
          if (length(spi)!=ncol(Li)) stop("`sp' dimension wrong in `paraPen'")
          sp[ind] <- spi
        }
        ## add smoothing parameter names....
        if (length(ind)>1) names(sp)[ind] <- paste(term.label,ind-ind[1]+1,sep="") 
        else names(sp)[ind] <- term.label
        
        if (length(ind2)>1) full.sp.names[ind2] <- paste(term.label,ind2-ind2[1]+1,sep="") 
        else full.sp.names[ind2] <- term.label
      }
    } ## end !is.null(P)  
  } ## looped through all terms
  if (k==0) return(NULL)
  if (!is.null(sp0)) {
    if (length(sp0)<length(sp)) stop("`sp' too short")
    sp0 <- sp0[1:length(sp)]
    sp[sp<0] <- sp0[sp<0]
  }
  ## S is list of penalty matrices, off[i] is index of first coefficient penalized by each S[[i]]
  ## sp is array of underlying smoothing parameter (-ve to estimate), L is matrix mapping log
  ## underlying smoothing parameters to log smoothing parameters, rank[i] is the rank of S[[i]].
  list(S=S,off=off,sp=sp,L=L,rank=rank,full.sp.names=full.sp.names)
} ## parametricPenalty

getNumericResponse <- function(form) {
## takes a formula for which the lhs contains numbers, but no variable 
## names, and returns an array of those numbers. For example if 'form' 
## is '1+26~s(x)', this will return the numeric vector c(1,26). 
## A zero length vector is returned if the lhs contains no numbers,
## or contains variable names.  
## This is useful for setting up models in which
## multiple linear predictors share terms. The lhs numbers can then 
## indicate which linear predictors the rhs will contribute to.

  ## if the response contains variables or there is no
  ## response then return nothing...

  if (length(form)==2||length(all.vars(form[[2]]))>0) return(rep(0,0))

  ## deparse turns lhs into a string; strsplit extracts the characters 
  ## corresponding to numbers; unlist deals with the fact that deparse 
  ## will split long lines resulting in multiple list items from 
  ## strsplit; as.numeric converts the numbers; na.omit drops NAs
  ## resulting from "" elements; unique & round are obvious... 

  round(unique(na.omit(as.numeric(unlist(strsplit(deparse(form[[2]]), "[^0-9]+"))))))  

} ## getNumericResponse

olid <- function(X,nsdf,pstart,flpi,lpi) {
## X is a model matrix, made up of nf=length(nsdf) column blocks.
## The ith block starts at column pstart[i] and its first nsdf[i]
## columns are unpenalized. X is used to define nlp=length(lpi)
## linear predictors. lpi[[i]] gives the columns of X used in the 
## ith linear predictor. flpi[j] gives the linear predictor(s)
## to which the jth block of X belongs. The problem is that the 
## unpenalized blocks need not be identifiable when used in combination. 
## This function returns a vector dind of columns of X to drop for 
## identifiability, along with modified lpi, pstart and nsdf vectors.
  nlp <- length(lpi) ## number of linear predictors
  n <- nrow(X) 
  nf <- length(nsdf) ## number of formulae blocks
  Xp <- matrix(0,n*nlp,sum(nsdf))
  start <- 1
  ii <- 1:n
  tind <- rep(0,0) ## complete index of all parametric columns in X
  ## create a block matrix, Xp, with the same identifiability properties as
  ## unpenalized part of model...
  for (i in 1:nf) {
    stop <- start - 1 + nsdf[i]
    if (stop>=start) {
      ind <- pstart[i] + 1:nsdf[i] - 1
      for (k in flpi[[i]]) {
        Xp[ii+(k-1)*n,start:stop] <- X[,ind]
      }
      tind <- c(tind,ind)
      start <- start + nsdf[i]
    }
  }
  ## rank deficiency of Xp will reveal number of redundant parametric 
  ## terms, and a pivoted QR will reveal which to drop to restore
  ## full rank...
  qrx <- qr(Xp,LAPACK=TRUE,tol=0.0) ## unidentifiable columns get pivoted to final cols
  r <- Rrank(qr.R(qrx)) ## get rank from R factor of pivoted QR
  if (r==ncol(Xp)) { ## full rank, all fine, drop nothing
    dind <- rep(0,0)
  } else { ## reduced rank, drop some columns
    dind <- tind[sort(qrx$pivot[(r+1):ncol(X)],decreasing=TRUE)] ## columns to drop
    ## now we need to adjust nsdf, pstart and lpi
    for (d in dind) { ## working down through drop indices
      ## following commented out code is useful should it ever prove necessary to 
      ## adjust pstart and nsdf, but at present these are only used in prediction, 
      ## and it is cleaner to leave them unchanged, and simply drop using dind during prediction.
      #k <- if (d>=pstart[nf]) nlp else which(d >= pstart[1:(nf-1)] & d < pstart[2:nf])
      #nsdf[k] <- nsdf[k] - 1 ## one less unpenalized column in this block
      #if (k<nf) pstart[(k+1):nf] <-  pstart[(k+1):nf] - 1 ## later block starts move down 1 
      for (i in 1:nlp) { 
        k <- which(d == lpi[[i]])
        if (length(k)>0) lpi[[i]] <- lpi[[i]][-k] ## drop row
        k <- which(lpi[[i]]>d)
        if (length(k)>0) lpi[[i]][k] <- lpi[[i]][k] - 1 ## close up
      }
    } ## end of drop index loop
  }
  list(dind=dind,lpi=lpi) ##,pstart=pstart,nsdf=nsdf)
} ## olid


gam.setup.list <- function(formula,pterms,
                    data=stop("No data supplied to gam.setup"),knots=NULL,sp=NULL,
                    min.sp=NULL,H=NULL,absorb.cons=TRUE,sparse.cons=0,select=FALSE,idLinksBases=TRUE,
                    scale.penalty=TRUE,paraPen=NULL,gamm.call=FALSE,drop.intercept=NULL,apply.by=TRUE,modCon=0) {
## version of gam.setup for when gam is called with a list of formulae, 
## specifying several linear predictors...
## key difference to gam.setup is an attribute to the model matrix, "lpi", which is a list
## of column indices for each linear predictor 
  if (!is.null(paraPen)) stop("paraPen not supported for multi-formula models")
  if (!absorb.cons) stop("absorb.cons must be TRUE for multi-formula models")
  d <- length(pterms) ## number of formulae
  if (is.null(drop.intercept)) drop.intercept <- rep(FALSE, d)
  if (length(drop.intercept) != d) stop("length(drop.intercept) should be equal to number of model formulas")

  lp.overlap <- if (formula$nlp<d) TRUE else FALSE ## predictors share terms

  G <- gam.setup(formula[[1]],pterms[[1]],
              data,knots,sp,min.sp,H,absorb.cons,sparse.cons,select,
              idLinksBases,scale.penalty,paraPen,gamm.call,drop.intercept[1],apply.by=apply.by,list.call=TRUE,modCon=modCon)
  G$pterms <- pterms
  
  G$offset <- list(G$offset)
  #G$contrasts <- list(G$contrasts)
  G$xlevels <- list(G$xlevels)
  G$assign <- list(G$assign)
  used.sp <- length(G$lsp0)
  if (!is.null(sp)&&used.sp>0) sp <- sp[-(1:used.sp)] ## need to strip off already used sp's
  if (!is.null(min.sp)&&nrow(G$L)>0) min.sp <- min.sp[-(1:nrow(G$L))]  

  ## formula[[1]] always relates to the base formula of the first linear predictor...

  flpi <- lpi <- list()
  for (i in 1:formula$nlp) lpi[[i]] <- rep(0,0)
  lpi[[1]] <- 1:ncol(G$X) ## lpi[[j]] is index of cols for jth linear predictor 
  flpi[[1]] <- formula[[1]]$lpi ## used in identifiability testing by olid, later  

  pof <- ncol(G$X) ## counts the model matrix columns produced so far
  pstart <- rep(0,d) ## indexes where parameteric columns start in each formula block of X
  pstart[1] <- 1
  if (d>1) for (i in 2:d) {
    if (is.null(formula[[i]]$response)) {  ## keep gam.setup happy
      formula[[i]]$response <- formula$response 
      mv.response <- FALSE
    } else mv.response <- TRUE
    #spind <- if (is.null(sp)) 1 else (length(G$S)+1):length(sp)
    formula[[i]]$pfok <- 1 ## empty formulae OK here!
    um <- gam.setup(formula[[i]],pterms[[i]],
              data,knots,sp,min.sp,#sp[spind],min.sp[spind],
	      H,absorb.cons,sparse.cons,select,
              idLinksBases,scale.penalty,paraPen,gamm.call,drop.intercept[i],apply.by=apply.by,list.call=TRUE,modCon=modCon)
    used.sp <- length(um$lsp0)	      
    if (!is.null(sp)&&used.sp>0) sp <- sp[-(1:used.sp)] ## need to strip off already used sp's
    if (!is.null(min.sp)&&nrow(um$L)>0) min.sp <- min.sp[-(1:nrow(um$L))]  

    flpi[[i]] <- formula[[i]]$lpi
    for (j in formula[[i]]$lpi) { ## loop through all l.p.s to which this term contributes
      lpi[[j]] <- c(lpi[[j]],pof + 1:ncol(um$X)) ## add these cols to lpi[[j]]
      ##lpi[[i]] <- pof + 1:ncol(um$X) ## old code
    }
    if (mv.response) G$y <- cbind(G$y,um$y)
    if (i>formula$nlp&&!is.null(um$offset)) {
      stop("shared offsets not allowed")
    }
    G$offset[[i]] <- um$offset
    #G$contrasts[[i]] <- um$contrasts
    if (!is.null(um$contrasts)) G$contrasts <- c(G$contrasts,um$contrasts)
    G$xlevels[[i]] <- um$xlevels
    G$assign[[i]] <- um$assign
    G$rank <- c(G$rank,um$rank)
    pstart[i] <- pof+1
    G$X <- cbind(G$X,um$X) ## extend model matrix
    ## deal with the smooths...
    k <- G$m
    if (um$m) for (j in 1:um$m) {
      um$smooth[[j]]$first.para <- um$smooth[[j]]$first.para + pof
      um$smooth[[j]]$last.para <- um$smooth[[j]]$last.para + pof
      k <- k + 1 
      G$smooth[[k]] <- um$smooth[[j]]
    }
    ## L, S and off...
    ks <- length(G$S)
    M <- length(um$S)
 
    if (!is.null(um$L)||!is.null(G$L)) {
      if (is.null(G$L)) G$L <- diag(1,nrow=ks)
      if (is.null(um$L)) um$L <- diag(1,nrow=M)
      G$L <- rbind(cbind(G$L,matrix(0,nrow(G$L),ncol(um$L))),cbind(matrix(0,nrow(um$L),ncol(G$L)),um$L))
    }

    G$off <- c(G$off,um$off+pof)
    if (M) for (j in 1:M) {
      ks <- ks + 1
      G$S[[ks]] <- um$S[[j]]
    }
 
    G$m <- G$m + um$m ## number of smooths
    ##G$nsdf <- G$nsdf + um$nsdf ## or list??
    G$nsdf[i] <- um$nsdf
    if (!is.null(um$P)||!is.null(G$P)) {
      if (is.null(G$P)) G$P <- diag(1,nrow=pof)
      k <- ncol(um$X)
      if (is.null(um$P)) um$P <- diag(1,nrow=k)
      G$P <- rbind(cbind(G$P,matrix(0,pof,k)),cbind(matrix(0,k,pof),um$P))
    }
    G$cmX <- c(G$cmX,um$cmX)
    if (um$nsdf>0) um$term.names[1:um$nsdf] <- paste(um$term.names[1:um$nsdf],i-1,sep=".")
    G$term.names <- c(G$term.names,um$term.names)
    G$lsp0 <- c(G$lsp0,um$lsp0)
    G$sp <- c(G$sp,um$sp)
    pof <- ncol(G$X)
  } ## formula loop end

  ## If there is overlap then there is a danger of lack of identifiability of the 
  ## parameteric terms, especially if there are factors present in shared components. 
  ## The following code deals with this possibility... 
  if (lp.overlap) {
    rt <- olid(G$X,G$nsdf,pstart,flpi,lpi)
    if (length(rt$dind)>0) { ## then columns have to be dropped
      warning("dropping unidentifiable parametric terms from model",call.=FALSE)
      G$X <- G$X[,-rt$dind] ## drop cols 
      G$cmX <- G$cmX[-rt$dind]
      G$term.names <- G$term.names[-rt$dind]
      ## adjust indexing in smooth list, noting that coefs of smooths 
      ## are never dropped by dind 
      for (i in 1:length(G$smooth)) {
        k <- sum(rt$dind < G$smooth[[i]]$first.para)
        G$smooth[[i]]$first.para <- G$smooth[[i]]$first.para - k
        G$smooth[[i]]$last.para <- G$smooth[[i]]$last.para - k
      }
      for (i in 1:length(G$off)) G$off[i] <- G$off[i] - sum(rt$dind < G$off[i])
      ## replace various indices with updated versions...
      # pstart <- rt$pstart; G$nsdf <- rt$nsdf ## these two only needed in predict.gam - cleaner to leave unchanged
      lpi <- rt$lpi
      attr(G$nsdf,"drop.ind") <- rt$dind ## store drop index
    } 
  }
  attr(lpi,"overlap") <- lp.overlap
  attr(G$X,"lpi") <- lpi
  attr(G$nsdf,"pstart") <- pstart ##unlist(lapply(lpi,min))

  ## assemble a global indicator array for non-linear parameters... 
  G$g.index <- rep(FALSE,ncol(G$X))
  n.sp0 <- 0
  if (length(G$smooth)) for (i in 1:length(G$smooth)) {
    if (!is.null(G$smooth[[i]]$g.index)) G$g.index[G$smooth[[i]]$first.para:G$smooth[[i]]$last.para] <- G$smooth[[i]]$g.index
    n.sp <- length(G$smooth[[i]]$S)
    if (n.sp) {
      G$smooth[[i]]$first.sp <- n.sp0 + 1
      n.sp0 <- G$smooth[[i]]$last.sp <- n.sp0 + n.sp
    }
  }  
  if (!any(G$g.index)) G$g.index <- NULL  

  G
} ## gam.setup.list



gam.setup <- function(formula,pterms,
                     data=stop("No data supplied to gam.setup"),knots=NULL,sp=NULL,
                    min.sp=NULL,H=NULL,absorb.cons=TRUE,sparse.cons=0,select=FALSE,idLinksBases=TRUE,
                    scale.penalty=TRUE,paraPen=NULL,gamm.call=FALSE,drop.intercept=FALSE,
                    diagonal.penalty=FALSE,apply.by=TRUE,list.call=FALSE,modCon=0) 
## set up the model matrix, penalty matrices and auxilliary information about the smoothing bases
## needed for a gam fit.
## elements of returned object:
## * m - number of smooths
## * min.sp - minimum smoothing parameters
## * H supplied H matrix
## * pearson.extra, dev.extra, n.true --- entries to hold these quantities
## * pterms - terms object for parametric terms
## * intercept TRUE if intercept present
## * offset - the model offset
## * nsdf - number of strictly parameteric coefs
## * contrasts 
## * xlevels - records levels of factors
## * assign - indexes which parametric model matrix columns map to which term in pterms
## * smooth - list of smooths
## * S - penalties (non-zero block only)
## * off - first coef penalized by each element of S
## * cmX - col mean of X
## * P - maps parameters in fit constraint parameterization to those in prediction parameterization
## * X - model matrix
## * sp
## * rank
## * n.paraPen
## * L 
## * lsp0
## * y - response
## * C - constraint matrix - only if absorb.cons==FALSE
## * n - dim(y)
## * w - weights
## * term.names
## * nP
{ # split the formula if the object being passed is a formula, otherwise it's already split

  if (inherits(formula,"split.gam.formula")) split <- formula else
  if (inherits(formula,"formula")) split <- interpret.gam(formula) 
  else stop("First argument is no sort of formula!") 
  
  if (length(split$smooth.spec)==0) {
    if (split$pfok==0) stop("You've got no model....")
    m <- 0
  } else  m <- length(split$smooth.spec) # number of smooth terms
  
  G <- list(m=m,min.sp=min.sp,H=H,pearson.extra=0,
            dev.extra=0,n.true=-1,pterms=pterms) ## dev.extra gets added to deviance if REML/ML used in gam.fit3
  
  if (is.null(attr(data,"terms"))) # then data is not a model frame
  mf <- model.frame(split$pf,data,drop.unused.levels=FALSE) # must be false or can end up with wrong prediction matrix!
  else mf <- data # data is already a model frame

  G$intercept <-  attr(attr(mf,"terms"),"intercept")>0

  ## get any model offset. Complicated by possibility of offsets in multiple formulae...
  if (list.call) {
    offi <- attr(pterms,"offset")
    if (!is.null(offi)) {
      G$offset <- mf[[names(attr(pterms,"dataClasses"))[offi]]]
    }
  } else G$offset <- model.offset(mf)   # get any model offset including from offset argument
  
  if (!is.null(G$offset))  G$offset <- as.numeric(G$offset) 

  # construct strictly parametric model matrix.... 
  if (drop.intercept) attr(pterms,"intercept") <- 1 ## ensure there is an intercept to drop
  X <- model.matrix(pterms,mf)
  if (drop.intercept) { ## some extended families require intercept to be dropped 
    xat <- attributes(X);ind <- xat$assign>0 ## index of non intercept columns 
    X <- X[,ind,drop=FALSE] ## some extended families need to drop intercept
    xat$assign <- xat$assign[ind];xat$dimnames[[2]]<-xat$dimnames[[2]][ind];
    xat$dim[2] <- xat$dim[2]-1;attributes(X) <- xat
    G$intercept <- FALSE
  } 
  rownames(X) <- NULL ## save memory
  
  G$nsdf <- ncol(X)
  G$contrasts <- attr(X,"contrasts")
  G$xlevels <- .getXlevels(pterms,mf)
  G$assign <- attr(X,"assign") # used to tell which coeffs relate to which pterms

  ## now deal with any user supplied penalties on the parametric part of the model...
  PP <- parametricPenalty(pterms,G$assign,paraPen,sp)
  if (!is.null(PP)) { ## strip out supplied sps already used
    ind <- 1:length(PP$sp)
    if (!is.null(sp)) sp <- sp[-ind]
    if (!is.null(min.sp)) { 
      PP$min.sp <- min.sp[ind]
      min.sp <- min.sp[-ind]
    } 
  }
  
  # next work through smooth terms (if any) extending model matrix.....
  
  G$smooth <- list()
  G$S <- list()
 
  if (gamm.call) { ## flag that this is a call from gamm --- some smoothers need to know!
    if (m>0) for (i in 1:m) attr(split$smooth.spec[[i]],"gamm") <- TRUE
  }

  if (m>0 && idLinksBases) { ## search smooth.spec[[]] for terms linked by common id's
    id.list <- list() ## id information list
    for (i in 1:m) if (!is.null(split$smooth.spec[[i]]$id)) {
      id <- as.character(split$smooth.spec[[i]]$id)
      if (length(id.list)&&id%in%names(id.list)) { ## it's an existing id
        ni <- length(id.list[[id]]$sm.i) ## number of terms so far with this id
        id.list[[id]]$sm.i[ni+1] <- i    ## adding smooth.spec index to this id's list
        ## clone smooth.spec from base smooth spec....
        base.i <- id.list[[id]]$sm.i[1]
         
        split$smooth.spec[[i]] <- clone.smooth.spec(split$smooth.spec[[base.i]],
                                                      split$smooth.spec[[i]])
        
        ## add data for this term to the data list for basis setup...
        temp.term <- split$smooth.spec[[i]]$term
       
        ## note cbind deliberate in next line, as construction will handle matrix argument 
        ## correctly... 
        for (j in 1:length(temp.term)) id.list[[id]]$data[[j]] <- cbind(id.list[[id]]$data[[j]],
                                                          get.var(temp.term[j],data,vecMat=FALSE))
       
       } else { ## new id
        id.list[[id]] <- list(sm.i=i) ## start the array of indices of smooths with this id
        id.list[[id]]$data <- list()
        ## need to collect together all data for which this basis will be used,
        ## for basis setup...
        term <- split$smooth.spec[[i]]$term
        for (j in 1:length(term)) id.list[[id]]$data[[j]] <- get.var(term[j],data,vecMat=FALSE)
      } ## new id finished
    }
  } ## id.list complete

  G$off<-array(0,0)
  first.para<-G$nsdf+1
  sm <- list()
  newm <- 0
  if (m>0) for (i in 1:m) {
    # idea here is that terms are set up in accordance with information given in split$smooth.spec
    # appropriate basis constructor is called depending on the class of the smooth
    # constructor returns penalty matrices model matrix and basis specific information
  
    id <- split$smooth.spec[[i]]$id
    if (is.null(id)||!idLinksBases) { ## regular evaluation
      sml <- smoothCon(split$smooth.spec[[i]],data,knots,absorb.cons,scale.penalty=scale.penalty,
                       null.space.penalty=select,sparse.cons=sparse.cons,
                       diagonal.penalty=diagonal.penalty,apply.by=apply.by,modCon=modCon) 
    } else { ## it's a smooth with an id, so basis setup data differs from model matrix data
      names(id.list[[id]]$data) <- split$smooth.spec[[i]]$term ## give basis data suitable names
      sml <- smoothCon(split$smooth.spec[[i]],id.list[[id]]$data,knots,
                       absorb.cons,n=nrow(data),dataX=data,scale.penalty=scale.penalty,
                       null.space.penalty=select,sparse.cons=sparse.cons,
                       diagonal.penalty=diagonal.penalty,apply.by=apply.by,modCon=modCon)
    }
    #for (j in 1:length(sml)) {
    #  newm <- newm + 1
    #  sm[[newm]] <- sml[[j]]
    #}
    ind <- 1:length(sml)
    sm[ind+newm] <- sml[ind]
    newm <- newm + length(sml)
  }
  
  G$m <- m <- newm ## number of actual smooths

  ## at this stage, it is neccessary to impose any side conditions required
  ## for identifiability
  if (m>0) { 
    sm <- gam.side(sm,X,tol=.Machine$double.eps^.5)
    if (!apply.by) for (i in 1:length(sm)) { ## restore any by-free model matrices
      if (!is.null(sm[[i]]$X0)) { ## there is a by-free matrix to restore 
        ind <- attr(sm[[i]],"del.index") ## columns, if any to delete
        sm[[i]]$X <- if (is.null(ind)) sm[[i]]$X0 else sm[[i]]$X0[,-ind,drop=FALSE] 
      }
    }
  }

  ## The matrix, L, mapping the underlying log smoothing parameters to the
  ## log of the smoothing parameter multiplying the S[[i]] must be
  ## worked out...
  idx <- list() ## idx[[id]]$c contains index of first col in L relating to id
  L <- matrix(0,0,0)
  lsp.names <- sp.names <- rep("",0) ## need a list of names to identify sps in global sp array
  if (m>0) for (i in 1:m) {
    id <- sm[[i]]$id
    ## get the L matrix for this smooth...
    length.S <- if (is.null(sm[[i]]$updateS)) length(sm[[i]]$S) else sm[[i]]$n.sp ## deals with possibility of non-linear penalty
    Li <- if (is.null(sm[[i]]$L)) diag(length.S) else sm[[i]]$L 
     
    if (length.S > 0) { ## there are smoothing parameters to name
       if (length.S == 1) lspn <- sm[[i]]$label else {
          Sname <- names(sm[[i]]$S)
          lspn <- if (is.null(Sname)) paste(sm[[i]]$label,1:length.S,sep="") else
                  paste(sm[[i]]$label,Sname,sep="") ## names for all sp's
       }
       spn <- lspn[1:ncol(Li)] ## names for actual working sps
    }

    ## extend the global L matrix...
    if (is.null(id)||is.null(idx[[id]])) { ## new `id'     
      if (!is.null(id)) { ## create record in `idx'
        idx[[id]]$c <- ncol(L)+1   ## starting column in L for this `id'
        idx[[id]]$nc <- ncol(Li)   ## number of columns relating to this `id'
      }
      L <- rbind(cbind(L,matrix(0,nrow(L),ncol(Li))),
                 cbind(matrix(0,nrow(Li),ncol(L)),Li))
      if (length.S > 0) { ## there are smoothing parameters to name
        sp.names <- c(sp.names,spn) ## extend the sp name vector
        lsp.names <- c(lsp.names,lspn) ## extend full.sp name vector
      }
    } else { ## it's a repeat id => shares existing sp's
      L0 <- matrix(0,nrow(Li),ncol(L))
      if (ncol(Li)>idx[[id]]$nc) {
        stop("Later terms sharing an `id' can not have more smoothing parameters than the first such term")
      }
      L0[,idx[[id]]$c:(idx[[id]]$c+ncol(Li)-1)] <- Li
      L <- rbind(L,L0)
      if (length.S > 0) { ## there are smoothing parameters to name
        lsp.names <- c(lsp.names,lspn) ## extend full.sp name vector
      }
    }
  }

  ## create the model matrix...

  Xp <- NULL ## model matrix under prediction constraints, if given
  if (m>0) for (i in 1:m) {
    n.para <- ncol(sm[[i]]$X)
    # define which elements in the parameter vector this smooth relates to....
    sm[[i]]$first.para<-first.para     
    first.para <- first.para+n.para
    sm[[i]]$last.para <- first.para-1
    ## termwise offset handling ...
    Xoff <- attr(sm[[i]]$X,"offset")
    if (!is.null(Xoff)) { 
      if (is.null(G$offset)) G$offset <- Xoff
      else G$offset <- G$offset + Xoff
    }
    ## model matrix accumulation ...
    
    ## alternative version under alternative constraint first (prediction only)
    if (is.null(sm[[i]]$Xp)) {
      if (!is.null(Xp)) Xp <- cbind2(Xp,sm[[i]]$X)
    } else { 
      if (is.null(Xp)) Xp <- X
      Xp <- cbind2(Xp,sm[[i]]$Xp);sm[[i]]$Xp <- NULL
    }
    ## now version to use for fitting ...
    X <- cbind2(X,sm[[i]]$X);sm[[i]]$X<-NULL
   
    G$smooth[[i]] <- sm[[i]]   
  }

  if (is.null(Xp)) {
    G$cmX <- colMeans(X) ## useful for componentwise CI construction 
  } else {
    G$cmX <- colMeans(Xp)
    ## transform from fit params to prediction params...
    ## G$P <- qr.coef(qr(Xp),X) ## old code assumes always full rank!!
    
    qrx <- qr(Xp,LAPACK=TRUE)
    R <- qr.R(qrx)
    p <- ncol(R)
    rank <- Rrank(R) ## rank of Xp/R    
    QtX <- qr.qty(qrx,X)[1:rank,]
    if (rank<p) { ## rank deficient  
      R <- R[1:rank,]
      qrr <- qr(t(R),tol=0)
      R <- qr.R(qrr)
      G$P <- forwardsolve(t(R),QtX)
    } else {
      G$P <- backsolve(R,QtX)
    }
    if (rank<p) {
      G$P <- qr.qy(qrr,rbind(G$P,matrix(0,p-rank,p)))
    }
    G$P[qrx$pivot,] <- G$P
  }
  ## cmX relates to computation of CIs incorportating uncertainty about the mean
  ## It may make more sense to incorporate all uncertainty about the mean,
  ## rather than just the uncertainty in the fixed effects mean. This means
  ## incorporating the mean of random effects and unconstrained smooths. Hence
  ## comment out the following.
  #if (G$nsdf>0) G$cmX[-(1:G$nsdf)] <- 0 ## zero the smooth parts here 
  #else G$cmX <- G$cmX * 0
  G$X <- X;rm(X)
  n.p <- ncol(G$X) 
  # deal with penalties


  ## min.sp must be length nrow(L) to make sense
  ## sp must be length ncol(L) --- need to partition
  ## L into columns relating to free log smoothing parameters,
  ## and columns, L0, corresponding to values supplied in sp.
  ## lsp0 = L0%*%log(sp[sp>=0]) [need to fudge sp==0 case by
  ## setting log(0) to log(effective zero) computed case-by-case]

  ## following deals with supplied and estimated smoothing parameters...
  ## first process the `sp' array supplied to `gam'...
  
  if (!is.null(sp)) { # then user has supplied fixed smoothing parameters
   ok <- TRUE 
   if (length(sp) < ncol(L)) { 
      warning("Supplied smoothing parameter vector is too short - ignored.")
      ok <- FALSE
    }
    if (sum(is.na(sp))) { 
      warning("NA's in supplied smoothing parameter vector - ignoring.")
      ok <- FALSE
    }
  } else ok <- FALSE
  G$sp <- if (ok) sp[1:ncol(L)] else rep(-1,ncol(L))
  
  names(G$sp) <- sp.names

  ## now work through the smooths searching for any `sp' elements
  ## supplied in `s' or `te' terms.... This relies on `idx' created 
  ## above...
  
  k <- 1 ## current location in `sp' array
  if (m>0) for (i in 1:m) {
    id <- sm[[i]]$id
    if (is.null(sm[[i]]$L)) Li <- diag(length(sm[[i]]$S)) else Li <- sm[[i]]$L 
    if (is.null(id)) { ## it's a smooth without an id
      spi <- sm[[i]]$sp
      if (!is.null(spi)) { ## sp supplied in `s' or `te'
        if (length(spi)!=ncol(Li)) stop("incorrect number of smoothing parameters supplied for a smooth term")
        G$sp[k:(k+ncol(Li)-1)] <- spi
      }       
      k <- k + ncol(Li) 
    } else { ## smooth has an id
      spi <- sm[[i]]$sp
      if (is.null(idx[[id]]$sp.done)) { ## not already dealt with these sp's
        if (!is.null(spi)) { ## sp supplied in `s' or `te'
          if (length(spi)!=ncol(Li)) stop("incorrect number of smoothing parameters supplied for a smooth term")
          G$sp[idx[[id]]$c:(idx[[id]]$c+idx[[id]]$nc-1)] <- spi
        }
        idx[[id]]$sp.done <- TRUE ## only makes sense to use supplied `sp' from defining term
        k <- k + idx[[id]]$nc 
      }
    }
  } ## finished processing `sp' vectors supplied in `s' or `te' terms

  ## copy initial sp's back into smooth objects, so there is a record of
  ## fixed and free...
  k <- 1 
  if (length(idx)) for (i in 1:length(idx)) idx[[i]]$sp.done <- FALSE
  if (m>0) for (i in 1:m) { ## work through all smooths
    id <- sm[[i]]$id 
    if (!is.null(id)) { ## smooth with id
      if (idx[[id]]$nc>0) { ## only copy if there are sp's
        G$smooth[[i]]$sp <- G$sp[idx[[id]]$c:(idx[[id]]$c+idx[[id]]$nc-1)]
      }   
      if (!idx[[id]]$sp.done) { ## only update k on first encounter with this smooth
        idx[[id]]$sp.done <- TRUE
        k <- k + idx[[id]]$nc
      }
    
    } else { ## no id, just work through sp 
      if (is.null(sm[[i]]$L)) nc <- length(sm[[i]]$S) else nc <- ncol(sm[[i]]$L)
      if (nc>0) G$smooth[[i]]$sp <- G$sp[k:(k+nc-1)]
      k <- k + nc
    }
  } ## now all elements of G$smooth have a record of initial sp. 


  if (!is.null(min.sp)) { # then minimum s.p.'s supplied
    if (length(min.sp)<nrow(L)) stop("length of min.sp is wrong.")
    if (nrow(L)>0) min.sp <- min.sp[1:nrow(L)]
    if (sum(is.na(min.sp))) stop("NA's in min.sp.")
    if (sum(min.sp<0)) stop("elements of min.sp must be non negative.")
  }

  k.sp <- 0 # count through sp and S
  G$rank <- array(0,0)
  if (m>0) for (i in 1:m) {
    sm<-G$smooth[[i]]
    if (length(sm$S)>0)
    for (j in 1:length(sm$S)) {  # work through penalty matrices
      k.sp <- k.sp+1
      G$off[k.sp] <- sm$first.para 
      G$S[[k.sp]] <- sm$S[[j]]
      G$rank[k.sp]<-sm$rank[j]
      if (!is.null(min.sp)) {
        if (is.null(H)) H<-matrix(0,n.p,n.p)
        H[sm$first.para:sm$last.para,sm$first.para:sm$last.para] <-
        H[sm$first.para:sm$last.para,sm$first.para:sm$last.para]+min.sp[k.sp]*sm$S[[j]] 
      }           
    } 
  }
 
  ## need to modify L, lsp.names, G$S, G$sp, G$rank and G$off to include any penalties
  ## on parametric stuff, at this point....
  if (!is.null(PP)) { ## deal with penalties on parametric terms
    L <- rbind(cbind(L,matrix(0,nrow(L),ncol(PP$L))),
                 cbind(matrix(0,nrow(PP$L),ncol(L)),PP$L))
    G$off <- c(PP$off,G$off)
    G$S <- c(PP$S,G$S)
    G$rank <- c(PP$rank,G$rank)
    G$sp <- c(PP$sp,G$sp)
    lsp.names <- c(PP$full.sp.names,lsp.names)
    G$n.paraPen <- length(PP$off)
    if (!is.null(PP$min.sp)) { ## deal with minimum sps
      if (is.null(H)) H <- matrix(0,n.p,n.p)
      for (i in 1:length(PP$S)) {
        ind <- PP$off[i]:(PP$off[i]+ncol(PP$S[[i]])-1)
        H[ind,ind] <- H[ind,ind] + PP$min.sp[i] * PP$S[[i]]
      }
    } ## min.sp stuff finished
  } else G$n.paraPen <- 0


  ## Now remove columns of L and rows of sp relating to fixed 
  ## smoothing parameters, and use removed elements to create lsp0

  fix.ind <- G$sp>=0

  if (sum(fix.ind)) {
    lsp0 <- G$sp[fix.ind]
    ind <- lsp0==0 ## find the zero s.p.s
    ef0 <- indi <- (1:length(ind))[ind]
    if (length(indi)>0) for (i in 1:length(indi)) {
      ## find "effective zero" to replace each zero s.p. with
      ii <- G$off[i]:(G$off[i]+ncol(G$S[[i]])-1) 
      ef0[i] <- norm(G$X[,ii],type="F")^2/norm(G$S[[i]],type="F")*.Machine$double.eps*.1
    }
    lsp0[!ind] <- log(lsp0[!ind])
    lsp0[ind] <- log(ef0) ##log(.Machine$double.xmin)*1000 ## zero fudge
    lsp0 <- as.numeric(L[,fix.ind,drop=FALSE]%*%lsp0)

    L <- L[,!fix.ind,drop=FALSE]  
    G$sp <- G$sp[!fix.ind]
  } else {lsp0 <- rep(0,nrow(L))}

  G$H <- H

  if (ncol(L)==nrow(L)&&!sum(L!=diag(ncol(L)))) L <- NULL ## it's just the identity

  G$L <- L;G$lsp0 <- lsp0
  names(G$lsp0) <- lsp.names ## names of all smoothing parameters (not just underlying)

  if (absorb.cons==FALSE) {  ## need to accumulate constraints 
    G$C <- matrix(0,0,n.p)
    if (m>0) {
      for (i in 1:m) {
        if (is.null(G$smooth[[i]]$C)) n.con<-0 
        else n.con<- nrow(G$smooth[[i]]$C)
        C <- matrix(0,n.con,n.p)
        C[,G$smooth[[i]]$first.para:G$smooth[[i]]$last.para]<-G$smooth[[i]]$C
        G$C <- rbind(G$C,C)
        G$smooth[[i]]$C <- NULL
      }
      rm(C)
    }
  } ## absorb.cons == FALSE
 
  G$y <- drop(data[[split$response]])
  ydim <- dim(G$y)
  if (!is.null(ydim)&&length(ydim)<2) dim(G$y) <- NULL
         
  ##data[[deparse(split$full.formula[[2]],backtick=TRUE)]]
  
  G$n <- nrow(data)

  if (is.null(data$"(weights)")) G$w <- rep(1,G$n)
  else G$w <- data$"(weights)"  

  ## Create names for model coefficients... 

  if (G$nsdf > 0) term.names <- colnames(G$X)[1:G$nsdf] else term.names<-array("",0)
  n.smooth <- length(G$smooth)
  ## create coef names, if smooth has any coefs, and create a global indicator of non-linear parameters
  ## g.index, if needed
  n.sp0 <- 0
  if (n.smooth) for (i in 1:n.smooth) {
    k <- 1
    jj <- G$smooth[[i]]$first.para:G$smooth[[i]]$last.para
    if (G$smooth[[i]]$df > 0) for (j in jj) {
      term.names[j] <- paste(G$smooth[[i]]$label,".",as.character(k),sep="")
      k <- k+1
    }
    n.sp <- length(G$smooth[[i]]$S)
    if (n.sp) { ## record sp this relates to in full sp vector
      G$smooth[[i]]$first.sp <- n.sp0 + 1
      n.sp0 <- G$smooth[[i]]$last.sp <- n.sp0 + n.sp
    }
    if (!is.null(G$smooth[[i]]$g.index)) {
      if (is.null(G$g.index)) G$g.index <- rep(FALSE,n.p)
      G$g.index[jj] <- G$smooth[[i]]$g.index
    }
    
  }
  G$term.names <- term.names

  ## Deal with non-linear parameterizations...
 

  G$pP <- PP ## return paraPen object, if present

  G
} ## gam.setup



formula.gam <- function(x, ...)
# formula.lm and formula.glm reconstruct the formula from x$terms, this is 
# problematic because of the way mgcv handles s() and te() terms 
{ x$formula
}

BC <- function(y,lambda=1) {
  y <- if (lambda==0) log(y) else (y^lambda-1)/lambda
}

corBC <- function(lambda,y,mu) {
  n <- length(y)
  qn <- qnorm((1:n-.5)/n)
  -cor(qn,sort(BC(y,lambda)-BC(mu,lambda)))
}

neico4 <- function(nei,dd,dd1) {
## nei is neighbourhood structure. dd are leave one out perturbations.
## dd1 the same for a different residual
## This routine computes the most basic covariance
## matrix estimate, based on observed correlation within neighbourhoods
## and assumption of zero correlation without. Somehow it is a testement
## to my extreme slowness that it took me 6 months to come up with this
## skull-thumpingly obvious solution having tried every other hare brained
## scheme first.
  n <- length(nei$i)
  i1 <- 0
  W <- dd*0 ## dbeta/dy (y-mu)
  for (i in 1:n) {
    i0 <- i1+1
    i1 <- nei$m[i]
    ii <- nei$k[i0:i1] ## neighbours of i
    W[nei$i[i],] <-  W[nei$i[i],] + colSums(dd[ii,,drop=FALSE]) 
  }
  V <- t(dd1)%*%W
} ## neico4

pdef <- function(V,eps = .Machine$double.eps^.9) {
## find nearest pdf matrix to symmetric matrix V
  ev <- eigen(V,symmetric=TRUE)
  thresh <- max(abs(ev$values))*eps
  ii <- ev$values<thresh
  if (any(ii)) {
    ev$values[ii] <- thresh
    V <- ev$vectors %*% (ev$values*t(ev$vectors))
  }
  V
} ## pdef

gam.outer <- function(lsp,fscale,family,control,method,optimizer,criterion,scale,gamma,G,start=NULL,nei=NULL,...)
# function for smoothing parameter estimation by outer optimization. i.e.
# P-IRLS scheme iterated to convergence for each trial set of smoothing
# parameters.
# MAJOR STEPS:
#  1. Call appropriate smoothing parameter optimizer, and extract fitted model
#    `object'
#  2. Call `gam.fit3.post.proc' to get parameter covariance matrices, edf etc to
#     add to `object' 
{ if (is.na(optimizer[2])) optimizer[2] <- "newton"
  if (!optimizer[2]%in%c("newton","bfgs","nlm","optim")) stop("unknown outer optimization method.")

  if (length(lsp)==0) { ## no sp estimation to do -- run a fit instead
    optimizer[2] <- "no.sps" ## will cause gam2objective to be called, below
  }
  nbGetTheta <- substr(family$family[1],1,17)=="Negative Binomial" && length(family$getTheta())>1
  if (nbGetTheta) stop("Please provide a single value for theta or use nb to estimate it")
 
  ## some preparations for the other methods, which all use gam.fit3...
 
  family <- fix.family.link(family)
  family <- fix.family.var(family)
  if (method%in%c("REML","ML","P-REML","P-ML")) family <- fix.family.ls(family)
  
  if (optimizer[1]=="efs"&& optimizer[2] != "no.sps" ) { ## experimental extended efs
    ##warning("efs is still experimental!")
    if (inherits(family,"general.family")) {
      object <- efsud(x=G$X,y=G$y,lsp=lsp,Sl=G$Sl,weights=G$w,offset=G$offxset,family=family,
                     control=control,Mp=G$Mp,start=start)
    } else {
      family <- fix.family.ls(family)
      object <- efsudr(x=G$X,y=G$y,lsp=lsp,Eb=G$Eb,UrS=G$UrS,weights=G$w,family=family,offset=G$offset,
                       start=start,
                       U1=G$U1, intercept = TRUE,scale=scale,Mp=G$Mp,control=control,n.true=G$n.true,...)
    }
    object$gcv.ubre <- object$REML
  } else if (optimizer[2]=="newton"||optimizer[2]=="bfgs"){ ## the gam.fit3 method 
    if (optimizer[2]=="bfgs") 
    b <- bfgs(lsp=lsp,X=G$X,y=G$y,Eb=G$Eb,UrS=G$UrS,L=G$L,lsp0=G$lsp0,offset=G$offset,U1=G$U1,Mp = G$Mp,
                family=family,weights=G$w,control=control,gamma=gamma,scale=scale,conv.tol=control$newton$conv.tol,
                maxNstep= control$newton$maxNstep,maxSstep=control$newton$maxSstep,maxHalf=control$newton$maxHalf, 
                printWarn=FALSE,scoreType=criterion,null.coef=G$null.coef,start=start,
                pearson.extra=G$pearson.extra,dev.extra=G$dev.extra,n.true=G$n.true,Sl=G$Sl,nei=nei,...) else
    b <- newton(lsp=lsp,X=G$X,y=G$y,Eb=G$Eb,UrS=G$UrS,L=G$L,lsp0=G$lsp0,offset=G$offset,U1=G$U1,Mp=G$Mp,
                family=family,weights=G$w,control=control,gamma=gamma,scale=scale,conv.tol=control$newton$conv.tol,
                maxNstep= control$newton$maxNstep,maxSstep=control$newton$maxSstep,maxHalf=control$newton$maxHalf, 
                printWarn=FALSE,scoreType=criterion,null.coef=G$null.coef,start=start,
                pearson.extra=G$pearson.extra,dev.extra=G$dev.extra,n.true=G$n.true,Sl=G$Sl,
		edge.correct=control$edge.correct,,nei=nei,...)                
                
    object <- b$object
    object$REML <- object$REML1 <- object$REML2 <-
    object$GACV <- object$D2 <- object$P2 <- object$UBRE2 <- object$trA2 <- 
    object$GACV1 <- object$GACV2 <- object$GCV2 <- object$D1 <- object$P1 <- NULL
    object$sp <- as.numeric(exp(b$lsp))
    object$gcv.ubre <- b$score
    b <- list(conv=b$conv,iter=b$iter,grad=b$grad,hess=b$hess,score.hist=b$score.hist) ## return info
    object$outer.info <- b   
  } else { ## methods calling gam.fit3 
    args <- list(X=G$X,y=G$y,Eb=G$Eb,UrS=G$UrS,offset=G$offset,U1=G$U1,Mp=G$Mp,family=family,
             weights=G$w,control=control,scoreType=criterion,gamma=gamma,scale=scale,
             L=G$L,lsp0=G$lsp0,null.coef=G$null.coef,n.true=G$n.true,Sl=G$Sl,start=start,nei=nei)
  
    if (optimizer[2]=="nlm") {
       b <- nlm(gam4objective, lsp, typsize = lsp, fscale = fscale, 
            stepmax = control$nlm$stepmax, ndigit = control$nlm$ndigit,
	    gradtol = control$nlm$gradtol, steptol = control$nlm$steptol, 
            iterlim = control$nlm$iterlim,
	    check.analyticals=control$nlm$check.analyticals,
            args=args,...)
      lsp <- b$estimate
      
    } else if (optimizer[2]=="optim") {
      b<-optim(par=lsp,fn=gam2objective,gr=gam2derivative,method="L-BFGS-B",control=
         list(fnscale=fscale,factr=control$optim$factr,lmm=min(5,length(lsp))),args=args,...)
      lsp <- b$par
    } else b <- NULL
    obj <- gam2objective(lsp,args,printWarn=TRUE,...) # final model fit, with warnings 
    object <- attr(obj,"full.fit")
    attr(obj,"full.fit") <- NULL
    object$gcv.ubre <- obj 
    object$outer.info <- b
    object$sp <- exp(lsp)
  } # end of methods calling gam.fit2
  
  
  if (scale>0) { 
    object$scale.estimated <- FALSE; object$scale <- scale} else {
    object$scale <- object$scale.est;object$scale.estimated <- TRUE
  } 
  
  object$control <- control
  object$method <- method
  if (inherits(family,"general.family")) {
    mv <- gam.fit5.post.proc(object,G$Sl,G$L,G$lsp0,G$S,G$off,gamma)
    ## object$coefficients <- Sl.initial.repara(G$Sl,object$coefficients,inverse=TRUE)
  } else mv <- gam.fit3.post.proc(G$X,G$L,G$lsp0,G$S,G$off,object,gamma)

  object[names(mv)] <- mv
  if (!is.null(nei)) {
    if (isTRUE(all.equal(sort(nei$i),1:nrow(G$X))) && length(nei$mi)==nrow(G$X) && criterion=="NCV") { ## each point predicted on its own
      loocv <- length(nei$k)==length(nei$i) && isTRUE(all.equal(nei$i,nei$k)) && length(nei$m) == length(nei$k) ## leave one out CV,
      if (is.logical(nei$jackknife)&&nei$jackknife) loocv <- TRUE ## straight jackknife requested
      if (nei$jackknife < 0) nei$jackknife <- TRUE ## signal cov matrix calc
    } else {
      loocv <- FALSE
      if (nei$jackknife < 0) nei$jackknife <- FALSE
    }  
  }
  if (!is.null(nei)&&(criterion!="NCV"||nei$jackknife)) { ## returning NCV when other criterion used for sp selection, or computing perturbations
    if (!is.null(nei$QNCV)&&nei$GNCV) family$qapprox <- TRUE
    if (is.null(family$qapprox)) family$qapprox <- FALSE
    lsp <- if (is.null(G$L)) log(object$sp) + G$lsp0 else G$L%*%log(object$sp)+G$lsp0
    if (object$scale.estimated && criterion %in% c("REML","ML","EFS")) lsp <- lsp[-length(lsp)] ## drop log scale estimate
    if (is.null(nei$gamma)) nei$gamma <- 1 ## a major application of this NCV is to select gamma - so it must not itself change with gamma!
    if (criterion!="NCV") nei$jackknife <- FALSE ## no cov matrix stuff.
    if (nei$jackknife) {
      n <- length(nei$i)
      nei1 <- if (loocv) nei else list(i=1:n,mi=1:n,m=1:n,k=1:n) ## set up for LOO CV or requested straight jackknife
      nei1$jackknife <- 10 ## signal that cross-validated beta perturbations are required
    } else nei1 <- nei ## called with another criteria
    b <- gam.fit3(x=G$X, y=G$y, sp=lsp,Eb=G$Eb,UrS=G$UrS,
                 offset = G$offset,U1=G$U1,Mp=G$Mp,family = family,weights=G$w,deriv=0,
                 control=control,gamma=nei$gamma, 
		 scale=scale,printWarn=FALSE,start=start,scoreType="NCV",null.coef=G$null.coef,
                 pearson.extra=G$pearson.extra,dev.extra=G$dev.extra,n.true=G$n.true,Sl=G$Sl,nei=nei1,...)
    object$NCV <- as.numeric(b$NCV)
    if (nei$jackknife) { ## need to compute direct cov matrix estimate
      dd <- attr(b$NCV,"dd") ## leave-one-out parameter perturbations
      if (!loocv) { ## need to account for assumed independence structure
        rsd0 <- residuals.gam(object)
        ## compute cross validated residuals...
        eta.cv <- attr(object$gcv.ubre,"eta.cv")
        fitted0 <- object$fitted.values 
        if (inherits(object$family,"general.family")) {
          for (j in 1:ncol(eta.cv)) {
            object$fitted.values[,j] <-
	      if (j <= length(G$offset)&&!is.null(G$offset[[j]])) family$linfo[[j]]$linkinv(eta.cv[,j]+G$offset[[j]]) else 
                family$linfo[[j]]$linkinv(eta.cv[,j])
          }	
        } else {
          object$fitted.values <- object$family$linkinv(eta.cv+G$offset)
        }
        rsd <- residuals.gam(object)
        object$fitted.values <- fitted0
        ii <- !is.finite(rsd);if (any(ii)) rsd[ii] <- rsd0[ii]
        dd1 <- dd*rsd/rsd0 ## correct to avoid underestimation (over-estimation if CV residuals used alone)	

        Vcv <- pdef(neicov(dd1,nei))  #*n/(n-sum(object$edf)) ## cross validated V (too large)
	V0 <- pdef(neicov(dd,nei))  #*n/(n-sum(object$edf))   ## direct V (too small)
	Vj <- (Vcv+V0)/2       ## combination less bad
	alpha <- max(sum(diag(Vj))/sum(diag(object$Ve)),1) ## inverse learning rate - does lower limit make sense? 
	alpha1 <- max(sum(Vj*object$Ve)/sum(object$Ve^2),1)
	Vcv <- Vcv + (object$Vp-object$Ve)*alpha1 ## bias correct conservative (too large)
	Vj <- Vj + (object$Vp-object$Ve)*alpha ## bias correct
	attr(Vj,"Vcv") <- Vcv ## conservative as attribute
      } else { ## LOO or straight jackknife requested
        Vj <- pdef(crossprod(dd)) ## straight jackknife is fine.
	alpha <- sum(diag(Vj))/sum(diag(object$Ve)) ## inverse learning rate (do not impose lower limit)
	Vj <- Vj + (object$Vp-object$Ve)*alpha ## bias correct
      }
      attr(object$Ve,"Vp") <- object$Vp ## keep original
      attr(object$Ve,"inv.learn") <- alpha
      object$Vp <- Vj     
    }
  }
  ## note: use of the following (Vc) in place of Vp appears to mess up p-values for smooths,
  ##       but doesn't change r.e. p-values of course. 
  #if (!is.null(mv$Vc)) object$Vc <- mv$Vc 
  #if (!is.null(mv$edf2)) object$edf2 <- mv$edf2
  #object$Vp <- mv$Vb
  #object$V.sp <- mv$V.sp
  #object$hat<-mv$hat
  #object$Ve <- mv$Ve
  #object$edf<-mv$edf
  #object$edf1 <- mv$edf1
  ##object$F <- mv$F ## DoF matrix --- probably not needed
  #object$R <- mv$R ## qr.R(sqrt(W)X)
  object$aic <- object$aic + 2*sum(object$edf)
  object$nsdf <- G$nsdf
  object$K <-  object$D1 <-  object$D2 <-  object$P <-  object$P1 <-  object$P2 <- object$dw.drho <-  
  object$GACV <-  object$GACV1 <-  object$GACV2 <-  object$REML <-  object$REML1 <-  object$REML2 <-  
  object$GCV<-object$GCV1<- object$GCV2 <- object$UBRE <-object$UBRE1 <- object$UBRE2 <- object$trA <-
  object$trA1<- object$trA2 <- object$alpha <- object$alpha1 <- object$scale.est <- NULL
  object$sig2 <- object$scale
  
  object
} ## gam.outer

get.null.coef <- function(G,start=NULL,etastart=NULL,mustart=NULL,...) {
## Get an estimate of the coefs corresponding to maximum reasonable deviance...
  y <- G$y
  weights <- G$w
  nobs <- G$n ## ignore codetools warning!!
  ##start <- etastart <- mustart <- NULL
  family <- G$family
  eval(family$initialize) ## have to do this to ensure y numeric
  y <- as.numeric(y)
  mum <- mean(y)+0*y
  etam <- family$linkfun(mum)
  null.coef <- qr.coef(qr(G$X),etam)
  null.coef[is.na(null.coef)] <- 0;
  ## get a suitable function scale for optimization routines
  null.scale <- sum(family$dev.resids(y,mum,weights))/nrow(G$X) 
  list(null.coef=null.coef,null.scale=null.scale)
}

estimate.gam <- function (G,method,optimizer,control,in.out,scale,gamma,start=NULL,nei=NULL,...) {
## Do gam estimation and smoothness selection...

  if (!is.null(G$family$method) && !method %in% G$family$method) method <- G$family$method[1] 

  if (method %in% c("QNCV","NCV")||!is.null(nei)) {
    optimizer <- c("outer","bfgs")
    if (method=="QNCV") { method <- "NCV";G$family$qapprox <- TRUE } else G$family$qapprox <- FALSE
    if (is.null(nei)) nei <- list(i=1:G$n,mi=1:G$n,m=1:G$n,k=1:G$n) ## LOOCV
    if (is.null(nei$k)||is.null(nei$m)) nei$k <- nei$m <- nei$mi <- nei$i <- 1:G$n 
    if (is.null(nei$i)) if (length(nei$m)==G$n) nei$mi <- nei$i <- 1:G$n else stop("unclear which points NCV neighbourhoods belong to")
    if (length(nei$mi)!=length(nei$m)) stop("for NCV number of dropped and predicted neighbourhoods must match")
    nei$m <- round(nei$m); nei$mi <- round(nei$mi); nei$k <- round(nei$k); nei$i <- round(nei$i); 
    if (min(nei$i)<1||max(nei$i>G$n)||min(nei$k)<1||max(nei$k>G$n)) stop("nei indexes non-existent points")
    if (nei$m[1]<1||max(nei$m)>length(nei$k)||length(nei$m)<2||any(diff(nei$m)<1)) stop('nei$m faulty')
    if (nei$mi[1]<1||max(nei$mi)>length(nei$i)||length(nei$mi)<2||any(diff(nei$mi)<1)) stop('nei$mi faulty')
    if (is.null(nei$jackknife)) nei$jackknife <- -1
  }  

  if (inherits(G$family,"extended.family")) { ## then there are some restrictions...
    if (!(method%in%c("REML","ML","NCV"))) method <- "REML"
    if (inherits(G$family,"general.family")) {
       if (!(method%in%c("REML","NCV"))||optimizer[1]=="efs") method <- "REML"
       if (method=="NCV"&&is.null(G$family$ncv)) {
         warning("family lacks a Neighbourhood Cross Validation method")
         method <- "REML"
       }	 
       G$Sl <- Sl.setup(G) ## prepare penalty sequence
      
       G$X <- Sl.initial.repara(G$Sl,G$X,both.sides=FALSE) ## re-parameterize accordingly
 
       if (!is.null(start)) start <- Sl.initial.repara(G$Sl,start,inverse=FALSE,both.sides=FALSE)

       ## make sure optimizer appropriate for available derivatives
       if (!is.null(G$family$available.derivs)) {
         if (G$family$available.deriv==1 && optimizer[1]!="efs")  optimizer <- c("outer","bfgs")
	 if (G$family$available.derivs==0) optimizer <- "efs"
       }	 
    }
  }

  if (!optimizer[1]%in%c("outer","efs")) stop("unknown optimizer")
  if (optimizer[1]=="efs") method <- "REML"
  if (!method%in%c("GCV.Cp","GACV.Cp","REML","P-REML","ML","P-ML","NCV")) stop("unknown smoothness selection criterion") 
  G$family <- fix.family(G$family)
  G$rS <- mini.roots(G$S,G$off,ncol(G$X),G$rank)
 
  reml <- method%in%c("REML","P-REML","ML","P-ML","NCV")
  
  Ssp <- totalPenaltySpace(G$S,G$H,G$off,ncol(G$X))
  G$Eb <- Ssp$E       ## balanced penalty square root for rank determination purposes 
  G$U1 <- cbind(Ssp$Y,Ssp$Z) ## eigen space basis
  G$Mp <- ncol(Ssp$Z) ## null space dimension
  G$UrS <- list()     ## need penalty matrices in overall penalty range space...
  if (length(G$S)>0) for (i in 1:length(G$S)) G$UrS[[i]] <- t(Ssp$Y)%*%G$rS[[i]] else i <- 0
  if (!is.null(G$H)) { ## then the sqrt fixed penalty matrix H is needed for (RE)ML 
      G$UrS[[i+1]] <- t(Ssp$Y)%*%mroot(G$H)
  }


  # is outer looping needed ?
  outer.looping <- (!G$am ||reml||method=="GACV.Cp"||method=="NCV"||!is.null(nei)) 

  ## sort out exact sp selection criterion to use

  fam.name <- G$family$family[1]

  if (scale==0) { ## choose criterion for performance iteration
    if (fam.name == "binomial"||fam.name == "poisson") G$sig2<-1 #ubre
      else G$sig2 <- -1 #gcv
  } else {G$sig2 <- scale}


  if (reml||method=="NCV") { ## then RE(ML) selection, but which variant?
   criterion <- method
   if (fam.name == "binomial"||fam.name == "poisson") scale <- 1
   if (inherits(G$family,"extended.family") && scale <=0) {
     scale <- if (is.null(G$family$scale)) 1 else G$family$scale
   }
  } else {
    if (scale==0) { 
      if (fam.name=="binomial"||fam.name=="poisson") scale <- 1 #ubre
      else scale <- -1 #gcv
    }
    if (scale > 0) criterion <- "UBRE"
    else {
      if (method=="GCV.Cp") criterion <- "GCV" else criterion <- "GACV"
    }
  }


  if (substr(fam.name,1,17)=="Negative Binomial") { 
    scale <- 1; ## no choice
    if (method%in%c("GCV.Cp","GACV.Cp")) criterion <- "UBRE"
  }
  ## Reset P-ML or P-REML in known scale parameter cases
  if (scale>0) {
    if (method=="P-ML") criterion <- method <- "ML" else 
    if (method=="P-REML")  criterion <- method <- "REML"
  } 

  family <- G$family; nb.fam.reset <- FALSE
  
  if (substr(G$family$family[1],1,17)=="Negative Binomial") { ## initialize sensibly
      scale <- G$sig2 <- 1
      G$family <- negbin(max(family$getTheta()),link=family$link)
      nb.fam.reset <- TRUE
  }
  
  ## extended family may need to manipulate G...
    
  if (!is.null(G$family$preinitialize)) {
    if (inherits(G$family,"general.family")) {
      Gmod <- G$family$preinitialize(G) ## modifies some elements of G
      for (gnam in names(Gmod)) G[[gnam]] <- Gmod[[gnam]] ## copy these into G  
    } else {
      ## extended family - usually just initializes theta and possibly y
      if (!is.null(attr(G$family$preinitialize,"needG"))) attr(G$family,"G") <- G ## more complicated
      pini <- G$family$preinitialize(G$y,G$family)
      attr(G$family,"G") <- NULL
      if (!is.null(pini$family)) G$family <- pini$family
      if (!is.null(pini$Theta)) G$family$putTheta(pini$Theta)
      if (!is.null(pini$y)) G$y <- pini$y
    }
  }

  if (length(G$sp)>0) lsp2 <- log(initial.spg(G$X,G$y,G$w,G$family,G$S,G$rank,G$off,
                                  offset=G$offset,L=G$L,lsp0=G$lsp0,E=G$Eb,...))
  else lsp2 <- rep(0,0)

  if (!outer.looping) { ## additive GCV/UBRE
    object <- am.fit(G,control=control,gamma=gamma,...)
    lsp <- log(object$sp) 
  } else {
    if (!is.null(in.out)) { # initial s.p.s and scale provided
      ok <- TRUE ## run a few basic checks
      if (is.null(in.out$sp)||is.null(in.out$scale)) ok <- FALSE
      if (length(in.out$sp)!=length(G$sp)) ok <- FALSE
      if (!ok) stop("in.out incorrect: see documentation")
      lsp <- log(in.out$sp) 
    } else { 
      lsp <- lsp2
    } 

    if (nb.fam.reset) G$family <- family ## restore, in case manipulated for negative binomial 
       
    ## Get an estimate of the coefs corresponding to maximum reasonable deviance,
    ## and an estimate of the function scale, suitable for optimizers that need this.
    ## Doesn't make sense for general families that have to initialize coefs directly.
  
    null.stuff  <- if (inherits(G$family,"general.family")) list() else { 
      if (is.null(G$family$get.null.coef)) get.null.coef(G,...) else G$family$get.null.coef(G,...)
    }

    scale.as.sp <- (criterion%in%c("REML","ML")||(criterion=="NCV"&&inherits(G$family,"extended.family")))&&scale<=0
 
    if (scale.as.sp) { ## log(scale) to be estimated as a smoothing parameter
      if (is.null(in.out)) {
          log.scale <- log(null.stuff$null.scale/10)
      } else {
          log.scale <- log(in.out$scale)
      }
      lsp <- c(lsp,log.scale) ## append log initial scale estimate to lsp
      ## extend G$L, if present...
      if (!is.null(G$L)) { 
        G$L <- cbind(rbind(G$L,rep(0,ncol(G$L))),c(rep(0,nrow(G$L)),1))
      }
      if (!is.null(G$lsp0)) G$lsp0 <- c(G$lsp0,0)
    } 
    ## check if there are extra parameters to estimate
    if (inherits(G$family,"extended.family")&&!inherits(G$family,"general.family")&&G$family$n.theta>0) {
      th0 <- G$family$getTheta() ## additional (initial) parameters of likelihood 
      nth <- length(th0)
      nlsp <- length(lsp)
      ind <- 1:nlsp + nth ## only used if nlsp>0
      lsp <- c(th0,lsp) ## append to start of lsp
      ## extend G$L, G$lsp0 if present...
      if (!is.null(G$L)&&nth>0) { 
        L <- rbind(cbind(diag(nth),matrix(0,nth,ncol(G$L))),
                   cbind(matrix(0,nrow(G$L),nth),G$L))
        #sat <- attr(G$L,"scale")
        G$L <- L
        #attr(G$L,"scale") <- sat
        #attr(G$L,"not.sp") <- nth ## first not.sp params are not smoothing params
      }
      if (!is.null(G$lsp0)) G$lsp0 <- c(th0*0,G$lsp0)
    } else nth <- 0

   
    G$null.coef <- null.stuff$null.coef

    object <- gam.outer(lsp,fscale=null.stuff$null.scale, ##abs(object$gcv.ubre)+object$sig2/length(G$y),
                        family=G$family,control=control,criterion=criterion,method=method,
                        optimizer=optimizer,scale=scale,gamma=gamma,G=G,start=start,nei=nei,...)
    
    if (scale.as.sp)  object$sp <- object$sp[-length(object$sp)] ## drop scale estimate from sp array
    
    if (inherits(G$family,"extended.family")&&nth>0) object$sp <- object$sp[-(1:nth)] ## drop theta params
 
  } ## finished outer looping

  ## correct null deviance if there's an offset [Why not correct calc in gam.fit/3???]....

  if (!inherits(G$family,"extended.family")&&G$intercept&&any(G$offset!=0)) object$null.deviance <-
         glm(object$y~offset(G$offset),family=object$family,weights=object$prior.weights)$deviance

  object$method <- if (method=="NCV"&&G$family$qapprox) "QNCV" else criterion

  object$smooth<-G$smooth

  names(object$edf) <- G$term.names
  names(object$edf1) <- G$term.names

  if (inherits(family,"general.family")) {
    object$coefficients <- Sl.initial.repara(G$Sl,object$coefficients,inverse=TRUE)
  }
  
  ## extended family may need to manipulate fit object. Code
  ## will need to include the following line if G$X used...
  ## G$X <- Sl.initial.repara(G$Sl,G$X,inverse=TRUE,cov=FALSE,both.sides=FALSE)
  if (!is.null(G$family$postproc)) {
    if (inherits(G$family,"general.family")) eval(G$family$postproc) else {
      posr <- G$family$postproc(family=object$family,y=G$y,prior.weights=object$prior.weights,
              fitted=object$fitted.values,linear.predictors=object$linear.predictors,offset=G$offset,
	      intercept=G$intercept)
      if (!is.null(posr$family)) object$family$family <- posr$family
      if (!is.null(posr$deviance)) object$deviance <- posr$deviance
      if (!is.null(posr$null.deviance)) object$null.deviance <- posr$null.deviance	      
    }
  } 

  if (!is.null(G$P)) { ## matrix transforming from fit to prediction parameterization
    object$coefficients <- as.numeric(G$P %*% object$coefficients)
    object$Vp <- G$P %*% object$Vp %*% t(G$P)
    object$Ve <- G$P %*% object$Ve %*% t(G$P)
    rownames(object$Vp) <- colnames(object$Vp) <- G$term.names
    rownames(object$Ve) <- colnames(object$Ve) <- G$term.names
  }
  names(object$coefficients) <- G$term.names 
  
  object
} ## end estimate.gam

variable.summary <- function(pf,dl,n) {
## routine to summarize all the variables in dl, which is a list
## containing raw input variables to a model (i.e. no functions applied)
## pf is a formula containing the strictly parametric part of the
## model for the variables in dl. A list is returned, with names given by 
## the variables. For variables in the parametric part, then the list elements
## may be:
## * a 1 column matrix with elements set to the column medians, if variable 
##   is a matrix.
## * a 3 figure summary (min,median,max) for a numeric variable.
## * a factor variable, with the most commonly occuring factor (all levels)
## --- classes are as original data type, but anything not numeric, factor or matrix
## is coerced to numeric. 
## For non-parametric variables, any matrices are coerced to numeric, otherwise as 
## parametric.      
## medians in the above are always observed values (to deal with variables coerced to 
## factors in the model formulae in a nice way).
## variables with less than `n' entries are discarded
   v.n <- length(dl)
   ## if (v.n) for (i in 1:v.n) if (length(dl[[i]])<n) dl[[i]] <- NULL 
   
   v.name <- v.name1 <- names(dl)
   if (v.n) ## need to strip out names of any variables that are too short.
   { k <- 0 ## counter for retained variables
     for (i in 1:v.n) if (length(dl[[i]])>=n) { 
       k <- k+1
       v.name[k] <- v.name1[i] ## save names of variables of correct length
     }
     if (k>0) v.name <- v.name[1:k] else v.name <- rep("",k)
   }

   ## v.name <- names(dl)    ## the variable names
   p.name <- all.vars(pf[-2]) ## variables in parametric part (not response)
   vs <- list()
   v.n <- length(v.name)
   if (v.n>0) for (i in 1:v.n) {
     if (v.name[i]%in%p.name) para <- TRUE else para <- FALSE ## is variable in the parametric part?

     if (para&&is.matrix(dl[[v.name[i]]])&&ncol(dl[[v.name[i]]])>1) { ## parametric matrix --- a special case
       x <- matrix(apply(dl[[v.name[i]]],2,quantile,probs=0.5,type=3,na.rm=TRUE),1,ncol(dl[[v.name[i]]])) ## nearest to median entries
     } else { ## anything else
       x <- dl[[v.name[i]]]
       if (is.character(x)) x <- as.factor(x)
       if (is.factor(x)) {
         x <- x[!is.na(x)]
         lx <- levels(x)
         freq <- tabulate(x)
         ii <- min((1:length(lx))[freq==max(freq)])
         x <- factor(lx[ii],levels=lx) 
       } else {
         x <- as.numeric(x)
         x <- c(min(x,na.rm=TRUE),as.numeric(quantile(x,probs=.5,type=3,na.rm=TRUE)) ,max(x,na.rm=TRUE)) ## 3 figure summary
       }
     }
     vs[[v.name[i]]] <- x
   }
   vs
} ## end variable.summary


nanei <- function(nb,k) {
## nb is an NCV neighbourhood defining list, k an array of points to drop.
## this function adjusts nb to remove the dropped points and adjust the
## indices accordingly, so that the structure works with a data frame
## from which rows in k have been dropped.
  if (!length(k)) return()
  if (is.null(nb$k)||is.null(nb$m)||is.null(nb$mi)||is.null(nb$i)) stop("full nei list needed if data incomplete")
  ## first work on dropped folds...
  kk <- which(nb$k %in% k) ## position of dropped in nb$k
  ## adjust m for the fact that points are to be dropped...
  nb$m <- nb$m - cumsum(tabulate(findInterval(kk-1,nb$m)+1,nbins=length(nb$m)))
  ii <- which(diff(c(0,nb$m))==0) ## identify zero length folds
  if (length(ii)) nb$m <- nb$m[-ii] ## drop zero length folds
  nb$k <- nb$k[-kk] ## drop the elements of k
  nb$k <- nb$k - findInterval(nb$k,sort(k)) ## and shift indices to account for dropped

  ## now the prediction folds...
  kk <- which(nb$i %in% k) ## position of dropped in nb$i
  ## adjust mi for the fact that points are to be dropped...
  nb$mi <- nb$mi - cumsum(tabulate(findInterval(kk-1,nb$mi)+1,nbins=length(nb$m)))
  ii <- which(diff(c(0,nb$mi))==0) ## identify zero length folds
  if (length(ii)) nb$mi <- nb$mi[-ii] ## drop zero length folds
  nb$i <- nb$i[-kk] ## drop the elements of k
  nb$i <- nb$i - findInterval(nb$i,sort(k)) ## and shift indices to account for dropped
  nb 
} ## nanei

## don't be tempted to change to control=list(...) --- messes up passing on other stuff via ...

gam <- function(formula,family=gaussian(),data=list(),weights=NULL,subset=NULL,na.action,offset=NULL,
                method="GCV.Cp",optimizer=c("outer","newton"),control=list(),#gam.control(),
                scale=0,select=FALSE,knots=NULL,sp=NULL,min.sp=NULL,H=NULL,gamma=1,fit=TRUE,
                paraPen=NULL,G=NULL,in.out=NULL,drop.unused.levels=TRUE,drop.intercept=NULL,
		nei=NULL,discrete=FALSE,...) {
## Routine to fit a GAM to some data. The model is stated in the formula, which is then 
## interpreted to figure out which bits relate to smooth terms and which to parametric terms.
## Basic steps:
## 1. Formula is split up into parametric and non-parametric parts,
##    and a fake formula constructed to be used to pick up data for
##    model frame. pterms "terms" object(s) created for parametric 
##    components, model frame created along with terms object.
## 2. 'gam.setup' called to do most of basis construction and other
##    elements of model setup.
## 3. 'estimate.gam' is called to estimate the model. This performs further 
##    pre- and post- fitting steps and calls either 'gam.fit' (performance
##    iteration) or 'gam.outer' (default method). 'gam.outer' calls the actual 
##    smoothing parameter optimizer ('newton' by default) and then any post 
##    processing. The optimizer calls 'gam.fit3/4/5' to estimate the model 
##    coefficients and obtain derivatives w.r.t. the smoothing parameters.
## 4. Finished 'gam' object assembled.
   control <- do.call("gam.control",control)
   if (is.null(G) && discrete) { ## get bam to do the setup
     cl <- match.call() ## NOTE: check all arguments more carefully
     cl[[1]] <- quote(bam)
     cl$fit = FALSE
     G <- eval(cl,parent.frame()) ## NOTE: cl probaby needs modifying in G to work properly (with fit=FALSE reset?? also below??)
   }
   if (is.null(G)) {
    ## create model frame..... 
    gp <- interpret.gam(formula) # interpret the formula 
    cl <- match.call() # call needed in gam object for update to work
    mf <- match.call(expand.dots=FALSE)
    mf$formula <- gp$fake.formula 
    mf$family <- mf$control<-mf$scale<-mf$knots<-mf$sp<-mf$min.sp<-mf$H<-mf$select <- mf$drop.intercept <- mf$nei <-
                 mf$gamma<-mf$method<-mf$fit<-mf$paraPen<-mf$G<-mf$optimizer <- mf$in.out <- mf$discrete <- mf$...<-NULL
    mf$drop.unused.levels <- drop.unused.levels
    mf[[1]] <- quote(stats::model.frame) ## as.name("model.frame")
    pmf <- mf
    mf <- eval(mf, parent.frame()) # the model frame now contains all the data 
    if (nrow(mf)<2) stop("Not enough (non-NA) data to do anything meaningful")
    terms <- attr(mf,"terms")

    if (!is.null(nei)) { ## check if data dropped
      k <- attr(mf,"na.action")
      if (!is.null(k)) { ## need to adjust nei for dropped data
        nei <- nanei(nei,as.numeric(k))
      }
    }
    ## summarize the *raw* input variables
    ## note can't use get_all_vars here -- buggy with matrices
    vars <- all_vars1(gp$fake.formula[-2]) ## drop response here
    inp <- parse(text = paste("list(", paste(vars, collapse = ","),")"))

    ## allow a bit of extra flexibility in what `data' is allowed to be (as model.frame actually does)
    if (!is.list(data)&&!is.data.frame(data)) data <- as.data.frame(data) 

    dl <- eval(inp, data, parent.frame())
    names(dl) <- vars ## list of all variables needed
    var.summary <- variable.summary(gp$pf,dl,nrow(mf)) ## summarize the input data
    rm(dl) ## save space    

    ## pterms are terms objects for the parametric model components used in 
    ## model setup - don't try obtaining by evaluating pf in mf - doesn't
    ## work in general (e.g. with offset)...

    if (is.list(formula)) { ## then there are several linear predictors
      environment(formula) <- environment(formula[[1]]) ## e.g. termplots needs this
      pterms <- list()
      tlab <- rep("",0)
      for (i in 1:length(formula)) {
        pmf$formula <- gp[[i]]$pf 
        pterms[[i]] <- attr(eval(pmf, parent.frame()),"terms")
        tlabi <- attr(pterms[[i]],"term.labels")
        if (i>1&&length(tlabi)>0) tlabi <- paste(tlabi,i-1,sep=".")
        tlab <- c(tlab,tlabi)
      }
      attr(pterms,"term.labels") <- tlab ## labels for all parametric terms, distinguished by predictor
    } else { ## single linear predictor case
      pmf$formula <- gp$pf
      pmf <- eval(pmf, parent.frame()) # pmf contains all data for parametric part
      pterms <- attr(pmf,"terms") ## pmf only used for this
    }

    if (is.character(family)) family <- eval(parse(text=family))
    if (is.function(family)) family <- family()
    if (is.null(family$family)) stop("family not recognized")
  
    if (family$family[1]=="gaussian" && family$link=="identity") am <- TRUE
    else am <- FALSE
    
    if (!control$keepData) rm(data) ## save space

    ## check whether family requires intercept to be dropped...
    # drop.intercept <- if (is.null(family$drop.intercept) || !family$drop.intercept) FALSE else TRUE
    # drop.intercept <- as.logical(family$drop.intercept)
    if (is.null(family$drop.intercept)) { ## family does not provide information
      lengthf <- if (is.list(formula)) length(formula) else 1
      if (is.null(drop.intercept)) drop.intercept <- rep(FALSE, lengthf) else {
        drop.intercept <- rep(drop.intercept,length=lengthf) ## force drop.intercept to correct length
	if (sum(drop.intercept)) family$drop.intercept <- drop.intercept ## ensure prediction works
      }
    } else drop.intercept <- as.logical(family$drop.intercept) ## family overrides argument
    
    if (inherits(family,"general.family")&&!is.null(family$presetup)) eval(family$presetup)

    gsname <- if (is.list(formula)) "gam.setup.list" else "gam.setup" 

    G <- do.call(gsname,list(formula=gp,pterms=pterms,
                 data=mf,knots=knots,sp=sp,min.sp=min.sp,
                 H=H,absorb.cons=TRUE,sparse.cons=0,select=select,
                 idLinksBases=control$idLinksBases,scale.penalty=control$scalePenalty,
                 paraPen=paraPen,drop.intercept=drop.intercept))
    
    G$var.summary <- var.summary
    G$family <- family
   
    if ((is.list(formula)&&(is.null(family$nlp)||family$nlp!=gp$nlp))||
        (!is.list(formula)&&!is.null(family$npl)&&(family$npl>1))) stop("incorrect number of linear predictors for family")
       
    G$terms<-terms;
    G$mf<-mf;G$cl<-cl;
    G$am <- am

    if (is.null(G$offset)) G$offset<-rep(0,G$n)
     
    G$min.edf <- G$nsdf ## -dim(G$C)[1]
    if (G$m) for (i in 1:G$m) G$min.edf<-G$min.edf+G$smooth[[i]]$null.space.dim

    G$formula <- formula
    G$pred.formula <- gp$pred.formula
    environment(G$formula)<-environment(formula)
  } else { ## G not null
    if (!is.null(sp)&&any(sp>=0)) { ## request to modify smoothing parameters
      if (is.null(G$L)) G$L <- diag(length(G$sp))
      if (length(sp)!=ncol(G$L)) stop('length of sp must be number of free smoothing parameters in original model')
      ind <- sp>=0 ## which smoothing parameters are now fixed
      spind <- log(sp[ind]); 
      spind[!is.finite(spind)] <- -30 ## set any zero parameters to effective zero
      G$lsp0 <- G$lsp0 + drop(G$L[,ind,drop=FALSE] %*% spind) ## add fix to lsp0
      G$L <- G$L[,!ind,drop=FALSE] ## drop the cols of G
      G$sp <- rep(-1,ncol(G$L))
    }
  }

  if (!fit) {
    class(G) <- "gam.prefit"
    return(G)
  }  


  ## if (ncol(G$X)>nrow(G$X)) warning("Model has more coefficients than data") ##stop("Model has more coefficients than data") 

  G$conv.tol <- control$mgcv.tol      # tolerence for mgcv
  G$max.half <- control$mgcv.half # max step halving in Newton update mgcv

  object <- estimate.gam(G,method,optimizer,control,in.out,scale,gamma,nei=nei,...)

  
  if (!is.null(G$L)) { 
    object$full.sp <- as.numeric(exp(G$L%*%log(object$sp)+G$lsp0))
    names(object$full.sp) <- names(G$lsp0)
  }
  names(object$sp) <- names(G$sp)
  object$paraPen <- G$pP
  object$formula <- G$formula
  ## store any lpi attribute of G$X for use in predict.gam...
  if (is.list(object$formula)) attr(object$formula,"lpi") <- attr(G$X,"lpi")
  object$var.summary <- G$var.summary 
  object$cmX <- G$cmX ## column means of model matrix --- useful for CIs
  object$model<-G$mf # store the model frame
  object$na.action <- attr(G$mf,"na.action") # how to deal with NA's
  object$control <- control
  object$terms <- G$terms
  object$pred.formula <- G$pred.formula

  attr(object$pred.formula,"full") <- reformulate(all.vars(object$terms))
  
  object$pterms <- G$pterms
  object$assign <- G$assign # applies only to pterms
  object$contrasts <- G$contrasts
  object$xlevels <- G$xlevels
  object$offset <- G$offset
  if (!is.null(G$Xcentre)) object$Xcentre <- G$Xcentre
  if (control$keepData) object$data <- data
  object$df.residual <- nrow(G$X) - sum(object$edf)
  object$min.edf <- G$min.edf
  if (G$am&&!(method%in%c("REML","ML","P-ML","P-REML"))) object$optimizer <- "magic" else object$optimizer <- optimizer
  object$call <- G$cl # needed for update() to work
  class(object) <- c("gam","glm","lm")
  if (is.null(object$deviance)) object$deviance <- sum(residuals(object,"deviance")^2)
  names(object$gcv.ubre) <- method
  ## The following lines avoid potentially very large objects in hidden environments being stored
  ## with fitted gam objects. The downside is that functions like 'termplot' that rely on searching in
  ## the environment of the formula can fail...
  environment(object$formula) <- environment(object$pred.formula) <-
  environment(object$terms) <- environment(object$pterms) <- .GlobalEnv
  if (!is.null(object$model))  environment(attr(object$model,"terms"))  <- .GlobalEnv
  if (!is.null(attr(object$pred.formula,"full"))) environment(attr(object$pred.formula,"full")) <- .GlobalEnv
  object
} ## gam



print.gam<-function (x,...) 
# default print function for gam objects
{ print(x$family)
  cat("Formula:\n")
  if (is.list(x$formula)) for (i in 1:length(x$formula)) print(x$formula[[i]]) else
     print(x$formula)
  n.smooth<-length(x$smooth)
  if (n.smooth==0)
  cat("Total model degrees of freedom",sum(x$edf),"\n")
  else
  { edf<-0
    cat("\nEstimated degrees of freedom:\n")
    for (i in 1:n.smooth)
    edf[i]<-sum(x$edf[x$smooth[[i]]$first.para:x$smooth[[i]]$last.para])
    edf.str <- format(round(edf,digits=4),digits=3,scientific=FALSE)
    for (i in 1:n.smooth) {   
    cat(edf.str[i]," ",sep="")
      if (i%%7==0) cat("\n")
    }
    cat(" total =",round(sum(x$edf),digits=2),"\n")
  }
  if (!is.null(x$method)&&!(x$method%in%c("PQL","lme.ML","lme.REML")))  
  cat("\n",x$method," score: ",x$gcv.ubre,"     ",sep="")
  if (!is.null(x$rank) && x$rank< length(x$coefficients)) cat("rank: ",x$rank,"/",length(x$coefficients),sep="") 
  cat("\n")
  invisible(x)
} ## print.gam

gam.control <- function (nthreads=1,ncv.threads=1,irls.reg=0.0,epsilon = 1e-7, maxit = 200,
                         mgcv.tol=1e-7,mgcv.half=15,trace =FALSE,
                         rank.tol=.Machine$double.eps^0.5,
                         nlm=list(),optim=list(),newton=list(),#outerPIsteps=0,
                         idLinksBases=TRUE,scalePenalty=TRUE,efs.lspmax=15,efs.tol=.1,
                         keepData=FALSE,scale.est="fletcher",edge.correct=FALSE) 
# Control structure for a gam. 
# irls.reg is the regularization parameter to use in the GAM fitting IRLS loop.
# epsilon is the tolerance to use in the IRLS MLE loop. maxit is the number 
# of IRLS iterations to use. mgcv.tol is the tolerance to use in the mgcv call within each IRLS. mgcv.half is the 
# number of step halvings to employ in the mgcv search for the optimal GCV score, before giving up 
# on a search direction. trace turns on or off some de-bugging information.
# rank.tol is the tolerance to use for rank determination
# outerPIsteps was the number of performance iteration steps used to intialize
#                         outer iteration (unused for a while) 
{   scale.est <- match.arg(scale.est,c("fletcher","pearson","deviance"))
    if (!is.logical(edge.correct)&&(!is.numeric(edge.correct)||edge.correct<0)) stop(
        "edge.correct must be logical or a positive number")
    if (!is.numeric(nthreads) || nthreads <1) stop("nthreads must be a positive integer")
    if (!is.numeric(ncv.threads) || ncv.threads <1) stop("ncv.threads must be a positive integer") 
    if (!is.numeric(irls.reg) || irls.reg <0.0) stop("IRLS regularizing parameter must be a non-negative number.")
    if (!is.numeric(epsilon) || epsilon <= 0) 
        stop("value of epsilon must be > 0")
    if (!is.numeric(maxit) || maxit <= 0) 
        stop("maximum number of iterations must be > 0")
    if (rank.tol<0||rank.tol>1) 
    { rank.tol=.Machine$double.eps^0.5
      warning("silly value supplied for rank.tol: reset to square root of machine precision.")
    }
    # work through nlm defaults
    if (is.null(nlm$ndigit)||nlm$ndigit<2) nlm$ndigit <- max(2,ceiling(-log10(epsilon)))
    nlm$ndigit <- round(nlm$ndigit)
    ndigit <- floor(-log10(.Machine$double.eps))
    if (nlm$ndigit>ndigit) nlm$ndigit <- ndigit
    if (is.null(nlm$gradtol)) nlm$gradtol <- epsilon*10
    nlm$gradtol <- abs(nlm$gradtol)
    ## note that nlm will stop after hitting stepmax 5 consecutive times
    ## hence should not be set too small ... 
    if (is.null(nlm$stepmax)||nlm$stepmax==0) nlm$stepmax <- 2
    nlm$stepmax <- abs(nlm$stepmax)
    if (is.null(nlm$steptol)) nlm$steptol <- 1e-4
    nlm$steptol <- abs(nlm$steptol)
    if (is.null(nlm$iterlim)) nlm$iterlim <- 200
    nlm$iterlim <- abs(nlm$iterlim)
    ## Should be reset for a while anytime derivative code altered...
    if (is.null(nlm$check.analyticals)) nlm$check.analyticals <- FALSE
    nlm$check.analyticals <- as.logical(nlm$check.analyticals) 

    # and newton defaults
    if (is.null(newton$conv.tol)) newton$conv.tol <- 1e-6
    if (is.null(newton$maxNstep)) newton$maxNstep <- 5
    if (is.null(newton$maxSstep)) newton$maxSstep <- 2
    if (is.null(newton$maxHalf)) newton$maxHalf <- 30
    if (is.null(newton$use.svd)) newton$use.svd <- FALSE

    # and optim defaults
    if (is.null(optim$factr)) optim$factr <- 1e7
    optim$factr <- abs(optim$factr)
    if (efs.tol<=0) efs.tol <- .1

    list(nthreads=round(nthreads),ncv.threads=round(ncv.threads),irls.reg=irls.reg,epsilon = epsilon, maxit = maxit,
         trace = trace, mgcv.tol=mgcv.tol,mgcv.half=mgcv.half,
         rank.tol=rank.tol,nlm=nlm,
         optim=optim,newton=newton,#outerPIsteps=outerPIsteps,
         idLinksBases=idLinksBases,scalePenalty=scalePenalty,efs.lspmax=efs.lspmax,efs.tol=efs.tol,
         keepData=as.logical(keepData[1]),scale.est=scale.est,edge.correct=edge.correct)
    
} ## gam.control



mgcv.get.scale<-function(Theta,weights,good,mu,mu.eta.val,G)
# Get scale implied by current fit and trial -ve binom Theta, I've used
# mu and mu.eta.val used in fit rather than implied by it....
{ variance<- negbin(Theta)$variance
  w<-sqrt(weights[good]*mu.eta.val[good]^2/variance(mu)[good])
  wres<-w*(G$y-G$X%*%G$p)
  sum(wres^2)/(G$n-sum(G$edf))
}


mgcv.find.theta<-function(Theta,T.max,T.min,weights,good,mu,mu.eta.val,G,tol)
# searches for -ve binomial theta between given limits to get scale=1 
{ scale<-mgcv.get.scale(Theta,weights,good,mu,mu.eta.val,G)
  T.hi<-T.low<-Theta
  while (scale<1&&T.hi<T.max) 
  { T.hi<-T.hi*2
    T.hi<-min(T.hi,T.max)
    scale<-mgcv.get.scale(T.hi,weights,good,mu,mu.eta.val,G)
  } 
  if (all.equal(T.hi,T.max)==TRUE && scale<1) return(T.hi)
  T.low<-T.hi
  while (scale>=1&&T.low>T.min)
  { T.low<-T.low/2 
    T.low<-max(T.low,T.min)
    scale<-mgcv.get.scale(T.low,weights,good,mu,mu.eta.val,G)
  } 
  if (all.equal(T.low,T.min)==TRUE && scale>1) return(T.low)
  # (T.low,T.hi) now brackets scale=1. 
  while (abs(scale-1)>tol)
  { Theta<-(T.low+T.hi)/2
    scale<-mgcv.get.scale(Theta,weights,good,mu,mu.eta.val,G)
    if (scale<1) T.low<-Theta
    else T.hi<-Theta
  }
  Theta
}


full.score <- function(sp,G,family,control,gamma,...)
# function suitable for calling from nlm in order to polish gam fit
# so that actual minimum of score is found in generalized cases
{ .Deprecated(msg="Internal mgcv function full.score is no longer used and will be removed soon.")
  if (is.null(G$L)) {
    G$sp<-exp(sp);
  } else {
    G$sp <- as.numeric(exp(G$L%*%sp + G$lsp0))
  }
  # set up single fixed penalty....
  q<-NCOL(G$X)
  if (is.null(G$H)) G$H<-matrix(0,q,q)
  for (i in 1:length(G$S))
  { j<-ncol(G$S[[i]])
    off1<-G$off[i];off2<-off1+j-1
    G$H[off1:off2,off1:off2]<-G$H[off1:off2,off1:off2]+G$sp[i]*G$S[[i]]
  }
  G$S<-list() # have to reset since length of this is used as number of penalties
  G$L <- NULL
  xx<-gam.fit(G,family=family,control=control,gamma=gamma,...)
  res <- xx$gcv.ubre.dev
  attr(res,"full.gam.object")<-xx
  res
} ## full.score

am.fit <- function (G, #start = NULL, etastart = NULL, 
    #mustart = NULL,# family = gaussian(), 
    control = gam.control(),gamma=1,...) {
    ## fit additive model using 'magic' returning gam type object.

    family <- gaussian()
    intercept<-G$intercept
    n <- nobs <- NROW(G$y) ## n just there to keep codetools happy
    y <- G$y # original data
    X <- G$X # original design matrix
    if (ncol(G$X) == 0) stop("Model seems to contain no terms")
    olm <- G$am   # only need 1 iteration as it's a pure additive model.
    
    # obtain average element sizes for the penalties
    n.S <- length(G$S)
    if (n.S>0) {
      S.size <- rep(0,n.S)
      for (i in 1:n.S) S.size[i] <- mean(abs(G$S[[i]])) 
    }
    weights<-G$w # original weights

    n.score <- sum(weights!=0) ## n to use in GCV score (i.e. don't count points with no influence)   

    offset<-G$offset 

    variance <- family$variance;dev.resids <- family$dev.resids
    aic <- family$aic
    if (NCOL(y) > 1) stop("y must be univariate unless binomial")
    
    scale <- G$sig2

    msp <- G$sp
    magic.control<-list(tol=G$conv.tol,step.half=G$max.half,#maxit=control$maxit+control$globit,
                          rank.tol=control$rank.tol)

    ## solve the penalized LS problem ...
    good <- weights > 0
    G$y <- y[good] - offset[good]
    G$X <- X[good,,drop=FALSE]
    G$w <- sqrt(weights[good]) ## note magic assumes sqrt weights
    mr <- magic(G$y,G$X,msp,G$S,G$off,L=G$L,lsp0=G$lsp0,G$rank,G$H,matrix(0,0,ncol(G$X)),
                G$w,gamma=gamma,G$sig2,G$sig2<0,
                ridge.parameter=control$irls.reg,control=magic.control,n.score=n.score,nthreads=control$nthreads)
    coef <- mr$b;msp <- mr$sp;G$sig2 <- mr$scale;G$gcv.ubre <- mr$score;

    if (any(!is.finite(coef))) warning(gettextf("Non-finite coefficients at iteration"))
  
    mu <- eta <- drop(X %*% coef + offset) 
    dev <- sum(dev.resids(y, mu, weights))
    
    residuals <- rep(NA, nobs)
    residuals[good] <- G$y - (mu - offset)[good]
       
    wtdmu <- if (intercept) sum(weights * y)/sum(weights) else offset
    nulldev <- sum(dev.resids(y, wtdmu, weights))
    n.ok <- nobs - sum(weights == 0)
    nulldf <- n.ok - as.integer(intercept)
    
    ## Extract a little more information from the fit....

    mv <- magic.post.proc(G$X,mr,w=G$w^2)
    G$Vp<-mv$Vb;G$hat<-mv$hat;
    G$Ve <- mv$Ve # frequentist cov. matrix
    G$edf<-mv$edf
    G$conv<-mr$gcv.info
    G$sp<-msp
    rank<-G$conv$rank

    ## use MLE of scale in Gaussian case - best estimate otherwise. 
    dev1 <- if (scale>0) scale*sum(weights) else dev 

    aic.model <- aic(y, n, mu, weights, dev1) + 2 * sum(G$edf)
    if (scale < 0) { ## deviance based GCV
      gcv.ubre.dev <- n.score*dev/(n.score-gamma*sum(G$edf))^2
    } else { # deviance based UBRE, which is just AIC
      gcv.ubre.dev <- dev/n.score + 2 * gamma * sum(G$edf)/n.score - G$sig2
    }

    list(coefficients = as.vector(coef), residuals = residuals, fitted.values = mu, 
        family = family,linear.predictors = eta, deviance = dev,
        null.deviance = nulldev, iter = 1, weights = weights, prior.weights = weights,  
        df.null = nulldf, y = y, converged = TRUE,sig2=G$sig2,edf=G$edf,edf1=mv$edf1,hat=G$hat,
        R=mr$R,
        boundary = FALSE,sp = G$sp,nsdf=G$nsdf,Ve=G$Ve,Vp=G$Vp,rV=mr$rV,mgcv.conv=G$conv,
        gcv.ubre=G$gcv.ubre,aic=aic.model,rank=rank,gcv.ubre.dev=gcv.ubre.dev,scale.estimated = (scale < 0))
} ## am.fit


gam.fit <- function (G, start = NULL, etastart = NULL, 
    mustart = NULL, family = gaussian(), 
    control = gam.control(),gamma=1,
    fixedSteps=(control$maxit+1),...) 
# fitting function for a gam, modified from glm.fit.
# note that smoothing parameter estimates from one irls iterate are carried over to the next irls iterate
# unless the range of s.p.s is large enough that numerical problems might be encountered (want to avoid 
# completely flat parts of gcv/ubre score). In the latter case autoinitialization is requested.
# fixedSteps < its default causes at most fixedSteps iterations to be taken,
# without warning if convergence has not been achieved. This is useful for
# obtaining starting values for outer iteration.
{   .Deprecated(msg="Internal mgcv function gam.fit is no longer used - see gam.fit3.")
    intercept<-G$intercept
    conv <- FALSE
    n <- nobs <- NROW(G$y) ## n just there to keep codetools happy
    nvars <- NCOL(G$X) # check this needed
    y<-G$y # original data
    X<-G$X # original design matrix
    if (nvars == 0) stop("Model seems to contain no terms")
    olm <- G$am   # only need 1 iteration as it's a pure additive model.
    if (!olm)  .Deprecated(msg="performance iteration with gam is deprecated, use bam instead")
    find.theta<-FALSE # any supplied -ve binomial theta treated as known, G$sig2 is scale parameter
    if (substr(family$family[1],1,17)=="Negative Binomial")
    { Theta <- family$getTheta()
      if (length(Theta)==1) { ## Theta fixed
        find.theta <- FALSE
        G$sig2 <- 1
      } else {
        if (length(Theta)>2)
        warning("Discrete Theta search not available with performance iteration")
        Theta <- range(Theta)
        T.max <- Theta[2]          ## upper search limit
        T.min <- Theta[1]          ## lower search limit
        Theta <- sqrt(T.max*T.min) ## initial value
        find.theta <- TRUE
      }
      nb.link<-family$link # negative.binomial family, there's a choise of links
    }

    
    # obtain average element sizes for the penalties
    n.S<-length(G$S)
    if (n.S>0)
    { S.size<-0
      for (i in 1:n.S) S.size[i]<-mean(abs(G$S[[i]])) 
    }
    weights<-G$w # original weights

    n.score <- sum(weights!=0) ## n to use in GCV score (i.e. don't count points with no influence)   

    offset<-G$offset 

    variance <- family$variance;dev.resids <- family$dev.resids
    aic <- family$aic
    linkinv <- family$linkinv;linkfun <- family$linkfun;mu.eta <- family$mu.eta
    if (!is.function(variance) || !is.function(linkinv)) 
        stop("illegal `family' argument")
    valideta <- family$valideta
    if (is.null(valideta)) 
        valideta <- function(eta) TRUE
    validmu <- family$validmu
    if (is.null(validmu)) 
        validmu <- function(mu) TRUE
    if (is.null(mustart))   # new from version 1.5.0 
    { eval(family$initialize)}
    else 
    { mukeep <- mustart
      eval(family$initialize)
      mustart <- mukeep
    }
    if (NCOL(y) > 1) 
        stop("y must be univariate unless binomial")
    
    coefold <- NULL</