R/mgcv.r

Defines functions onUnload onAttach onLoad set.mgcv.options print.mgcv.version magic initial.sp initial.spg single.sp magic.post.proc mroot logLik.gam influence.gam vcov.gam gam.vcomp sp.vcov cooks.distance.gam pen.edf print.anova.gam anova.gam print.summary.gam summary.gam testStat reTest recov simf liu2 residuals.gam concurvity predict.gam get.na.action model.matrix.gam gam.fit full.score mgcv.find.theta mgcv.get.scale gam.control print.gam gam variable.summary estimate.gam get.null.coef gam.outer formula.gam gam.setup gam.setup.list olid getNumericResponse parametricPenalty clone.smooth.spec gam.side augment.smX fixDependence interpret.gam interpret.gam0 all.vars1 pcls strip.offset rig slanczos Rrank

Documented in anova.gam concurvity fixDependence formula.gam full.score gam gam.control gam.fit gam.outer gam.side gam.vcomp influence.gam initial.sp interpret.gam logLik.gam magic magic.post.proc model.matrix.gam mroot pcls pen.edf predict.gam print.anova.gam print.gam print.summary.gam residuals.gam rig Rrank slanczos sp.vcov summary.gam vcov.gam

##  R routines for the package mgcv (c) Simon Wood 2000-2016
##  With contributions from Henric Nilsson

Rrank <- function(R,tol=.Machine$double.eps^.9) {
## Finds rank of upper triangular matrix R, by estimating condition
## number of upper rank by rank block, and reducing rank until this is 
## acceptably low... assumes R pivoted 
 m <- nrow(R)
 rank <- min(m,ncol(R))
 ok <- TRUE
  while (ok) {
    Rcond <- .C(C_R_cond,R=as.double(R),r=as.integer(m),c=as.integer(rank),
             work=as.double(rep(0,4*m)),Rcond=as.double(1))$Rcond
    if (Rcond*tol<1) ok <- FALSE else rank <- rank - 1  
  }
  rank
}
 
slanczos <- function(A,k=10,kl=-1,tol=.Machine$double.eps^.5,nt=1) {
## Computes truncated eigen decomposition of symmetric A by 
## Lanczos iteration. If kl < 0 then k largest magnitude
## eigenvalues returned, otherwise k highest and kl lowest.
## Eigenvectors are always returned too.
## set.seed(1);n <- 1000;A <- matrix(runif(n*n),n,n);A <- A+t(A);er <- slanczos(A,10)
## um <- eigen(A,symmetric=TRUE);ind <- c(1:5,(n-5+1):n)
## range(er$values-um$values[ind]);range(abs(er$vectors)-abs(um$vectors[,ind]))
## It seems that when k (or k+kl) is beyond 10-15% of n
## then you might as well use eigen(A,symmetric=TRUE), but the
## extra cost is the expensive accumulation of eigenvectors.
## Should re-write whole thing using LAPACK routines for eigenvectors. 
   if (tol<=0||tol>.01) stop("silly tolerance supplied")
   k <- round(k);kl <- round(kl)
   if (k<0) stop("argument k must be positive.")
   m <- k + max(0,kl) 
   n <- nrow(A) 
   if (m<1) return(list(values=rep(0,0),vectors=matrix(0,n,0),iter=0)) 
   if (n != ncol(A)) stop("A not square")
   if (m>n) stop("Can not have more eigenvalues than nrow(A)")
   oo <- .C(C_Rlanczos,A=as.double(A),U=as.double(rep(0,n*m)),D=as.double(rep(0,m)),
            n=as.integer(n),m=as.integer(k),ml=as.integer(kl),tol=as.double(tol),nt=as.integer(nt))
   list(values = oo$D,vectors = matrix(oo$U,n,m),iter=oo$n) 
}

rig <- function(n,mean,scale) {
## inverse guassian deviates generated by algorithm 5.7 of
## Gentle, 2003. scale = 1/lambda. 
  if (length(n)>1) n <- length(n)
  x <- y <- rnorm(n)^2
  mys <- mean*scale*y
  mu <- 0*y + mean ## y is there to ensure mu is a vector
  mu2 <- mu^2;
  ind <- mys < .Machine$double.eps^-.5 ## cut off for tail computation
  x[ind] <- mu[ind]*(1 + 0.5*(mys[ind] - sqrt(mys[ind]*4+mys[ind]^2)))
  x[!ind] <- mu[!ind]/mys[!ind] ## tail term (derived from Taylor of sqrt(1+eps) etc)
  #my <- mean*y; sc <- 0*y + scale
  #ind <- my > 1 ## cancellation error can be severe, without splitting
  #x[!ind] <- mu[!ind]*(1 + 0.5*sc[!ind]*(my[!ind] - sqrt(4*my[!ind]/sc[!ind] + my[!ind]^2)))
  ## really the sqrt in the next term should be expanded beyond first order and then
  ## worked on - otherwise too many exact zeros?
  #x[ind] <- pmax(0,mu[ind]*(1+my[ind]*.5*sc[ind]*(1-sqrt(1+ 4/(sc[ind]*my[ind])))))
  ind <- runif(n) > mean/(mean+x)
  x[ind] <- mu2[ind]/x[ind]
  x ## E(x) = mean; var(x) = scale*mean^3
}

strip.offset <- function(x)
# sole purpose is to take a model frame and rename any "offset(a.name)"
# columns "a.name"
{ na <- names(x)
  for (i in 1:length(na)) {
    if (substr(na[i],1,7)=="offset(") 
      na[i] <- substr(na[i],8,nchar(na[i])-1)
  }
  names(x) <- na
  x
}


pcls <- function(M)
# Function to perform penalized constrained least squares.
# Problem to be solved is:
#
#  minimise      ||W^0.5 (y - Xp)||^2 + p'Bp
#  subject to    Ain p >= b  & C p = "constant"
#
# where B = \sum_{i=1}^m \theta_i S_i and W=diag(w)
# on entry this routine requires a list M, with the following elements:
# M$X - the design matrix for this problem.
# M$p - a feasible initial parameter vector - note that this should not be set up to
#       lie exactly on all the inequality constraints - which can easily happen if M$p=0!
# M$y - response variable
# M$w - weight vector: W= diag(M$w)
# M$Ain - matrix of inequality constraints
# M$bin - b above
# M$C  - fixed constraint matrix
# M$S  - List of (minimal) penalty matrices
# M$off - used for unpacking M$S
# M$sp - array of theta_i's 
# Ain, bin and p are not in the object needed to call mgcv....
#
{ nar<-c(length(M$y),length(M$p),dim(M$Ain)[1],dim(M$C)[1])
  ## sanity checking ...
  if (nrow(M$X)!=nar[1]) stop("nrow(M$X) != length(M$y)") 
  if (ncol(M$X)!=nar[2]) stop("ncol(M$X) != length(M$p)")
  if (length(M$w)!=nar[1]) stop("length(M$w) != length(M$y)")
  if (nar[3]!=length(M$bin)) stop("nrow(M$Ain) != length(M$bin)")
  if (nrow(M$Ain)>0) {
    if (ncol(M$Ain)!=nar[2]) stop("nrow(M$Ain) != length(M$p)") 
    res <- as.numeric(M$Ain%*%M$p) - as.numeric(M$bin)
    if (sum(res<0)>0) stop("initial parameters not feasible")
    res <- abs(res)
    if (sum(res<.Machine$double.eps^.5)>0) 
      warning("initial point very close to some inequality constraints")
    res <- mean(res)
    if (res<.Machine$double.eps^.5) 
      warning("initial parameters very close to inequality constraints")
  }
  
  if (nrow(M$C)>0) if (ncol(M$C)!=nar[2]) stop("ncol(M$C) != length(M$p)")  
  if (length(M$S)!=length(M$off)) stop("M$S and M$off have different lengths")
  if (length(M$S)!=length(M$sp)) stop("M$sp has different length to M$S and M$off")
  
  
  # pack the S array for mgcv call 
  m<-length(M$S)
  Sa<-array(0,0);df<-0
  if (m>0) for (i in 1:m)
  { Sa<-c(Sa,M$S[[i]])
    df[i]<-nrow(M$S[[i]])
    if (M$off[i]+df[i]-1>nar[2]) stop(gettextf("M$S[%d] is too large given M$off[%d]", i, i))
  }
  qra.exist <- FALSE
  if (ncol(M$X)>nrow(M$X)) {
    if (m>0) stop("Penalized model matrix must have no more columns than rows") 
    else { ## absorb M$C constraints
      qra <- qr(t(M$C))
      j <- nrow(M$C);k <- ncol(M$X)
      M$X <- t(qr.qty(qra,t(M$X))[(j+1):k,])
      M$Ain <- t(qr.qty(qra,t(M$Ain))[(j+1):k,])
      M$C <- matrix(0,0,0)
      M$p <- rep(0,ncol(M$X)) 
      nar[2] <- length(M$p)
      qra.exist <- TRUE
      if  (ncol(M$X)>nrow(M$X)) stop("Model matrix not full column rank")
    }
  }
  o<-.C(C_RPCLS,as.double(M$X),as.double(M$p),as.double(M$y),as.double(M$w),as.double(M$Ain),as.double(M$bin)
        ,as.double(M$C),as.double(Sa),as.integer(M$off),as.integer(df),as.double(M$sp),
        as.integer(length(M$off)),as.integer(nar))
  p <- array(o[[2]],length(M$p))
  if (qra.exist) p <- qr.qy(qra,c(rep(0,j),p))
  p
} ## pcls

all.vars1 <- function(form) {
## version of all.vars that doesn't split up terms like x$y into x and y
  vars <- all.vars(form)
  vn <- all.names(form)
  vn <- vn[vn%in%c(vars,"$","[[")] ## actual variable related names
  if ("[["%in%vn) stop("can't handle [[ in formula")
  ii <- which(vn%in%"$") ## index of '$'
  if (length(ii)) { ## assemble variable names
    vn1 <- if (ii[1]>1) vn[1:(ii[1]-1)]
    go <- TRUE
    k <- 1
    while (go) {
      n <- 2; 
      while(k<length(ii) && ii[k]==ii[k+1]-1) { k <- k + 1;n <- n + 1 }
      vn1 <- c(vn1,paste(vn[ii[k]+1:n],collapse="$"))
      if (k==length(ii)) {
        go <- FALSE
	ind <- if (ii[k]+n<length(vn)) (ii[k]+n+1):length(vn) else rep(0,0) 
      } else {
        k <- k +  1
	ind <- if (ii[k-1]+n<ii[k]-1) (ii[k-1]+n+1):(ii[k]-1) else rep(0,0)
      }
      vn1 <- c(vn1,vn[ind])
    }
  } else vn1 <- vn
  vn1
} ## all.vars1

interpret.gam0 <- function(gf,textra=NULL,extra.special=NULL)
# interprets a gam formula of the generic form:
#   y~x0+x1+x3*x4 + s(x5)+ s(x6,x7) ....
# and returns:
# 1. a model formula for the parametric part: pf (and pfok indicating whether it has terms)
# 2. a list of descriptors for the smooths: smooth.spec
# this is function does the work, and is called by in interpret.gam
# 'textra' is optional text to add to term labels
# 'extra.special' is label of extra smooth within formula.
{ p.env <- environment(gf) # environment of formula
  tf <- terms.formula(gf,specials=c("s","te","ti","t2",extra.special)) # specials attribute indicates which terms are smooth
 
  terms <- attr(tf,"term.labels") # labels of the model terms 
  nt <- length(terms) # how many terms?
  
  if (attr(tf,"response") > 0) {  # start the replacement formulae
    response <- as.character(attr(tf,"variables")[2])
  } else { 
    response <- NULL
  }
  sp <- attr(tf,"specials")$s     # array of indices of smooth terms 
  tp <- attr(tf,"specials")$te    # indices of tensor product terms
  tip <- attr(tf,"specials")$ti   # indices of tensor product pure interaction terms
  t2p <- attr(tf,"specials")$t2   # indices of type 2 tensor product terms
  zp <- if (is.null(extra.special)) NULL else attr(tf,"specials")[[extra.special]]
  off <- attr(tf,"offset") # location of offset in formula

  ## have to translate sp, tp, tip, t2p (zp) so that they relate to terms,
  ## rather than elements of the formula...
  vtab <- attr(tf,"factors") # cross tabulation of vars to terms
  if (length(sp)>0) for (i in 1:length(sp)) {
    ind <- (1:nt)[as.logical(vtab[sp[i],])]
    sp[i] <- ind # the term that smooth relates to
  }
  if (length(tp)>0) for (i in 1:length(tp)) {
    ind <- (1:nt)[as.logical(vtab[tp[i],])]
    tp[i] <- ind # the term that smooth relates to
  } 
  if (length(tip)>0) for (i in 1:length(tip)) {
    ind <- (1:nt)[as.logical(vtab[tip[i],])]
    tip[i] <- ind # the term that smooth relates to
  } 
  if (length(t2p)>0) for (i in 1:length(t2p)) {
    ind <- (1:nt)[as.logical(vtab[t2p[i],])]
    t2p[i] <- ind # the term that smooth relates to
  }
  if (length(zp)>0) for (i in 1:length(zp)) {
    ind <- (1:nt)[as.logical(vtab[zp[i],])]
    zp[i] <- ind # the term that smooth relates to
  } ## re-referencing is complete

  k <- kt <- kti <- kt2 <- ks <- kz <- kp <- 1 # counters for terms in the 2 formulae
  len.sp <- length(sp)
  len.tp <- length(tp)
  len.tip <- length(tip)
  len.t2p <- length(t2p)
  len.zp <- length(zp)
  ns <- len.sp + len.tp + len.tip + len.t2p + len.zp# number of smooths
  pav <- av <- rep("",0)
  smooth.spec <- list()
  #mgcvat <- "package:mgcv" %in% search() ## is mgcv in search path?
  mgcvns <- loadNamespace('mgcv')
  if (nt) for (i in 1:nt) { # work through all terms
    if (k <= ns&&((ks<=len.sp&&sp[ks]==i)||(kt<=len.tp&&tp[kt]==i)||(kz<=len.zp&&zp[kz]==i)||
                  (kti<=len.tip&&tip[kti]==i)||(kt2<=len.t2p&&t2p[kt2]==i))) { # it's a smooth
      ## have to evaluate in the environment of the formula or you can't find variables 
      ## supplied as smooth arguments, e.g. k <- 5;gam(y~s(x,k=k)), fails,
      ## but if you don't specify namespace of mgcv then stuff like 
      ## loadNamespace('mgcv'); k <- 10; mgcv::interpret.gam(y~s(x,k=k)) fails (can't find s)
      ## eval(parse(text=terms[i]),envir=p.env,enclos=loadNamespace('mgcv')) fails??
      ## following may supply namespace of mgcv explicitly if not on search path...
      ## If 's' etc are masked then we can fail even if mgcv on search path, hence paste
      ## of explicit mgcv reference into first attempt...
    
      st <- try(eval(parse(text=paste("mgcv::",terms[i],sep="")),envir=p.env),silent=TRUE)
      if (inherits(st,"try-error")) st <- 
            eval(parse(text=terms[i]),enclos=p.env,envir=mgcvns)
     
      if (!is.null(textra)) { ## modify the labels on smooths with textra
        pos <- regexpr("(",st$lab,fixed=TRUE)[1]
        st$label <- paste(substr(st$label,start=1,stop=pos-1),textra,
                    substr(st$label,start=pos,stop=nchar(st$label)),sep="")
      }
      smooth.spec[[k]] <- st
      if (ks<=len.sp&&sp[ks]==i) ks <- ks + 1 else # counts s() terms
      if (kt<=len.tp&&tp[kt]==i) kt <- kt + 1 else # counts te() terms
      if (kti<=len.tip&&tip[kti]==i) kti <- kti + 1 else # counts ti() terms
      if (kt2<=len.t2p&&t2p[kt2]==i) kt2 <- kt2 + 1 # counts t2() terms
      else kz <- kz + 1
      k <- k + 1      # counts smooth terms 
    } else {          # parametric
      av[kp] <- terms[i] ## element kp on rhs of parametric
      kp <- kp+1    # counts parametric terms
    }
  }    
  if (!is.null(off)) { ## deal with offset 
    av[kp] <- as.character(attr(tf,"variables")[1+off])
    kp <- kp+1          
  }

  pf <- paste(response,"~",paste(av,collapse=" + "))
  if (attr(tf,"intercept")==0) {
    pf <- paste(pf,"-1",sep="")
    if (kp>1) pfok <- 1 else pfok <- 0
  } else { 
    pfok <- 1;if (kp==1) { 
      pf <- paste(pf,"1"); 
    }
  }

  fake.formula <- pf

  if (length(smooth.spec)>0) 
  for (i in 1:length(smooth.spec)) {
    nt <- length(smooth.spec[[i]]$term)
    ff1 <- paste(smooth.spec[[i]]$term[1:nt],collapse="+")
    fake.formula <- paste(fake.formula,"+",ff1)
    if (smooth.spec[[i]]$by!="NA") {
      fake.formula <- paste(fake.formula,"+",smooth.spec[[i]]$by)
      av <- c(av,smooth.spec[[i]]$term,smooth.spec[[i]]$by)
    } else av <- c(av,smooth.spec[[i]]$term)
  }
  fake.formula <- as.formula(fake.formula,p.env)
  if (length(av)) {
    pred.formula <- as.formula(paste("~",paste(av,collapse="+")))
    pav <- all.vars(pred.formula) ## trick to strip out 'offset(x)' etc...
    pred.formula <- reformulate(pav) 
  } else  pred.formula <- ~1
  ret <- list(pf=as.formula(pf,p.env),pfok=pfok,smooth.spec=smooth.spec,
            fake.formula=fake.formula,response=response,fake.names=av,
            pred.names=pav,pred.formula=pred.formula)
  class(ret) <- "split.gam.formula"
  ret
} ## interpret.gam0

interpret.gam <- function(gf,extra.special=NULL) {
## wrapper to allow gf to be a list of formulae or 
## a single formula. This facilitates general penalized 
## likelihood models in which several linear predictors 
## may be involved...
##
## The list syntax is as follows. The first formula must have a response on
## the lhs, rather than labels. For m linear predictors, there 
## must be m 'base formulae' in linear predictor order. lhs labels will 
## be ignored in a base formula. Empty base formulae have '-1' on rhs.
## Further formulae have labels up to m labels 1,...,m on the lhs, in a 
## syntax like this: 3 + 5 ~ s(x), which indicates that the same s(x) 
## should be added to both linear predictors 3 and 5. 
## e.g. A bivariate normal model with common expected values might be
## list(y1~-1,y2~-1,1+2~s(x)), whereas if the second component was contaminated 
## by something else we might have list(y1~-1,y2~s(v)-1,1+2~s(x)) 
## 
## For a list argument, this routine returns a list of slit.formula objects 
## with an extra field "lpi" indicating the linear predictors to which each 
## contributes...
  if (is.list(gf)) {
    d <- length(gf)

    ## make sure all formulae have a response, to avoid
    ## problems with parametric sub formulae of the form ~1
    #if (length(gf[[1]])<3) stop("first formula must specify a response")
    resp <- gf[[1]][2]
    
    ret <- list()
    pav <- av <- rep("",0)
    nlp <- 0 ## count number of linear predictors (may be different from number of formulae)
    for (i in 1:d) {
      textra <- if (i==1) NULL else paste(".",i-1,sep="") ## modify smooth labels to identify to predictor  

      lpi <- getNumericResponse(gf[[i]]) ## get linear predictors to which this applies, if explicit
      if (length(lpi)==1) warning("single linear predictor indices are ignored")
      if (length(lpi)>0) gf[[i]][[2]] <- NULL else { ## delete l.p. labels from formula response 
        nlp <- nlp + 1;lpi <- nlp ## this is base formula for l.p. number nlp       
      }
      ret[[i]] <- interpret.gam0(gf[[i]],textra,extra.special=extra.special)
      ret[[i]]$lpi <- lpi ## record of the linear predictors to which this applies
      
      ## make sure all parametric formulae have a response, to avoid
      ## problems with parametric sub formulae of the form ~1
      respi <- rep("",0) ## no extra response terms
      if (length(ret[[i]]$pf)==2) { 
        ret[[i]]$pf[3] <- ret[[i]]$pf[2];ret[[i]]$pf[2] <- resp
        respi <- rep("",0)
      } else if (i>1) respi <- ret[[i]]$response ## extra response terms
      av <- c(av,ret[[i]]$fake.names,respi) ## accumulate all required variable names 
      pav <- c(pav,ret[[i]]$pred.names) ## predictors only 
    } 
    av <- unique(av) ## strip out duplicate variable names
    pav <- unique(pav)
    ret$fake.formula <- if (length(av)>0) reformulate(av,response=ret[[1]]$response) else 
                        ret[[1]]$fake.formula ## create fake formula containing all variables
    ret$pred.formula <- if (length(pav)>0) reformulate(pav) else ~1 ## predictor only formula
    ret$response <- ret[[1]]$response 
    ret$nlp <- nlp ## number of linear predictors
    for (i in 1:d) if (max(ret[[i]]$lpi)>nlp||min(ret[[i]]$lpi)<1) stop("linear predictor labels out of range")
    class(ret) <- "split.gam.formula"
    return(ret)
  } else interpret.gam0(gf,extra.special=extra.special)  
} ## interpret.gam


fixDependence <- function(X1,X2,tol=.Machine$double.eps^.5,rank.def=0,strict=FALSE)
# model matrix X2 may be linearly dependent on X1. This 
# routine finds which columns of X2 should be zeroed to 
# fix this. If rank.def>0 then it is taken as the known degree 
# of dependence of X2 on X1 and tol is ignored.
{ qr1 <- qr(X1,LAPACK=TRUE)
  R11 <- abs(qr.R(qr1)[1,1])
  r<-ncol(X1);n<-nrow(X1)
  if (strict) { ## only delete columns of X2 individually dependent on X1
    ## Project columns of X2 into space of X1 and look at difference
    ## to orignal X2 to check for deficiency...  
    QtX2 <- qr.qty(qr1,X2)
    QtX2[-(1:r),] <- 0
    mdiff <- colMeans(abs(X2 - qr.qy(qr1,QtX2)))
    if (rank.def>0) ind <- (1:ncol(X2))[rank(mdiff) <= rank.def] else
    ind <- (1:ncol(X2))[mdiff < R11*tol]
    if (length(ind)<1) ind <- NULL
  } else { ## make X2 full rank given X1
    QtX2 <- qr.qty(qr1,X2)[(r+1):n,] # Q'X2
    qr2 <- qr(QtX2,LAPACK=TRUE)
    R <- qr.R(qr2)
    # now final diagonal block of R may be zero, indicating rank 
    # deficiency.
    r0 <- r <- nrow(R)
    if (rank.def > 0 && rank.def <= nrow(R)) r0 <- r - rank.def else ## degree of rank def known
      while (r0>0 && mean(abs(R[r0:r,r0:r]))< R11*tol) r0 <- r0 -1 ## compute rank def
    r0 <- r0 + 1
    if (r0>r) return(NULL) else
    ind <- qr2$pivot[r0:r] # the columns of X2 to zero in order to get independence
  }
  ind
} ## fixDependence


augment.smX <- function(sm,nobs,np) {
## augments a smooth model matrix with a square root penalty matrix for
## identifiability constraint purposes.
  ns <- length(sm$S) ## number of penalty matrices
  if (ns==0) { ## nothing to do
    return(rbind(sm$X,matrix(0,np,ncol(sm$X))))
  }
  ind <- colMeans(abs(sm$S[[1]]))!=0
  sqrmaX  <- mean(abs(sm$X[,ind]))^2
  alpha <- sqrmaX/mean(abs(sm$S[[1]][ind,ind]))
  St <- sm$S[[1]]*alpha
  if (ns>1) for (i in 2:ns) { 
    ind <- colMeans(abs(sm$S[[i]]))!=0
    alpha <- sqrmaX/mean(abs(sm$S[[i]][ind,ind]))
    St <- St +  sm$S[[i]]*alpha
  }
  rS <- mroot(St,rank=ncol(St)) ## get sqrt of penalty
  X <- rbind(sm$X,matrix(0,np,ncol(sm$X))) ## create augmented model matrix
  X[nobs+sm$p.ind,] <- t(rS) ## add in 
  X ## scaled augmented model matrix
} ## augment.smX

gam.side <- function(sm,Xp,tol=.Machine$double.eps^.5,with.pen=FALSE)
# works through a list of smooths, sm, aiming to identify nested or partially
# nested terms, and impose identifiability constraints on them.
# Xp is the parametric model matrix. It is needed in order to check whether
# there is a constant (or equivalent) in the model. If there is, then this needs 
# to be included when working out side constraints, otherwise dependencies can be 
# missed. 
# Note that with.pen is quite extreme, since you then pretty much only pick
# up dependencies in the null spaces
{ if (!with.pen) { ## check that's possible and reset if not!
    with.pen <- nrow(Xp) < ncol(Xp) + sum(unlist(lapply(sm,function(x) ncol(x$X))))
  }
  m <- length(sm)
  if (m==0) return(sm)
  v.names<-array("",0);maxDim<-1
  for (i in 1:m) { ## collect all term names and max smooth `dim'
    vn <- sm[[i]]$term
    ## need to include by variables in names
    if (sm[[i]]$by!="NA") vn <- paste(vn,sm[[i]]$by,sep="")
    ## need to distinguish levels of factor by variables...
    if (!is.null(sm[[i]]$by.level))  vn <- paste(vn,sm[[i]]$by.level,sep="")
    sm[[i]]$vn <- vn ## use this record to identify variables from now
    v.names <- c(v.names,vn)
    if (sm[[i]]$dim > maxDim) maxDim <- sm[[i]]$dim
  } 
  lv <- length(v.names)   
  v.names <- unique(v.names)
  if (lv == length(v.names)) return(sm) ## no repeats => no nesting

  ## Only get this far if there is nesting.
  ## Need to test for intercept or equivalent in Xp
  intercept <- FALSE
  if (ncol(Xp)) {
    ## first check columns directly...
    if (sum(apply(Xp,2,sd)<.Machine$double.eps^.75)>0) intercept <- TRUE else {
      ## no constant column, so need to check span of Xp...
      f <- rep(1,nrow(Xp))
      ff <- qr.fitted(qr(Xp),f)
      if (max(abs(ff-f))<.Machine$double.eps^.75) intercept <- TRUE 
    }
  }

  sm.id <- as.list(v.names)
  names(sm.id) <- v.names
  for (i in 1:length(sm.id)) sm.id[[i]]<-array(0,0)
  sm.dim <- sm.id
  for (d in 1:maxDim) {
    for (i in 1:m) {
      if (sm[[i]]$dim==d&&sm[[i]]$side.constrain) for (j in 1:d) { ## work through terms
        term<-sm[[i]]$vn[j]
        a <- sm.id[[term]]
        la <- length(a)+1
        sm.id[[term]][la] <- i   ## record smooth i.d. for this variable
        sm.dim[[term]][la] <- d  ## ... and smooth dim.
      }
    }
  }
  ## so now each unique variable name has an associated array of 
  ## the smooths of which it is an argument, arranged in ascending 
  ## order of dimension. Smooths for which side.constrain==FALSE are excluded.
  if (maxDim==1) warning("model has repeated 1-d smooths of same variable.")

  ## Now set things up to enable term specific model matrices to be
  ## augmented with square root penalties, on the fly...
  if (with.pen) {
    k <- 1
    for (i in 1:m) { ## create parameter indices for each term
      k1 <- k + ncol(sm[[i]]$X) - 1
      sm[[i]]$p.ind <- k:k1
      k <- k1 + 1
    }
    np <- k-1 ## number of penalized parameters
  }
  nobs <- nrow(sm[[1]]$X) ## number of observations
  
  for (d in 1:maxDim) { ## work up through dimensions 
    for (i in 1:m) { ## work through smooths
      if (sm[[i]]$dim == d&&sm[[i]]$side.constrain) { ## check for nesting
        if (with.pen) X1 <- matrix(c(rep(1,nobs),rep(0,np)),nobs+np,as.integer(intercept)) else
        X1 <- matrix(1,nobs,as.integer(intercept))
	X1comp <- rep(0,0) ## list of components of X1 to avoid duplication
        for (j in 1:d) { ## work through variables
          b <- sm.id[[sm[[i]]$vn[j]]] # list of smooths dependent on this variable
          k <- (1:length(b))[b==i] ## locate current smooth in list 
          if (k>1) for (l in 1:(k-1)) if (!b[l] %in% X1comp) { ## collect X columns
           X1comp <- c(X1comp,b[l]) ## keep track of components to avoid adding same one twice
           if (with.pen) { ## need to use augmented model matrix in testing 
              if (is.null(sm[[b[l]]]$Xa)) sm[[b[l]]]$Xa <- augment.smX(sm[[b[l]]],nobs,np)
              X1 <- cbind(X1,sm[[b[l]]]$Xa)
            } else X1 <- cbind(X1,sm[[b[l]]]$X) ## penalties not considered
          }
        } ## Now X1 contains columns for all lower dimensional terms
        if (ncol(X1)==as.integer(intercept)) ind <- NULL else {
          if (with.pen) {
             if (is.null(sm[[i]]$Xa)) sm[[i]]$Xa <- augment.smX(sm[[i]],nobs,np)
             ind <- fixDependence(X1,sm[[i]]$Xa,tol=tol) 
          } else ind <- fixDependence(X1,sm[[i]]$X,tol=tol)        
        }
        ## ... the columns to zero to ensure independence
        if (!is.null(ind)) { 
          sm[[i]]$X <- sm[[i]]$X[,-ind]
          ## work through list of penalty matrices, applying constraints...
          nsmS <- length(sm[[i]]$S)
          if (nsmS>0) for (j in nsmS:1) { ## working down so that dropping is painless
            sm[[i]]$S[[j]] <- sm[[i]]$S[[j]][-ind,-ind]
            if (sum(sm[[i]]$S[[j]]!=0)==0) rank <- 0 else
            rank <- qr(sm[[i]]$S[[j]],tol=tol,LAPACK=FALSE)$rank
            sm[[i]]$rank[j] <- rank ## replace previous rank with new rank
            if (rank == 0) { ## drop the penalty
              sm[[i]]$rank <- sm[[i]]$rank[-j]
              sm[[i]]$S[[j]] <- NULL
              sm[[i]]$S.scale <- sm[[i]]$S.scale[-j]
              if (!is.null(sm[[i]]$L)) sm[[i]]$L <- sm[[i]]$L[-j,,drop=FALSE]
            }
          } ## penalty matrices finished
          ## Now we need to establish null space rank for the term
          mi <- length(sm[[i]]$S)
          if (mi>0) {
            St <- sm[[i]]$S[[1]]/norm(sm[[i]]$S[[1]],type="F")
            if (mi>1) for (j in 1:mi) St <- St + 
                  sm[[i]]$S[[j]]/norm(sm[[i]]$S[[j]],type="F")
            es <- eigen(St,symmetric=TRUE,only.values=TRUE)
            sm[[i]]$null.space.dim <- sum(es$values<max(es$values)*.Machine$double.eps^.75) 
          } ## rank found

          if (!is.null(sm[[i]]$L)) {
            ind <- as.numeric(colSums(sm[[i]]$L!=0))!=0
            sm[[i]]$L <- sm[[i]]$L[,ind,drop=FALSE] ## retain only those sps that influence something!
          }

          sm[[i]]$df <- ncol(sm[[i]]$X)
          attr(sm[[i]],"del.index") <- ind
          ## Now deal with case in which prediction constraints differ from fit constraints
          if (!is.null(sm[[i]]$Xp)) { ## need to get deletion indices under prediction parameterization
            ## Note that: i) it doesn't matter what the identifiability con on X1 is
            ##            ii) the degree of rank deficiency can't be changed by an identifiability con
            if (with.pen) { 
              smi <- sm[[i]]  ## clone smooth 
              smi$X <- smi$Xp ## copy prediction Xp to X slot.
              smi$S <- smi$Sp ## and make sure penalty parameterization matches. 
              Xpa <- augment.smX(smi,nobs,np)
              ind <- fixDependence(X1,Xpa,rank.def=length(ind)) 
            } else ind <- fixDependence(X1,sm[[i]]$Xp,rank.def=length(ind)) 
            sm[[i]]$Xp <- sm[[i]]$Xp[,-ind,drop=FALSE]
            attr(sm[[i]],"del.index") <- ind ## over-writes original
          }
        }
        sm[[i]]$vn <- NULL
      } ## end if (sm[[i]]$dim == d)
    } ## end i in 1:m loop
  }  
  if (with.pen) for (i in 1:m) { ## remove working variables that were added
    sm[[i]]$p.ind <- NULL
    if (!is.null(sm[[i]]$Xa)) sm[[i]]$Xa <- NULL
  }
  sm
} ## gam.side

clone.smooth.spec <- function(specb,spec) {
## produces a version of base smooth.spec, `specb', but with 
## the variables relating to `spec'. Used by `gam.setup' in handling 
## of linked smooths.
 ## check dimensions same...
 if (specb$dim!=spec$dim) stop("`id' linked smooths must have same number of arguments") 
 ## Now start cloning...
 if (inherits(specb,c("tensor.smooth.spec","t2.smooth.spec"))) { ##`te' or `t2' generated base smooth.spec
    specb$term <- spec$term
    specb$label <- spec$label 
    specb$by <- spec$by
    k <- 1
    for (i in 1:length(specb$margin)) {
      if (is.null(spec$margin)) { ## sloppy user -- have to construct margin info...
         for (j in 1:length(specb$margin[[i]]$term)) {
           specb$margin[[i]]$term[j] <- spec$term[k]
           k <- k + 1
         }
         specb$margin[[i]]$label <- ""
 
      } else { ## second term was at least `te'/`t2', so margin cloning is easy
        specb$margin[[i]]$term <- spec$margin[[i]]$term
        specb$margin[[i]]$label <- spec$margin[[i]]$label
        specb$margin[[i]]$xt <- spec$margin[[i]]$xt
      }
    }

  } else { ## `s' generated case
    specb$term <- spec$term
    specb$label <- spec$label 
    specb$by <- spec$by
    specb$xt <- spec$xt ## don't generally know what's in here => don't clone
  }
  specb ## return clone
} ## clone.smooth.spec


parametricPenalty <- function(pterms,assign,paraPen,sp0) {
## routine to process any penalties on the parametric part of the model.
## paraPen is a list whose items have names corresponding to the 
## term.labels in pterms. Each list item may have named elements 
## L, rank and sp. All other elements should be penalty coefficient matrices.
  S <- list()     ## penalty matrix list
  off <- rep(0,0) ## offset array
  rank <- rep(0,0) ## rank array
  sp <- rep(0,0)    ## smoothing param array
  full.sp.names <- rep("",0) ## names for sp's multiplying penalties (not underlying)
  L <- matrix(0,0,0) 
  k <- 0
  tind <- unique(assign) ## unique term indices
  n.t <- length(tind)
  if (n.t>0) for (j in 1:n.t) if (tind[j]>0) {
    term.label <- attr(pterms[tind[j]],"term.label")
    P <- paraPen[[term.label]] ## get any penalty information for this term
    if (!is.null(P)) { ## then there is information
      ind <- (1:length(assign))[assign==tind[j]] ## index of coefs involved here
      Li <- P$L;P$L <- NULL
      spi <- P$sp;P$sp <- NULL
      ranki <- P$rank;P$rank <- NULL
      ## remaining terms should be penalty matrices...
      np <- length(P)

      if (!is.null(ranki)&&length(ranki)!=np) stop("`rank' has wrong length in `paraPen'") 
      if (np) for (i in 1:np) { ## unpack penalty matrices, offsets and ranks
        k <- k + 1
        S[[k]] <- P[[i]]
        off[k] <- min(ind) ## index of first coef penalized by this term
        if ( ncol(P[[i]])!=nrow(P[[i]])||nrow(P[[i]])!=length(ind)) stop(" a parametric penalty has wrong dimension")
        if (is.null(ranki)) {
          ev <- eigen(S[[k]],symmetric=TRUE,only.values=TRUE)$values
          rank[k] <- sum(ev>max(ev)*.Machine$double.eps*10) ## estimate rank
        } else rank[k] <- ranki[i]
      }
      ## now deal with L matrices
      if (np) { ## only do this stuff if there are any penalties!
        if (is.null(Li)) Li <- diag(np)
        if (nrow(Li)!=np) stop("L has wrong dimension in `paraPen'")
        L <- rbind(cbind(L,matrix(0,nrow(L),ncol(Li))),
                   cbind(matrix(0,nrow(Li),ncol(L)),Li))
        ind <- (length(sp)+1):(length(sp)+ncol(Li))
        ind2 <- (length(sp)+1):(length(sp)+nrow(Li)) ## used to produce names for full sp array
        if (is.null(spi)) {
          sp[ind] <- -1 ## auto-initialize
        } else {
          if (length(spi)!=ncol(Li)) stop("`sp' dimension wrong in `paraPen'")
          sp[ind] <- spi
        }
        ## add smoothing parameter names....
        if (length(ind)>1) names(sp)[ind] <- paste(term.label,ind-ind[1]+1,sep="") 
        else names(sp)[ind] <- term.label
        
        if (length(ind2)>1) full.sp.names[ind2] <- paste(term.label,ind2-ind2[1]+1,sep="") 
        else full.sp.names[ind2] <- term.label
      }
    } ## end !is.null(P)  
  } ## looped through all terms
  if (k==0) return(NULL)
  if (!is.null(sp0)) {
    if (length(sp0)<length(sp)) stop("`sp' too short")
    sp0 <- sp0[1:length(sp)]
    sp[sp<0] <- sp0[sp<0]
  }
  ## S is list of penalty matrices, off[i] is index of first coefficient penalized by each S[[i]]
  ## sp is array of underlying smoothing parameter (-ve to estimate), L is matrix mapping log
  ## underlying smoothing parameters to log smoothing parameters, rank[i] is the rank of S[[i]].
  list(S=S,off=off,sp=sp,L=L,rank=rank,full.sp.names=full.sp.names)
} ## parametricPenalty

getNumericResponse <- function(form) {
## takes a formula for which the lhs contains numbers, but no variable 
## names, and returns an array of those numbers. For example if 'form' 
## is '1+26~s(x)', this will return the numeric vector c(1,26). 
## A zero length vector is returned if the lhs contains no numbers,
## or contains variable names.  
## This is useful for setting up models in which
## multiple linear predictors share terms. The lhs numbers can then 
## indicate which linear predictors the rhs will contribute to.

  ## if the response contains variables or there is no
  ## response then return nothing...

  if (length(form)==2||length(all.vars(form[[2]]))>0) return(rep(0,0))

  ## deparse turns lhs into a string; strsplit extracts the characters 
  ## corresponding to numbers; unlist deals with the fact that deparse 
  ## will split long lines resulting in multiple list items from 
  ## strsplit; as.numeric converts the numbers; na.omit drops NAs
  ## resulting from "" elements; unique & round are obvious... 

  round(unique(na.omit(as.numeric(unlist(strsplit(deparse(form[[2]]), "[^0-9]+"))))))  

} ## getNumericResponse

olid <- function(X,nsdf,pstart,flpi,lpi) {
## X is a model matrix, made up of nf=length(nsdf) column blocks.
## The ith block starts at column pstart[i] and its first nsdf[i]
## columns are unpenalized. X is used to define nlp=length(lpi)
## linear predictors. lpi[[i]] gives the columns of X used in the 
## ith linear predictor. flpi[j] gives the linear predictor(s)
## to which the jth block of X belongs. The problem is that the 
## unpenalized blocks need not be identifiable when used in combination. 
## This function returns a vector dind of columns of X to drop for 
## identifiability, along with modified lpi, pstart and nsdf vectors.
  nlp <- length(lpi) ## number of linear predictors
  n <- nrow(X) 
  nf <- length(nsdf) ## number of formulae blocks
  Xp <- matrix(0,n*nlp,sum(nsdf))
  start <- 1
  ii <- 1:n
  tind <- rep(0,0) ## complete index of all parametric columns in X
  ## create a block matrix, Xp, with the same identifiability properties as
  ## unpenalized part of model...
  for (i in 1:nf) {
    stop <- start - 1 + nsdf[i]
    if (stop>=start) {
      ind <- pstart[i] + 1:nsdf[i] - 1
      for (k in flpi[[i]]) {
        Xp[ii+(k-1)*n,start:stop] <- X[,ind]
      }
      tind <- c(tind,ind)
      start <- start + nsdf[i]
    }
  }
  ## rank deficiency of Xp will reveal number of redundant parametric 
  ## terms, and a pivoted QR will reveal which to drop to restore
  ## full rank...
  qrx <- qr(Xp,LAPACK=TRUE,tol=0.0) ## unidentifiable columns get pivoted to final cols
  r <- Rrank(qr.R(qrx)) ## get rank from R factor of pivoted QR
  if (r==ncol(Xp)) { ## full rank, all fine, drop nothing
    dind <- rep(0,0)
  } else { ## reduced rank, drop some columns
    dind <- tind[sort(qrx$pivot[(r+1):ncol(X)],decreasing=TRUE)] ## columns to drop
    ## now we need to adjust nsdf, pstart and lpi
    for (d in dind) { ## working down through drop indices
      ## following commented out code is useful should it ever prove necessary to 
      ## adjust pstart and nsdf, but at present these are only used in prediction, 
      ## and it is cleaner to leave them unchanged, and simply drop using dind during prediction.
      #k <- if (d>=pstart[nf]) nlp else which(d >= pstart[1:(nf-1)] & d < pstart[2:nf])
      #nsdf[k] <- nsdf[k] - 1 ## one less unpenalized column in this block
      #if (k<nf) pstart[(k+1):nf] <-  pstart[(k+1):nf] - 1 ## later block starts move down 1 
      for (i in 1:nlp) { 
        k <- which(d == lpi[[i]])
        if (length(k)>0) lpi[[i]] <- lpi[[i]][-k] ## drop row
        k <- which(lpi[[i]]>d)
        if (length(k)>0) lpi[[i]][k] <- lpi[[i]][k] - 1 ## close up
      }
    } ## end of drop index loop
  }
  list(dind=dind,lpi=lpi) ##,pstart=pstart,nsdf=nsdf)
} ## olid



gam.setup.list <- function(formula,pterms,
                    data=stop("No data supplied to gam.setup"),knots=NULL,sp=NULL,
                    min.sp=NULL,H=NULL,absorb.cons=TRUE,sparse.cons=0,select=FALSE,idLinksBases=TRUE,
                    scale.penalty=TRUE,paraPen=NULL,gamm.call=FALSE,drop.intercept=NULL) {
## version of gam.setup for when gam is called with a list of formulae, 
## specifying several linear predictors...
## key difference to gam.setup is an attribute to the model matrix, "lpi", which is a list
## of column indices for each linear predictor 
  if (!is.null(paraPen)) stop("paraPen not supported for multi-formula models")
  if (!absorb.cons) stop("absorb.cons must be TRUE for multi-formula models")
  d <- length(pterms) ## number of formulae
  if (is.null(drop.intercept)) drop.intercept <- rep(FALSE, d)
  if (length(drop.intercept) != d) stop("length(drop.intercept) should be equal to number of model formulas")

  lp.overlap <- if (formula$nlp<d) TRUE else FALSE ## predictors share terms

  G <- gam.setup(formula[[1]],pterms[[1]],
              data,knots,sp,min.sp,H,absorb.cons,sparse.cons,select,
              idLinksBases,scale.penalty,paraPen,gamm.call,drop.intercept[1],list.call=TRUE)
  G$pterms <- pterms
  
  G$offset <- list(G$offset)
  #G$contrasts <- list(G$contrasts)
  G$xlevels <- list(G$xlevels)
  G$assign <- list(G$assign)
  used.sp <- length(G$lsp0)
  if (!is.null(sp)&&used.sp>0) sp <- sp[-(1:used.sp)] ## need to strip off already used sp's
  if (!is.null(min.sp)&&nrow(G$L)>0) min.sp <- min.sp[-(1:nrow(G$L))]  

  ## formula[[1]] always relates to the base formula of the first linear predictor...

  flpi <- lpi <- list()
  for (i in 1:formula$nlp) lpi[[i]] <- rep(0,0)
  lpi[[1]] <- 1:ncol(G$X) ## lpi[[j]] is index of cols for jth linear predictor 
  flpi[[1]] <- formula[[1]]$lpi ## used in identifiability testing by olid, later  

  pof <- ncol(G$X) ## counts the model matrix columns produced so far
  pstart <- rep(0,d) ## indexes where parameteric columns start in each formula block of X
  pstart[1] <- 1
  if (d>1) for (i in 2:d) {
    if (is.null(formula[[i]]$response)) {  ## keep gam.setup happy
      formula[[i]]$response <- formula$response 
      mv.response <- FALSE
    } else mv.response <- TRUE
    #spind <- if (is.null(sp)) 1 else (length(G$S)+1):length(sp)
    formula[[i]]$pfok <- 1 ## empty formulae OK here!
    um <- gam.setup(formula[[i]],pterms[[i]],
              data,knots,sp,min.sp,#sp[spind],min.sp[spind],
	      H,absorb.cons,sparse.cons,select,
              idLinksBases,scale.penalty,paraPen,gamm.call,drop.intercept[i],list.call=TRUE)
    used.sp <- length(um$lsp0)	      
    if (!is.null(sp)&&used.sp>0) sp <- sp[-(1:used.sp)] ## need to strip off already used sp's
    if (!is.null(min.sp)&&nrow(um$L)>0) min.sp <- min.sp[-(1:nrow(um$L))]  

    flpi[[i]] <- formula[[i]]$lpi
    for (j in formula[[i]]$lpi) { ## loop through all l.p.s to which this term contributes
      lpi[[j]] <- c(lpi[[j]],pof + 1:ncol(um$X)) ## add these cols to lpi[[j]]
      ##lpi[[i]] <- pof + 1:ncol(um$X) ## old code
    }
    if (mv.response) G$y <- cbind(G$y,um$y)
    if (i>formula$nlp&&!is.null(um$offset)) {
      stop("shared offsets not allowed")
    }
    G$offset[[i]] <- um$offset
    #G$contrasts[[i]] <- um$contrasts
    if (!is.null(um$contrasts)) G$contrasts <- c(G$contrasts,um$contrasts)
    G$xlevels[[i]] <- um$xlevels
    G$assign[[i]] <- um$assign
    G$rank <- c(G$rank,um$rank)
    pstart[i] <- pof+1
    G$X <- cbind(G$X,um$X) ## extend model matrix
    ## deal with the smooths...
    k <- G$m
    if (um$m) for (j in 1:um$m) {
      um$smooth[[j]]$first.para <- um$smooth[[j]]$first.para + pof
      um$smooth[[j]]$last.para <- um$smooth[[j]]$last.para + pof
      k <- k + 1 
      G$smooth[[k]] <- um$smooth[[j]]
    }
    ## L, S and off...
    ks <- length(G$S)
    M <- length(um$S)
 
    if (!is.null(um$L)||!is.null(G$L)) {
      if (is.null(G$L)) G$L <- diag(1,nrow=ks)
      if (is.null(um$L)) um$L <- diag(1,nrow=M)
      G$L <- rbind(cbind(G$L,matrix(0,nrow(G$L),ncol(um$L))),cbind(matrix(0,nrow(um$L),ncol(G$L)),um$L))
    }

    G$off <- c(G$off,um$off+pof)
    if (M) for (j in 1:M) {
      ks <- ks + 1
      G$S[[ks]] <- um$S[[j]]
    }
 
    G$m <- G$m + um$m ## number of smooths
    ##G$nsdf <- G$nsdf + um$nsdf ## or list??
    G$nsdf[i] <- um$nsdf
    if (!is.null(um$P)||!is.null(G$P)) {
      if (is.null(G$P)) G$P <- diag(1,nrow=pof)
      k <- ncol(um$X)
      if (is.null(um$P)) um$P <- diag(1,nrow=k)
      G$P <- rbind(cbind(G$P,matrix(0,pof,k)),cbind(matrix(0,k,pof),um$P))
    }
    G$cmX <- c(G$cmX,um$cmX)
    if (um$nsdf>0) um$term.names[1:um$nsdf] <- paste(um$term.names[1:um$nsdf],i-1,sep=".")
    G$term.names <- c(G$term.names,um$term.names)
    G$lsp0 <- c(G$lsp0,um$lsp0)
    G$sp <- c(G$sp,um$sp)
    pof <- ncol(G$X)
  } ## formula loop end

  ## If there is overlap then there is a danger of lack of identifiability of the 
  ## parameteric terms, especially if there are factors present in shared components. 
  ## The following code deals with this possibility... 
  if (lp.overlap) {
    rt <- olid(G$X,G$nsdf,pstart,flpi,lpi)
    if (length(rt$dind)>0) { ## then columns have to be dropped
      warning("dropping unidentifiable parametric terms from model",call.=FALSE)
      G$X <- G$X[,-rt$dind] ## drop cols 
      G$cmX <- G$cmX[-rt$dind]
      G$term.names <- G$term.names[-rt$dind]
      ## adjust indexing in smooth list, noting that coefs of smooths 
      ## are never dropped by dind 
      for (i in 1:length(G$smooth)) {
        k <- sum(rt$dind < G$smooth[[i]]$first.para)
        G$smooth[[i]]$first.para <- G$smooth[[i]]$first.para - k
        G$smooth[[i]]$last.para <- G$smooth[[i]]$last.para - k
      }
      for (i in 1:length(G$off)) G$off[i] <- G$off[i] - sum(rt$dind < G$off[i])
      ## replace various indices with updated versions...
      # pstart <- rt$pstart; G$nsdf <- rt$nsdf ## these two only needed in predict.gam - cleaner to leave unchanged
      lpi <- rt$lpi
      attr(G$nsdf,"drop.ind") <- rt$dind ## store drop index
    } 
  }
  attr(lpi,"overlap") <- lp.overlap
  attr(G$X,"lpi") <- lpi
  attr(G$nsdf,"pstart") <- pstart ##unlist(lapply(lpi,min))
  G
} ## gam.setup.list



gam.setup <- function(formula,pterms,
                     data=stop("No data supplied to gam.setup"),knots=NULL,sp=NULL,
                    min.sp=NULL,H=NULL,absorb.cons=TRUE,sparse.cons=0,select=FALSE,idLinksBases=TRUE,
                    scale.penalty=TRUE,paraPen=NULL,gamm.call=FALSE,drop.intercept=FALSE,
                    diagonal.penalty=FALSE,apply.by=TRUE,list.call=FALSE,modCon=0) 
## set up the model matrix, penalty matrices and auxilliary information about the smoothing bases
## needed for a gam fit.
## elements of returned object:
## * m - number of smooths
## * min.sp - minimum smoothing parameters
## * H supplied H matrix
## * pearson.extra, dev.extra, n.true --- entries to hold these quantities
## * pterms - terms object for parametric terms
## * intercept TRUE if intercept present
## * offset - the model offset
## * nsdf - number of strictly parameteric coefs
## * contrasts 
## * xlevels - records levels of factors
## * assign - indexes which parametric model matrix columns map to which term in pterms
## * smooth - list of smooths
## * S - penalties (non-zero block only)
## * off - first coef penalized by each element of S
## * cmX - col mean of X
## * P - maps parameters in fit constraint parameterization to those in prediction parameterization
## * X - model matrix
## * sp
## * rank
## * n.paraPen
## * L 
## * lsp0
## * y - response
## * C - constraint matrix - only if absorb.cons==FALSE
## * n - dim(y)
## * w - weights
## * term.names
## * nP
{ # split the formula if the object being passed is a formula, otherwise it's already split

  if (inherits(formula,"split.gam.formula")) split <- formula else
  if (inherits(formula,"formula")) split <- interpret.gam(formula) 
  else stop("First argument is no sort of formula!") 
  
  if (length(split$smooth.spec)==0) {
    if (split$pfok==0) stop("You've got no model....")
    m <- 0
  } else  m <- length(split$smooth.spec) # number of smooth terms
  
  G <- list(m=m,min.sp=min.sp,H=H,pearson.extra=0,
            dev.extra=0,n.true=-1,pterms=pterms) ## dev.extra gets added to deviance if REML/ML used in gam.fit3
  
  if (is.null(attr(data,"terms"))) # then data is not a model frame
  mf <- model.frame(split$pf,data,drop.unused.levels=FALSE) # must be false or can end up with wrong prediction matrix!
  else mf <- data # data is already a model frame

  G$intercept <-  attr(attr(mf,"terms"),"intercept")>0

  ## get any model offset. Complicated by possibility of offsets in multiple formulae...
  if (list.call) {
    offi <- attr(pterms,"offset")
    if (!is.null(offi)) {
      G$offset <- mf[[names(attr(pterms,"dataClasses"))[offi]]]
    }
  } else G$offset <- model.offset(mf)   # get any model offset including from offset argument
  
  if (!is.null(G$offset))  G$offset <- as.numeric(G$offset) 

  # construct strictly parametric model matrix.... 
  if (drop.intercept) attr(pterms,"intercept") <- 1 ## ensure there is an intercept to drop
  X <- model.matrix(pterms,mf)
  if (drop.intercept) { ## some extended families require intercept to be dropped 
    xat <- attributes(X);ind <- xat$assign>0 ## index of non intercept columns 
    X <- X[,ind,drop=FALSE] ## some extended families need to drop intercept
    xat$assign <- xat$assign[ind];xat$dimnames[[2]]<-xat$dimnames[[2]][ind];
    xat$dim[2] <- xat$dim[2]-1;attributes(X) <- xat
    G$intercept <- FALSE
  } 
  rownames(X) <- NULL ## save memory
  
  G$nsdf <- ncol(X)
  G$contrasts <- attr(X,"contrasts")
  G$xlevels <- .getXlevels(pterms,mf)
  G$assign <- attr(X,"assign") # used to tell which coeffs relate to which pterms

  ## now deal with any user supplied penalties on the parametric part of the model...
  PP <- parametricPenalty(pterms,G$assign,paraPen,sp)
  if (!is.null(PP)) { ## strip out supplied sps already used
    ind <- 1:length(PP$sp)
    if (!is.null(sp)) sp <- sp[-ind]
    if (!is.null(min.sp)) { 
      PP$min.sp <- min.sp[ind]
      min.sp <- min.sp[-ind]
    } 
  }
  
  # next work through smooth terms (if any) extending model matrix.....
  
  G$smooth <- list()
  G$S <- list()
 
  if (gamm.call) { ## flag that this is a call from gamm --- some smoothers need to know!
    if (m>0) for (i in 1:m) attr(split$smooth.spec[[i]],"gamm") <- TRUE
  }

  if (m>0 && idLinksBases) { ## search smooth.spec[[]] for terms linked by common id's
    id.list <- list() ## id information list
    for (i in 1:m) if (!is.null(split$smooth.spec[[i]]$id)) {
      id <- as.character(split$smooth.spec[[i]]$id)
      if (length(id.list)&&id%in%names(id.list)) { ## it's an existing id
        ni <- length(id.list[[id]]$sm.i) ## number of terms so far with this id
        id.list[[id]]$sm.i[ni+1] <- i    ## adding smooth.spec index to this id's list
        ## clone smooth.spec from base smooth spec....
        base.i <- id.list[[id]]$sm.i[1]
         
        split$smooth.spec[[i]] <- clone.smooth.spec(split$smooth.spec[[base.i]],
                                                      split$smooth.spec[[i]])
        
        ## add data for this term to the data list for basis setup...
        temp.term <- split$smooth.spec[[i]]$term
       
        ## note cbind deliberate in next line, as construction will handle matrix argument 
        ## correctly... 
        for (j in 1:length(temp.term)) id.list[[id]]$data[[j]] <- cbind(id.list[[id]]$data[[j]],
                                                          get.var(temp.term[j],data,vecMat=FALSE))
       
       } else { ## new id
        id.list[[id]] <- list(sm.i=i) ## start the array of indices of smooths with this id
        id.list[[id]]$data <- list()
        ## need to collect together all data for which this basis will be used,
        ## for basis setup...
        term <- split$smooth.spec[[i]]$term
        for (j in 1:length(term)) id.list[[id]]$data[[j]] <- get.var(term[j],data,vecMat=FALSE)
      } ## new id finished
    }
  } ## id.list complete

  G$off<-array(0,0)
  first.para<-G$nsdf+1
  sm <- list()
  newm <- 0
  if (m>0) for (i in 1:m) {
    # idea here is that terms are set up in accordance with information given in split$smooth.spec
    # appropriate basis constructor is called depending on the class of the smooth
    # constructor returns penalty matrices model matrix and basis specific information
    ## sm[[i]] <- smoothCon(split$smooth.spec[[i]],data,knots,absorb.cons,scale.penalty=scale.penalty,sparse.cons=sparse.cons) ## old code
    id <- split$smooth.spec[[i]]$id
    if (is.null(id)||!idLinksBases) { ## regular evaluation
      sml <- smoothCon(split$smooth.spec[[i]],data,knots,absorb.cons,scale.penalty=scale.penalty,
                       null.space.penalty=select,sparse.cons=sparse.cons,
                       diagonal.penalty=diagonal.penalty,apply.by=apply.by,modCon=modCon) 
    } else { ## it's a smooth with an id, so basis setup data differs from model matrix data
      names(id.list[[id]]$data) <- split$smooth.spec[[i]]$term ## give basis data suitable names
      sml <- smoothCon(split$smooth.spec[[i]],id.list[[id]]$data,knots,
                       absorb.cons,n=nrow(data),dataX=data,scale.penalty=scale.penalty,
                       null.space.penalty=select,sparse.cons=sparse.cons,
                       diagonal.penalty=diagonal.penalty,apply.by=apply.by,modCon=modCon)
    }
    for (j in 1:length(sml)) {
      newm <- newm + 1
      sm[[newm]] <- sml[[j]]
    }
  }
  
  G$m <- m <- newm ## number of actual smooths

  ## at this stage, it is neccessary to impose any side conditions required
  ## for identifiability
  if (m>0) { 
    sm <- gam.side(sm,X,tol=.Machine$double.eps^.5)
    if (!apply.by) for (i in 1:length(sm)) { ## restore any by-free model matrices
      if (!is.null(sm[[i]]$X0)) { ## there is a by-free matrix to restore 
        ind <- attr(sm[[i]],"del.index") ## columns, if any to delete
        sm[[i]]$X <- if (is.null(ind)) sm[[i]]$X0 else sm[[i]]$X0[,-ind,drop=FALSE] 
      }
    }
  }

  ## The matrix, L, mapping the underlying log smoothing parameters to the
  ## log of the smoothing parameter multiplying the S[[i]] must be
  ## worked out...
  idx <- list() ## idx[[id]]$c contains index of first col in L relating to id
  L <- matrix(0,0,0)
  lsp.names <- sp.names <- rep("",0) ## need a list of names to identify sps in global sp array
  if (m>0) for (i in 1:m) {
    id <- sm[[i]]$id
    ## get the L matrix for this smooth...
    length.S <- length(sm[[i]]$S)
    if (is.null(sm[[i]]$L)) Li <- diag(length.S) else Li <- sm[[i]]$L 
     
    if (length.S > 0) { ## there are smoothing parameters to name
       if (length.S == 1) spn <- sm[[i]]$label else {
          Sname <- names(sm[[i]]$S)
          if (is.null(Sname)) spn <- paste(sm[[i]]$label,1:length.S,sep="") else
          spn <- paste(sm[[i]]$label,Sname,sep="")
       }
    }

    ## extend the global L matrix...
    if (is.null(id)||is.null(idx[[id]])) { ## new `id'     
      if (!is.null(id)) { ## create record in `idx'
        idx[[id]]$c <- ncol(L)+1   ## starting column in L for this `id'
        idx[[id]]$nc <- ncol(Li)   ## number of columns relating to this `id'
      }
      L <- rbind(cbind(L,matrix(0,nrow(L),ncol(Li))),
                 cbind(matrix(0,nrow(Li),ncol(L)),Li))
      if (length.S > 0) { ## there are smoothing parameters to name
        sp.names <- c(sp.names,spn) ## extend the sp name vector
        lsp.names <- c(lsp.names,spn) ## extend full.sp name vector
      }
    } else { ## it's a repeat id => shares existing sp's
      L0 <- matrix(0,nrow(Li),ncol(L))
      if (ncol(Li)>idx[[id]]$nc) {
        stop("Later terms sharing an `id' can not have more smoothing parameters than the first such term")
      }
      L0[,idx[[id]]$c:(idx[[id]]$c+ncol(Li)-1)] <- Li
      L <- rbind(L,L0)
      if (length.S > 0) { ## there are smoothing parameters to name
        lsp.names <- c(lsp.names,spn) ## extend full.sp name vector
      }
    }
  }

  ## create the model matrix...

  Xp <- NULL ## model matrix under prediction constraints, if given
  if (m>0) for (i in 1:m) {
    n.para<-ncol(sm[[i]]$X)
    # define which elements in the parameter vector this smooth relates to....
    sm[[i]]$first.para<-first.para     
    first.para<-first.para+n.para
    sm[[i]]$last.para<-first.para-1
    ## termwise offset handling ...
    Xoff <- attr(sm[[i]]$X,"offset")
    if (!is.null(Xoff)) { 
      if (is.null(G$offset)) G$offset <- Xoff
      else G$offset <- G$offset + Xoff
    }
    ## model matrix accumulation ...
    
    ## alternative version under alternative constraint first (prediction only)
    if (is.null(sm[[i]]$Xp)) {
      if (!is.null(Xp)) Xp <- cbind2(Xp,sm[[i]]$X)
    } else { 
      if (is.null(Xp)) Xp <- X
      Xp <- cbind2(Xp,sm[[i]]$Xp);sm[[i]]$Xp <- NULL
    }
    ## now version to use for fitting ...
    X <- cbind2(X,sm[[i]]$X);sm[[i]]$X<-NULL
   
    G$smooth[[i]] <- sm[[i]]   
  }

  if (is.null(Xp)) {
    G$cmX <- colMeans(X) ## useful for componentwise CI construction 
  } else {
    G$cmX <- colMeans(Xp)
    ## transform from fit params to prediction params...
    ## G$P <- qr.coef(qr(Xp),X) ## old code assumes always full rank!!
    
    qrx <- qr(Xp,LAPACK=TRUE)
    R <- qr.R(qrx)
    p <- ncol(R)
    rank <- Rrank(R) ## rank of Xp/R    
    QtX <- qr.qty(qrx,X)[1:rank,]
    if (rank<p) { ## rank deficient  
      R <- R[1:rank,]
      qrr <- qr(t(R),tol=0)
      R <- qr.R(qrr)
      G$P <- forwardsolve(t(R),QtX)
    } else {
      G$P <- backsolve(R,QtX)
    }
    if (rank<p) {
      G$P <- qr.qy(qrr,rbind(G$P,matrix(0,p-rank,p)))
    }
    G$P[qrx$pivot,] <- G$P
  }
  ## cmX relates to computation of CIs incorportating uncertainty about the mean
  ## It may make more sense to incorporate all uncertainty about the mean,
  ## rather than just the uncertainty in the fixed effects mean. This means
  ## incorporating the mean of random effects and unconstrained smooths. Hence
  ## comment out the following.
  #if (G$nsdf>0) G$cmX[-(1:G$nsdf)] <- 0 ## zero the smooth parts here 
  #else G$cmX <- G$cmX * 0
  G$X <- X;rm(X)
  n.p <- ncol(G$X) 
  # deal with penalties


  ## min.sp must be length nrow(L) to make sense
  ## sp must be length ncol(L) --- need to partition
  ## L into columns relating to free log smoothing parameters,
  ## and columns, L0, corresponding to values supplied in sp.
  ## lsp0 = L0%*%log(sp[sp>=0]) [need to fudge sp==0 case by
  ## setting log(0) to log(effective zero) computed case-by-case]

  ## following deals with supplied and estimated smoothing parameters...
  ## first process the `sp' array supplied to `gam'...
  
  if (!is.null(sp)) { # then user has supplied fixed smoothing parameters
   ok <- TRUE 
   if (length(sp) < ncol(L)) { 
      warning("Supplied smoothing parameter vector is too short - ignored.")
      ok <- FALSE
    }
    if (sum(is.na(sp))) { 
      warning("NA's in supplied smoothing parameter vector - ignoring.")
      ok <- FALSE
    }
  } else ok <- FALSE
  G$sp <- if (ok) sp[1:ncol(L)] else rep(-1,ncol(L))
  
  names(G$sp) <- sp.names

  ## now work through the smooths searching for any `sp' elements
  ## supplied in `s' or `te' terms.... This relies on `idx' created 
  ## above...
  
  k <- 1 ## current location in `sp' array
  if (m>0) for (i in 1:m) {
    id <- sm[[i]]$id
    if (is.null(sm[[i]]$L)) Li <- diag(length(sm[[i]]$S)) else Li <- sm[[i]]$L 
    if (is.null(id)) { ## it's a smooth without an id
      spi <- sm[[i]]$sp
      if (!is.null(spi)) { ## sp supplied in `s' or `te'
        if (length(spi)!=ncol(Li)) stop("incorrect number of smoothing parameters supplied for a smooth term")
        G$sp[k:(k+ncol(Li)-1)] <- spi
      }       
      k <- k + ncol(Li) 
    } else { ## smooth has an id
      spi <- sm[[i]]$sp
      if (is.null(idx[[id]]$sp.done)) { ## not already dealt with these sp's
        if (!is.null(spi)) { ## sp supplied in `s' or `te'
          if (length(spi)!=ncol(Li)) stop("incorrect number of smoothing parameters supplied for a smooth term")
          G$sp[idx[[id]]$c:(idx[[id]]$c+idx[[id]]$nc-1)] <- spi
        }
        idx[[id]]$sp.done <- TRUE ## only makes sense to use supplied `sp' from defining term
        k <- k + idx[[id]]$nc 
      }
    }
  } ## finished processing `sp' vectors supplied in `s' or `te' terms

  ## copy initial sp's back into smooth objects, so there is a record of
  ## fixed and free...
  k <- 1 
  if (length(idx)) for (i in 1:length(idx)) idx[[i]]$sp.done <- FALSE
  if (m>0) for (i in 1:m) { ## work through all smooths
    id <- sm[[i]]$id 
    if (!is.null(id)) { ## smooth with id
      if (idx[[id]]$nc>0) { ## only copy if there are sp's
        G$smooth[[i]]$sp <- G$sp[idx[[id]]$c:(idx[[id]]$c+idx[[id]]$nc-1)]
      }   
      if (!idx[[id]]$sp.done) { ## only update k on first encounter with this smooth
        idx[[id]]$sp.done <- TRUE
        k <- k + idx[[id]]$nc
      }
    
    } else { ## no id, just work through sp 
      if (is.null(sm[[i]]$L)) nc <- length(sm[[i]]$S) else nc <- ncol(sm[[i]]$L)
      if (nc>0) G$smooth[[i]]$sp <- G$sp[k:(k+nc-1)]
      k <- k + nc
    }
  } ## now all elements of G$smooth have a record of initial sp. 


  if (!is.null(min.sp)) { # then minimum s.p.'s supplied
    if (length(min.sp)<nrow(L)) stop("length of min.sp is wrong.")
    min.sp <- min.sp[1:nrow(L)]
    if (sum(is.na(min.sp))) stop("NA's in min.sp.")
    if (sum(min.sp<0)) stop("elements of min.sp must be non negative.")
  }

  k.sp <- 0 # count through sp and S
  G$rank <- array(0,0)
  if (m>0) for (i in 1:m) {
    sm<-G$smooth[[i]]
    if (length(sm$S)>0)
    for (j in 1:length(sm$S)) {  # work through penalty matrices
      k.sp <- k.sp+1
      G$off[k.sp] <- sm$first.para 
      G$S[[k.sp]] <- sm$S[[j]]
      G$rank[k.sp]<-sm$rank[j]
      if (!is.null(min.sp)) {
        if (is.null(H)) H<-matrix(0,n.p,n.p)
        H[sm$first.para:sm$last.para,sm$first.para:sm$last.para] <-
        H[sm$first.para:sm$last.para,sm$first.para:sm$last.para]+min.sp[k.sp]*sm$S[[j]] 
      }           
    } 
  }
 
  ## need to modify L, lsp.names, G$S, G$sp, G$rank and G$off to include any penalties
  ## on parametric stuff, at this point....
  if (!is.null(PP)) { ## deal with penalties on parametric terms
    L <- rbind(cbind(L,matrix(0,nrow(L),ncol(PP$L))),
                 cbind(matrix(0,nrow(PP$L),ncol(L)),PP$L))
    G$off <- c(PP$off,G$off)
    G$S <- c(PP$S,G$S)
    G$rank <- c(PP$rank,G$rank)
    G$sp <- c(PP$sp,G$sp)
    lsp.names <- c(PP$full.sp.names,lsp.names)
    G$n.paraPen <- length(PP$off)
    if (!is.null(PP$min.sp)) { ## deal with minimum sps
      if (is.null(H)) H <- matrix(0,n.p,n.p)
      for (i in 1:length(PP$S)) {
        ind <- PP$off[i]:(PP$off[i]+ncol(PP$S[[i]])-1)
        H[ind,ind] <- H[ind,ind] + PP$min.sp[i] * PP$S[[i]]
      }
    } ## min.sp stuff finished
  } else G$n.paraPen <- 0


  ## Now remove columns of L and rows of sp relating to fixed 
  ## smoothing parameters, and use removed elements to create lsp0

  fix.ind <- G$sp>=0

  if (sum(fix.ind)) {
    lsp0 <- G$sp[fix.ind]
    ind <- lsp0==0 ## find the zero s.p.s
    ef0 <- indi <- (1:length(ind))[ind]
    if (length(indi)>0) for (i in 1:length(indi)) {
      ## find "effective zero" to replace each zero s.p. with
      ii <- G$off[i]:(G$off[i]+ncol(G$S[[i]])-1) 
      ef0[i] <- norm(G$X[,ii],type="F")^2/norm(G$S[[i]],type="F")*.Machine$double.eps*.1
    }
    lsp0[!ind] <- log(lsp0[!ind])
    lsp0[ind] <- log(ef0) ##log(.Machine$double.xmin)*1000 ## zero fudge
    lsp0 <- as.numeric(L[,fix.ind,drop=FALSE]%*%lsp0)

    L <- L[,!fix.ind,drop=FALSE]  
    G$sp <- G$sp[!fix.ind]
  } else {lsp0 <- rep(0,nrow(L))}

  G$H <- H

  if (ncol(L)==nrow(L)&&!sum(L!=diag(ncol(L)))) L <- NULL ## it's just the identity

  G$L <- L;G$lsp0 <- lsp0
  names(G$lsp0) <- lsp.names ## names of all smoothing parameters (not just underlying)

  if (absorb.cons==FALSE) {  ## need to accumulate constraints 
    G$C <- matrix(0,0,n.p)
    if (m>0) {
      for (i in 1:m) {
        if (is.null(G$smooth[[i]]$C)) n.con<-0 
        else n.con<- nrow(G$smooth[[i]]$C)
        C <- matrix(0,n.con,n.p)
        C[,G$smooth[[i]]$first.para:G$smooth[[i]]$last.para]<-G$smooth[[i]]$C
        G$C <- rbind(G$C,C)
        G$smooth[[i]]$C <- NULL
      }
      rm(C)
    }
  } ## absorb.cons == FALSE
 
  G$y <- data[[split$response]]
         
  ##data[[deparse(split$full.formula[[2]],backtick=TRUE)]]
  
  G$n <- nrow(data)

  if (is.null(data$"(weights)")) G$w <- rep(1,G$n)
  else G$w <- data$"(weights)"  

  ## Create names for model coefficients... 

  if (G$nsdf > 0) term.names <- colnames(G$X)[1:G$nsdf] else term.names<-array("",0)
  n.smooth <- length(G$smooth)
  if (n.smooth)
  for (i in 1:n.smooth) { ## create coef names, if smooth has any coefs!
    k<-1
    jj <- G$smooth[[i]]$first.para:G$smooth[[i]]$last.para
    if (G$smooth[[i]]$df > 0) for (j in jj) {
      term.names[j] <- paste(G$smooth[[i]]$label,".",as.character(k),sep="")
      k <- k+1
    }
  }
  G$term.names <- term.names

  # now run some checks on the arguments
  
  ### Should check that there are enough unique covariate combinations to support model dimension

  G$pP <- PP ## return paraPen object, if present

  G
} ## gam.setup



formula.gam <- function(x, ...)
# formula.lm and formula.glm reconstruct the formula from x$terms, this is 
# problematic because of the way mgcv handles s() and te() terms 
{ x$formula
}



gam.outer <- function(lsp,fscale,family,control,method,optimizer,criterion,scale,gamma,G,start=NULL,...)
# function for smoothing parameter estimation by outer optimization. i.e.
# P-IRLS scheme iterated to convergence for each trial set of smoothing
# parameters.
# MAJOR STEPS:
#  1. Call appropriate smoothing parameter optimizer, and extract fitted model
#    `object'
#  2. Call `gam.fit3.post.proc' to get parameter covariance matrices, edf etc to
#     add to `object' 
{ if (is.na(optimizer[2])) optimizer[2] <- "newton"
  if (!optimizer[2]%in%c("newton","bfgs","nlm","optim","nlm.fd")) stop("unknown outer optimization method.")

  if (optimizer[2]%in%c("nlm.fd")) .Deprecated(msg=paste("optimizer",optimizer[2],"is deprecated, please use newton or bfgs"))

#  if (optimizer[1]=="efs" && !inherits(family,"general.family")) {
#    warning("Extended Fellner Schall only implemented for general families")
#    optimizer <- c("outer","newton")
#  }

  if (length(lsp)==0) { ## no sp estimation to do -- run a fit instead
    optimizer[2] <- "no.sps" ## will cause gam2objective to be called, below
  }
  nbGetTheta <- substr(family$family[1],1,17)=="Negative Binomial" && length(family$getTheta())>1
  if (nbGetTheta) stop("Please provide a single value for theta or use nb to estimate it")
  if (optimizer[2]=="nlm.fd") {
    #if (nbGetTheta) stop("nlm.fd not available with negative binomial Theta estimation")
    if (method%in%c("REML","ML","GACV.Cp","P-ML","P-REML")) stop("nlm.fd only available for GCV/UBRE")
    um<-nlm(full.score,lsp,typsize=lsp,fscale=fscale, stepmax = 
            control$nlm$stepmax, ndigit = control$nlm$ndigit,
	    gradtol = control$nlm$gradtol, steptol = control$nlm$steptol, 
            iterlim = control$nlm$iterlim, G=G,family=family,control=control,
            gamma=gamma,start=start,...)
    lsp<-um$estimate
    object<-attr(full.score(lsp,G,family,control,gamma=gamma,...),"full.gam.object")
    object$gcv.ubre <- um$minimum
    object$outer.info <- um
    object$sp <- exp(lsp)
    return(object)
  }
  ## some preparations for the other methods, which all use gam.fit3...
 
  family <- fix.family.link(family)
  family <- fix.family.var(family)
  if (method%in%c("REML","ML","P-REML","P-ML")) family <- fix.family.ls(family)
  
  if (optimizer[1]=="efs"&& optimizer[2] != "no.sps" ) { ## experimental extended efs
    ##warning("efs is still experimental!")
    if (inherits(family,"general.family")) {
      object <- efsud(x=G$X,y=G$y,lsp=lsp,Sl=G$Sl,weights=G$w,offset=G$offxset,family=family,
                     control=control,Mp=G$Mp,start=start)
    } else {
      family <- fix.family.ls(family)
      object <- efsudr(x=G$X,y=G$y,lsp=lsp,Eb=G$Eb,UrS=G$UrS,weights=G$w,family=family,offset=G$offset,
                       start=start,
                       U1=G$U1, intercept = TRUE,scale=scale,Mp=G$Mp,control=control,n.true=G$n.true,...)
    }
    object$gcv.ubre <- object$REML
  } else if (optimizer[2]=="newton"||optimizer[2]=="bfgs"){ ## the gam.fit3 method 
    if (optimizer[2]=="bfgs") 
    b <- bfgs(lsp=lsp,X=G$X,y=G$y,Eb=G$Eb,UrS=G$UrS,L=G$L,lsp0=G$lsp0,offset=G$offset,U1=G$U1,Mp = G$Mp,
                family=family,weights=G$w,control=control,gamma=gamma,scale=scale,conv.tol=control$newton$conv.tol,
                maxNstep= control$newton$maxNstep,maxSstep=control$newton$maxSstep,maxHalf=control$newton$maxHalf, 
                printWarn=FALSE,scoreType=criterion,null.coef=G$null.coef,start=start,
                pearson.extra=G$pearson.extra,dev.extra=G$dev.extra,n.true=G$n.true,Sl=G$Sl,...) else
    b <- newton(lsp=lsp,X=G$X,y=G$y,Eb=G$Eb,UrS=G$UrS,L=G$L,lsp0=G$lsp0,offset=G$offset,U1=G$U1,Mp=G$Mp,
                family=family,weights=G$w,control=control,gamma=gamma,scale=scale,conv.tol=control$newton$conv.tol,
                maxNstep= control$newton$maxNstep,maxSstep=control$newton$maxSstep,maxHalf=control$newton$maxHalf, 
                printWarn=FALSE,scoreType=criterion,null.coef=G$null.coef,start=start,
                pearson.extra=G$pearson.extra,dev.extra=G$dev.extra,n.true=G$n.true,Sl=G$Sl,
		edge.correct=control$edge.correct,...)                
                
    object <- b$object
    object$REML <- object$REML1 <- object$REML2 <-
    object$GACV <- object$D2 <- object$P2 <- object$UBRE2 <- object$trA2 <- 
    object$GACV1 <- object$GACV2 <- object$GCV2 <- object$D1 <- object$P1 <- NULL
    object$sp <- as.numeric(exp(b$lsp))
    object$gcv.ubre <- b$score
    b <- list(conv=b$conv,iter=b$iter,grad=b$grad,hess=b$hess,score.hist=b$score.hist) ## return info
    object$outer.info <- b   
  } else { ## methods calling gam.fit3 
    args <- list(X=G$X,y=G$y,Eb=G$Eb,UrS=G$UrS,offset=G$offset,U1=G$U1,Mp=G$Mp,family=family,
             weights=G$w,control=control,scoreType=criterion,gamma=gamma,scale=scale,
             L=G$L,lsp0=G$lsp0,null.coef=G$null.coef,n.true=G$n.true,Sl=G$Sl,start=start)
  
    if (optimizer[2]=="nlm") {
       b <- nlm(gam4objective, lsp, typsize = lsp, fscale = fscale, 
            stepmax = control$nlm$stepmax, ndigit = control$nlm$ndigit,
	    gradtol = control$nlm$gradtol, steptol = control$nlm$steptol, 
            iterlim = control$nlm$iterlim,
	    check.analyticals=control$nlm$check.analyticals,
            args=args,...)
      lsp <- b$estimate
      
    } else if (optimizer[2]=="optim") {
      b<-optim(par=lsp,fn=gam2objective,gr=gam2derivative,method="L-BFGS-B",control=
         list(fnscale=fscale,factr=control$optim$factr,lmm=min(5,length(lsp))),args=args,...)
      lsp <- b$par
    } else b <- NULL
    obj <- gam2objective(lsp,args,printWarn=TRUE,...) # final model fit, with warnings 
    object <- attr(obj,"full.fit")
    object$gcv.ubre <- as.numeric(obj) 
    object$outer.info <- b
    object$sp <- exp(lsp)
  } # end of methods calling gam.fit2
  
  
  if (scale>0) { 
    object$scale.estimated <- FALSE; object$scale <- scale} else {
    object$scale <- object$scale.est;object$scale.estimated <- TRUE
  } 
  
  object$control <- control
  object$method <- method
  if (inherits(family,"general.family")) {
    mv <- gam.fit5.post.proc(object,G$Sl,G$L,G$lsp0,G$S,G$off)
    ## object$coefficients <- Sl.initial.repara(G$Sl,object$coefficients,inverse=TRUE)
  } else mv <- gam.fit3.post.proc(G$X,G$L,G$lsp0,G$S,G$off,object)
  ## note: use of the following in place of Vp appears to mess up p-values for smooths,
  ##       but doesn't change r.e. p-values of course. 
  if (!is.null(mv$Vc)) object$Vc <- mv$Vc 
  if (!is.null(mv$edf2)) object$edf2 <- mv$edf2
  object$Vp <- mv$Vb
  object$hat<-mv$hat
  object$Ve <- mv$Ve
  object$edf<-mv$edf
  object$edf1 <- mv$edf1
  ##object$F <- mv$F ## DoF matrix --- probably not needed
  object$R <- mv$R ## qr.R(sqrt(W)X)
  object$aic <- object$aic + 2*sum(mv$edf)
  object$nsdf <- G$nsdf
  object$K <-  object$D1 <-  object$D2 <-  object$P <-  object$P1 <-  object$P2 <-  
  object$GACV <-  object$GACV1 <-  object$GACV2 <-  object$REML <-  object$REML1 <-  object$REML2 <-  
  object$GCV<-object$GCV1<- object$GCV2 <- object$UBRE <-object$UBRE1 <- object$UBRE2 <- object$trA <-
  object$trA1<- object$trA2 <- object$alpha <- object$alpha1 <- object$scale.est <- NULL
  object$sig2 <- object$scale
  
  object
} ## gam.outer

get.null.coef <- function(G,start=NULL,etastart=NULL,mustart=NULL,...) {
## Get an estimate of the coefs corresponding to maximum reasonable deviance...
  y <- G$y
  weights <- G$w
  nobs <- G$n ## ignore codetools warning!!
  ##start <- etastart <- mustart <- NULL
  family <- G$family
  eval(family$initialize) ## have to do this to ensure y numeric
  y <- as.numeric(y)
  mum <- mean(y)+0*y
  etam <- family$linkfun(mum)
  null.coef <- qr.coef(qr(G$X),etam)
  null.coef[is.na(null.coef)] <- 0;
  ## get a suitable function scale for optimization routines
  null.scale <- sum(family$dev.resids(y,mum,weights))/nrow(G$X) 
  list(null.coef=null.coef,null.scale=null.scale)
}

estimate.gam <- function (G,method,optimizer,control,in.out,scale,gamma,start=NULL,...) {
## Do gam estimation and smoothness selection...
  
  if (inherits(G$family,"extended.family")) { ## then there are some restrictions...
    if (!(method%in%c("REML","ML"))) method <- "REML"
    if (optimizer[1]=="perf") optimizer <- c("outer","newton") 
    if (inherits(G$family,"general.family")) {
    
       method <- "REML" ## any method you like as long as it's REML
       G$Sl <- Sl.setup(G) ## prepare penalty sequence
       ## Xorig <- G$X ## store original X incase it is needed by family - poor option pre=proc can manipulate G$X
       G$X <- Sl.initial.repara(G$Sl,G$X,both.sides=FALSE) ## re-parameterize accordingly
 
       if (!is.null(start)) start <- Sl.initial.repara(G$Sl,start,inverse=FALSE,both.sides=FALSE)

       ## make sure optimizer appropriate for available derivatives
       if (!is.null(G$family$available.derivs)) {
         if (G$family$available.deriv==1 && optimizer[1]!="efs")  optimizer <- c("outer","bfgs")
	 if (G$family$available.derivs==0) optimizer <- "efs"
       }	 
    }
  }

  if (!optimizer[1]%in%c("perf","outer","efs")) stop("unknown optimizer")
  if (optimizer[1]=="efs") method <- "REML"
  if (!method%in%c("GCV.Cp","GACV.Cp","REML","P-REML","ML","P-ML")) stop("unknown smoothness selection criterion") 
  G$family <- fix.family(G$family)
  G$rS <- mini.roots(G$S,G$off,ncol(G$X),G$rank)

  if (method%in%c("REML","P-REML","ML","P-ML")) {
    if (optimizer[1]=="perf") {
      warning("Reset optimizer to outer/newton") 
      optimizer <- c("outer","newton")
    }
    reml <- TRUE
  } else reml <- FALSE ## experimental insert
  Ssp <- totalPenaltySpace(G$S,G$H,G$off,ncol(G$X))
  G$Eb <- Ssp$E       ## balanced penalty square root for rank determination purposes 
  G$U1 <- cbind(Ssp$Y,Ssp$Z) ## eigen space basis
  G$Mp <- ncol(Ssp$Z) ## null space dimension
  G$UrS <- list()     ## need penalty matrices in overall penalty range space...
  if (length(G$S)>0) for (i in 1:length(G$S)) G$UrS[[i]] <- t(Ssp$Y)%*%G$rS[[i]] else i <- 0
  if (!is.null(G$H)) { ## then the sqrt fixed penalty matrix H is needed for (RE)ML 
      G$UrS[[i+1]] <- t(Ssp$Y)%*%mroot(G$H)
  }


  # is outer looping needed ?
  outer.looping <- ((!G$am && (optimizer[1]!="perf"))||reml||method=="GACV.Cp") ## && length(G$S)>0 && sum(G$sp<0)!=0

  ## sort out exact sp selection criterion to use

  fam.name <- G$family$family[1]

  if (scale==0) { ## choose criterion for performance iteration
    if (fam.name == "binomial"||fam.name == "poisson") G$sig2<-1 #ubre
      else G$sig2 <- -1 #gcv
  } else {G$sig2 <- scale}


  if (reml) { ## then RE(ML) selection, but which variant?
   criterion <- method
   if (fam.name == "binomial"||fam.name == "poisson") scale <- 1
   if (inherits(G$family,"extended.family") && scale <=0) {
     scale <- if (is.null(G$family$scale)) 1 else G$family$scale
   }
  } else {
    if (scale==0) { 
      if (fam.name=="binomial"||fam.name=="poisson") scale <- 1 #ubre
      else scale <- -1 #gcv
    }
    if (scale > 0) criterion <- "UBRE"
    else {
      if (method=="GCV.Cp") criterion <- "GCV" else criterion <- "GACV"
    }
  }


  if (substr(fam.name,1,17)=="Negative Binomial") { 
    scale <- 1; ## no choice
    if (method%in%c("GCV.Cp","GACV.Cp")) criterion <- "UBRE"
  }
  ## Reset P-ML or P-REML in known scale parameter cases
  if (scale>0) {
    if (method=="P-ML") criterion <- method <- "ML" else 
    if (method=="P-REML")  criterion <- method <- "REML"
  } 

  # take only a few IRLS steps to get scale estimates for "pure" outer
  # looping...
  family <- G$family; nb.fam.reset <- FALSE
  if (outer.looping) {     
    ## how many performance iteration steps to use for initialization...
    fixedSteps <- if (inherits(G$family,"extended.family")) 0 else control$outerPIsteps  
    if (substr(G$family$family[1],1,17)=="Negative Binomial") { ## initialize sensibly
      scale <- G$sig2 <- 1
      G$family <- negbin(max(family$getTheta()),link=family$link)
      nb.fam.reset <- TRUE
    }
  } else fixedSteps <- control$maxit+2
  
  ## extended family may need to manipulate G...
    
  if (!is.null(G$family$preinitialize)) {
    if (inherits(G$family,"general.family")) eval(G$family$preinitialize) else {
      ## extended family - just initializes theta and possibly y
      pini <- G$family$preinitialize(G$y,G$family)
      if (!is.null(pini$Theta)) G$family$putTheta(pini$Theta)
      if (!is.null(pini$y)) G$y <- pini$y
    }
  }

  if (length(G$sp)>0) lsp2 <- log(initial.spg(G$X,G$y,G$w,G$family,G$S,G$rank,G$off,
                                  offset=G$offset,L=G$L,lsp0=G$lsp0,E=G$Eb,...))
  else lsp2 <- rep(0,0)

  if (outer.looping && !is.null(in.out)) { # initial s.p.s and scale provided
    ok <- TRUE ## run a few basic checks
    if (is.null(in.out$sp)||is.null(in.out$scale)) ok <- FALSE
    if (length(in.out$sp)!=length(G$sp)) ok <- FALSE
    if (!ok) stop("in.out incorrect: see documentation")
    lsp <- log(in.out$sp) 
  } else {## do performance iteration.... 
    if (fixedSteps>0) {
    
      object <- gam.fit(G,family=G$family,control=control,gamma=gamma,fixedSteps=fixedSteps,...)
      lsp <- log(object$sp) 
    } else {
      lsp <- lsp2
    } 
  }
  if (nb.fam.reset) G$family <- family ## restore, in case manipulated for negative binomial 
    
  if (outer.looping) {
    # don't allow PI initial sp's too far from defaults, otherwise optimizers may
    # get stuck on flat portions of GCV/UBRE score....
    if (is.null(in.out)&&length(lsp)>0) { ## note no checks if supplied 
      ind <- lsp > lsp2+5;lsp[ind] <- lsp2[ind]+5
      ind <- lsp < lsp2-5;lsp[ind] <- lsp2[ind]-5 
    }
   
    ## Get an estimate of the coefs corresponding to maximum reasonable deviance,
    ## and an estimate of the function scale, suitable for optimizers that need this.
    ## Doesn't make sense for general families that have to initialize coefs directly.
  
    ## null.stuff  <- if(inherits(G$family,"general.family")) list() else get.null.coef(G,...)
    ## Matteo modification to facilitate qgam...
    null.stuff  <- if (inherits(G$family,"general.family")) list() else { 
      if (is.null(G$family$get.null.coef)) get.null.coef(G,...) else G$family$get.null.coef(G,...)
    }
    if (fixedSteps>0&&is.null(in.out)) mgcv.conv <- object$mgcv.conv else mgcv.conv <- NULL
    
    if (criterion%in%c("REML","ML")&&scale<=0) { ## log(scale) to be estimated as a smoothing parameter
      if (fixedSteps>0) {
        log.scale <-  log(sum(object$weights*object$residuals^2)/(G$n-sum(object$edf)))
      } else {
        if (is.null(in.out)) {
          log.scale <- log(null.stuff$null.scale/10)
        } else {
          log.scale <- log(in.out$scale)
        }
      }
      lsp <- c(lsp,log.scale) ## append log initial scale estimate to lsp
      ## extend G$L, if present...
      if (!is.null(G$L)) { 
        G$L <- cbind(rbind(G$L,rep(0,ncol(G$L))),c(rep(0,nrow(G$L)),1))
        #attr(G$L,"scale") <- TRUE ## indicates scale estimated as sp
      }
      if (!is.null(G$lsp0)) G$lsp0 <- c(G$lsp0,0)
    } 
     ## check if there are extra parameters to estimate
    if (inherits(G$family,"extended.family")&&!inherits(G$family,"general.family")&&G$family$n.theta>0) {
      th0 <- G$family$getTheta() ## additional (initial) parameters of likelihood 
      nth <- length(th0)
      nlsp <- length(lsp)
      ind <- 1:nlsp + nth ## only used if nlsp>0
      lsp <- c(th0,lsp) ## append to start of lsp
      ## extend G$L, G$lsp0 if present...
      if (!is.null(G$L)&&nth>0) { 
        L <- rbind(cbind(diag(nth),matrix(0,nth,ncol(G$L))),
                   cbind(matrix(0,nrow(G$L),nth),G$L))
        #sat <- attr(G$L,"scale")
        G$L <- L
        #attr(G$L,"scale") <- sat
        #attr(G$L,"not.sp") <- nth ## first not.sp params are not smoothing params
      }
      if (!is.null(G$lsp0)) G$lsp0 <- c(th0*0,G$lsp0)
    } else nth <- 0

   
    G$null.coef <- null.stuff$null.coef

    object <- gam.outer(lsp,fscale=null.stuff$null.scale, ##abs(object$gcv.ubre)+object$sig2/length(G$y),
                        family=G$family,control=control,criterion=criterion,method=method,
                        optimizer=optimizer,scale=scale,gamma=gamma,G=G,start=start,...)
    
    if (criterion%in%c("REML","ML")&&scale<=0)  object$sp <- 
                                                object$sp[-length(object$sp)] ## drop scale estimate from sp array
    
    if (inherits(G$family,"extended.family")&&nth>0) object$sp <- object$sp[-(1:nth)] ## drop theta params
 
    object$mgcv.conv <- mgcv.conv 

  } ## finished outer looping

  ## correct null deviance if there's an offset [Why not correct calc in gam.fit/3???]....

  if (!inherits(G$family,"extended.family")&&G$intercept&&any(G$offset!=0)) object$null.deviance <-
         glm(object$y~offset(G$offset),family=object$family,weights=object$prior.weights)$deviance

  object$method <- criterion

  object$smooth<-G$smooth

  names(object$edf) <- G$term.names
  names(object$edf1) <- G$term.names

  if (inherits(family,"general.family")) {
    object$coefficients <- Sl.initial.repara(G$Sl,object$coefficients,inverse=TRUE)
  }
  
  ## extended family may need to manipulate fit object. Code
  ## will need to include the following line if G$X used...
  ## G$X <- Sl.initial.repara(G$Sl,G$X,inverse=TRUE,cov=FALSE,both.sides=FALSE)
  if (!is.null(G$family$postproc)) {
    if (inherits(G$family,"general.family")) eval(G$family$postproc) else {
      posr <- G$family$postproc(family=object$family,y=G$y,prior.weights=object$prior.weights,
              fitted=object$fitted.values,linear.predictors=object$linear.predictors,offset=G$offset,
	      intercept=G$intercept)
      if (!is.null(posr$family)) object$family$family <- posr$family
      if (!is.null(posr$deviance)) object$deviance <- posr$deviance
      if (!is.null(posr$null.deviance)) object$null.deviance <- posr$null.deviance	      
    }
  } 

  if (!is.null(G$P)) { ## matrix transforming from fit to prediction parameterization
    object$coefficients <- as.numeric(G$P %*% object$coefficients)
    object$Vp <- G$P %*% object$Vp %*% t(G$P)
    object$Ve <- G$P %*% object$Ve %*% t(G$P)
    rownames(object$Vp) <- colnames(object$Vp) <- G$term.names
    rownames(object$Ve) <- colnames(object$Ve) <- G$term.names
  }
  names(object$coefficients) <- G$term.names 
  
  object
} ## end estimate.gam

variable.summary <- function(pf,dl,n) {
## routine to summarize all the variables in dl, which is a list
## containing raw input variables to a model (i.e. no functions applied)
## pf is a formula containing the strictly parametric part of the
## model for the variables in dl. A list is returned, with names given by 
## the variables. For variables in the parametric part, then the list elements
## may be:
## * a 1 column matrix with elements set to the column medians, if variable 
##   is a matrix.
## * a 3 figure summary (min,median,max) for a numeric variable.
## * a factor variable, with the most commonly occuring factor (all levels)
## --- classes are as original data type, but anything not numeric, factor or matrix
## is coerced to numeric. 
## For non-parametric variables, any matrices are coerced to numeric, otherwise as 
## parametric.      
## medians in the above are always observed values (to deal with variables coerced to 
## factors in the model formulae in a nice way).
## variables with less than `n' entries are discarded
   v.n <- length(dl)
   ## if (v.n) for (i in 1:v.n) if (length(dl[[i]])<n) dl[[i]] <- NULL 
   
   v.name <- v.name1 <- names(dl)
   if (v.n) ## need to strip out names of any variables that are too short.
   { k <- 0 ## counter for retained variables
     for (i in 1:v.n) if (length(dl[[i]])>=n) { 
       k <- k+1
       v.name[k] <- v.name1[i] ## save names of variables of correct length
     }
     if (k>0) v.name <- v.name[1:k] else v.name <- rep("",k)
   }

   ## v.name <- names(dl)    ## the variable names
   p.name <- all.vars(pf[-2]) ## variables in parametric part (not response)
   vs <- list()
   v.n <- length(v.name)
   if (v.n>0) for (i in 1:v.n) {
     if (v.name[i]%in%p.name) para <- TRUE else para <- FALSE ## is variable in the parametric part?

     if (para&&is.matrix(dl[[v.name[i]]])&&ncol(dl[[v.name[i]]])>1) { ## parametric matrix --- a special case
       x <- matrix(apply(dl[[v.name[i]]],2,quantile,probs=0.5,type=3,na.rm=TRUE),1,ncol(dl[[v.name[i]]])) ## nearest to median entries
     } else { ## anything else
       x <- dl[[v.name[i]]]
       if (is.character(x)) x <- as.factor(x)
       if (is.factor(x)) {
         x <- x[!is.na(x)]
         lx <- levels(x)
         freq <- tabulate(x)
         ii <- min((1:length(lx))[freq==max(freq)])
         x <- factor(lx[ii],levels=lx) 
       } else {
         x <- as.numeric(x)
         x <- c(min(x,na.rm=TRUE),as.numeric(quantile(x,probs=.5,type=3,na.rm=TRUE)) ,max(x,na.rm=TRUE)) ## 3 figure summary
       }
     }
     vs[[v.name[i]]] <- x
   }
   vs
} ## end variable.summary


## don't be tempted to change to control=list(...) --- messes up passing on other stuff via ...

gam <- function(formula,family=gaussian(),data=list(),weights=NULL,subset=NULL,na.action,offset=NULL,
                method="GCV.Cp",optimizer=c("outer","newton"),control=list(),#gam.control(),
                scale=0,select=FALSE,knots=NULL,sp=NULL,min.sp=NULL,H=NULL,gamma=1,fit=TRUE,
                paraPen=NULL,G=NULL,in.out=NULL,drop.unused.levels=TRUE,drop.intercept=NULL,...) {
## Routine to fit a GAM to some data. The model is stated in the formula, which is then 
## interpreted to figure out which bits relate to smooth terms and which to parametric terms.
## Basic steps:
## 1. Formula is split up into parametric and non-parametric parts,
##    and a fake formula constructed to be used to pick up data for
##    model frame. pterms "terms" object(s) created for parametric 
##    components, model frame created along with terms object.
## 2. 'gam.setup' called to do most of basis construction and other
##    elements of model setup.
## 3. 'estimate.gam' is called to estimate the model. This performs further 
##    pre- and post- fitting steps and calls either 'gam.fit' (performance
##    iteration) or 'gam.outer' (default method). 'gam.outer' calls the actual 
##    smoothing parameter optimizer ('newton' by default) and then any post 
##    processing. The optimizer calls 'gam.fit3/4/5' to estimate the model 
##    coefficients and obtain derivatives w.r.t. the smoothing parameters.
## 4. Finished 'gam' object assembled.
   control <- do.call("gam.control",control)
   if (is.null(G)) {
    ## create model frame..... 
    gp <- interpret.gam(formula) # interpret the formula 
    cl <- match.call() # call needed in gam object for update to work
    mf <- match.call(expand.dots=FALSE)
    mf$formula <- gp$fake.formula 
    mf$family <- mf$control<-mf$scale<-mf$knots<-mf$sp<-mf$min.sp<-mf$H<-mf$select <- mf$drop.intercept <-
                 mf$gamma<-mf$method<-mf$fit<-mf$paraPen<-mf$G<-mf$optimizer <- mf$in.out <- mf$...<-NULL
    mf$drop.unused.levels <- drop.unused.levels
    mf[[1]] <- quote(stats::model.frame) ## as.name("model.frame")
    pmf <- mf
    mf <- eval(mf, parent.frame()) # the model frame now contains all the data 
    if (nrow(mf)<2) stop("Not enough (non-NA) data to do anything meaningful")
    terms <- attr(mf,"terms")
    
    ## summarize the *raw* input variables
    ## note can't use get_all_vars here -- buggy with matrices
    vars <- all.vars1(gp$fake.formula[-2]) ## drop response here
    inp <- parse(text = paste("list(", paste(vars, collapse = ","),")"))

    ## allow a bit of extra flexibility in what `data' is allowed to be (as model.frame actually does)
    if (!is.list(data)&&!is.data.frame(data)) data <- as.data.frame(data) 

    dl <- eval(inp, data, parent.frame())
    names(dl) <- vars ## list of all variables needed
    var.summary <- variable.summary(gp$pf,dl,nrow(mf)) ## summarize the input data
    rm(dl) ## save space    

    ## pterms are terms objects for the parametric model components used in 
    ## model setup - don't try obtaining by evaluating pf in mf - doesn't
    ## work in general (e.g. with offset)...

    if (is.list(formula)) { ## then there are several linear predictors
      environment(formula) <- environment(formula[[1]]) ## e.g. termplots needs this
      pterms <- list()
      tlab <- rep("",0)
      for (i in 1:length(formula)) {
        pmf$formula <- gp[[i]]$pf 
        pterms[[i]] <- attr(eval(pmf, parent.frame()),"terms")
        tlabi <- attr(pterms[[i]],"term.labels")
        if (i>1&&length(tlabi)>0) tlabi <- paste(tlabi,i-1,sep=".")
        tlab <- c(tlab,tlabi)
      }
      attr(pterms,"term.labels") <- tlab ## labels for all parametric terms, distinguished by predictor
    } else { ## single linear predictor case
      pmf$formula <- gp$pf
      pmf <- eval(pmf, parent.frame()) # pmf contains all data for parametric part
      pterms <- attr(pmf,"terms") ## pmf only used for this
    }

    if (is.character(family)) family <- eval(parse(text=family))
    if (is.function(family)) family <- family()
    if (is.null(family$family)) stop("family not recognized")
  
    if (family$family[1]=="gaussian" && family$link=="identity") am <- TRUE
    else am <- FALSE
    
    if (!control$keepData) rm(data) ## save space

    ## check whether family requires intercept to be dropped...
    # drop.intercept <- if (is.null(family$drop.intercept) || !family$drop.intercept) FALSE else TRUE
    # drop.intercept <- as.logical(family$drop.intercept)
    if (is.null(family$drop.intercept)) { ## family does not provide information
      lengthf <- if (is.list(formula)) length(formula) else 1
      if (is.null(drop.intercept)) drop.intercept <- rep(FALSE, lengthf) else {
        drop.intercept <- rep(drop.intercept,length=lengthf) ## force drop.intercept to correct length
	if (sum(drop.intercept)) family$drop.intercept <- drop.intercept ## ensure prediction works
      }
    } else drop.intercept <- as.logical(family$drop.intercept) ## family overrides argument
    
    if (inherits(family,"general.family")&&!is.null(family$presetup)) eval(family$presetup)

    gsname <- if (is.list(formula)) "gam.setup.list" else "gam.setup" 

    G <- do.call(gsname,list(formula=gp,pterms=pterms,
                 data=mf,knots=knots,sp=sp,min.sp=min.sp,
                 H=H,absorb.cons=TRUE,sparse.cons=0,select=select,
                 idLinksBases=control$idLinksBases,scale.penalty=control$scalePenalty,
                 paraPen=paraPen,drop.intercept=drop.intercept))
    
    G$var.summary <- var.summary
    G$family <- family
   
    if ((is.list(formula)&&(is.null(family$nlp)||family$nlp!=gp$nlp))||
        (!is.list(formula)&&!is.null(family$npl)&&(family$npl>1))) stop("incorrect number of linear predictors for family")
       
    G$terms<-terms;
    G$mf<-mf;G$cl<-cl;
    G$am <- am

    if (is.null(G$offset)) G$offset<-rep(0,G$n)
     
    G$min.edf <- G$nsdf ## -dim(G$C)[1]
    if (G$m) for (i in 1:G$m) G$min.edf<-G$min.edf+G$smooth[[i]]$null.space.dim

    G$formula <- formula
    G$pred.formula <- gp$pred.formula
    environment(G$formula)<-environment(formula)
  }

  if (!fit) return(G)

  if (ncol(G$X)>nrow(G$X)) stop("Model has more coefficients than data") 

  G$conv.tol <- control$mgcv.tol      # tolerence for mgcv
  G$max.half <- control$mgcv.half # max step halving in Newton update mgcv

  object <- estimate.gam(G,method,optimizer,control,in.out,scale,gamma,...)

  
  if (!is.null(G$L)) { 
    object$full.sp <- as.numeric(exp(G$L%*%log(object$sp)+G$lsp0))
    names(object$full.sp) <- names(G$lsp0)
  }
  names(object$sp) <- names(G$sp)
  object$paraPen <- G$pP
  object$formula <- G$formula
  ## store any lpi attribute of G$X for use in predict.gam...
  if (is.list(object$formula)) attr(object$formula,"lpi") <- attr(G$X,"lpi")
  object$var.summary <- G$var.summary 
  object$cmX <- G$cmX ## column means of model matrix --- useful for CIs
  object$model<-G$mf # store the model frame
  object$na.action <- attr(G$mf,"na.action") # how to deal with NA's
  object$control <- control
  object$terms <- G$terms
  object$pred.formula <- G$pred.formula

  attr(object$pred.formula,"full") <- reformulate(all.vars(object$terms))
  
  object$pterms <- G$pterms
  object$assign <- G$assign # applies only to pterms
  object$contrasts <- G$contrasts
  object$xlevels <- G$xlevels
  object$offset <- G$offset
  if (!is.null(G$Xcentre)) object$Xcentre <- G$Xcentre
  if (control$keepData) object$data <- data
  object$df.residual <- nrow(G$X) - sum(object$edf)
  object$min.edf <- G$min.edf
  if (G$am&&!(method%in%c("REML","ML","P-ML","P-REML"))) object$optimizer <- "magic" else object$optimizer <- optimizer
  object$call <- G$cl # needed for update() to work
  class(object) <- c("gam","glm","lm")
  if (is.null(object$deviance)) object$deviance <- sum(residuals(object,"deviance")^2)
  names(object$gcv.ubre) <- method
  environment(object$formula) <- environment(object$pred.formula) <-
  environment(object$terms) <- environment(object$pterms) <- .GlobalEnv
  if (!is.null(object$model))  environment(attr(object$model,"terms"))  <- .GlobalEnv
  if (!is.null(attr(object$pred.formula,"full"))) environment(attr(object$pred.formula,"full")) <- .GlobalEnv
  object
} ## gam



print.gam<-function (x,...) 
# default print function for gam objects
{ print(x$family)
  cat("Formula:\n")
  if (is.list(x$formula)) for (i in 1:length(x$formula)) print(x$formula[[i]]) else
     print(x$formula)
  n.smooth<-length(x$smooth)
  if (n.smooth==0)
  cat("Total model degrees of freedom",sum(x$edf),"\n")
  else
  { edf<-0
    cat("\nEstimated degrees of freedom:\n")
    for (i in 1:n.smooth)
    edf[i]<-sum(x$edf[x$smooth[[i]]$first.para:x$smooth[[i]]$last.para])
    edf.str <- format(round(edf,digits=4),digits=3,scientific=FALSE)
    for (i in 1:n.smooth) {   
    cat(edf.str[i]," ",sep="")
      if (i%%7==0) cat("\n")
    }
    cat(" total =",round(sum(x$edf),digits=2),"\n")
  }
  if (!is.null(x$method)&&!(x$method%in%c("PQL","lme.ML","lme.REML")))  
  cat("\n",x$method," score: ",x$gcv.ubre,"     ",sep="")
  if (!is.null(x$rank) && x$rank< length(x$coefficients)) cat("rank: ",x$rank,"/",length(x$coefficients),sep="") 
  cat("\n")
  invisible(x)
}

gam.control <- function (nthreads=1,irls.reg=0.0,epsilon = 1e-7, maxit = 200,
                         mgcv.tol=1e-7,mgcv.half=15,trace =FALSE,
                         rank.tol=.Machine$double.eps^0.5,
                         nlm=list(),optim=list(),newton=list(),outerPIsteps=0,
                         idLinksBases=TRUE,scalePenalty=TRUE,efs.lspmax=15,efs.tol=.1,
                         keepData=FALSE,scale.est="fletcher",edge.correct=FALSE) 
# Control structure for a gam. 
# irls.reg is the regularization parameter to use in the GAM fitting IRLS loop.
# epsilon is the tolerance to use in the IRLS MLE loop. maxit is the number 
# of IRLS iterations to use. mgcv.tol is the tolerance to use in the mgcv call within each IRLS. mgcv.half is the 
# number of step halvings to employ in the mgcv search for the optimal GCV score, before giving up 
# on a search direction. trace turns on or off some de-bugging information.
# rank.tol is the tolerance to use for rank determination
# outerPIsteps is the number of performance iteration steps used to intialize
#                         outer iteration
{   scale.est <- match.arg(scale.est,c("fletcher","pearson","deviance"))
    if (!is.logical(edge.correct)&&(!is.numeric(edge.correct)||edge.correct<0)) stop(
        "edge.correct must be logical or a positive number")
    if (!is.numeric(nthreads) || nthreads <1) stop("nthreads must be a positive integer") 
    if (!is.numeric(irls.reg) || irls.reg <0.0) stop("IRLS regularizing parameter must be a non-negative number.")
    if (!is.numeric(epsilon) || epsilon <= 0) 
        stop("value of epsilon must be > 0")
    if (!is.numeric(maxit) || maxit <= 0) 
        stop("maximum number of iterations must be > 0")
    if (rank.tol<0||rank.tol>1) 
    { rank.tol=.Machine$double.eps^0.5
      warning("silly value supplied for rank.tol: reset to square root of machine precision.")
    }
    # work through nlm defaults
    if (is.null(nlm$ndigit)||nlm$ndigit<2) nlm$ndigit <- max(2,ceiling(-log10(epsilon)))
    nlm$ndigit <- round(nlm$ndigit)
    ndigit <- floor(-log10(.Machine$double.eps))
    if (nlm$ndigit>ndigit) nlm$ndigit <- ndigit
    if (is.null(nlm$gradtol)) nlm$gradtol <- epsilon*10
    nlm$gradtol <- abs(nlm$gradtol)
    ## note that nlm will stop after hitting stepmax 5 consecutive times
    ## hence should not be set too small ... 
    if (is.null(nlm$stepmax)||nlm$stepmax==0) nlm$stepmax <- 2
    nlm$stepmax <- abs(nlm$stepmax)
    if (is.null(nlm$steptol)) nlm$steptol <- 1e-4
    nlm$steptol <- abs(nlm$steptol)
    if (is.null(nlm$iterlim)) nlm$iterlim <- 200
    nlm$iterlim <- abs(nlm$iterlim)
    ## Should be reset for a while anytime derivative code altered...
    if (is.null(nlm$check.analyticals)) nlm$check.analyticals <- FALSE
    nlm$check.analyticals <- as.logical(nlm$check.analyticals) 

    # and newton defaults
    if (is.null(newton$conv.tol)) newton$conv.tol <- 1e-6
    if (is.null(newton$maxNstep)) newton$maxNstep <- 5
    if (is.null(newton$maxSstep)) newton$maxSstep <- 2
    if (is.null(newton$maxHalf)) newton$maxHalf <- 30
    if (is.null(newton$use.svd)) newton$use.svd <- FALSE

    # and optim defaults
    if (is.null(optim$factr)) optim$factr <- 1e7
    optim$factr <- abs(optim$factr)
    if (efs.tol<=0) efs.tol <- .1

    list(nthreads=round(nthreads),irls.reg=irls.reg,epsilon = epsilon, maxit = maxit,
         trace = trace, mgcv.tol=mgcv.tol,mgcv.half=mgcv.half,
         rank.tol=rank.tol,nlm=nlm,
         optim=optim,newton=newton,outerPIsteps=outerPIsteps,
         idLinksBases=idLinksBases,scalePenalty=scalePenalty,efs.lspmax=efs.lspmax,efs.tol=efs.tol,
         keepData=as.logical(keepData[1]),scale.est=scale.est,edge.correct=edge.correct)
    
}



mgcv.get.scale<-function(Theta,weights,good,mu,mu.eta.val,G)
# Get scale implied by current fit and trial -ve binom Theta, I've used
# mu and mu.eta.val used in fit rather than implied by it....
{ variance<- negbin(Theta)$variance
  w<-sqrt(weights[good]*mu.eta.val[good]^2/variance(mu)[good])
  wres<-w*(G$y-G$X%*%G$p)
  sum(wres^2)/(G$n-sum(G$edf))
}


mgcv.find.theta<-function(Theta,T.max,T.min,weights,good,mu,mu.eta.val,G,tol)
# searches for -ve binomial theta between given limits to get scale=1 
{ scale<-mgcv.get.scale(Theta,weights,good,mu,mu.eta.val,G)
  T.hi<-T.low<-Theta
  while (scale<1&&T.hi<T.max) 
  { T.hi<-T.hi*2
    T.hi<-min(T.hi,T.max)
    scale<-mgcv.get.scale(T.hi,weights,good,mu,mu.eta.val,G)
  } 
  if (all.equal(T.hi,T.max)==TRUE && scale<1) return(T.hi)
  T.low<-T.hi
  while (scale>=1&&T.low>T.min)
  { T.low<-T.low/2 
    T.low<-max(T.low,T.min)
    scale<-mgcv.get.scale(T.low,weights,good,mu,mu.eta.val,G)
  } 
  if (all.equal(T.low,T.min)==TRUE && scale>1) return(T.low)
  # (T.low,T.hi) now brackets scale=1. 
  while (abs(scale-1)>tol)
  { Theta<-(T.low+T.hi)/2
    scale<-mgcv.get.scale(Theta,weights,good,mu,mu.eta.val,G)
    if (scale<1) T.low<-Theta
    else T.hi<-Theta
  }
  Theta
}


full.score <- function(sp,G,family,control,gamma,...)
# function suitable for calling from nlm in order to polish gam fit
# so that actual minimum of score is found in generalized cases
{ if (is.null(G$L)) {
    G$sp<-exp(sp);
  } else {
    G$sp <- as.numeric(exp(G$L%*%sp + G$lsp0))
  }
  # set up single fixed penalty....
  q<-NCOL(G$X)
  if (is.null(G$H)) G$H<-matrix(0,q,q)
  for (i in 1:length(G$S))
  { j<-ncol(G$S[[i]])
    off1<-G$off[i];off2<-off1+j-1
    G$H[off1:off2,off1:off2]<-G$H[off1:off2,off1:off2]+G$sp[i]*G$S[[i]]
  }
  G$S<-list() # have to reset since length of this is used as number of penalties
  G$L <- NULL
  xx<-gam.fit(G,family=family,control=control,gamma=gamma,...)
  res <- xx$gcv.ubre.dev
  attr(res,"full.gam.object")<-xx
  res
}

gam.fit <- function (G, start = NULL, etastart = NULL, 
    mustart = NULL, family = gaussian(), 
    control = gam.control(),gamma=1,
    fixedSteps=(control$maxit+1),...) 
# fitting function for a gam, modified from glm.fit.
# note that smoothing parameter estimates from one irls iterate are carried over to the next irls iterate
# unless the range of s.p.s is large enough that numerical problems might be encountered (want to avoid 
# completely flat parts of gcv/ubre score). In the latter case autoinitialization is requested.
# fixedSteps < its default causes at most fixedSteps iterations to be taken,
# without warning if convergence has not been achieved. This is useful for
# obtaining starting values for outer iteration.
{   intercept<-G$intercept
    conv <- FALSE
    n <- nobs <- NROW(G$y) ## n just there to keep codetools happy
    nvars <- NCOL(G$X) # check this needed
    y<-G$y # original data
    X<-G$X # original design matrix
    if (nvars == 0) stop("Model seems to contain no terms")
    olm <- G$am   # only need 1 iteration as it's a pure additive model.
    if (!olm)  .Deprecated(msg="performance iteration with gam is deprecated, use bam instead")
    find.theta<-FALSE # any supplied -ve binomial theta treated as known, G$sig2 is scale parameter
    if (substr(family$family[1],1,17)=="Negative Binomial")
    { Theta <- family$getTheta()
      if (length(Theta)==1) { ## Theta fixed
        find.theta <- FALSE
        G$sig2 <- 1
      } else {
        if (length(Theta)>2)
        warning("Discrete Theta search not available with performance iteration")
        Theta <- range(Theta)
        T.max <- Theta[2]          ## upper search limit
        T.min <- Theta[1]          ## lower search limit
        Theta <- sqrt(T.max*T.min) ## initial value
        find.theta <- TRUE
      }
      nb.link<-family$link # negative.binomial family, there's a choise of links
    }

    
    # obtain average element sizes for the penalties
    n.S<-length(G$S)
    if (n.S>0)
    { S.size<-0
      for (i in 1:n.S) S.size[i]<-mean(abs(G$S[[i]])) 
    }
    weights<-G$w # original weights

    n.score <- sum(weights!=0) ## n to use in GCV score (i.e. don't count points with no influence)   

    offset<-G$offset 

    variance <- family$variance;dev.resids <- family$dev.resids
    aic <- family$aic
    linkinv <- family$linkinv;linkfun <- family$linkfun;mu.eta <- family$mu.eta
    if (!is.function(variance) || !is.function(linkinv)) 
        stop("illegal `family' argument")
    valideta <- family$valideta
    if (is.null(valideta)) 
        valideta <- function(eta) TRUE
    validmu <- family$validmu
    if (is.null(validmu)) 
        validmu <- function(mu) TRUE
    if (is.null(mustart))   # new from version 1.5.0 
    { eval(family$initialize)}
    else 
    { mukeep <- mustart
      eval(family$initialize)
      mustart <- mukeep
    }
    if (NCOL(y) > 1) 
        stop("y must be univariate unless binomial")
    
    coefold <- NULL                 # 1.5.0
    eta <- if (!is.null(etastart))  # 1.5.0
        etastart

    else if (!is.null(start)) 
    if (length(start) != nvars) 
    stop(gettextf("Length of start should equal %d and correspond to initial coefs.",nvars)) 
    else 
    { coefold<-start                        #1.5.0
      offset+as.vector(if (NCOL(G$X) == 1)
       G$X * start
       else G$X %*% start)
    }
    else family$linkfun(mustart)
    mu <- linkinv(eta)
    if (!(validmu(mu) && valideta(eta))) 
        stop("Can't find valid starting values: please specify some")
    devold <- sum(dev.resids(y, mu, weights))
   
    boundary <- FALSE
    scale <- G$sig2

    msp <- G$sp
    magic.control<-list(tol=G$conv.tol,step.half=G$max.half,#maxit=control$maxit+control$globit,
                          rank.tol=control$rank.tol)

    for (iter in 1:(control$maxit)) 
    {
        good <- weights > 0
        varmu <- variance(mu)[good]
        if (any(is.na(varmu))) 
            stop("NAs in V(mu)")
        if (any(varmu == 0)) 
            stop("0s in V(mu)")
        mu.eta.val <- mu.eta(eta)
        if (any(is.na(mu.eta.val[good]))) 
            stop("NAs in d(mu)/d(eta)")
        good <- (weights > 0) & (mu.eta.val != 0) # note good modified here => must re-calc each iter
        if (all(!good)) {
            conv <- FALSE
            warning(gettextf("No observations informative at iteration %d", 
                iter))
            break
        }
   
        mevg <- mu.eta.val[good];mug <- mu[good];yg <- y[good]
        weg <- weights[good];##etag <- eta[good]
        var.mug<-variance(mug)

        G$y <- z <- (eta - offset)[good] + (yg - mug)/mevg
        w <- sqrt((weg * mevg^2)/var.mug)
        
        G$w<-w
        G$X<-X[good,,drop=FALSE]  # truncated design matrix       
     
        # must set G$sig2 to scale parameter or -1 here....
        G$sig2 <- scale


        if (sum(!is.finite(G$y))+sum(!is.finite(G$w))>0) 
        stop("iterative weights or data non-finite in gam.fit - regularization may help. See ?gam.control.")

        ## solve the working weighted penalized LS problem ...

        mr <- magic(G$y,G$X,msp,G$S,G$off,L=G$L,lsp0=G$lsp0,G$rank,G$H,matrix(0,0,ncol(G$X)),  #G$C,
                    G$w,gamma=gamma,G$sig2,G$sig2<0,
                    ridge.parameter=control$irls.reg,control=magic.control,n.score=n.score,nthreads=control$nthreads)
        G$p<-mr$b;msp<-mr$sp;G$sig2<-mr$scale;G$gcv.ubre<-mr$score;

        if (find.theta) {# then family is negative binomial with unknown theta - estimate it here from G$sig2
            ##  need to get edf array
          mv <- magic.post.proc(G$X,mr,w=G$w^2)
          G$edf <- mv$edf

          Theta<-mgcv.find.theta(Theta,T.max,T.min,weights,good,mu,mu.eta.val,G,.Machine$double.eps^0.5)
          family<-do.call("negbin",list(theta=Theta,link=nb.link))
          variance <- family$variance;dev.resids <- family$dev.resids
          aic <- family$aic
          family$Theta <- Theta ## save Theta estimate in family
        }

        if (any(!is.finite(G$p))) {
            conv <- FALSE   
            warning(gettextf("Non-finite coefficients at iteration %d",iter))
            break
        }

		
        start <- G$p
        eta <- drop(X %*% start) # 1.5.0
        mu <- linkinv(eta <- eta + offset)
        eta <- linkfun(mu) # force eta/mu consistency even if linkinv truncates
        dev <- sum(dev.resids(y, mu, weights))
        if (control$trace) 
            message(gettextf("Deviance = %s Iterations - %d", dev, iter, domain = "R-mgcv"))
        boundary <- FALSE
        if (!is.finite(dev)) {
            if (is.null(coefold))
            stop("no valid set of coefficients has been found:please supply starting values",
            call. = FALSE)
            warning("Step size truncated due to divergence",call.=FALSE)
            ii <- 1
            while (!is.finite(dev)) {
                if (ii > control$maxit) 
                  stop("inner loop 1; can't correct step size")
                ii <- ii + 1
                start <- (start + coefold)/2
                eta<-drop(X %*% start)
                mu <- linkinv(eta <- eta + offset)
                eta <- linkfun(mu) 
                dev <- sum(dev.resids(y, mu, weights))
            }
            boundary <- TRUE
            if (control$trace) 
                cat("Step halved: new deviance =", dev, "\n")
        }
        if (!(valideta(eta) && validmu(mu))) {
            warning("Step size truncated: out of bounds.",call.=FALSE)
            ii <- 1
            while (!(valideta(eta) && validmu(mu))) {
                if (ii > control$maxit) 
                  stop("inner loop 2; can't correct step size")
                ii <- ii + 1
                start <- (start + coefold)/2
                eta<-drop(X %*% start)
                mu <- linkinv(eta <- eta + offset)
                eta<-linkfun(mu)
            }
            boundary <- TRUE
            dev <- sum(dev.resids(y, mu, weights))
            if (control$trace) 
                cat("Step halved: new deviance =", dev, "\n")
        }

        ## Test for convergence here ...

        if (abs(dev - devold)/(0.1 + abs(dev)) < control$epsilon || olm ||
            iter >= fixedSteps) {
            conv <- TRUE
            coef <- start #1.5.0
            break
        }
        else {
            devold <- dev
            coefold <- coef<-start
        }
    }
    if (!conv) 
    { warning("Algorithm did not converge") 
    }
    if (boundary) 
        warning("Algorithm stopped at boundary value")
    eps <- 10 * .Machine$double.eps
    if (family$family[1] == "binomial") {
        if (any(mu > 1 - eps) || any(mu < eps)) 
            warning("fitted probabilities numerically 0 or 1 occurred")
    }
    if (family$family[1] == "poisson") {
        if (any(mu < eps)) 
            warning("fitted rates numerically 0 occurred")
    }
    
    residuals <- rep(NA, nobs)
    residuals[good] <- z - (eta - offset)[good]
    
    wt <- rep(0, nobs)
    wt[good] <- w^2
   
    wtdmu <- if (intercept) sum(weights * y)/sum(weights) else linkinv(offset)
    nulldev <- sum(dev.resids(y, wtdmu, weights))
    n.ok <- nobs - sum(weights == 0)
    nulldf <- n.ok - as.integer(intercept)
    
    ## Extract a little more information from the fit....

    mv <- magic.post.proc(G$X,mr,w=G$w^2)
    G$Vp<-mv$Vb;G$hat<-mv$hat;
    G$Ve <- mv$Ve # frequentist cov. matrix
    G$edf<-mv$edf
    G$conv<-mr$gcv.info
    G$sp<-msp
    rank<-G$conv$rank

    aic.model <- aic(y, n, mu, weights, dev) + 2 * sum(G$edf)
    if (scale < 0) { ## deviance based GCV
      gcv.ubre.dev <- n.score*dev/(n.score-gamma*sum(G$edf))^2
    } else { # deviance based UBRE, which is just AIC
      gcv.ubre.dev <- dev/n.score + 2 * gamma * sum(G$edf)/n.score - G$sig2
    }
  
	list(coefficients = as.vector(coef), residuals = residuals, fitted.values = mu, 
        family = family,linear.predictors = eta, deviance = dev,
        null.deviance = nulldev, iter = iter, weights = wt, prior.weights = weights,  
        df.null = nulldf, y = y, converged = conv,sig2=G$sig2,edf=G$edf,edf1=mv$edf1,hat=G$hat,
        ##F=mv$F,
        R=mr$R,
        boundary = boundary,sp = G$sp,nsdf=G$nsdf,Ve=G$Ve,Vp=G$Vp,rV=mr$rV,mgcv.conv=G$conv,
        gcv.ubre=G$gcv.ubre,aic=aic.model,rank=rank,gcv.ubre.dev=gcv.ubre.dev,scale.estimated = (scale < 0))
} ## gam.fit


model.matrix.gam <- function(object,...)
{ if (!inherits(object,"gam")) stop("`object' is not of class \"gam\"")
  predict(object,type="lpmatrix",...)
}

get.na.action <- function(na.action) {
## get the name of the na.action whether function or text string.
## avoids deparse(substitute(na.action)) which is easily broken by 
## nested calls.
  if (is.character(na.action)) {
    if (na.action%in%c("na.omit","na.exclude","na.pass","na.fail")) 
      return(na.action) else stop("unrecognised na.action")
  } 
  if (!is.function(na.action)) stop("na.action not character or function")
  a <- try(na.action(c(0,NA)),silent=TRUE)
  if (inherits(a,"try-error")) return("na.fail")
  if (inherits((attr(a,"na.action")),"omit")) return("na.omit")
  if (inherits((attr(a,"na.action")),"exclude")) return("na.exclude")
  return("na.pass")
} ## get.na.action


predict.gam <- function(object,newdata,type="link",se.fit=FALSE,terms=NULL,exclude=NULL,
                       block.size=NULL,newdata.guaranteed=FALSE,na.action=na.pass,
                       unconditional=FALSE,iterms.type=NULL,...) {

# This function is used for predicting from a GAM. 'object' is a gam object, newdata a dataframe to
# be used in prediction......
#
# Type == "link"     - for linear predictor (may be several for extended case)
#      == "response" - for fitted values: may be several if several linear predictors,
#                      and may return something other than inverse link of l.p. for some families
#      == "terms"    - for individual terms on scale of linear predictor 
#      == "iterms"   - exactly as "terms" except that se's include uncertainty about mean  
#      == "lpmatrix" - for matrix mapping parameters to l.p. - has "lpi" attribute if multiple l.p.s
#      == "newdata"  - returns newdata after pre-processing 
# Steps are:
#  1. Set newdata to object$model if no newdata supplied
#  2. split up newdata into manageable blocks if too large
#  3. Obtain parametric model matrix (safely!)
#  4. Work through smooths calling prediction.matrix constructors for each term
#  5. Work out required quantities
# 
# The splitting into blocks enables blocks of compiled code to be called efficiently
# using smooth class specific prediction matrix constructors, without having to 
# build up potentially enormous prediction matrices.
# if newdata.guaranteed == TRUE then the data.frame is assumed complete and
# ready to go, so that only factor levels are checked for sanity.
# 
# if `terms' is non null then it should be a list of terms to be returned 
# when type=="terms" or "iterms". 
# if `object' has an attribute `para.only' then only parametric terms of order
# 1 are returned for type=="terms"/"iterms" : i.e. only what termplot can handle.
#
# if no new data is supplied then na.action does nothing, otherwise 
# if na.action == "na.pass" then NA predictors result in NA predictions (as lm
#                   or glm)
#              == "na.omit" or "na.exclude" then NA predictors result in
#                       dropping
# if GC is TRUE then gc() is called after each block is processed

  ## para acts by adding all smooths to the exclude list. 
  ## it also causes any lp matrix to be smaller than it would otherwise have been.
  #if (para) exclude <- c(exclude,unlist(lapply(object$smooth,function(x) x$label)))

  if (unconditional) {
    if (is.null(object$Vc)) warning("Smoothness uncertainty corrected covariance not available") else 
    object$Vp <- object$Vc
  }

  if (type!="link"&&type!="terms"&&type!="iterms"&&type!="response"&&type!="lpmatrix"&&type!="newdata")  { 
    warning("Unknown type, reset to terms.")
    type<-"terms"
  }
  if (!inherits(object,"gam")) stop("predict.gam can only be used to predict from gam objects")

  ## to mimic behaviour of predict.lm, some resetting is required ...
  if (missing(newdata)) na.act <- object$na.action else {
    if (is.null(na.action)) na.act <- NULL 
    else {
      na.txt <- if (is.character(na.action)||is.function(na.action)) get.na.action(na.action) else "na.pass"
      #if (is.character(na.action))
      #na.txt <- na.action else ## substitute(na.action) else
      #if (is.function(na.action)) na.txt <- deparse(substitute(na.action))
      if (na.txt=="na.pass") na.act <- "na.exclude" else
      if (na.txt=="na.exclude") na.act <- "na.omit" else
      na.act <- na.action
    }
  } ## ... done

  # get data from which to predict.....  
  nd.is.mf <- FALSE # need to flag if supplied newdata is already a model frame 
  ## get name of response...
  # yname <- all.vars(object$terms)[attr(object$terms,"response")]
  yname <- attr(attr(object$terms,"dataClasses"),"names")[attr(object$terms,"response")]
  if (newdata.guaranteed==FALSE) {
    if (missing(newdata)) { # then "fake" an object suitable for prediction 
      newdata <- object$model
      new.data.ok <- FALSE
      nd.is.mf <- TRUE
      response <- newdata[[yname]] ## ok even with "cbind(foo,bar)" as yname 
    } else {  # do an R ``standard'' evaluation to pick up data
      new.data.ok <- TRUE
      if (is.data.frame(newdata)&&!is.null(attr(newdata,"terms"))) { # it's a model frame
        if (sum(!(names(object$model)%in%names(newdata)))) stop(
        "newdata is a model.frame: it should contain all required variables\n")
         nd.is.mf <- TRUE
      } else {
        ## Following is non-standard to allow convenient splitting into blocks
        ## below, and to allow checking that all variables are in newdata ...

        ## get names of required variables, less response, but including offset variable
        ## see ?terms.object and ?terms for more information on terms objects
        # yname <- all.vars(object$terms)[attr(object$terms,"response")] ## redundant
        resp <- get.var(yname,newdata,FALSE)
        naresp <- FALSE
        #if (!is.null(object$family$predict)&&!is.null(newdata[[yname]])) {
	if (!is.null(object$family$predict)&&!is.null(resp)) {
          ## response provided, and potentially needed for prediction (e.g. Cox PH family)
          if (!is.null(object$pred.formula)) object$pred.formula <- attr(object$pred.formula,"full")
          response <- TRUE
          Terms <- terms(object)
          #resp <- newdata[[yname]]
	  if (is.matrix(resp)) {
            if (sum(is.na(rowSums(resp)))>0) stop("no NAs allowed in response data for this model")
          } else { ## vector response
            if (sum(is.na(resp))>0) {
              naresp <- TRUE ## there are NAs in supplied response
              ## replace them with a numeric code, so that rows are not dropped below
              rar <- range(resp,na.rm=TRUE)
              thresh <- rar[1]*1.01-rar[2]*.01
              resp[is.na(resp)] <- thresh
              newdata[[yname]] <- thresh 
            }
	  }  
        } else { ## response not provided
          response <- FALSE 
          Terms <- delete.response(terms(object))
        }
        allNames <- if (is.null(object$pred.formula)) all.vars(Terms) else all.vars(object$pred.formula)
        if (length(allNames) > 0) { 
          ff <- if (is.null(object$pred.formula)) reformulate(allNames) else  object$pred.formula
          if (sum(!(allNames%in%names(newdata)))) { 
            warning("not all required variables have been supplied in  newdata!\n")
          }
          ## note that `xlev' argument not used here, otherwise `as.factor' in 
          ## formula can cause a problem ... levels reset later.
          newdata <- eval(model.frame(ff,data=newdata,na.action=na.act),parent.frame())
          if (naresp) newdata[[yname]][newdata[[yname]]<=thresh] <- NA ## reinstate as NA  
        } ## otherwise it's intercept only and newdata can be left alone
        na.act <- attr(newdata,"na.action")
        #response <- if (response) newdata[[yname]] else NULL
	response <- if (response) get.var(yname,newdata,FALSE) else NULL
      }
    }
  } else { ## newdata.guaranteed == TRUE
    na.act <- NULL
    new.data.ok=TRUE ## it's guaranteed!
    if (!is.null(attr(newdata,"terms"))) nd.is.mf <- TRUE
    #response <- newdata[[yname]]
    response <- get.var(yname,newdata,FALSE)
  }


  ## now check the factor levels and split into blocks...

  if (new.data.ok) {
    ## check factor levels are right ...
    names(newdata)->nn # new data names
    colnames(object$model)->mn # original names
    for (i in 1:length(newdata)) 
    if (nn[i]%in%mn && is.factor(object$model[,nn[i]])) { # then so should newdata[[i]] be 
      levm <- levels(object$model[,nn[i]]) ## original levels
      levn <- levels(factor(newdata[[i]])) ## new levels
      if (sum(!levn%in%levm)>0) { ## check not trying to sneak in new levels 
        msg <- paste(paste(levn[!levn%in%levm],collapse=", "),"not in original fit",collapse="")
        stop(msg)
      }
      ## set prediction levels to fit levels...
      if (is.matrix(newdata[[i]])) {
        dum <- factor(newdata[[i]],levels=levm)
	dim(dum) <- dim(newdata[[i]])
	newdata[[i]] <- dum
      } else newdata[[i]] <- factor(newdata[[i]],levels=levm)
    }
    if (type=="newdata") return(newdata)

    # split prediction into blocks, to avoid running out of memory
    if (length(newdata)==1) newdata[[2]] <- newdata[[1]] # avoids data frame losing its labels and dimensions below!
    if (is.null(dim(newdata[[1]]))) np <- length(newdata[[1]]) 
    else np <- dim(newdata[[1]])[1] 
    nb <- length(object$coefficients)
    if (is.null(block.size)) block.size <- 1000
    if (block.size < 1) block.size <- np
  } else { # no new data, just use object$model
    np <- nrow(object$model)
    nb <- length(object$coefficients)
  }
  
  if (type=="lpmatrix") block.size <- NULL ## nothing gained by blocking in this case - and offset handling easier this way

  ## split prediction into blocks, to avoid running out of memory
  if (is.null(block.size)) { 
    ## use one block as predicting using model frame
    ## and no block size supplied... 
    n.blocks <- 1
    b.size <- array(np,1)
  } else {
    n.blocks <- np %/% block.size
    b.size <- rep(block.size,n.blocks)
    last.block <- np-sum(b.size)
    if (last.block>0) {
      n.blocks <- n.blocks+1  
      b.size[n.blocks] <- last.block
    }
  }


  # setup prediction arrays...
  ## in multi-linear predictor models, lpi[[i]][j] is the column of model matrix contributing the jth col to lp i 
  lpi <- if (is.list(object$formula)) attr(object$formula,"lpi") else NULL 
  n.smooth<-length(object$smooth)
  if (type=="lpmatrix") {
    H <- matrix(0,np,nb)
  } else if (type=="terms"||type=="iterms") { 
    term.labels <- attr(object$pterms,"term.labels")
    if (is.null(attr(object,"para.only"))) para.only <-FALSE else
    para.only <- TRUE  # if true then only return information on parametric part
    n.pterms <- length(term.labels)
    fit <- array(0,c(np,n.pterms+as.numeric(!para.only)*n.smooth))
    if (se.fit) se <- fit
    ColNames <- term.labels
  } else { ## "response" or "link"
    ## get number of linear predictors, in case it's more than 1...
    if (is.list(object$formula)) {
      # nf <- length(object$formula) ## number of model formulae
      nlp <- length(lpi) ## number of linear predictors
    } else nlp <- 1 ## nf <- 1
    # nlp <- if (is.list(object$formula)) length(object$formula) else 1
    fit <- if (nlp>1) matrix(0,np,nlp) else array(0,np)
    if (se.fit) se <- fit
    fit1 <- NULL ## "response" returned by fam$fv can be non-vector 
  }
  stop <- 0
  if (is.list(object$pterms)) { ## multiple linear predictors
    if (type=="iterms") {
      warning("type iterms not available for multiple predictor cases")
      type <- "terms"
    }
    pstart <- attr(object$nsdf,"pstart") ## starts of parametric blocks in coef vector
    pind <- rep(0,0) ## index of parametric coefs
    Terms <- list();pterms <- object$pterms
    for (i in 1:length(object$nsdf)) {
      Terms[[i]] <- delete.response(object$pterms[[i]])
      if (object$nsdf[i]>0) pind <- c(pind,pstart[i]-1+1:object$nsdf[i])
    }
  } else { ## normal single predictor case
    Terms <- list(delete.response(object$pterms)) ## make into a list anyway
    pterms <- list(object$pterms)
    pstart <- 1
    pind <- 1:object$nsdf ## index of parameteric coefficients
  }

  ## check if extended family required intercept to be dropped...
  #drop.intercept <- FALSE 
  #if (!is.null(object$family$drop.intercept)&&object$family$drop.intercept) {
  #  drop.intercept <- TRUE;
  #  ## make sure intercept explicitly included, so it can be cleanly dropped...
  #  for (i in 1:length(Terms)) attr(Terms[[i]],"intercept") <- 1 
  #} 
  drop.intercept <- object$family$drop.intercept
  if (is.null(drop.intercept)) {
    drop.intercept <- rep(FALSE, length(Terms))
  } else {
    ## make sure intercept explicitly included, so it can be cleanly dropped...
    for (i in 1:length(Terms)) {
      if (drop.intercept[i] == TRUE) attr(Terms[[i]],"intercept") <- 1 
    }
  }
  ## index of any parametric terms that have to be dropped
  ## this is used to help with identifiability in multi-
  ## formula models...

  drop.ind <- attr(object$nsdf,"drop.ind") 

  ####################################
  ## Actual prediction starts here...
  ####################################

  s.offset <- NULL # to accumulate any smooth term specific offset
  any.soff <- FALSE # indicator of term specific offset existence
  if (n.blocks > 0) for (b in 1:n.blocks) { # work through prediction blocks
    start <- stop+1
    stop <- start + b.size[b] - 1
    if (n.blocks==1) data <- newdata else data <- newdata[start:stop,]
    X <- matrix(0,b.size[b],nb+length(drop.ind))
    Xoff <- matrix(0,b.size[b],n.smooth) ## term specific offsets 
    offs <- list()
    for (i in 1:length(Terms)) { ## loop for parametric components (1 per lp)
      ## implements safe prediction for parametric part as described in
      ## http://developer.r-project.org/model-fitting-functions.txt
      if (new.data.ok) {
        if (nd.is.mf) mf <- model.frame(data,xlev=object$xlevels) else {
          mf <- model.frame(Terms[[i]],data,xlev=object$xlevels)
          if (!is.null(cl <- attr(pterms[[i]],"dataClasses"))) .checkMFClasses(cl,mf)
        } 
        Xp <- model.matrix(Terms[[i]],mf,contrasts=object$contrasts) 
      } else { 
        Xp <- model.matrix(Terms[[i]],object$model)
        mf <- newdata # needed in case of offset, below
      }
      offi <- attr(Terms[[i]],"offset")
      if (is.null(offi)) offs[[i]] <- 0 else { ## extract offset
        offs[[i]] <- mf[[names(attr(Terms[[i]],"dataClasses"))[offi+1]]]
      }
      if (drop.intercept[i]) { 
        xat <- attributes(Xp);ind <- xat$assign>0 
        Xp <- Xp[,xat$assign>0,drop=FALSE] ## some extended families need to drop intercept
        xat$assign <- xat$assign[ind];xat$dimnames[[2]]<-xat$dimnames[[2]][ind];
        xat$dim[2] <- xat$dim[2]-1;attributes(Xp) <- xat 
      }
      if (object$nsdf[i]>0) X[,pstart[i]-1 + 1:object$nsdf[i]] <- Xp
    } ## end of parametric loop
    if (length(offs)==1) offs <- offs[[1]]
    
    if (!is.null(drop.ind)) X <- X[,-drop.ind]

    if (n.smooth) for (k in 1:n.smooth) { ## loop through smooths
      klab <- object$smooth[[k]]$label
      if ((is.null(terms)||(klab%in%terms))&&(is.null(exclude)||!(klab%in%exclude))) {
        Xfrag <- PredictMat(object$smooth[[k]],data)		 
        X[,object$smooth[[k]]$first.para:object$smooth[[k]]$last.para] <- Xfrag
        Xfrag.off <- attr(Xfrag,"offset") ## any term specific offsets?
        if (!is.null(Xfrag.off)) { Xoff[,k] <- Xfrag.off; any.soff <- TRUE }
      }
      if (type=="terms"||type=="iterms") ColNames[n.pterms+k] <- klab
    } ## smooths done

    

    if (!is.null(object$Xcentre)) { ## Apply any column centering
      X <- sweep(X,2,object$Xcentre)
    }

    # Now have prediction matrix, X, for this block, need to do something with it...

    if (type=="lpmatrix") { 
      H[start:stop,] <- X
      if (any.soff) s.offset <- rbind(s.offset,Xoff)
    } else if (type=="terms"||type=="iterms") { ## split results into terms
      lass <- if (is.list(object$assign)) object$assign else list(object$assign)
      k <- 0
      for (j in 1:length(lass)) if (length(lass[[j]])) { ## work through assign list
        ind <- 1:length(lass[[j]]) ## index vector for coefs involved
        nptj <- max(lass[[j]]) ## number of terms involved here
        if (nptj>0) for (i in 1:nptj) { ## work through parametric part
          k <- k + 1 ## counts total number of parametric terms
          ii <- ind[lass[[j]]==i] + pstart[j] - 1 
          fit[start:stop,k] <- X[,ii,drop=FALSE]%*%object$coefficients[ii]
          if (se.fit) se[start:stop,k] <-
          sqrt(pmax(0,rowSums((X[,ii,drop=FALSE]%*%object$Vp[ii,ii])*X[,ii,drop=FALSE])))
        }
      } ## assign list done
      if (n.smooth&&!para.only) {
        for (k in 1:n.smooth) # work through the smooth terms 
        { first <- object$smooth[[k]]$first.para; last <- object$smooth[[k]]$last.para
          fit[start:stop,n.pterms+k] <- X[,first:last,drop=FALSE] %*% object$coefficients[first:last] + Xoff[,k]
          if (se.fit) { # diag(Z%*%V%*%t(Z))^0.5; Z=X[,first:last]; V is sub-matrix of Vp
            if (type=="iterms"&& attr(object$smooth[[k]],"nCons")>0) { ## termwise se to "carry the intercept
              ## some general families, add parameters after cmX created, which are irrelevant to cmX... 
              if (length(object$cmX) < ncol(X)) object$cmX <- c(object$cmX,rep(0,ncol(X)-length(object$cmX)))
              if (!is.null(iterms.type)&&iterms.type==2) object$cmX[-(1:object$nsdf)] <- 0 ## variability of fixed effects mean only
              X1 <- matrix(object$cmX,nrow(X),ncol(X),byrow=TRUE)
              meanL1 <- object$smooth[[k]]$meanL1
              if (!is.null(meanL1)) X1 <- X1 / meanL1              
              X1[,first:last] <- X[,first:last]
              se[start:stop,n.pterms+k] <- sqrt(pmax(0,rowSums((X1%*%object$Vp)*X1)))
            } else se[start:stop,n.pterms+k] <- ## terms strictly centred
            sqrt(pmax(0,rowSums((X[,first:last,drop=FALSE]%*%
                          object$Vp[first:last,first:last,drop=FALSE])*X[,first:last,drop=FALSE])))
          } ## end if (se.fit)
        }
        colnames(fit) <- ColNames
        if (se.fit) colnames(se) <- ColNames
      } else {
        if (para.only&&is.list(object$pterms)) { 
          ## have to use term labels that match original data, or termplot fails 
          ## to plot. This only applies for 'para.only' calls which are 
          ## designed for use from termplot called from plot.gam
          term.labels <- unlist(lapply(object$pterms,attr,"term.labels"))
        }
        colnames(fit) <- term.labels
        if (se.fit) colnames(se) <- term.labels
        # retain only terms of order 1 - this is to make termplot work
        order <- if (is.list(object$pterms)) unlist(lapply(object$pterms,attr,"order")) else attr(object$pterms,"order")
        term.labels <- term.labels[order==1]
        ## fit <- as.matrix(as.matrix(fit)[,order==1])
        fit <- fit[,order==1,drop=FALSE]
        colnames(fit) <- term.labels
        if (se.fit) { ## se <- as.matrix(as.matrix(se)[,order==1])
        se <- se[,order==1,drop=FALSE]
        colnames(se) <- term.labels } 
      }
    } else { ## "link" or "response" case
      fam <- object$family
      k <- attr(attr(object$model,"terms"),"offset")
      if (nlp>1) { ## multiple linear predictor case
        if (is.null(fam$predict)||type=="link") {
          ##pstart <- c(pstart,ncol(X)+1)
          ## get index of smooths with an offset...
          off.ind <- (1:n.smooth)[as.logical(colSums(abs(Xoff)))]
          for (j in 1:nlp) { ## looping over the model formulae
            ind <- lpi[[j]] ##pstart[j]:(pstart[j+1]-1)
            fit[start:stop,j] <- X[,ind,drop=FALSE]%*%object$coefficients[ind] + offs[[j]]
            if (length(off.ind)) for (i in off.ind) { ## add any term specific offsets
              if (object$smooth[[i]]$first.para%in%ind)  fit[start:stop,j] <- fit[start:stop,j] + Xoff[,i]
            }
            if (se.fit) se[start:stop,j] <- 
            sqrt(pmax(0,rowSums((X[,ind,drop=