R/bam.r

Defines functions bam.update bam terms2tensor tens2matrix AR.resid tero predict.bamd bam.fit predict.bam pabapr ar.qr.up regular.Sb mini.mf discrete.mf discrete.mf0 check.term compress.df qr.up qr.update chol2qr rwMatrix ls.size

Documented in bam bam.update ls.size predict.bam

## routines for very large dataset generalized additive modelling.
## (c) Simon N. Wood 2009-2019


ls.size <- function(x) {
## If `x' is a list, return the size of its elements, in bytes, in a named array
## otherwise return the size of the object
 if (is.list(x)==FALSE) return(object.size(x))

 xn <- names(x)
 n <- length(x)
 sz <- rep(-1,n)
 for (i in 1:n) sz[i] <- object.size(x[[i]])
 names(sz) <- xn
 sz
} ## ls.size

rwMatrix <- function(stop,row,weight,X,trans=FALSE) {
## Routine to recombine the rows of a matrix X according to info in 
## stop, row and weight. Consider the ith row of the output matrix 
## ind <- 1:stop[i] if i==1 and ind <- (stop[i-1]+1):stop[i]
## otherwise. The ith output row is then X[row[ind],]*weight[ind]
  if (is.matrix(X)) { n <- nrow(X);p<-ncol(X);ok <- TRUE} else { n<- length(X);p<-1;ok<-FALSE}
  stop <- stop - 1;row <- row - 1 ## R indices -> C indices
  oo <-.C(C_rwMatrix,as.integer(stop),as.integer(row),as.double(weight),X=as.double(X),
          as.integer(n),as.integer(p),trans=as.integer(trans),work=as.double(rep(0,n*p)))
  if (ok) return(matrix(oo$X,n,p)) else
  return(oo$X) 
} ## rwMatrix

chol2qr <- function(XX,Xy,nt=1) {
## takes X'X and X'y and returns R and f
## equivalent to qr update.  
  op <- options(warn = -1) ## otherwise warns if +ve semidef
  R <- if (nt) pchol(XX,nt=nt) else chol(XX,pivot=TRUE)
  options(op)
  p <- length(Xy)
  ipiv <- piv <- attr(R,"pivot");ipiv[piv] <- 1:p
  rank <- attr(R,"rank");ind <- 1:rank
  if (rank<p) R[(rank+1):p,] <- 0 ## chol is buggy (R 3.1.0) with pivot=TRUE
  f <- c(forwardsolve(t(R[ind,ind]),Xy[piv[ind]]),rep(0,p-rank))[ipiv]
  R <- R[ipiv,ipiv]
  list(R=R,f=f)
} ## chol2qr

qr.update <- function(Xn,yn,R=NULL,f=rep(0,0),y.norm2=0,use.chol=FALSE,nt=1)
## Let X = QR and f = Q'y. This routine updates f and R
## when Xn is appended to X and yn appended to y. If R is NULL
## then initial QR of Xn is performed. ||y||^2 is accumulated as well.
## if use.chol==TRUE then quicker but less stable accumulation of X'X and
## X'y are used. Results then need post processing, to get R =chol(X'X)
## and f= R^{-1} X'y.
## if nt>1 and use.chol=FALSE then parallel QR is used 
{ p <- ncol(Xn)  
  y.norm2 <- y.norm2+sum(yn*yn)
  if (use.chol) { 
    if (is.null(R)) { 
      R <- crossprod(Xn)
      fn <- as.numeric(t(Xn)%*%yn) 
    } else {
      R <- R + crossprod(Xn)
      fn <- f + as.numeric(t(Xn)%*%yn)
    } 
    return(list(R=R,f=fn,y.norm2=y.norm2))
  } else { ## QR update
    if (!is.null(R)) {
      Xn <- rbind(R,Xn)
      yn <- c(f,yn)
    }
    qrx <- if (nt==1) qr(Xn,tol=0,LAPACK=TRUE) else pqr2(Xn,nt)
    fn <- qr.qty(qrx,yn)[1:p]
    rp <- qrx$pivot;rp[rp] <- 1:p # reverse pivot
    return(list(R = qr.R(qrx)[,rp],f=fn,y.norm2=y.norm2))
  }
} ## qr.update


qr.up <- function(arg) {
## routine for parallel computation of the QR factorization of 
## a large gam model matrix, suitable for calling with parLapply.
  wt <- rep(0,0) 
  dev <- 0
  eta <- arg$eta
  efam <- !is.null(arg$family) ## extended family?
  for (b in 1:arg$n.block) {
    ind <- arg$start[b]:arg$stop[b]
    X <- predict(arg$G,newdata=arg$mf[ind,],type="lpmatrix",newdata.guaranteed=TRUE,block.size=length(ind))
    rownames(X) <- NULL
    if (is.null(arg$coef)) eta1 <- arg$eta[ind] else eta[ind] <- eta1 <- drop(X%*%arg$coef) + arg$offset[ind]
    mu <- arg$linkinv(eta1) 
    y <- arg$G$y[ind] ## arg$G$model[[arg$response]] 
    weights <- arg$G$w[ind]
    if (efam) { ## extended family case
       dd <- dDeta(y,mu,weights,theta=arg$theta,arg$family,0)
       ## note: no handling of infinities and wz case yet
       w <- dd$EDeta2 * .5 
       #w <- w
       z <- (eta1-arg$offset[ind]) - dd$Deta.EDeta2
       good <- is.finite(z)&is.finite(w)     
    } else { ## regular exp fam case
      mu.eta.val <- arg$mu.eta(eta1)
      good <- (weights > 0) & (mu.eta.val != 0)
      z <- (eta1 - arg$offset[ind]) + (y - mu)/mu.eta.val
      w <- (weights * mu.eta.val^2)/arg$variance(mu)
    }
    w[!good] <- 0 ## drop if !good
    #z[!good] <- 0 ## irrelevant
    dev <- dev + if (efam) sum(arg$family$dev.resids(y,mu,weights,arg$theta)) else sum(arg$dev.resids(y,mu,weights))
    wt <- c(wt,w)
    z <- z[good];w <- w[good]
    w <- sqrt(w)
    ## note assumption that nt=1 in following qr.update - i.e. each cluster node is strictly serial
    if (b == 1) qrx <- qr.update(w*X[good,,drop=FALSE],w*z,use.chol=arg$use.chol) 
    else qrx <- qr.update(w*X[good,,drop=FALSE],w*z,qrx$R,qrx$f,qrx$y.norm2,use.chol=arg$use.chol)
    rm(X);if(arg$gc.level>1) gc() ## X can be large: remove and reclaim
  }
  qrx$dev <- dev;qrx$wt <- wt;qrx$eta <- eta
  if (arg$gc.level>1) { rm(arg,ind,mu,y,weights,mu.eta.val,good,z,w,wt,w);gc()}
  qrx
} ## qr.up

compress.df <- function(dat,m=NULL) {
## Takes dataframe in dat and compresses it by rounding and duplicate 
## removal. Returns index array as attribute. If covariates were matrices
## then the index is a matrix.
## For metric variables we first find the unique cases.
## If there are <= m of these then these are employed, otherwise 
## rounding is used. Factors are always reduced to the number of 
## levels present in the data. Idea is that this function is called 
## with columns of dataframes corresponding to single smooths or marginals.
## Note that this uses random sampling, so random seed manipulation
## is typically used before calling to force exact repeatability. 
  d <- ncol(dat) ## number of variables to deal with
  n <- nrow(dat) ## number of data/cases
  if (is.null(m)) m <- if (d==1) 1000 else if (d==2) 100 else 25 else
  if (d>1) m <- round(m^{1/d}) + 1
  
  mf <- mm <- 1 ## total grid points for factor and metric
  for (i in 1:d) if (is.factor(dat[,i])) {  
    mf <- mf * length(unique(as.vector(dat[,i]))) 
  } else {
    mm <- mm * m 
  } 
  if (is.matrix(dat[[1]])) { ## must replace matrix terms with vec(dat[[i]])
    dat0 <- data.frame(as.vector(dat[[1]]))
    if (d>1) for (i in 2:d) dat0[[i]] <- as.vector(dat[[i]])
    names(dat0) <- names(dat)
    dat <- dat0;rm(dat0)
  }
  xu <- uniquecombs(dat,TRUE)
  if (nrow(xu)>mm*mf) { ## too many unique rows to use only unique
    for (i in 1:d) if (!is.factor(dat[,i])) { ## round the metric variables
      xl <- range(dat[,i])
      xu <- seq(xl[1],xl[2],length=m)
      dx <- xu[2]-xu[1]
      kx <- round((dat[,i]-xl[1])/dx)+1
      dat[,i] <- xu[kx] ## rounding the metric variables
    }
    xu <- uniquecombs(dat,TRUE)
  }  
  k <- attr(xu,"index")
  if (nrow(xu)==nrow(dat)) { ## might as well return original data
    k <- 1:nrow(dat)
    if (length(k)>n) k <- matrix(k,nrow=n) ## deal with matrix arguments
    attr(dat,"index") <- k 
    return(dat)
  }
  ## shuffle rows in order to avoid induced dependencies between discretized
  ## covariates (which can mess up gam.side)...
  ## Any RNG setting should be done in routine calling this one!!
  
  ii <- sample(1:nrow(xu),nrow(xu),replace=FALSE) ## shuffling index
  
  xu[ii,] <- xu  ## shuffle rows of xu
  k <- ii[k]     ## correct k index accordingly
  ## ... finished shuffle
  ## if arguments were matrices, then return matrix index
  if (length(k)>n) k <- matrix(k,nrow=n) 
  k -> attr(xu,"index")
  xu
} ## compress.df

check.term <- function(term,rec) {
## utility function for discrete.mf. Checks whether variables in "term"
## have already been discretized, and if so whether this discretization 
## can be re-used for the current "term". Stops if term already discretized
## but we can't re-use discretization. Otherwise returns index of k index
## or 0 if the term is not in the existing list.
  ii <- which(rec$vnames%in%term)
  if (length(ii)) { ## at least one variable already discretized
    if (length(term)==rec$d[min(ii)]) { ## dimensions match previous discretization
      if (sum(!(term%in%rec$vnames[ii]))) stop("bam can not discretize with this nesting structure")
      else return(rec$ki[min(ii)]) ## all names match previous - return index of previous
    } else stop("bam can not discretize with this nesting structure")
  } else return(0) ## no match
} ## check.term

discrete.mf0 <- function(gp,mf,names.pmf,m=NULL,full=TRUE) {
## more or less original version where nr and k.start are by model
## term
## discretize the covariates for the terms specified in smooth.spec
## id not allowed. names.pmf gives the names of the parametric part
## of mf, and is used to create a model frame for just the 
## parametric terms --- mini.mf is applied to this.
## if full is FALSE then
## what is returned is a list where columns can be of 
## different lengths.
## On exit... 
## * mf is a model frame containing the unique discretized covariate
##   values, in randomized order, padded to all be same length (if full=TRUE)
## * nr records the number of unique discretized covariate values
##   i.e. the number of rows before the padding starts -
##   elements are labelled, corresponding to names in mf, but
##   not in the same order.
## * k.start contains the starting column in index vector k, for
##   each variable. The final element is the column beyond the last one.
## * k is the index matrix. The ith record of the 1st column of the 
##   jth variable is in row k[i,k.start[j]] of the corresponding 
##   column of mf.
## ... there is an element of nr and k.start for each variable of 
## each smooth, but variables are only discretized and stored in mf
## once. If there are no matrix variables then k.start = 1:(ncol(k)+1) 

  ## some sub sampling here... want to set and restore RNG state used for this
  ## to ensure strict repeatability.
  rngs <- temp.seed(8547) ## keep different to tps constructor!
  #seed <- try(get(".Random.seed",envir=.GlobalEnv),silent=TRUE) ## store RNG seed
  #if (inherits(seed,"try-error")) {
  #     runif(1)
  #     seed <- get(".Random.seed",envir=.GlobalEnv)
  #}
  #kind <- RNGkind(NULL)
  #RNGkind("default", "default")
  #set.seed(8547) ## keep different to tps constructor!

  mf0 <- list()
  nk <- 0 ## count number of index vectors to avoid too much use of cbind
  nlp <- if (is.null(gp$nlp)) 1 else sum(unlist(lapply(gp,inherits,"split.gam.formula")))
  for (lp in 1:nlp) { ## loop over linear predictors
    smooth.spec <- if (is.null(gp$nlp)) gp$smooth.spec else gp[[lp]]$smooth.spec
    if (length(smooth.spec)>0) for (i in 1:length(smooth.spec)) nk <- nk + as.numeric(smooth.spec[[i]]$by!="NA") +
      if (inherits(smooth.spec[[i]],"tensor.smooth.spec")) length(smooth.spec[[i]]$margin) else 1
  }
  names.pmf <- names.pmf[names.pmf %in% names(mf)] ## drop names.pmf not in mf (usually response)
  #if (full)
  nk <- nk + length(names.pmf)
  k <- matrix(0,nrow(mf),nk) ## each column is an index vector
  k.start <- 1:(nk+1) ## record last column for each term
  ik <- 0 ## index counter i.e. counts marginal smooths and their indices
  nr <- rep(0,nk) ## number of rows for marginal term
  ## structure to record terms already processed...
  rec <- list(vnames = rep("",0), ## variable names
              ki = rep(0,0),      ## index of original index vector var relates to  
              d = rep(0,0))       ## dimension of terms involving this var
  ## loop through the terms discretizing the covariates...
 
  for (lp in 1:nlp) { ## loop over linear predictors
    smooth.spec <- if (is.null(gp$nlp)) gp$smooth.spec else gp[[lp]]$smooth.spec
    if (length(smooth.spec)>0) for (i in 1:length(smooth.spec)) {
      nmarg <- if (inherits(smooth.spec[[i]],"tensor.smooth.spec")) length(smooth.spec[[i]]$margin) else 1
      maxj <- if (smooth.spec[[i]]$by=="NA") nmarg else nmarg + 1 
      mi <- if (is.null(m)||length(m)==1) m else m[i]
      j <- 0
      for (jj in 1:maxj) { ## loop through marginals
        if (jj==1&&maxj!=nmarg) termi <- smooth.spec[[i]]$by else {
          j <- j + 1
          termi <- if (inherits(smooth.spec[[i]],"tensor.smooth.spec")) smooth.spec[[i]]$margin[[j]]$term else 
                   smooth.spec[[i]]$term          
        } 
        ik.prev <- check.term(termi,rec) ## term already discretized?
        ik <- ik + 1 ## increment index counter
        if (ik.prev==0) { ## new discretization required
	  if (termi=="(Intercept)") {
            mfd <- data.frame(1)
	    names(mfd) <- termi
	    ki <- 1:nrow(mf)
          } else {
            mfd <- compress.df(mf[termi],m=mi)
            ki <- attr(mfd,"index")
	  }  
          if (is.matrix(ki)) {
             ind <- (ik+1):length(k.start) 
             k.start[ind] <- k.start[ind] + ncol(ki)-1    ## adjust start indices
             k <- cbind(k,matrix(0,nrow(k),ncol(ki)-1)) ## extend index matrix
             ind <- k.start[ik]:(k.start[ik+1]-1) 
             k[,ind] <- ki 
           } else {
             k[,k.start[ik]] <- ki
           }
           nr[ik] <- nrow(mfd)
           mf0 <- c(mf0,mfd) 
           ## record variable discretization info, for later duplicate avoiding check...
           d <- length(termi)
           rec$vnames <- c(rec$vnames,termi)
           rec$ki <- c(rec$ki,rep(ik,d))
           rec$d <- c(rec$d,rep(d,d))
         } else { ## re-use an earlier discretization...
           ind.prev <- k.start[ik.prev]:(k.start[ik.prev+1]-1)
           ind <- (ik+1):length(k.start)
           k.start[ind] <- k.start[ind] + length(ind.prev)-1
	   if (length(ind.prev)>1) k <- cbind(k,matrix(0,nrow(k),length(ind.prev)-1)) ## extend index matrix
           ind <- k.start[ik]:(k.start[ik+1]-1)
           k[,ind] <- k[,ind.prev]
           nr[ik] <- nr[ik.prev]
         }
      } ## end marginal jj loop
    } ## term loop (i)
  } ## linear predictor, lp, loop

  ## obtain parametric terms and..
  ## pad mf0 so that all rows are the same length
  ## padding is necessary if gam.setup is to be used for setup

  if (full) {
    colnames(k) <- rep("",ncol(k))
    maxr <- max(nr)
    ## If NA's caused rows to be dropped in mf, then they should
    ## also be dropped in pmf, otherwise we can end up with factors
    ## with more levels than unique observations, for example.
    ## The next couple of lines achieve this.
    ## find indices of terms in mf but not pmf...
    #di <- sort(which(!names(mf) %in% names.pmf),decreasing=TRUE)
    ## create copy of mf with only pmf variables...
    #mfp <- mf; for (i in di) mfp[[i]] <- NULL

    mfp <- mf[names.pmf] ## retain only variables needed in parametric part

    ## now discretize parametric covariates...
    mi <- if (is.null(m)) m else max(m)
    if (length(mfp)) for (i in 1:length(mfp)) {
      ii <- which(rec$vnames %in% names.pmf[i])
      ik.prev <- if (length(ii)>0) rec$ki[ii] else 0  ## term already discretized?
      ik <- ik + 1
      if (ik.prev==0) { ## new discretization needed (no need to record - no repeat vars within para)
        mfd <- compress.df(mfp[i],m=mi)
        mf0 <- c(mf0,mfd)
        ki <- attr(mfd,"index")
        #ik <- ik + 1
        k[,k.start[ik]] <- ki
        colnames(k)[k.start[ik]] <- names(mfp[i]) ## record the variable name associated with the index
        if (maxr<nrow(mfd)) maxr <- nrow(mfd)
        nr[ik] <- nrow(mfd)
      } else { ## re-use previous discretization
        k[,k.start[ik]] <- k[,k.start[ik.prev]]
	colnames(k)[k.start[ik]] <- names(mfp[i])
	nr[ik] <- nr[ik.prev]
      }
    }

    nr0 <- rep(NA,length(mf0)) 
    for (i in 1:length(mf0)) {
      me <- length(mf0[[i]]) 
      if (me < maxr) mf0[[i]][(me+1):maxr] <- sample(mf0[[i]],maxr-me,replace=TRUE)
      nr0[i] <- me ## record original column length
    }
    names(nr0) <- names(mf0)

    ## add response so that gam.setup can do its thing - looks redundant!! 
    # mf0[[gp$response]] <- sample(mf[[gp$response]],maxr,replace=TRUE) ## redundant??
    
    ## mf0 is the discretized model frame (actually a list), padded to have equal length rows
    ## k is the index vector for each sub-matrix, only the first nr rows of which are
    ## to be retained... Use of check.names=FALSE ensures, e.g. 'offset(x)' not changed...

    ## now copy back into mf so terms unchanged
    mf <- mf[sample(1:nrow(mf),maxr,replace=TRUE),]
    for (na in names(mf0)) mf[[na]] <- mf0[[na]] 
    nr0 <- nr0[names(mf)] ## makes sure nr0 order is same as mf order
    attr(mf,"nr") <- nr0 ## record original column lengths
  } else mf <- mf0
  ## reset RNG to old state...
  temp.seed(rngs)
  #RNGkind(kind[1], kind[2])
  #assign(".Random.seed", seed, envir = .GlobalEnv)

  ## finally one more pass through, expanding k, k.start and nr to deal with replication that
  ## will occur with factor by variables...
  #ik <- ncol(k)+1 ## starting index col for this term in k.start - wrong with matrix predictors
  ik <- length(k.start) ## starting index col for this term in k.start
  for (lp in 1:nlp) { ## loop over linear predictors
    smooth.spec <- if (is.null(gp$nlp)) gp$smooth.spec else gp[[lp]]$smooth.spec
    if (length(smooth.spec)>0) for (i in length(smooth.spec):1) { ## work down through terms so insertion painless
      if (inherits(smooth.spec[[i]],"tensor.smooth.spec")) nd <-  
         length(smooth.spec[[i]]$margin) else nd <- 1 ## number of indices
      ik <- ik - nd ## starting index if no by  
      if (smooth.spec[[i]]$by!="NA") {
        ik <- ik - 1 ## first index
        nd <- nd + 1 ## number of indices
        byvar <- mf[[smooth.spec[[i]]$by]]
        if (is.factor(byvar)) { ## then need to expand nr and index matrix
          nex <- length(levels(byvar))  ## number of copies of term indices
          if (is.ordered(byvar)) nex <- nex - 1 ## first level dropped
          if (nex>0) { ## insert index copies
            ii0 <- if (ik>1) 1:(ik-1) else rep(0,0) ## earlier
            ii1 <- if (ik+nd-1 < length(nr)) (ik+nd):length(nr) else rep(0,0) ## later
            ii <- ik:(ik+nd-1) ## cols for this term    
            ## indices for columns of k... 
            kk0 <- if (ik>1) 1:(k.start[ik]-1) else rep(0,0) ## earlier
            kk1 <- if (ik+nd-1 < length(nr)) k.start[ik+nd]:ncol(k) else rep(0,0) ## later
            kk <- k.start[ik]:(k.start[ik+nd]-1) ## cols for this term
            k <- cbind(k[,kk0,drop=FALSE],k[,rep(kk,nex),drop=FALSE],k[,kk1,drop=FALSE])
            nr <- c(nr[ii0],rep(nr[ii],nex),nr[ii1])
            ## expand k.start...
            nkk <- length(kk) ## number of k columns in term to be repeated
            k.start <- c(k.start[ii0],rep(k.start[ii],nex)+rep(0:(nex-1),each=nkk)*nkk,
                       (nex-1)*nkk+c(k.start[ii1],k.start[length(k.start)]))
          }
        } ## factor by 
      } ## existing by
    } ## smooth.spec loop
  } ## lp loop  
  list(mf=mf,k=k,nr=nr,k.start=k.start)
} ## discrete.mf0


discrete.mf <- function(gp,mf,names.pmf,m=NULL,full=TRUE) {
## Attempt at a cleaner design, in which k.start and nr have an entry
## for each variable in mf. For jointly discretized covariates these
## will simply be identical for each covariate. 
## discretize the covariates for the terms specified in smooth.spec
## id not allowed. names.pmf gives the names of the parametric part
## of mf. If full is FALSE then the mf returned is a list where columns
## can be of different lengths.
## On exit... 
## * mf is a model frame containing the unique discretized covariate
##   values, in randomized order, padded to all be same length (if full=TRUE)
## * nr records the number of unique discretized covariate values
##   for each variable in mf i.e. the number of rows before the padding starts -
##   elements are labelled, corresponding to names in mf.
## * ks is a two column matrix. ks[i,1] is the first column in index
##   matrix k for the corresponding element of mf, ks[i,2]-1 is the 
##   final column in k. row names match names of mf.
## * k is the index matrix. The ith record of the 1st column of the 
##   jth variable is in row k[i,ks[j,1]] of the corresponding 
##   column of mf.
## ... there is an element of nr and ks for each variable of
## mf. Variables are only discretized and stored in mf once.
## Model terms can be matched to elements of nr and ks based on the
## 'xnames' retuned from terms2tensor, or the 'terms' component of
## smooth objects.
## ks has an extra row, k an extra column and nr an extra entry for
## any "(Intercept)"
## Note that the margins component of a tensor product smooth spec
## is used to establish which covariates of the smooth must be
## jointly discretized.

  ## some sub sampling here... want to set and restore RNG state used for this
  ## to ensure strict repeatability.
  rngs <- temp.seed(8547) ## keep different to tps constructor!

  mf0 <- list()
  nk <- 0 ## count number of index vectors to avoid too much use of cbind
  nlp <- if (is.null(gp$nlp)) 1 else sum(unlist(lapply(gp,inherits,"split.gam.formula")))
  for (lp in 1:nlp) { ## loop over linear predictors
    smooth.spec <- if (is.null(gp$nlp)) gp$smooth.spec else gp[[lp]]$smooth.spec
    if (length(smooth.spec)>0) for (i in 1:length(smooth.spec)) nk <- nk + as.numeric(smooth.spec[[i]]$by!="NA") + length(smooth.spec[[i]]$term)
      ## if (inherits(smooth.spec[[i]],"tensor.smooth.spec")) length(smooth.spec[[i]]$margin) else 1
  }
  names.pmf <- names.pmf[names.pmf %in% names(mf)] ## drop names.pmf not in mf (usually response)
 
  nk <- nk + length(names.pmf)
  k <- matrix(0,nrow(mf),nk) ## each column is an index vector
  #k.start <- 1:(nk+1) ## record last column for each term
  ks <- matrix(NA,nk,2,dimnames=list(rep("",nk)))
  ik <- 0 ## index counter i.e. counts marginal smooths and their indices
  nr <- rep(0,nk) ## number of rows for marginal term
  ## structure to record terms already processed...
  rec <- list(vnames = rep("",0), ## variable names
              ki = rep(0,0),      ## index of original index vector var relates to  
              d = rep(0,0))       ## dimension of terms involving this var
  ## loop through the terms discretizing the covariates...
 
  for (lp in 1:nlp) { ## loop over linear predictors
    smooth.spec <- if (is.null(gp$nlp)) gp$smooth.spec else gp[[lp]]$smooth.spec
    if (length(smooth.spec)>0) for (i in 1:length(smooth.spec)) {
      nmarg <- if (inherits(smooth.spec[[i]],"tensor.smooth.spec")) length(smooth.spec[[i]]$margin) else 1
      maxj <- if (smooth.spec[[i]]$by=="NA") nmarg else nmarg + 1 
      mi <- if (is.null(m)||length(m)==1) m else m[i]
      j <- 0
      for (jj in 1:maxj) { ## loop through marginals
        if (jj==1&&maxj!=nmarg) termi <- smooth.spec[[i]]$by else {
          j <- j + 1
          termi <- if (inherits(smooth.spec[[i]],"tensor.smooth.spec")) smooth.spec[[i]]$margin[[j]]$term else 
                   smooth.spec[[i]]$term          
        } 
        ik.prev <- check.term(termi,rec) ## term already discretized?
        
        if (ik.prev==0) { ## new discretization required
	  ik <- ik + 1 ## increment index counter
          mfd <- compress.df(mf[termi],m=mi)
          ki <- attr(mfd,"index")
	  ks[ik,1] <- if (ik==1) 1 else ks[ik-1,2] 
          if (is.matrix(ki)) {
	     ks[ik,2] <- ks[ik,1] + ncol(ki)
             k <- cbind(k,matrix(0,nrow(k),ncol(ki)-1)) ## extend index matrix
             ind <- ks[ik,1]:(ks[ik,2]-1) 
             k[,ind] <- ki 
           } else {
	     ks[ik,2] <- ks[ik,1] + 1
             k[,ks[ik,1]] <- ki
           }
	   names(nr)[ik] <- rownames(ks)[ik] <- names(mfd)[1]
           nr[ik] <- nrow(mfd)
	   if (length(termi)>1) for (ii in 2:length(termi)) { ## duplicate index info for each jointly discretized variable 
             ik <- ik+1
	     ks[ik,] <- ks[ik-1,]
	     nr[ik] <- nr[ik-1]
	     names(nr)[ik] <- rownames(ks)[ik] <- names(mfd)[ii]
           }
           mf0 <- c(mf0,mfd) 
           ## record variable discretization info, for later duplicate avoiding check...
           d <- length(termi)
           rec$vnames <- c(rec$vnames,termi)
           rec$ki <- c(rec$ki,rep(ik,d)) ### NOTE: OK, as ki contents essentially unused
           rec$d <- c(rec$d,rep(d,d))
         } else { ## re-use an earlier discretization...
           ## nothing to do in fact
         }
      } ## end marginal jj loop
    } ## term loop (i)
  } ## linear predictor, lp, loop

  ## obtain parametric terms and..
  ## pad mf0 so that all rows are the same length
  ## padding is necessary if gam.setup is to be used for setup

  if (full) {
   # colnames(k) <- rep("",ncol(k))
    maxr <- max(nr)
    ## If NA's caused rows to be dropped in mf, then they should
    ## also be dropped in pmf, otherwise we can end up with factors
    ## with more levels than unique observations, for example.
    ## The next couple of lines achieve this.

    mfp <- mf[names.pmf] ## retain only variables needed in parametric part

    ## now discretize parametric covariates...
    mi <- if (is.null(m)) m else max(m)
    if (length(mfp)) for (i in 1:length(mfp)) {
      ii <- which(rec$vnames %in% names.pmf[i])
      ik.prev <- if (length(ii)>0) rec$ki[ii] else 0  ## term already discretized?
      if (ik.prev==0) { ## new discretization needed (no need to record - no repeat vars within para)
        ## note that compress.df(mfp[i]) is set up to deal with matrices in the manner appropriate
	## to the smooth summation convention, but not to matrices in the parametric part of the model
	## hence the following work around...
        #mfd <- compress.df(mfp[i],m=mi);mf0 <- c(mf0,mfd)
        if (is.matrix(mfp[[i]])) {
          mfd <- compress.df(mfp[[i]],m=mi)
	  mr <- nrow(mfd)
	  if (is.matrix(mfd)&&ncol(mfd)==1) mfd <- drop(mfd)
	  mf0[[names(mfp[i])]] <- mfd
	} else {
          mfd <- compress.df(mfp[i],m=mi);mf0 <- c(mf0,mfd)
	  mr <- length(mfd[[1]])
        }
        ki <- attr(mfd,"index")
        ik <- ik + 1
        ks[ik,1] <- if (ik==1) 1 else ks[ik-1,2]
	ks[ik,2] <- ks[ik,1] + 1
        k[,ks[ik,1]] <- ki
        if (maxr<mr) maxr <- mr
        nr[ik] <- mr
	names(nr)[ik] <- rownames(ks)[ik] <- names(mfp[i])
      } else { ## re-use previous discretization
        ## nothing needed
      }
    }

    for (i in 1:length(mf0)) { ## pad out columns to be same length
      if (is.matrix(mf0[[i]])) {
        me <- nrow(mf0[[i]])
	mf0[[i]] <- rbind(mf0[[i]],matrix(sample(mf0[[i]],(maxr-me)*ncol(mf0[[i]]),replace=TRUE),maxr-me,ncol(mf0[[i]])))
      } else {
        me <- length(mf0[[i]]) 
        if (me < maxr) mf0[[i]][(me+1):maxr] <- sample(mf0[[i]],maxr-me,replace=TRUE)
      }	
    }
    
    ## mf0 is the discretized model frame (actually a list), padded to have equal length rows
    ## now copy back into mf so terms unchanged
    mf <- mf[sample(1:nrow(mf),maxr,replace=TRUE),,drop=FALSE]
    for (na in names(mf0)) mf[[na]] <- mf0[[na]] 
  } else mf <- mf0
  nr <- nr[names(mf)] ## same order for both mf and nr

  ## reset RNG to old state...
  temp.seed(rngs)

  ## k can end up with too many columns if marginals themselves have
  ## multiple arguments, so drop these here...
  ks <- ks[!is.na(ks[,1]),,drop=FALSE]
  if (ncol(k)>max(ks)-1) k <- k[,1:(max(ks)-1),drop=FALSE] 
  k <- cbind(k,1) ## add an intercept index column
  nk <- ncol(k)
  ks <- rbind(ks,c(nk,nk+1)) ## add intercept row to ks
  nr <- c(nr,1) ## and intercept entry to nr
  names(nr)[length(nr)] <- rownames(ks)[nrow(ks)] <- "(Intercept)"
  list(mf=mf,k=k,nr=nr,ks=ks)
} ## discrete.mf

mini.mf <-function(mf,chunk.size) {
## takes a model frame and produces a representative subset of it, suitable for 
## basis setup.
  ## first count the minimum number of rows required for representiveness
  mn <- 0
  for (j in 1:length(mf)) mn <- mn + if (is.factor(mf[[j]])) length(levels(mf[[j]])) else 2
  if (chunk.size < mn) chunk.size <- mn
  n <- nrow(mf)
  if (n <= chunk.size) return(mf)
  rngs <- temp.seed(66)
  #seed <- try(get(".Random.seed",envir=.GlobalEnv),silent=TRUE) ## store RNG seed
  #if (inherits(seed,"try-error")) {
  #   runif(1)
  #   seed <- get(".Random.seed",envir=.GlobalEnv)
  #}
  #kind <- RNGkind(NULL)
  #RNGkind("default", "default")
  #set.seed(66)  
  ## randomly sample from original frame...
  ind <- sample(1:n,chunk.size)
  mf0 <- mf[ind,,drop=FALSE]
  ## ... now need to ensure certain sorts of representativeness

  ## work through elements collecting the rows containing 
  ## max and min for each variable, and a random row for each 
  ## factor level....

  ind <- sample(1:n,n,replace=FALSE) ## randomized index for stratified sampling w.r.t. factor levels
  fun <- function(X,fac,ind) ind[fac[ind]==X][1] ## stratified sampler
  k <- 0 
  for (j in 1:length(mf)) if (is.numeric(mf0[[j]])) {
    if (is.matrix(mf0[[j]])) { ## find row containing minimum
      j.min <- min((1:n)[as.logical(rowSums(mf[[j]]==min(mf[[j]])))])
      j.max <- min((1:n)[as.logical(rowSums(mf[[j]]==max(mf[[j]])))])
    } else { ## vector
      j.min <- min(which(mf[[j]]==min(mf[[j]])))
      j.max <- min(which(mf[[j]]==max(mf[[j]])))
    }
    k <- k + 1; mf0[k,] <- mf[j.min,]
    k <- k + 1; mf0[k,] <- mf[j.max,] 
  } else if (is.factor(mf[[j]])) { ## factor variable...
    ## randomly sample one row from each factor level...
    find <- apply(X=as.matrix(levels(mf[[j]])),MARGIN=1,FUN=fun,fac=mf[[j]],ind=ind)
    find <- find[is.finite(find)] ## in case drop.unused.levels==FALSE, so that there ar levels without rows
    nf <- length(find)
    mf0[(k+1):(k+nf),] <- mf[find,]
    k <- k + nf
  }
  temp.seed(rngs) ## reset RNG to initial state
  #RNGkind(kind[1], kind[2])
  #assign(".Random.seed", seed, envir = .GlobalEnv)

  mf0
} ## mini.mf


bgam.fitd <- function (G, mf, gp ,scale , coef=NULL,etastart = NULL,
    mustart = NULL, offset = rep(0, nobs),rho=0, control = gam.control(), intercept = TRUE, 
    gc.level=0,nobs.extra=0,npt=c(1,1),gamma=1) {
## This is a version of bgam.fit designed for use with discretized covariates. 
## Difference to bgam.fit is that XWX, XWy and Xbeta are computed in C
## code using compressed versions of X. Parallelization of XWX formation
## is performed at the C level using openMP.
## Alternative fitting iteration using Cholesky only, including for REML.
## Basic idea is to take only one Newton step for parameters per iteration
## and to control the step length to ensure that at the end of the step we
## are not going uphill w.r.t. the REML criterion...
    
    #y <- mf[[gp$response]]
    y <- G$y
    weights <- G$w 
    conv <- FALSE
    nobs <- nrow(mf)
    offset <- G$offset 
   
    if (inherits(G$family,"extended.family")) { ## preinitialize extended family
      efam <- TRUE
      pini <- if (is.null(G$family$preinitialize)) NULL else G$family$preinitialize(y,G$family)
      if (!is.null(pini$Theta)) G$family$putTheta(pini$Theta)
      if (!is.null(pini$y)) y <- pini$y
      if (is.null(G$family$scale)) scale <- 1 else scale <- if (G$family$scale<0) scale else G$family$scale
      scale1 <- scale
      if (scale < 0) scale <- var(y) *.1 ## initial guess
    } else efam <- FALSE


    if (rho!=0) { ## AR1 error model
      
      ld <- 1/sqrt(1-rho^2) ## leading diagonal of root inverse correlation
      sd <- -rho*ld         ## sub diagonal
      N <- nobs    
      ## see rwMatrix() for how following are used...
      ar.row <- c(1,rep(1:N,rep(2,N))[-c(1,2*N)]) ## index of rows to reweight
      ar.weight <- c(1,rep(c(sd,ld),N-1))     ## row weights
      ar.stop <- c(1,1:(N-1)*2+1)    ## (stop[i-1]+1):stop[i] are the rows to reweight to get ith row
      if (!is.null(mf$"(AR.start)")) { ## need to correct the start of new AR sections...
        ii <- which(mf$"(AR.start)"==TRUE)
        if (length(ii)>0) {
          if (ii[1]==1) ii <- ii[-1] ## first observation does not need any correction
          ar.weight[ii*2-2] <- 0 ## zero sub diagonal
          ar.weight[ii*2-1] <- 1 ## set leading diagonal to 1
        }
      }
    } else {## AR setup complete
      ar.row <- ar.weight <- ar.stop <- -1 ## signal no re-weighting
    }

    family <- G$family
    additive <- if (family$family=="gaussian"&&family$link=="identity") TRUE else FALSE
    linkinv <- family$linkinv;#dev.resids <- family$dev.resids
    if (!efam) {
      variance <- family$variance
      mu.eta <- family$mu.eta
      if (!is.function(variance) || !is.function(linkinv))
          stop("'family' argument seems not to be a valid family object")
    }
    valideta <- family$valideta
    if (is.null(valideta))
        valideta <- function(eta) TRUE
    validmu <- family$validmu
    if (is.null(validmu))
        validmu <- function(mu) TRUE
    if (is.null(mustart)) {
        eval(family$initialize)
    }
    else {
        mukeep <- mustart
        eval(family$initialize)
        mustart <- mukeep
    }

    if (is.matrix(y)&&ncol(y)>1) stop("This family should not have a matrix response")

    eta <- if (!is.null(etastart))
         etastart
    else family$linkfun(mustart)
    
    mu <- linkinv(eta)
    if (!(validmu(mu) && valideta(eta)))
       stop("cannot find valid starting values: please specify some")
    dev <- sum(family$dev.resids(y, mu, weights))*2 ## just to avoid converging at iter 1

    conv <- FALSE
   
    G$coefficients <- rep(0,ncol(G$X))
    class(G) <- "gam"  
    
    ## need to reset response and weights to post initialization values
    ## in particular to deal with binomial properly...
    G$y <- y
    G$w <- weights

    Sl <- Sl.setup(G) ## setup block diagonal penalty object
    rank <- 0
    if (length(Sl)>0) for (b in 1:length(Sl)) rank <- rank + Sl[[b]]$rank
    Mp <- ncol(G$X) - rank ## null space dimension
    Nstep <- 0
    if (efam) theta <- family$getTheta()
    for (iter in 1L:control$maxit) { ## main fitting loop 
      devold <- dev
      dev <- 0
     
      if (iter==1||!additive) {
        qrx <- list()

        if (iter>1) {
          ## form eta = X%*%beta
          eta <- Xbd(G$Xd,coef,G$kd,G$ks,G$ts,G$dt,G$v,G$qc,G$drop) + offset
	  lsp.full <- G$lsp0
	  if (n.sp>0) lsp.full <- lsp.full + if (is.null(G$L)) lsp[1:n.sp] else G$L %*% lsp[1:n.sp]
	  #Sb <- Sl.Sb(Sl,lsp.full,prop$beta) ## store S beta to allow rapid step halving
	  rSb <- Sl.rSb(Sl,lsp.full,prop$beta) ## store S beta to allow rapid step halving
	  if (iter>2) {
            #Sb0 <- Sl.Sb(Sl,lsp.full,b0)
	    #bSb0 <- sum(b0*Sb0) ## penalty at start of beta step
	    rSb0 <- Sl.rSb(Sl,lsp.full,b0)
	    bSb0 <- sum(rSb0^2)
	    ## get deviance at step start, with current theta if efam
	    dev0 <- if (efam) sum(family$dev.resids(G$y,mu0,G$w,theta)) else
	                 sum(family$dev.resids(G$y,mu0,G$w))
          }
        }
	kk <- 1
	repeat {
          mu <- linkinv(eta)
	  dev <- if (efam) sum(family$dev.resids(G$y,mu,G$w,theta)) else
	                 sum(family$dev.resids(G$y,mu,G$w))
          if (iter>2) { ## coef step length control
	    #bSb <- sum(prop$beta*Sb) ## penalty at end of beta step
	    bSb <- sum(rSb^2) ## penalty at end of beta step 
            if ((!is.finite(dev) || dev0 + bSb0 < dev + bSb) && kk < 30) { ## beta step not improving current pen dev
              coef <- (coef0 + coef)/2 ## halve the step
	      #Sb <- (Sb0 + Sb)/2
	      rSb <- (rSb0 + rSb)/2
	      eta <- (eta0 + eta)/2
	      prop$beta <- (b0 + prop$beta)/2
	      kk <- kk + 1
            } else break
          } else break
        }		 

        if (iter>1) { ## save components of penalized deviance for step control
          coef0 <- coef ## original para
	  eta0 <- eta
	  mu0 <- mu
	  b0 <- prop$beta ## beta repara
	  #dev <- dev + sum(prop$beta*Sb) ## add penalty to deviance
	  dev <- dev + sum(rSb^2)
	} else reml <- dev ## for convergence checking
	
	if (efam) { ## extended family
	  if (iter>1) { ## estimate theta
	    #scale1 <- if (!is.null(family$scale)) family$scale else scale
            if (family$n.theta>0||scale1<0) theta <- estimate.theta(theta,family,y,mu,scale=scale1,wt=G$w,tol=1e-7)
            if (!is.null(family$scale) && scale1<0) {
	      scale <- exp(theta[family$n.theta+1])
	      theta <- theta[1:family$n.theta]
	    }  
            family$putTheta(theta)
          }
	  
          dd <- dDeta(y,mu,G$w,theta=theta,family,0)
	  ## note: no handling of infinities and wz case yet

          if (rho==0) {
	    w <- dd$Deta2 * .5 
            z <- (eta-offset) - dd$Deta.Deta2
          } else { ## use fisher weights
	    w <- dd$EDeta2 * .5 
            z <- (eta-offset) - dd$Deta.EDeta2
	  }
          good <- is.finite(z)&is.finite(w)
	  w[!good] <- 0 ## drop if !good
	  z[!good] <- 0 ## irrelevant
        } else { ## exponential family
          mu.eta.val <- mu.eta(eta)
          good <- mu.eta.val != 0
          mu.eta.val[!good] <- .1 ## irrelvant as weight is zero
          z <- (eta - offset) + (G$y - mu)/mu.eta.val
          w <- (G$w * mu.eta.val^2)/variance(mu)
        }
      
  
        qrx$y.norm2 <- if (rho==0) sum(w*z^2) else   ## AR mod needed
          sum(rwMatrix(ar.stop,ar.row,ar.weight,sqrt(w)*z,trans=FALSE)^2) 
       
        ## form X'WX efficiently...
        qrx$R <- XWXd(G$Xd,w,G$kd,G$ks,G$ts,G$dt,G$v,G$qc,npt[1],G$drop,ar.stop,ar.row,ar.weight)
	##R0 <- XWXd(G$Xd,w,G$kd,G$ks,G$ts,G$dt,G$v,G$qc,1,G$drop,ar.stop,ar.row,ar.weight) ## DEBUG compare
        ## form X'Wz efficiently...
        qrx$f <- XWyd(G$Xd,w,z,G$kd,G$ks,G$ts,G$dt,G$v,G$qc,G$drop,ar.stop,ar.row,ar.weight)
        if(gc.level>1) gc()
     
        ## following reparameterizes X'X and f=X'y, according to initial reparameterizarion...
        qrx$XX <- Sl.initial.repara(Sl,qrx$R,inverse=FALSE,both.sides=TRUE,cov=FALSE,nt=npt[1])
        qrx$Xy <- Sl.initial.repara(Sl,qrx$f,inverse=FALSE,both.sides=TRUE,cov=FALSE,nt=npt[1])  
        
        G$n <- nobs
      } else {  ## end of if (iter==1||!additive)
        dev <- qrx$y.norm2 - sum(coef*qrx$f) ## actually penalized deviance
      }
  
      if (control$trace)
         message(gettextf("Deviance = %s Iterations - %d", dev, iter, domain = "R-mgcv"))

      if (!is.finite(dev)) stop("Non-finite deviance")

      ## preparation for working model fit is ready, but need to test for convergence first
      if (iter>2 && abs(dev - devold)/(0.1 + abs(dev)) < control$epsilon) {
          conv <- TRUE
          #coef <- start
          break
      }

      ## use fast REML code
      ## block diagonal penalty object, Sl, set up before loop


      if (iter==1) { ## need to get initial smoothing parameters 
        lambda.0 <- initial.sp(qrx$R,G$S,G$off,XX=TRUE) ## note that this uses the untransformed X'X in qrx$R
        ## convert intial s.p.s to account for L 
        lsp0 <- log(lambda.0) ## initial s.p.
        if (!is.null(G$L)) lsp0 <- 
          if (ncol(G$L)>0) as.numeric(coef(lm(lsp0 ~ G$L-1+offset(G$lsp0)))) else rep(0,0)
        n.sp <- length(lsp0) 
      }
     
      ## carry forward scale estimate if possible...
      if (scale>0) log.phi <- log(scale) else {
        if (iter==1) {
            if (is.null(coef)||qrx$y.norm2==0) lsp0[n.sp+1] <- log(var(as.numeric(G$y))*.05) else
               lsp0[n.sp+1] <- log(qrx$y.norm2/(nobs+nobs.extra))
        }
      }

      ## get beta, grad and proposed Newton step... 
      repeat { ## Take a Newton step to update log sp and phi
        lsp <- lsp0 + Nstep
        if (scale<=0) log.phi <- lsp[n.sp+1] 
        prop <- Sl.fitChol(Sl,qrx$XX,qrx$Xy,rho=lsp[1:n.sp],yy=qrx$y.norm2,L=G$L,rho0=G$lsp0,log.phi=log.phi,
                 phi.fixed=scale>0,nobs=nobs,Mp=Mp,nt=npt,tol=abs(reml)*.Machine$double.eps^.5,gamma=gamma)
        if (max(Nstep)==0) { 
          Nstep <- prop$step;lsp0 <- lsp;
          break 
        } else { ## step length control
          if (sum(prop$grad*Nstep)>dev*1e-7) Nstep <- Nstep/2 else {
            Nstep <- prop$step;lsp0 <- lsp;break;
          }
        }
      } ## end of sp update

      coef <- Sl.initial.repara(Sl,prop$beta,inverse=TRUE,both.sides=FALSE,cov=FALSE)

      if (any(!is.finite(coef))) {
          conv <- FALSE
          warning(gettextf("non-finite coefficients at iteration %d",
                  iter))
          break
      }
      reml <- (dev/(exp(log.phi)*gamma) - prop$ldetS + prop$ldetXXS)/2
    } ## end fitting iteration

    if (!conv)
       warning("algorithm did not converge")
   
    eps <- 10 * .Machine$double.eps
    if (family$family == "binomial") {
         if (any(mu > 1 - eps) || any(mu < eps))
                warning("fitted probabilities numerically 0 or 1 occurred")
    }
    if (family$family == "poisson") {
            if (any(mu < eps))
                warning("fitted rates numerically 0 occurred")
    }
  Mp <- G$nsdf
  if (length(G$smooth)>1) for (i in 1:length(G$smooth)) Mp <- Mp + G$smooth[[i]]$null.space.dim
  scale <- exp(log.phi)
  reml <- (dev/(scale*gamma) - prop$ldetS + prop$ldetXXS + (length(y)/gamma-Mp)*log(2*pi*scale)+Mp*log(gamma))/2
  if (rho!=0) { ## correct REML score for AR1 transform
    df <- if (is.null(mf$"(AR.start)")) 1 else sum(mf$"(AR.start)")
    reml <- reml - (nobs/gamma-df)*log(ld)
  }

  for (i in 1:ncol(prop$db)) prop$db[,i] <- ## d beta / d rho matrix
        Sl.initial.repara(Sl,as.numeric(prop$db[,i]),inverse=TRUE,both.sides=TRUE,cov=TRUE,nt=npt[1]) 

  object <- list(db.drho=prop$db,
                 gcv.ubre=reml,mgcv.conv=conv,rank=prop$r,
                 scale.estimated = scale<=0,outer.info=NULL,
                 optimizer=c("perf","chol"))
  object$coefficients <- coef
  object$family <- family
  ## form linear predictor efficiently...
  object$linear.predictors <- Xbd(G$Xd,coef,G$kd,G$ks,G$ts,G$dt,G$v,G$qc,G$drop) + G$offset
  object$fitted.values <- family$linkinv(object$linear.predictors)
  if (efam) { ## deal with any post processing
     if (!is.null(family$postproc)) {
      posr <- family$postproc(family=object$family,y=G$y,prior.weights=G$w,
              fitted=object$fitted.values,linear.predictors=object$linear.predictors,offset=G$offset,
	      intercept=G$intercept)
      if (!is.null(posr$family)) object$family$family <- posr$family
      if (!is.null(posr$deviance)) object$deviance <- posr$deviance
      if (!is.null(posr$null.deviance)) object$null.deviance <- posr$null.deviance
    }
    if (is.null(object$null.deviance)) object$null.deviance <- sum(family$dev.resids(G$y,weighted.mean(G$y,G$w),G$w,theta))   
  }

  PP <- Sl.initial.repara(Sl,prop$PP,inverse=TRUE,both.sides=TRUE,cov=TRUE,nt=npt[1])
  F <- pmmult(PP,qrx$R,FALSE,FALSE,nt=npt[1])  ##crossprod(PP,qrx$R) - qrx$R contains X'WX in this case
  object$edf <- diag(F)
  object$edf1 <- 2*object$edf - rowSums(t(F)*F)
  lsp <- if (n.sp>0) lsp[1:n.sp] else rep(0,0)
  object$sp <- exp(lsp)
  object$full.sp <- if (is.null(G$L)) object$sp else exp(drop(G$L%*%lsp + G$lsp0))
  object$sig2 <- object$scale <- scale
  object$Vp <- PP * scale
  object$Ve <- pmmult(F,object$Vp,FALSE,FALSE,nt=npt[1]) ## F%*%object$Vp
  ## sp uncertainty correction... 
  if (!is.null(G$L)) prop$db <- prop$db%*%G$L
  M <- ncol(prop$db) 
  if (M>0) {
    ev <- eigen(prop$hess,symmetric=TRUE)
    ind <- ev$values <= 0
    ev$values[ind] <- 0;ev$values[!ind] <- 1/sqrt(ev$values[!ind])
    rV <- (ev$values*t(ev$vectors))[,1:M]
    V.sp <- crossprod(rV)
    attr(V.sp,"L") <- G$L
    Vc <- pcrossprod(rV%*%t(prop$db),nt=npt[1])
  } else {
    Vc <- 0
    V.sp <- NULL
  }  
  Vc <- object$Vp + Vc  ## Bayesian cov matrix with sp uncertainty
  object$edf2 <- rowSums(Vc*qrx$R)/scale
  object$Vc <- Vc
  object$V.sp <- V.sp
  object$outer.info <- list(grad = prop$grad,hess=prop$hess)  
  object$AR1.rho <- rho
  object$R <- if (npt[2]>1) pchol(qrx$R,npt) else suppressWarnings(chol(qrx$R,pivot=TRUE)) ## latter much faster under optimized BLAS
  piv <- attr(object$R,"pivot") 
  object$R[,piv] <- object$R   
  object$iter <- iter 
  object$wt <- w
  object$y <- G$y
  object$prior.weights <- G$w
  rm(G);if (gc.level>0) gc()
  object
} ## end bgam.fitd


regular.Sb <- function(S,off,sp,beta) {
## form S %*% beta for a normal G list
  a <- beta*0
  if (length(S)>0) for (i in 1:length(S)) {
    ind <- off[i] - 1 + 1:ncol(S[[i]])
    a[ind] <- a[ind] + sp[i] * S[[i]] %*% beta[ind]
  }
  a
} ## regular.Sb


bgam.fit <- function (G, mf, chunk.size, gp ,scale ,gamma,method, coef=NULL,etastart = NULL,
    mustart = NULL, offset = rep(0, nobs), control = gam.control(), intercept = TRUE, 
    cl = NULL,gc.level=0,use.chol=FALSE,nobs.extra=0,samfrac=1,npt=1) {
    #y <- mf[[gp$response]]
    y <- G$y
    weights <- G$w
    conv <- FALSE
    nobs <- nrow(mf)
    ##nvars <- ncol(G$X)
    offset <- G$offset
    family <- G$family

    if (inherits(G$family,"extended.family")) { ## preinitialize extended family
      efam <- TRUE
      pini <- if (is.null(G$family$preinitialize)) NULL else G$family$preinitialize(y,G$family)
      if (!is.null(pini$Theta)) G$family$putTheta(pini$Theta)
      if (!is.null(pini$y)) y <- pini$y
      if (is.null(G$family$scale)) scale <- 1 else scale <- if (G$family$scale<0) scale else G$family$scale
      scale1 <-scale
      if (scale < 0) scale <- var(y) *.1 ## initial guess
    } else efam <- FALSE

 
    G$family <- gaussian() ## needed if REML/ML used
    G$family$drop.intercept <- family$drop.intercept ## needed in predict.gam
    linkinv <- family$linkinv
    if (!efam) {
      variance <- family$variance
      mu.eta <- family$mu.eta
      if (!is.function(variance) || !is.function(linkinv))
        stop("'family' argument seems not to be a valid family object")
    }
    dev.resids <- family$dev.resids
    ## aic <- family$aic
   
    valideta <- family$valideta
    if (is.null(valideta))
        valideta <- function(eta) TRUE
    validmu <- family$validmu
    if (is.null(validmu))
        validmu <- function(mu) TRUE
    if (is.null(mustart)) {
        eval(family$initialize)
    }
    else {
        mukeep <- mustart
        eval(family$initialize)
        mustart <- mukeep
    }

    if (is.matrix(y)&&ncol(y)>1) stop("This family should not have a matrix response")

    ##coefold <- NULL
    eta <- if (!is.null(etastart))
         etastart
    else family$linkfun(mustart)
    
    mu <- linkinv(eta)
    if (!(validmu(mu) && valideta(eta)))
       stop("cannot find valid starting values: please specify some")
    dev <- sum(dev.resids(y, mu, weights))*2 ## just to avoid converging at iter 1
    conv <- FALSE
   
    G$coefficients <- rep(0,ncol(G$X))
    class(G) <- "gam"  
    
    ## need to reset response and weights to post initialization values
    ## in particular to deal with binomial properly...
    G$y <- y
    G$w <- weights

  
    ## set up cluster for parallel computation...

    if (!is.null(cl)&&inherits(cl,"cluster")) {
      n.threads <- length(cl)
      while(nobs/n.threads < ncol(G$X)) n.threads <- n.threads - 1
      if (n.threads < length(cl)) { 
        warning("Too many cluster nodes to use all efficiently")
      }
    } else n.threads <- 1

    if (n.threads>1) { ## set up thread argument lists
      ## number of obs per thread
      nt <- rep(ceiling(nobs/n.threads),n.threads)
      nt[n.threads] <- nobs - sum(nt[-n.threads])
      arg <- list()
      n1 <- 0
      for (i in 1:n.threads) {
        n0 <- n1+1;n1 <- n1+nt[i]
        ind <- n0:n1 ## this thread's data block from mf
        n.block <- nt[i]%/%chunk.size ## number of full sized blocks
        stub <- nt[i]%%chunk.size ## size of end block
        if (n.block>0) {
          start <- (0:(n.block-1))*chunk.size+1
          stop <- (1:n.block)*chunk.size
          if (stub>0) {
            start[n.block+1] <- stop[n.block]+1
            stop[n.block+1] <- nt[i]
            n.block <- n.block+1
          } 
        } else {
          n.block <- 1
          start <- 1
          stop <- nt[i]
        }
        arg[[i]] <- list(nobs= nt[i],start=start,stop=stop,n.block=n.block,
                         linkinv=linkinv,dev.resids=dev.resids,gc.level=gc.level,
                         mf = mf[ind,],
                         eta = eta[ind],offset = offset[ind],G = G,use.chol=use.chol)
        if (efam) {
          arg[[i]]$family <- family
        } else {
          arg[[i]]$mu.eta <- mu.eta
	  arg[[i]]$variance <- variance
        }
        arg[[i]]$G$w <- G$w[ind];arg[[i]]$G$model <- NULL
        arg[[i]]$G$y <- G$y[ind]
      }
    } else { ## single thread, requires single indices
      ## construct indices for splitting up model matrix construction... 
      n.block <- nobs%/%chunk.size ## number of full sized blocks
      stub <- nobs%%chunk.size ## size of end block
      if (n.block>0) {
        start <- (0:(n.block-1))*chunk.size+1
        stop <- (1:n.block)*chunk.size
        if (stub>0) {
          start[n.block+1] <- stop[n.block]+1
          stop[n.block+1] <- nobs
          n.block <- n.block+1
        } 
      } else {
        n.block <- 1
        start <- 1
        stop <- nobs
      }
   } ## single thread indices complete
 
    conv <- FALSE

    if (method=="fREML") Sl <- Sl.setup(G) ## setup block diagonal penalty object

    if (efam) theta <- family$getTheta()

    for (iter in 1L:control$maxit) { ## main fitting loop
       ## accumulate the QR decomposition of the weighted model matrix
       devold <- dev
       kk <- 0
       repeat { 
         dev <- 0;wt <- rep(0,0) 
         if (n.threads == 1) { ## use original serial update code
           wt <- G$y
           for (b in 1:n.block) {
             ind <- start[b]:stop[b]
             X <- predict(G,newdata=mf[ind,],type="lpmatrix",newdata.guaranteed=TRUE,block.size=length(ind))
             rownames(X) <- NULL
             if (is.null(coef)) eta1 <- eta[ind] else eta[ind] <- eta1 <- drop(X%*%coef) + offset[ind]
             mu <- linkinv(eta1) 
             y <- G$y[ind] ## G$model[[gp$response]] ## - G$offset[ind]
             weights <- G$w[ind]
             if (efam) { ## extended family case
                dd <- dDeta(y,mu,weights,theta=theta,family,0)
	        ## note: no handling of infinities and wz case yet
               
	        w <- dd$EDeta2 * .5 
	        #w <- w
                z <- (eta1-offset[ind]) - dd$Deta.EDeta2
	        good <- is.finite(z)&is.finite(w)
	       
             } else { ## regular exp fam case
               mu.eta.val <- mu.eta(eta1)
               good <- (weights > 0) & (mu.eta.val != 0)
               z <- (eta1 - offset[ind]) + (y - mu)/mu.eta.val
               w <- (weights * mu.eta.val^2)/variance(mu)
             }
             dev <- dev + if (efam) sum(dev.resids(y,mu,weights,theta)) else sum(dev.resids(y,mu,weights))
	     w[!good] <- 0 ## drop if !good
	     #z[!good] <- 0 ## irrelevant 
             wt[ind] <- w ## wt <- c(wt,w)
             w <- w[good];z <- z[good]
             w <- sqrt(w)
             ## note that QR may be parallel using npt>1, even under serial accumulation...
             if (b == 1) qrx <- qr.update(w*X[good,,drop=FALSE],w*z,use.chol=use.chol,nt=npt) 
             else qrx <- qr.update(w*X[good,,drop=FALSE],w*z,qrx$R,qrx$f,qrx$y.norm2,use.chol=use.chol,nt=npt)
             rm(X);if(gc.level>1) gc() ## X can be large: remove and reclaim
          }
          if (use.chol) { ## post proc to get R and f...
            y.norm2 <- qrx$y.norm2 
            qrx <- chol2qr(qrx$R,qrx$f,nt=npt)
            qrx$y.norm2 <- y.norm2
          }
        } else { ## use parallel accumulation
	  
          for (i in 1:length(arg)) {
	    arg[[i]]$coef <- coef
	    if (efam) arg[[i]]$theta <- theta
	  }
          res <- parallel::parLapply(cl,arg,qr.up) 
          ## single thread debugging version 
          #res <- list()
          #for (i in 1:length(arg)) {
          #  res[[i]] <- qr.up(arg[[i]])
          #}
          ## now consolidate the results from the parallel threads...
          if (use.chol) {
            R <- res[[1]]$R;f <- res[[1]]$f;dev <- res[[1]]$dev
            wt <- res[[1]]$wt;y.norm2 <- res[[1]]$y.norm2
	    eta <- res[[1]]$eta
            for (i in 2:n.threads) {
              R <- R + res[[i]]$R; f <- f + res[[i]]$f
              wt <- c(wt,res[[i]]$wt);eta <- c(eta,res[[i]]$eta);
	      dev <- dev + res[[i]]$dev
              y.norm2 <- y.norm2 + res[[i]]$y.norm2
            }         
            qrx <- chol2qr(R,f,nt=npt)
            qrx$y.norm2 <- y.norm2
          } else { ## proper QR
            R <- res[[1]]$R;f <- res[[1]]$f;dev <- res[[1]]$dev
            wt <- res[[1]]$wt;y.norm2 <- res[[1]]$y.norm2; eta <- res[[1]]$eta
            for (i in 2:n.threads) {
              R <- rbind(R,res[[i]]$R); f <- c(f,res[[i]]$f)
              wt <- c(wt,res[[i]]$wt);eta <- c(eta,res[[i]]$eta)
	      dev <- dev + res[[i]]$dev
              y.norm2 <- y.norm2 + res[[i]]$y.norm2
            }         
            ## use parallel QR here if npt>1...
            qrx <- if (npt>1) pqr2(R,npt) else qr(R,tol=0,LAPACK=TRUE) 
            f <- qr.qty(qrx,f)[1:ncol(R)]
            rp <- qrx$pivot;rp[rp] <- 1:ncol(R) # reverse pivot
            qrx <- list(R=qr.R(qrx)[,rp],f=f,y.norm2=y.norm2)
          }
        } 

        ## if the routine has been called with only a random sample of the data, then 
        ## R, f and ||y||^2 can be corrected to estimate the full versions...
 
        qrx$R <- qrx$R/sqrt(samfrac)
        qrx$f <- qrx$f/sqrt(samfrac)
        qrx$y.norm2 <- qrx$y.norm2/samfrac

        G$n <- nobs
      
        rss.extra <- qrx$y.norm2 - sum(qrx$f^2)
      
        if (control$trace)
           message(gettextf("Deviance = %s Iterations - %d", dev, iter, domain = "R-mgcv"))

        if (!is.finite(dev)) stop("Non-finite deviance")

        ## preparation for working model fit is ready, but need to test for convergence first
        if (iter>2 && abs(dev - devold)/(0.1 + abs(dev)) < control$epsilon) {
            conv <- TRUE
            coef <- start
            break
        }
        if (kk > 0) break ## already shrunk the step
        ## At this point it is worth checking that coef update actually improved the penalized
        ## deviance. If not try step halving, and redo the above once a suitable step has been
        ## found...
        if (iter>2) { ## can test divergence
          ## need to compute penalty at start and end of step
	  if (efam) {
	    dev0 <- sum(dev.resids(G$y,linkinv(eta0),G$w,theta0)) ## depends on theta, which will have changed
	    dev1 <- sum(dev.resids(G$y,linkinv(eta),G$w,theta0)) ## depends on theta, which will have changed
          } else { dev1 <- dev }
          if (method=="fREML") {
	    pcoef <- fit$beta
            Sb0 <- Sl.Sb(um$Sl,rho=log(object$full.sp),pcoef0)
	    Sb <- Sl.Sb(um$Sl,rho=log(object$full.sp),pcoef)
          } else {
	    pcoef <- coef
	    full.sp <- if (is.null(object$full.sp)) object$sp else object$full.sp
            Sb0 <- regular.Sb(G$S,G$off,full.sp,pcoef0)
	    Sb <- regular.Sb(G$S,G$off,full.sp,pcoef)
          }
	  while (dev0 + sum(pcoef0*Sb0) < dev1 + sum(pcoef * Sb) && kk < 6) {
            ## shrink step ...
            coef <- (coef0 + coef)/2
	    pcoef <- (pcoef0 + pcoef)/2
	    eta <- (eta0 + eta)/2
	    Sb <- (Sb0 + Sb)/2
	    ## recompute deviance ...
	    dev <- if (efam) sum(dev.resids(G$y,linkinv(eta),G$w,theta)) else sum(dev.resids(G$y,linkinv(eta),G$w)) 
            dev1 <- if (efam) sum(dev.resids(G$y,linkinv(eta),G$w,theta0)) else dev
            kk <- kk + 1
          }
        }
	if (kk == 0) break ## step was ok
      } ## repeat
      
      if (conv) break

      if (iter>1) { ## store coef and eta for divergence checking
        coef0 <- coef
	if (efam) theta0 <- theta ## theta used for determining step
	pcoef0 <- if (method=="fREML") fit$beta else coef
	eta0 <- eta
	dev0 <- dev
      }

      if (efam && iter>1) { ## estimate theta
	#scale1 <- if (!is.null(family$scale)) family$scale else scale
        if (family$n.theta>0||scale1<0) theta <- estimate.theta(theta,family,G$y,linkinv(eta),scale=scale1,wt=G$w,tol=1e-7)
        if (!is.null(family$scale) && scale1<0) {
	   scale <- exp(theta[family$n.theta+1])
	   theta <- theta[1:family$n.theta]
	}  
        family$putTheta(theta)
      }
	  
      if (method=="GCV.Cp") {
         fit <- magic(qrx$f,qrx$R,G$sp,G$S,G$off,L=G$L,lsp0=G$lsp0,rank=G$rank,
                      H=G$H,C=matrix(0,0,ncol(qrx$R)),     ##C=G$C,
                      gamma=gamma,scale=scale,gcv=(scale<=0),
                      extra.rss=rss.extra,n.score=nobs+nobs.extra)
 
         post <- magic.post.proc(qrx$R,fit,qrx$f*0+1) 
      } else if (method=="fREML") { ## use fast REML code
        ## block diagonal penalty object, Sl, set up before loop
        um <- Sl.Xprep(Sl,qrx$R,nt=npt)
        lambda.0 <- initial.sp(qrx$R,G$S,G$off)
        lsp0 <- log(lambda.0) ## initial s.p.
        ## carry forward scale estimate if possible...
        if (scale>0) log.phi <- log(scale) else {
          if (iter>1) log.phi <- log(object$scale) else {
            if (is.null(coef)||qrx$y.norm2==0) log.phi <- log(var(as.numeric(G$y))*.05) else
               log.phi <- log(qrx$y.norm2/(nobs+nobs.extra))
          }
        }
        fit <- fast.REML.fit(um$Sl,um$X,qrx$f,rho=lsp0,L=G$L,rho.0=G$lsp0,
                             log.phi=log.phi,phi.fixed=scale>0,rss.extra=rss.extra,
                             nobs =nobs+nobs.extra,Mp=um$Mp,nt=npt,gamma=gamma)
        res <- Sl.postproc(Sl,fit,um$undrop,qrx$R,cov=FALSE,L=G$L,nt=npt)
        object <- list(coefficients=res$beta,db.drho=fit$d1b,
                       gcv.ubre=fit$reml,mgcv.conv=list(iter=fit$iter,
                       message=fit$conv),rank=ncol(um$X),
                       Ve=NULL,scale.estimated = scale<=0,outer.info=fit$outer.info,
                        optimizer=c("perf","newton"))
 
        if (scale<=0) { ## get sp's and scale estimate
          nsp <- length(fit$rho)
          object$sig2 <- object$scale <- exp(fit$rho[nsp])
          object$sp <- exp(fit$rho[-nsp]) 
          nsp <- length(fit$rho.full)
          object$full.sp <- exp(fit$rho.full[-nsp])
        } else { ## get sp's
          object$sig2 <- object$scale <- scale  
          object$sp <- exp(fit$rho)
          object$full.sp <- exp(fit$rho.full)
        }
        class(object)<-c("gam")               
      } else { ## method is one of "ML", "P-REML" etc...
        y <- G$y; w <- G$w; n <- G$n;offset <- G$offset
        G$y <- qrx$f
        G$w <- G$y*0+1
        G$X <- qrx$R
        G$n <- length(G$y)
        G$offset <- G$y*0
        G$dev.extra <- rss.extra
        G$pearson.extra <- rss.extra
        G$n.true <- nobs+nobs.extra
        object <- gam(G=G,method=method,gamma=gamma,scale=scale,control=gam.control(nthreads=npt))
        y -> G$y; w -> G$w; n -> G$n;offset -> G$offset
	object$deviance <- object$family <- object$null.deviance <- object$fitted.values <- NULL
      }
     
      if (method=="GCV.Cp") { 
        object <- list()
        object$coefficients <- fit$b
        object$edf <- post$edf 
        object$edf1 <- post$edf1
        ##object$F <- post$F
        object$full.sp <- fit$sp.full
        object$gcv.ubre <- fit$score
        object$hat <- post$hat
        object$mgcv.conv <- fit$gcv.info 
        object$optimizer="magic"
        object$rank <- fit$gcv.info$rank
        object$Ve <- post$Ve
        object$Vp <- post$Vb
        object$sig2 <- object$scale <- fit$scale
        object$sp <- fit$sp
        names(object$sp) <- names(G$sp)
        class(object)<-c("gam")
      }

      coef <- object$coefficients
        
      if (any(!is.finite(coef))) {
          conv <- FALSE
          warning(gettextf("non-finite coefficients at iteration %d",
                  iter))
          break
      }
    } ## end fitting iteration

    if (method=="fREML") { ## do expensive cov matrix cal only at end
      res <- Sl.postproc(Sl,fit,um$undrop,qrx$R,cov=TRUE,scale=scale,L=G$L,nt=npt)
      object$edf <- res$edf
      object$edf1 <- res$edf1
      object$edf2 <- res$edf2
      object$hat <- res$hat
      object$Vp <- res$Vp
      object$Ve <- res$Ve
      object$Vc <- res$Vc
      object$V.sp <- res$V.sp
    }
    
    if (efam) { ## deal with any post processing
       if (!is.null(family$postproc)) {
         object$family <- family
         posr <- family$postproc(family=family,y=G$y,prior.weights=G$w,
              fitted=linkinv(eta),linear.predictors=eta,offset=G$offset,
	      intercept=G$intercept)
         if (!is.null(posr$family)) object$family$family <- posr$family
         if (!is.null(posr$deviance)) object$deviance <- posr$deviance
         if (!is.null(posr$null.deviance)) object$null.deviance <- posr$null.deviance
       }
      if (is.null(object$null.deviance)) object$null.deviance <- sum(family$dev.resids(G$y,weighted.mean(G$y,G$w),G$w,theta))   
    }

    if (!conv)
       warning("algorithm did not converge")
   
    eps <- 10 * .Machine$double.eps
    if (family$family == "binomial") {
         if (any(mu > 1 - eps) || any(mu < eps))
                warning("fitted probabilities numerically 0 or 1 occurred")
    }
    if (family$family == "poisson") {
            if (any(mu < eps))
                warning("fitted rates numerically 0 occurred")
    }
  object$R <- qrx$R    
  object$iter <- iter 
  object$wt <- wt
  object$y <- G$y
  object$prior.weights <- G$w
  rm(G);if (gc.level>0) gc()
  object
} ## end bgam.fit




ar.qr.up <- function(arg) {
## function to perform QR updating with AR residuals, on one execution thread
  if (arg$rho!=0) { ## AR1 error model
     ld <- 1/sqrt(1 - arg$rho^2) ## leading diagonal of root inverse correlation
     sd <- -arg$rho * ld         ## sub diagonal
  } 
  yX.last <- NULL
  qrx <- list(R=NULL,f=array(0,0),y.norm2=0) ## initial empty qr object
  for (i in 1:arg$n.block) {
    ind <- arg$start[i]:arg$end[i] 
    if (arg$rho!=0) { ## have to find AR1 transform...
       N <- arg$end[i]-arg$start[i]+1
       ## note first row implied by this transform
       ## is always dropped, unless really at beginning of data.
       row <- c(1,rep(1:N,rep(2,N))[-c(1,2*N)])
       weight <- c(1,rep(c(sd,ld),N-1))
       stop <- c(1,1:(N-1)*2+1)
       if (!is.null(arg$mf$"(AR.start)")) { ## need to correct the start of new AR sections...
           ii <- which(arg$mf$"(AR.start)"[ind]==TRUE)
           if (length(ii)>0) {
             if (ii[1]==1) ii <- ii[-1] ## first observation does not need any correction
             weight[ii*2-2] <- 0 ## zero sub diagonal
             weight[ii*2-1] <- 1 ## set leading diagonal to 1
           }
       }
     } 
     ## arg$G$model <- arg$mf[ind,]
     w <- sqrt(arg$G$w[ind])
     X <- w*predict(arg$G,newdata=arg$mf[ind,],type="lpmatrix",newdata.guaranteed=TRUE,block.size=length(ind))
     y <- w*(arg$mf[ind,arg$response] - arg$offset[ind]) ## w*(arg$G$model[[arg$response]] - arg$offset[ind])
     if (arg$rho!=0) {
       ## Apply transform...
       if (arg$last&&arg$end[i]==arg$nobs) yX.last <- 
           c(y[nrow(X)],X[nrow(X),]) ## store final row, in case of update
       if (arg$first&&i==1) {
          X <- rwMatrix(stop,row,weight,X)
          y <- rwMatrix(stop,row,weight,y)
       } else {
          X <- rwMatrix(stop,row,weight,X)[-1,]
          y <- rwMatrix(stop,row,weight,y)[-1]
       } 
     } ## dealt with AR1      
     qrx <- qr.update(X,y,qrx$R,qrx$f,qrx$y.norm2,use.chol=arg$use.chol)
     rm(X);if (arg$gc.level>1) {gc()} ## X can be large: remove and reclaim
  } ## all blocks dealt with
  qrx$yX.last <- yX.last
  if (arg$gc.level>1) {rm(arg,w,y,ind);gc()}
  qrx
} ## ar.qr.up

pabapr <- function(arg) {
## function for parallel calling of predict.gam
## QUERY: ... handling?
  predict.gam(arg$object,newdata=arg$newdata,type=arg$type,se.fit=arg$se.fit,terms=arg$terms,
                        block.size=arg$block.size,newdata.guaranteed=arg$newdata.guaranteed,
                        na.action=arg$na.action)
}

predict.bam <- function(object,newdata,type="link",se.fit=FALSE,terms=NULL,exclude=NULL,
                        block.size=50000,newdata.guaranteed=FALSE,na.action=na.pass,
                        cluster=NULL,discrete=TRUE,n.threads=1,gc.level=0,...) {
## function for prediction from a bam object, possibly in parallel
  
  #if (is.function(na.action)) na.action <- deparse(substitute(na.action)) ## otherwise predict.gam can't detect type
  if (discrete && !is.null(object$dinfo)) {
    return(predict.bamd(object,newdata,type,se.fit,terms,exclude,
                        block.size,newdata.guaranteed,na.action,n.threads,gc.level=gc.level,...))
  }
  ## remove some un-needed stuff from object
  object$Sl <- object$qrx <- object$R <- object$F <- object$Ve <-
  object$Vc <- object$G <- object$residuals <- object$fitted.values <-
  object$linear.predictors <- NULL
  if (gc.level>0) gc()
  if (!is.null(cluster)&&inherits(cluster,"cluster")) { 
     ## require(parallel)
     n.threads <- length(cluster)
  } else n.threads <- 1
  if (missing(newdata)) n <- nrow(object$model) else {
    n <- if (is.matrix(newdata[[1]])) nrow(newdata[[1]]) else length(newdata[[1]]) 
  }
  if (n < 100*n.threads) n.threads <- 1 ## not worth the overheads
  if (n.threads==1) { ## single threaded call
    if (missing(newdata)) return(
      predict.gam(object,newdata=object$model,type=type,se.fit=se.fit,terms=terms,exclude=exclude,
                        block.size=block.size,newdata.guaranteed=newdata.guaranteed,
                        na.action=na.action,...)
    ) else return(
      predict.gam(object,newdata=newdata,type=type,se.fit=se.fit,terms=terms,exclude=exclude,
                        block.size=block.size,newdata.guaranteed=newdata.guaranteed,
                        na.action=na.action,...))
  } else { ## parallel call...
    nt <- rep(floor(n/n.threads),n.threads)
    nt[1] <- n - sum(nt[-1])
    arg <- list()
    n1 <- 0
    for (i in 1:n.threads) { 
      n0 <- n1+1;n1 <- n1+nt[i]
      ind <- n0:n1 ## this thread's data block from mf
      arg[[i]] <- list(object=object,type=type,se.fit=se.fit,terms=terms,exclude=exclude,
                        block.size=block.size,newdata.guaranteed=newdata.guaranteed,
                        na.action=na.action)
      arg[[i]]$object$model <- object$model[1:2,] ## save space
      if (missing(newdata)) {
        arg[[i]]$newdata <- object$model[ind,]
      } else {
        arg[[i]]$newdata <- newdata[ind,]
      }
    } ## finished setting up arguments
    ## newdata and object no longer needed - all info in thread lists...
    if (!missing(newdata)) rm(newdata)
    rm(object)
    if (gc.level>0) gc()
    res <- parallel::parLapply(cluster,arg,pabapr) ## perform parallel prediction
    if (gc.level>0) gc()
    ## and splice results back together...
    if (type=="lpmatrix") {
      X <- res[[1]]
      for (i in 2:length(res)) X <- rbind(X,res[[i]])
      return(X)
    } else if (se.fit==TRUE) {
      rt <- list(fit = res[[1]]$fit,se.fit = res[[1]]$se.fit)
      if (type=="terms") {
        for (i in 2:length(res)) { 
          rt$fit <- rbind(rt$fit,res[[i]]$fit)
          rt$se.fit <- rbind(rt$se.fit,res[[i]]$se.fit)
        }
      } else {
        for (i in 2:length(res)) { 
          rt$fit <- c(rt$fit,res[[i]]$fit)
          rt$se.fit <- c(rt$se.fit,res[[i]]$se.fit)
        }
      }
      return(rt)
    } else { ## no se's returned
      rt <- res[[1]]
       if (type=="terms") {
        for (i in 2:length(res)) rt <- rbind(rt,res[[i]])
      } else {
        for (i in 2:length(res)) rt <- c(rt,res[[i]])
      }
      return(rt)
    } 
  }
} ## end predict.bam 


bam.fit <- function(G,mf,chunk.size,gp,scale,gamma,method,rho=0,
                    cl=NULL,gc.level=0,use.chol=FALSE,npt=1) {
## function that does big additive model fit in strictly additive case
   ## first perform the QR decomposition, blockwise....
   n <- nrow(mf)
   if (rho!=0) { ## AR1 error model
     ld <- 1/sqrt(1-rho^2) ## leading diagonal of root inverse correlation
     sd <- -rho*ld         ## sub diagonal
   }

   if (n>chunk.size) { ## then use QR accumulation approach
     if (!is.null(cl)&&inherits(cl,"cluster")) { 
       n.threads <- length(cl)
       while(n/n.threads < ncol(G$X)) n.threads <- n.threads - 1
       if (n.threads < length(cl)) { 
         warning("Too many cluster nodes to use all efficiently")
       }
     } else n.threads <- 1

     G$coefficients <- rep(0,ncol(G$X))
     class(G) <- "gam"

     if (n.threads>1) { ## set up thread argument lists
       ## number of obs per thread
       nt <- rep(ceiling(n/n.threads),n.threads)
       nt[n.threads] <- n - sum(nt[-n.threads])
       arg <- list()
       n1 <- 0
       for (i in 1:n.threads) { 
         n0 <- n1+1;n1 <- n1+nt[i]
         if (i>1&&rho!=0) { ## need to start from end of last block if rho!=0
           n0 <- n0-1;nt[i] <- nt[i]+1 
         }   
         ind <- n0:n1 ## this thread's data block from mf
         n.block <- nt[i]%/%chunk.size ## number of full sized blocks
         stub <- nt[i]%%chunk.size ## size of end block
         if (n.block>0) { 
           ## each block is of size 
           start <- (0:(n.block-1))*chunk.size+1
           end <- start + chunk.size - 1
           if (stub>0) {
             start[n.block+1] <- end[n.block]+1
             end[n.block+1] <- nt[i]
             n.block <- n.block+1
           } 
           if (rho!=0) { ## then blocks must overlap
             ns <- length(start)
             if (ns>1) start[2:ns] <- start[2:ns]-1 
           }
         } else {
           n.block <- 1
           start <- 1
           end <- nt[i]
         }
         arg[[i]] <- list(nobs= nt[i],start=start,end=end,n.block=n.block,
                         rho=rho,mf = mf[ind,],gc.level=gc.level,
                         offset = G$offset[ind],G = G,response=gp$response,
                         first=FALSE,last=FALSE,use.chol=use.chol)
         if (i==1) arg[[1]]$first <- TRUE
         if (i==n.threads) arg[[i]]$last <- TRUE 
         arg[[i]]$G$w <- G$w[ind];arg[[i]]$G$model <- NULL
       }
     } else { ## single thread, requires single indices 
       n.block <- n%/%chunk.size ## number of full sized blocks
       stub <- n%%chunk.size ## size of end block
       if (stub>0) n.block <- n.block + 1
       start <- 0:(n.block-1)*chunk.size    ## block starts
       end <- start + chunk.size;           ## block ends
       end[n.block] <- n
       if (rho==0) start <- start + 1  ## otherwise most blocks go to 1 before block start
       start[1] <- 1  
     } 
    
     if (n.threads==1) { ## use original single thread method...
       qrx <- list(R=NULL,f=array(0,0),y.norm2=0) ## initial empty qr object
       for (i in 1:n.block) {
         ind <- start[i]:end[i] 
         if (rho!=0) {
           N <- end[i]-start[i]+1

           row <- c(1,rep(1:N,rep(2,N))[-c(1,2*N)])
           weight <- c(1,rep(c(sd,ld),N-1))
           stop <- c(1,1:(N-1)*2+1) 
           if (!is.null(mf$"(AR.start)")) { ## need to correct the start of new AR sections...
             ii <- which(mf$"(AR.start)"[ind]==TRUE)
             if (length(ii)>0) {
               if (ii[1]==1) ii <- ii[-1] ## first observation does not need any correction
               weight[ii*2-2] <- 0 ## zero sub diagonal
               weight[ii*2-1] <- 1 ## set leading diagonal to 1
             }
           }
         } 
         #G$model <- mf[ind,]
         w <- sqrt(G$w[ind])
         X <- w*predict(G,newdata=mf[ind,],type="lpmatrix",newdata.guaranteed=TRUE,block.size=length(ind))
         y <- w*(mf[ind,gp$response]-G$offset[ind])  ## w*(G$model[[gp$response]] - G$offset[ind])
         if (rho!=0) {
           ## Apply transform...
           if (end[i]==n) yX.last <- c(y[nrow(X)],X[nrow(X),]) ## store final row, in case of update
           if (i==1) {
             X <- rwMatrix(stop,row,weight,X)
             y <- rwMatrix(stop,row,weight,y)
           } else {
             X <- rwMatrix(stop,row,weight,X)[-1,]
             y <- rwMatrix(stop,row,weight,y)[-1]
           } 
         }      

         qrx <- qr.update(X,y,qrx$R,qrx$f,qrx$y.norm2,use.chol=use.chol,nt=npt)
         rm(X)
         if (gc.level>1) {gc()} ## X can be large: remove and reclaim
       } ## end of single thread block loop
       if (use.chol) { ## post proc to get R and f...
          y.norm2 <- qrx$y.norm2 
          qrx <- chol2qr(qrx$R,qrx$f,nt=npt)
          qrx$y.norm2 <- y.norm2
        }
     } else { ## use parallel accumulation
     
       res <- parallel::parLapply(cl,arg,ar.qr.up)
       ## Single thread de-bugging...
       # res <- list()
       # for (i in 1:length(arg)) {
       #   res[[i]] <- ar.qr.up(arg[[i]])
       # }

       ## now consolidate the results from the parallel threads...
       R <- res[[1]]$R;f <- res[[1]]$f; ## dev <- res[[1]]$dev
       y.norm2 <- res[[1]]$y.norm2
       for (i in 2:n.threads) {
         if (use.chol) {
           R <- R + res[[i]]$R; f <- f + res[[i]]$f
         } else {
           R <- rbind(R,res[[i]]$R); f <- c(f,res[[i]]$f)
         }
         y.norm2 <- y.norm2 + res[[i]]$y.norm2
       } 
       if (use.chol) {
         qrx <- chol2qr(R,f,nt=npt)
         qrx$y.norm2 <- y.norm2
       } else { ## proper QR         
         ## use parallel QR if npt>1...
         qrx <- if (npt>1) pqr2(R,npt) else qr(R,tol=0,LAPACK=TRUE) 
         f <- qr.qty(qrx,f)[1:ncol(R)]
         rp <- qrx$pivot;rp[rp] <- 1:ncol(R) # reverse pivot
         qrx <- list(R=qr.R(qrx)[,rp],f=f,y.norm2=y.norm2)
       }
       yX.last <- res[[n.threads]]$yX.last
     } 
     G$n <- n
     #G$y <- mf[[gp$response]]
   
   } else { ## n <= chunk.size
     if (rho==0) qrx <- qr.update(sqrt(G$w)*G$X,sqrt(G$w)*(G$y-G$offset),use.chol=use.chol,nt=npt) else {
       row <- c(1,rep(1:n,rep(2,n))[-c(1,2*n)])
       weight <- c(1,rep(c(sd,ld),n-1))
       stop <- c(1,1:(n-1)*2+1)
       if (!is.null(mf$"(AR.start)")) { ## need to correct the start of new AR sections...
         ii <- which(mf$"(AR.start)"==TRUE)
         if (length(ii)>0) {
           if (ii[1]==1) ii <- ii[-1] ## first observation does not need any correction
           weight[ii*2-2] <- 0 ## zero sub diagonal
           weight[ii*2-1] <- 1 ## set leading diagonal to 1
         }
       }
       yX.last <- c(G$y[n],G$X[n,])  ## store final row, in case of update
       X <- rwMatrix(stop,row,weight,sqrt(G$w)*G$X)
       y <- rwMatrix(stop,row,weight,sqrt(G$w)*G$y)
       qrx <- qr.update(X,y,use.chol=use.chol,nt=npt)
   
       rm(X); if (gc.level>1) gc() ## X can be large: remove and reclaim
     } 
     if (use.chol) { ## post proc to get R and f...
        y.norm2 <- qrx$y.norm2 
        qrx <- chol2qr(qrx$R,qrx$f,nt=npt)
        qrx$y.norm2 <- y.norm2
     }
   }

   rss.extra <- qrx$y.norm2 - sum(qrx$f^2)
 
   if (method=="GCV.Cp") {
     fit <- magic(qrx$f,qrx$R,G$sp,G$S,G$off,L=G$L,lsp0=G$lsp0,rank=G$rank,
                H=G$H,C=matrix(0,0,ncol(qrx$R)),  ##C=G$C,
                gamma=gamma,scale=scale,gcv=(scale<=0),
                extra.rss=rss.extra,n.score=n)
 
     post <- magic.post.proc(qrx$R,fit,qrx$f*0+1) 
   } else if (method=="fREML"){ ## use fast REML code
     Sl <- Sl.setup(G) ## setup block diagonal penalty object
     um <- Sl.Xprep(Sl,qrx$R,nt=npt)
     lambda.0 <- initial.sp(qrx$R,G$S,G$off)
     lsp0 <- log(lambda.0) ## initial s.p.
     if (scale<=0) log.phi <- log(var(as.numeric(G$y))*.05) else ## initial phi guess
                   log.phi <- log(scale)
     fit <- fast.REML.fit(um$Sl,um$X,qrx$f,rho=lsp0,L=G$L,rho.0=G$lsp0,
            log.phi=log.phi,phi.fixed=scale>0,rss.extra=rss.extra,
            nobs =n,Mp=um$Mp,nt=npt,gamma=gamma)
     res <- Sl.postproc(Sl,fit,um$undrop,qrx$R,cov=TRUE,scale=scale,L=G$L,nt=npt)
     object <- list(coefficients=res$beta,edf=res$edf,edf1=res$edf1,edf2=res$edf2,##F=res$F,
                    db.drho=fit$d1b,
                    gcv.ubre=fit$reml,hat=res$hat,mgcv.conv=list(iter=fit$iter,
                    message=fit$conv),rank=ncol(um$X),
                    Ve=res$Ve,Vp=res$Vp,Vc=res$Vc,V.sp=res$V.sp,
                    scale.estimated = scale<=0,outer.info=fit$outer.info,
                    optimizer=c("perf","newton"))
     if (scale<=0) { ## get sp's and scale estimate
       nsp <- length(fit$rho)
       object$sig2 <- object$scale <- exp(fit$rho[nsp])
       object$sp <- exp(fit$rho[-nsp])
       nsp <- length(fit$rho.full)
       object$full.sp <- exp(fit$rho.full[-nsp])
     } else { ## get sp's
       object$sig2 <- object$scale <- scale  
       object$sp <- exp(fit$rho)
       object$full.sp <- exp(fit$rho.full)
     }
     
     if (rho!=0) { ## correct RE/ML score for AR1 transform
       df <- if (is.null(mf$"(AR.start)")) 1 else sum(mf$"(AR.start)")
       object$gcv.ubre <- object$gcv.ubre - (n-df)*log(ld)
     }

     G$X <- qrx$R;G$dev.extra <- rss.extra
     G$pearson.extra <- rss.extra;G$n.true <- n
     object$Sl <- Sl ## to allow for efficient update
     class(object)<-c("gam")
   } else { ## method is "ML", "P-REML" or similar
     y <- G$y; w <- G$w; n <- G$n;offset <- G$offset
     G$y <- qrx$f
     G$w <- G$y*0+1
     G$X <- qrx$R
     G$n <- length(G$y)
     G$offset <- G$y*0
     G$dev.extra <- rss.extra
     G$pearson.extra <- rss.extra
     G$n.true <- n
     object <- gam(G=G,method=method,gamma=gamma,scale=scale,control=gam.control(nthreads=npt))
     object$null.deviance <- object$fitted.values <- NULL
     y -> G$y; w -> G$w; n -> G$n;offset -> G$offset
     if (rho!=0) { ## correct RE/ML score for AR1 transform 
       df <- if (is.null(mf$"(AR.start)")) 1 else sum(mf$"(AR.start)")
       object$gcv.ubre <- object$gcv.ubre - (n-df)*log(ld)
     }
   }
   if (method=="GCV.Cp") { 
     object <- list()
     object$coefficients <- fit$b
     object$edf <- post$edf
     object$edf1 <- post$edf1
     ##object$F <- post$F
     object$full.sp <- fit$sp.full
     object$gcv.ubre <- fit$score
     object$hat <- post$hat
     object$mgcv.conv <- fit$gcv.info 
     object$optimizer="magic"
     object$rank <- fit$gcv.info$rank
     object$Ve <- post$Ve
     object$Vp <- post$Vb
     object$sig2 <- object$scale <- fit$scale
     object$sp <- fit$sp
     class(object)<-c("gam")
   } else {
    
   }
   G$smooth <- G$X <- NULL
   object$prior.weights <- G$w
   object$AR1.rho <- rho
   if (rho!=0) { ## need to store last model matrix row, to allow update
     object$yX.last <- yX.last
   }
  
   object$R <- qrx$R
   object$gamma <- gamma;object$G <- G;object$qrx <- qrx ## to allow updating of the model
   object$y <- mf[[gp$response]]
   object$iter <- 1
   object
} # end of bam.fit

predict.bamd <- function(object,newdata,type="link",se.fit=FALSE,terms=NULL,exclude=NULL,
                        block.size=50000,newdata.guaranteed=FALSE,na.action=na.pass,n.threads=1,gc.level=0,...) {
## function for prediction from a bam object, by discrete methods
## remove some un-needed stuff from object
  object$Sl <- object$qrx <- object$R <- object$F <- object$Ve <-
  object$Vc <- object$G <- object$residuals <- object$fitted.values <-
  object$linear.predictors <- NULL
  if (gc.level>0) gc()
  if (missing(newdata)) newdata <- object$model
   
  convert2mf <- is.null(attr(newdata,"terms"))

  if (type=="iterms") {
    type <- "terms"
    warning("iterms reset to terms")
  }

  lpi <- attr(object$formula,"lpi") ## lpi[[i]] indexes coefs for ith linear predoctor
  nlp <- if (is.null(lpi)) 1 else length(lpi) ## number of linear predictors
  lpid <- if (nlp>1) object$dinfo$lpid else NULL ## index of discrete terms involved in each linear predictor
 
  ## newdata has to be processed first to avoid, e.g. dropping different subsets of data
  ## for parametric and smooth components....

  newterms <- attr(newdata,"terms") ## non NULL for model frame

  newd <- predict.gam(object,newdata=newdata,type="newdata",se.fit=se.fit,terms=terms,exclude=exclude,
            block.size=block.size,newdata.guaranteed=newdata.guaranteed,
            na.action=na.action,...) 
  if (nrow(newd)>0) {
    newdata <- newd;rm(newd)
  } else { ## no non NA data, might as well call predict.gam
    return(predict.gam(object,newdata=newdata,type=type,se.fit=se.fit,terms=terms,exclude=exclude,
            block.size=block.size,newdata.guaranteed=newdata.guaranteed,
            na.action=na.action,...))
  }

  ## Next line needed to avoid treating newdata as a model frame if it was supplied not as a model frame.
  ## Otherwise names of e.g. offset are messed up (as they will also be if it was supplied as a model frame
  ## or was set to object$model and we set terms to NULL)
  
  if (is.null(newterms)) attr(newdata,"terms") <- NULL
  
  na.act <- attr(newdata,"na.action") ## save the NA action for later


  ## Parametric terms have to be dealt with safely, but without forming all terms 
  ## or a full model matrix. Strategy here is to use predict.gam, having removed
  ## key smooth related components from model object, so that it appears to be
  ## a parametric model... 
  offset <- if (nlp>1) as.list(rep(0,nlp)) else 0
  para.discrete <- !is.null(object$dinfo$para.discrete)
  if (!para.discrete&&(any(object$nsdf)||any(object$offset!=0))) { ## deal with parametric terms...
    ## save copies of smooth info...
    .Deprecated(package="mgcv",msg="prediction from discrete bam models prior to 1.8-32 is deprecated, please refit")
    smooth <- object$smooth; coef <- object$coefficients; Vp <- object$Vp
    ## remove key smooth info from object
    ## first identify coefficients (indexed by ii) to retain, and modify pstart
    ## attribute so that it's pointing to retained coefficient array.
    if (length(object$nsdf)>1) {
      pstart <- attr(object$nsdf,"pstart")
      ps <- pstart * 0
      ii <- rep(0,0); k <- 1
      for (i in 1:length(object$nsdf)) if (object$nsdf[i]) {
        ps[i] <- k;k <- k + object$nsdf[i]
        ii <- c(ii,1:object$nsdf[i]+pstart[i]-1)
      }
      attr(object$nsdf,"pstart") <- ps ## make compatible with stripping out smooths
    } else { ii <- 1:object$nsdf; pstart <- 1; ps <- 1}
    object$coefficients <-  object$coefficients[ii]
    object$Vp <- object$Vp[ii,ii]
    object$smooth <- NULL
    ## get prediction for parametric component. Always "lpmatrix", unless terms required.
    ptype <- if (type %in% c("terms","iterms")) type else "lpmatrix"
    pterms <- if (is.null(terms)) terms else terms[terms %in% row.names(attr(object$pterms,"factors"))]
    pexclude <- if (is.null(exclude)) exclude else exclude[exclude %in% row.names(attr(object$pterms,"factors"))]
    pp <- predict.gam(object,newdata=newdata,type=ptype,se.fit=se.fit,terms=pterms,exclude=pexclude,
            block.size=block.size,newdata.guaranteed=TRUE,
            na.action=na.action,...)  
    ## restore smooths to 'object'
    object$coefficients <- coef
    object$Vp <- Vp
    object$smooth <- smooth
    if (length(object$nsdf)) pstart -> attr(object$nsdf,"pstart") ## restore pstart
    if (ptype=="lpmatrix") {
      offset <- attr(pp,"model.offset")
      if (is.null(offset)) offset <- if (nlp==1) 0 else as.list(rep(0,nlp))
    }
  } else {
    pp <- if (se.fit) list(fit=rep(0,0),se.fit=rep(0,0)) else rep(0,0) ## note: needed in terms branch below
  } ## parametric component dealt with in non-discrete case

  ## now discretize covariates...
  if (convert2mf) newdata <- model.frame(object$dinfo$gp$fake.formula[-2],newdata)
  dk <- if (para.discrete) discrete.mf(object$dinfo$gp,mf=newdata,names.pmf=object$dinfo$pmf.names,full=TRUE) else
        discrete.mf0(object$dinfo$gp,mf=newdata,names.pmf=NULL,full=FALSE)
  Xd <- list() ### list of discrete model matrices...
  n <- if (is.matrix(newdata[[1]])) nrow(newdata[[1]]) else length(newdata[[1]])
  kb <- k <- 1;kd <- dk$k
  ks <- matrix(0,0,2) ## NOTE: slightly more efficient not to repeatedly extend
  if (para.discrete) { 
    for (j in 1:nlp) { ## loop over parametric components of linear predictors
      ## get discretized marginal matrices for the parametric model (if contrast warnings see predict.gam fix)
      ptens <- if (nlp==1&&!is.list(object$pterms)) terms2tensor(object$pterms,dk$mf,object$contrasts) else
	             terms2tensor(object$pterms[[j]],dk$mf,object$contrasts)
      offi <- if (nlp==1&&!is.list(object$pterms)) attr(object$pterms,"offset") else attr(object$pterms[[j]],"offset")
      if (!is.null(offi)) {
        #offname <- if (nlp==1&&!is.list(object$pterms)) attr(object$pterms,"factor") else attr(object$pterms[[j]],"factor")
	#offname <- rownames(offname)[offi]
        offname <- if (nlp==1&&!is.list(object$pterms)) names(attr(object$pterms,"dataClasses"))[offi] else
	                                                names(attr(object$pterms[[j]],"dataClasses"))[offi]
        
	if (j==1&&nlp>1) offset <- list()
	if (nlp>1) offset[[j]] <- newdata[[offname]] else  offset <- newdata[[offname]]
      }
      if (!is.null(ptens)) {
        np <-length(ptens$X);
        for (i in 1:np) {
	  ks <-  rbind(ks,dk$ks[ptens$xname[i],])
	  Xd[[k]] <- ptens$X[[i]][1:dk$nr[ptens$xname[i]],,drop=FALSE]
	  k <- k + 1
        }	
      }
      kb <- kb + length(ptens$ts)
    } ## parametric lp loop
  } ## if (para.discrete)

  if (!para.discrete && any(object$nsdf>0)) for (i in 1:length(object$nsdf)) if (object$nsdf[i]>0) {
     Xd[[k]] <- if (type%in%c("terms","iterms")) matrix(0,0,0) else pp[,ps[k]+1:object$nsdf[k]-1,drop=FALSE] 
     kd <- cbind(1:nrow(newdata),kd) ## add index for parametric part to index list
     k <- k + 1; 
     dk$k.start <- c(1,dk$k.start+1) ## and adjust k.start accordingly
     dk$nr <- c(NA,dk$nr) ## need array index to match elements of Xd
  } 
  
  ts <- object$dinfo$ts
  dt <- object$dinfo$dt

  ## remove padding from the discretized data...
  class(dk$mf) <- "list" 
  for (i in 1:length(dk$mf)) dk$mf[[i]] <- if (is.matrix(dk$mf[[i]])) dk$mf[[i]][1:dk$nr[i],] else dk$mf[[i]][1:dk$nr[i]]

  if (length(object$smooth)) for (i in 1:length(object$smooth)) { ## work through the smooth list
    ## potentially each smoother model matrix can be made up of a sequence
    ## of row-tensor products, nead to loop over such sub blocks...
    nsub <- if (!is.null(object$smooth[[i]]$ts)) length(object$smooth[[i]]$ts) else 1
    ## first deal with any by variable (as first marginal of tensor)...
    for (sb in 1:nsub) { ## loop over sub-blocks
      if (object$smooth[[i]]$by!="NA") {
        termk <- object$smooth[[i]]$by
        by.var <- dk$mf[[object$smooth[[i]]$by]][1:dk$nr[termk]]
        if (is.factor(by.var)) { 
          ## create dummy by variable...
          by.var <- as.numeric(by.var==object$smooth[[i]]$by.level)  
        }
        Xd[[k]] <- matrix(by.var,dk$nr[termk],1)
        ks <- rbind(ks,dk$ks[termk,])
        k <- k + 1
        by.present <- 1
      } else by.present <- 0
      ## ... by done
      if (inherits(object$smooth[[i]],"tensor.smooth")) { 
        nmar <- if (is.null(object$smooth[[i]]$dt)) length(object$smooth[[i]]$margin) else object$smooth[[i]]$dt[sb]
        XP <- object$smooth[[i]]$XP
	jind <- if (sb>1) object$smooth[[i]]$ts[sb] + 1:object$smooth[[i]]$dt[sb] - 1 else 1:nmar       
        for (j in jind) {
          object$smooth[[i]]$margin[[j]]$by<- "NA" ## should be no by's here (any by dealt with above)
	  termk <- object$smooth[[i]]$margin[[j]]$term[1]
          Xd[[k]] <-if (termk=="(Intercept)") matrix(1,dk$nr[termk],1) else PredictMat(object$smooth[[i]]$margin[[j]],dk$mf,n=dk$nr[termk])
	  ks <- rbind(ks,dk$ks[termk,])
          if (!is.null(XP)&&(j<=length(XP))&&!is.null(XP[[j]])) Xd[[k]] <- Xd[[k]]%*%XP[[j]]
          k <- k + 1 
        }
      } else { ## not a tensor smooth
        object$smooth[[i]]$by <- "NA" ## have to ensure by not applied here (it's dealt with as a tensor marginal)!
        termk <- object$smooth[[i]]$term[1]
        ks <- rbind(ks,dk$ks[termk,])
        Xd[[k]] <- PredictMat(object$smooth[[i]],dk$mf,n=dk$nr[termk])
        k <- k + 1
      }
      kb <- kb + 1
    } ## sub block loop  
  }

  attr(Xd,"lpip") <- object$dinfo$lpip ## list of coef indices for each term

  ## end of discrete set up
  se <- se.fit
  if (type=="terms") {
    if (para.discrete) {
      term.lab <- names(object$dinfo$ts) ## names of all terms
      termi <- rep(TRUE,length(term.lab)) ## do we want term included?
      termi[term.lab=="(Intercept)"] <- FALSE ## not if it's intercept
      if (!is.null(terms)) termi <- termi & term.lab %in% terms ## and only if it's in terms
      if (!is.null(exclude)) termi <- termi & !(term.lab %in% exclude) ## and not in exclude
      uterms <- unique(term.lab[termi])
      nterms <- length(uterms) ## sum(termi) 
      fit <- matrix(0,nrow(kd),nterms)
      if (se) se.fit <- matrix(0,nrow(kd),nterms)
      for (k in 1:nterms) {
        lt <- which(term.lab%in%uterms[k]) ## discrete terms involved in this smooth (can be more than one)
        if (termi[lt]) {
          fit[,k] <- Xbd(Xd,object$coefficients,kd,ks,                          
                      ts,dt,object$dinfo$v,object$dinfo$qc,drop=object$dinfo$drop,lt=lt)
          if (se) se.fit[,k] <- diagXVXd(Xd,object$Vp,kd,ks,                
                       ts,dt,object$dinfo$v,object$dinfo$qc,drop=object$dinfo$drop,nthreads=n.threads,lt=lt,rt=lt)^.5
        }	
      }
      colnames(fit) <- uterms
      if (se) {
        colnames(se.fit) <- uterms
	fit <- list(fit=fit,se.fit=se.fit)
      }
    } else { ## legacy code for prediction from old objects
      term.lab <- unlist(lapply(object$smooth,function(x) x$label))
      termi <- rep(TRUE,length(object$smooth))
      if (!is.null(terms)) termi <- termi & term.lab %in% terms
      if (!is.null(exclude)) termi <- termi & !(term.lab %in% exclude)
      if (any(object$nsdf>0)) {
        if (se) {
          fit <- cbind(pp$fit,matrix(0,nrow(kd),sum(termi)))
          se.fit <- cbind(pp$se.fit,matrix(0,nrow(kd),sum(termi))) 
        } else fit <- cbind(pp,matrix(0,nrow(kd),sum(termi)))
        #k <- 2; ## starting Xd
        kk <- ncol(fit) - sum(termi) + 1 ## starting col of fit for smooth terms
      } else {
        if (se) {
          fit <- matrix(0,nrow(kd),sum(termi))
          se.fit <- matrix(0,nrow(kd),sum(termi)) 
        } else fit <- matrix(0,nrow(kd),sum(termi))
        #k <- 1; ## starting Xd
        kk <- 1 ## starting col of fit for smooth terms
      }
      k <- min(which(!is.na(dk$nr))) ## starting Xd
      n.smooth <- length(object$smooth)
      if (n.smooth) for (i in 1:n.smooth) {
        ilab <- object$smooth[[i]]$label
        if (termi[i]) {
          ii <- ts[k]:(ts[k]+dt[k]-1) ## index components for this term
          ind <- object$smooth[[i]]$first.para:object$smooth[[i]]$last.para ## index coefs for this term
          if (!is.null(object$dinfo$drop)) { 
            drop <- object$dinfo$drop-object$smooth[[i]]$first.para+1
            drop <- drop[drop<=length(ii)]
          } else drop <- NULL
          fit[,kk] <- Xbd(Xd[ii],object$coefficients[ind],kd,ks[ii,],                           ##kd[,ii,drop=FALSE]
                      1,dt[k],object$dinfo$v[k],object$dinfo$qc[k],drop=drop)
          if (se) se.fit[,kk] <- diagXVXd(Xd[ii],object$Vp[ind,ind,drop=FALSE],kd,ks[ii,],                 #kd[,ii,drop=FALSE],
                       1,dt[k],object$dinfo$v[k],object$dinfo$qc[k],drop=drop,nthreads=n.threads)^.5
          kk <- kk + 1
        }
        k <-  k + 1;
      } 
      fit.names <- c(if (se) colnames(pp$fit) else colnames(pp), term.lab[termi])
      colnames(fit) <- fit.names
      if (se) { 
        colnames(se.fit) <- fit.names
        fit <- list(fit=fit,se.fit=se.fit)
      }
    } ## !para.discrete branch  
  } else if (type=="lpmatrix") {
    if (para.discrete) {
      lt <- 1:length(ts);names(lt) <- names(ts)
      if (!is.null(terms)) lt <- lt[names(lt)%in%terms]
      if (!is.null(exclude)) lt <- lt[!names(lt)%in%exclude]
    } else lt <- NULL 
    fit <- Xbd(Xd,diag(length(object$coefficients)),kd,ks,ts,dt,object$dinfo$v,object$dinfo$qc,drop=object$dinfo$drop,lt=lt)
    if (nlp>1) attr(fit,"lpi") <- lpi
  } else { ## link or response 
    if (is.null(object$family$predict)||type=="link") {
      lt <- 1:length(ts);names(lt) <- names(ts)
      if (nlp>1) {
        fit <- matrix(0,nrow(kd),nlp)
	if (se) se.fit <- matrix(0,nrow(kd),nlp)
        for (i in 1:nlp) {
          if (para.discrete) {
            lt <- 1:length(ts);names(lt) <- names(ts)
	    lt <- lt[lpid[[i]]]
            if (!is.null(terms)) lt <- lt[names(lt)%in%terms]
            if (!is.null(exclude)) lt <- lt[!names(lt)%in%exclude]
          } else lt <- lpid[[i]]
          fit[,i] <- Xbd(Xd,object$coefficients,kd,ks,ts,dt,object$dinfo$v,object$dinfo$qc,drop=object$dinfo$drop,lt=lt) + offset[[i]]
	  if (se) se.fit[,i] <- diagXVXd(Xd,object$Vp,kd,ks,ts,dt,object$dinfo$v,object$dinfo$qc,drop=object$dinfo$drop,
	                                        lt=lt,rt=lt,nthreads=n.threads)^.5
	}  
      } else {
        if (para.discrete) {
          lt <- 1:length(ts);names(lt) <- names(ts)
          if (!is.null(terms)) lt <- lt[names(lt)%in%terms]
          if (!is.null(exclude)) lt <- lt[!names(lt)%in%exclude]
        } else lt <- NULL 
        fit <- Xbd(Xd,object$coefficients,kd,ks,ts,dt,object$dinfo$v,object$dinfo$qc,drop=object$dinfo$drop,lt=lt) + offset
	if (se) se.fit <- diagXVXd(Xd,object$Vp,kd,ks,ts,dt,object$dinfo$v,object$dinfo$qc,drop=object$dinfo$drop,lt=lt,rt=lt,nthreads=n.threads)^.5
      }
      if (type=="response") {
        linkinv <- object$family$linkinv
        dmu.deta <- object$family$mu.eta
      } else linkinv <- dmu.deta <- NULL
      if (se==TRUE) {
#        if (nlp>1) {
#          se.fit <- matrix(0,nrow(kd),nlp)
#          for (i in 1:nlp) se.fit[,i] <- diagXVXd(Xd,object$Vp,kd,ks,ts,dt,object$dinfo$v,object$dinfo$qc,drop=object$dinfo$drop,
#	                                        lt=lpid[[i]],rt=lpid[[i]],nthreads=n.threads)^.5
#        } else se.fit <- diagXVXd(Xd,object$Vp,kd,ks,ts,dt,object$dinfo$v,object$dinfo$qc,drop=object$dinfo$drop,nthreads=n.threads)^.5
        if (type=="response") {
	  if (nlp>1) for (i in 1:nlp) {
	    se.fit[,i] <- se.fit[,i] * abs(object$family$linfo[[i]]$mu.eta[fit[,i]])
	    fit[,i] <- object$family$linfo[[i]]$linkinv[fit[,i]]
          } else {
            se.fit <- se.fit * abs(object$family$mu.eta(fit))
            fit <- object$family$linkinv(fit)
	  }  
        }
        fit <- list(fit=fit,se.fit=se.fit)
      } else if (type=="response") fit <- object$family$linkinv(fit)
    } else { ## family has its own response fitting code
      X <- list(Xd=Xd,kd=kd,ks=ks,ts=ts,dt=dt,v=object$dinfo$v,qc=object$dinfo$qc,drop=object$dinfo$drop,lpid=lpid)
      if (nlp>1) attr(X,"lpi") <- lpi
      ## NOTE: not set up for families needing response for prediction (e.g. cox.ph)
      fampred <- object$family$predict ## just eases debugging
      ffv <- fampred(object$family,se=se,y=NULL,X=X,beta=object$coefficients,
                             off=offset,Vb=object$Vp)  ## NOTE: offsets not handled
      fit <- ffv[[1]]
      if (se) fit <- list(fit=fit,se.fit =ffv[[2]])
    }
  }
  rn <- rownames(newdata)
  if (type=="lpmatrix") {
    colnames(fit) <- names(object$coefficients)
    rownames(fit) <- rn
    attr(fit,"model.offset") <- offset
    fit <- napredict(na.act,fit)
  } else {
     if (se) { 
      if (is.null(nrow(fit$fit))) {
        names(fit$fit) <- rn
        names(fit$se.fit) <- rn
        fit$fit <- napredict(na.act,fit$fit)
        fit$se.fit <- napredict(na.act,fit$se.fit) 
      } else { 
        rownames(fit$fit) <- rn
        rownames(fit$se.fit) <- rn
        fit$fit <- napredict(na.act,fit$fit)
        fit$se.fit <- napredict(na.act,fit$se.fit)
      }
    } else { 
      if (is.null(nrow(fit))) names(fit) <- rn else
      rownames(fit) <- rn
      fit <- napredict(na.act,fit)
    }
  }
  fit
} ## end predict.bamd 



tero <- function(sm) {
## te smooth spec re-order so that largest marginal is last.
  maxd <- 0
  ns <- length(sm$margin)
  for (i in 1:ns) if (sm$margin[[i]]$bs.dim>=maxd) {
    maxi <- i;maxd <- sm$margin[[i]]$bs.dim
  }
  if (maxi<ns) { ## re-ordering required
    ind <- 1:ns;ind[maxi] <- ns;ind[ns] <- maxi
    sm$margin <- sm$margin[ind]
    sm$fix <- sm$fix[ind]
    if (!is.null(sm$mc)) sm$mc <- sm$mc[ind]
    sm$term <- rep("",0)
    for (i in 1:ns) sm$term <- c(sm$term,sm$margin[[i]]$term)
    sm$label <- paste0(substr(sm$label,1,3),paste0(sm$term,collapse=","),")",collapse="")
  }
  sm
} ## tero

AR.resid <- function(rsd,rho=0,AR.start=NULL) {
## standardised residuals for AR1 model
  if (rho==0) return(rsd)
  ld <- 1/sqrt(1-rho^2) ## leading diagonal of root inverse correlation
  sd <- -rho*ld         ## sub diagonal
  N <- length(rsd)    
  ## see rwMatrix() for how following are used...
  ar.row <- c(1,rep(1:N,rep(2,N))[-c(1,2*N)]) ## index of rows to reweight
  ar.weight <- c(1,rep(c(sd,ld),N-1))     ## row weights
  ar.stop <- c(1,1:(N-1)*2+1)    ## (stop[i-1]+1):stop[i] are the rows to reweight to get ith row
  if (!is.null(AR.start)) { ## need to correct the start of new AR sections...
    ii <- which(AR.start==TRUE)
    if (length(ii)>0) {
          if (ii[1]==1) ii <- ii[-1] ## first observation does not need any correction
          ar.weight[ii*2-2] <- 0 ## zero sub diagonal
          ar.weight[ii*2-1] <- 1 ## set leading diagonal to 1
    }
  }
  rwMatrix(ar.stop,ar.row,ar.weight,rsd)
} ## AR.resid

tens2matrix <- function(X,ts,dt) {
## converts result of terms2tensor to a full model matrix. Should
## match direct model.matrix call on the original terms object.
## Purely for checking purposes (insufficiently efficient for
## general use)
  Xt <- matrix(0,nrow(X[[1]]),0)
  for (i in 1:length(ts)) {
    ind <- ts[i]:(ts[i]+dt[i]-1)
    Xt <- if (dt[i]==1) cbind(Xt,X[[ind]]) else cbind(Xt,tensor.prod.model.matrix(X[ind]))
  }
  Xt
} ## tens2matrix


terms2tensor <- function(terms,data=NULL,contrasts.arg=NULL,drop.intercept=FALSE,identify=TRUE,sparse=FALSE) {
## takes a terms object or formula and converts it into a sequence of marginal model matrices, X
## and associated indices ts and dt, using the data in 'data'. drops the intercept if needed.
## If 'identify' then identifiability constraints/contrasts imposed, otherwise not. 
## The ith block of the model matrix is the row tensor product
## of the dt[i] elements of X starting at the ts[i].
## i.e. tensor.prod.model.matrix(X[ts[i]:(ts[i]+dt[i]-1)])
## xname[i] is the name of the variable in data used to create the marginal matrix X[[i]], with
## `(Intercept)' returned for the intercept term
## If data=NULL does a dummy run, not returning X or p
  if (!inherits(terms,"formula")) stop("requires a terms or formula object as first argument")
  if (!inherits(terms,"terms")) terms <- terms(terms)
  fac <- attr(terms,"factors")
  intercept <- attr(terms,"intercept")==1 ## is there an intercept?
  nt <- if (is.matrix(fac)) ncol(fac) else 0
  nt <- nt + intercept - drop.intercept ## number of terms.
  if (nt==0) return(NULL)
  p <- ts <- dt <- rep(1,nt)
  X <- list() ## marginal model matrix list
  form <- list() ## marginal formulae list
  k <- 1
  xname <- rep("",nt)
  dummy <- is.null(data)
  if (!dummy) n <- if (is.matrix(data[[1]])) nrow(data[[1]]) else length(data[[1]])
  if (intercept && !drop.intercept) {
    ts[k] <- 1;dt[k] <- 1
    if (!dummy) X[[k]] <- matrix(1,n,1)
    xname[k] <- "(Intercept)"
    form[[k]] <- ~1
    environment(form[[k]]) <- NULL 
    k <- k + 1
    term.labels <- c("(Intercept)",attr(terms,"term.labels"))
  } else term.labels <- attr(terms,"term.labels")
  varn <- rownames(fac)
  ord <- attr(terms,"order")
  no.int <- !intercept ## indicator of whether missing intercept still unhandled
  if (drop.intercept) intercept <- 0;
  if (nt-intercept>0) for (i in 1:(nt-intercept)) { ## work through the terms
    ts[i+intercept] <- k;dt[i+intercept] <- ord[i]
    if (ord[i]==1) { ## special case due to odd intercept handling
      vn <- varn[as.logical(fac[,i])]
      if (no.int||!identify) {
        fm <- as.formula(paste("~",vn,"-1"))
        if (!dummy) X[[k]] <- if (sparse) sparse.model.matrix(fm,data,contrasts.arg) else model.matrix(fm,data,contrasts.arg)
        no.int <- FALSE
      } else {
        fm <- as.formula(paste("~",vn))
        if (!dummy) X[[k]] <- if (sparse) sparse.model.matrix(fm,data,contrasts.arg)[,-1,drop=FALSE] else model.matrix(fm,data,contrasts.arg)[,-1,drop=FALSE]
      }
      xname[k] <- if (!dummy && vn %in% names(data)) vn else all.vars(fm)
      form[[k]] <- fm; environment(form[[k]]) <- NULL
      if (!dummy) p[i+intercept] = ncol(X[[k]])
      k <- k + 1
    } else { ## interaction term
      m <- which(fac[,i]!=0) ## marginal terms involved
      for (j in ord[i]:1) { ## reverse ordering is to conform with model.matrix(terms,data) called directly
        vn <- varn[m[j]]
        if (fac[m[j],i]==2||!identify) { ## no contrast
	  fm <- as.formula(paste("~",vn,"-1"))
	  if (!dummy) X[[k]] <- if (sparse) sparse.model.matrix(fm,data,contrasts.arg) else model.matrix(fm,data,contrasts.arg)
	} else { ## with contrast
          fm <- as.formula(paste("~",vn))
	  if (!dummy) X[[k]] <- if (sparse) sparse.model.matrix(fm,data,contrasts.arg)[,-1,drop=FALSE] else model.matrix(fm,data,contrasts.arg)[,-1,drop=FALSE]
        }
	xname[k] <- if (!dummy && vn %in% names(data)) vn else all.vars(fm)
	form[[k]] <- fm; environment(form[[k]]) <- NULL
	if (!dummy) p[i+intercept] <- p[i+intercept] * ncol(X[[k]])
	k <- k + 1
      } ## j marginal loop end
    } ## finished interaction term
  } ## i term loop end
  list(X=X, ## list of marginal model matrices
       ts=ts, ## ts[i] is starting marginal matrix for ith term 
       dt=dt, ## dt[i] is number of marginal matrices for ith term
       xname=xname, ## xname[k] is name of covariate associated with X[[k]]
       form=form, ## form[[k]] is formula used to produce X[[k]]
       term.labels=term.labels, ## term.labels[i] is label of ith term
       p=p) ## p[i] is total coef count for ith term  
} # terms2tensor


bam <- function(formula,family=gaussian(),data=list(),weights=NULL,subset=NULL,na.action=na.omit,
                offset=NULL,method="fREML",control=list(),select=FALSE,scale=0,gamma=1,knots=NULL,sp=NULL,
                min.sp=NULL,paraPen=NULL,chunk.size=10000,rho=0,AR.start=NULL,discrete=FALSE,
                cluster=NULL,nthreads=1,gc.level=0,use.chol=FALSE,samfrac=1,coef=NULL,
                drop.unused.levels=TRUE,G=NULL,fit=TRUE,drop.intercept=NULL,...)

## Routine to fit an additive model to a large dataset. The model is stated in the formula, 
## which is then interpreted to figure out which bits relate to smooth terms and which to 
## parametric terms.
## This is a modification of `gam' designed to build the QR decomposition of the model matrix 
## up in chunks, to keep memory costs down.
## If cluster is a parallel package cluster uses parallel QR build on cluster. 
## 'n.threads' is number of threads to use for non-cluster computation (e.g. combining 
## results from cluster nodes). If 'NA' then is set to max(1,length(cluster)).
{ control <- do.call("gam.control",control)
  if (control$trace) t3 <- t2 <- t1 <- t0 <- proc.time()
  if (length(nthreads)==1) nthreads <- rep(nthreads,2)
  if (is.null(G)) { ## need to set up model!
    if (is.character(family))
            family <- eval(parse(text = family))
    if (is.function(family))
            family <- family()
    if (is.null(family$family))
            stop("family not recognized")
      
    if (family$family=="gaussian"&&family$link=="identity") am <- TRUE else am <- FALSE
    if (scale==0) { if (family$family%in%c("poisson","binomial")) scale <- 1 else scale <- -1} 
    if (!method%in%c("fREML","GACV.Cp","GCV.Cp","REML",
                    "ML","P-REML","P-ML")) stop("un-supported smoothness selection method")
    if (is.logical(discrete)) {
      discretize <- discrete
      discrete <- NULL ## use default discretization, if any
    } else {
      discretize <- if (is.numeric(discrete)) TRUE else FALSE
    }
    if (discretize) { 
      if (method!="fREML") { 
        discretize <- FALSE
        warning("discretization only available with fREML")
      } else {
        if (!is.null(cluster)) warning("discrete method does not use parallel cluster - use nthreads instead")
	if (all(is.finite(nthreads)) && any(nthreads>1) && !mgcv.omp()) warning("openMP not available: single threaded computation only")
      }
    }  
    if (inherits(family,"extended.family")) {
      family <- fix.family.link(family); efam <- TRUE
    } else efam <- FALSE
    
    if (method%in%c("fREML")&&!is.null(min.sp)) {
      min.sp <- NULL
      warning("min.sp not supported with fast REML computation, and ignored.")
    }
   
    gp <- interpret.gam(formula) # interpret the formula
    if (discretize && length(gp$smooth.spec)==0) {
      ok <- TRUE
      ## check it's not a list formula
      if (!is.null(gp$nlp)) for (i in 1:gp$nlp) if (length(gp[[i]]$smooth.spec)>0) ok <- FALSE
      if (ok) {
        warning("no smooths, ignoring `discrete=TRUE'")
        discretize <- FALSE
      }	
    }
    if (discretize) { 
      ## re-order the tensor terms for maximum efficiency, and 
      ## signal that "re"/"fs" terms should be constructed with marginals
      ## also for efficiency
      
      if (is.null(gp$nlp)) for (i in 1:length(gp$smooth.spec)) { 
        if (inherits(gp$smooth.spec[[i]],"tensor.smooth.spec")) gp$smooth.spec[[i]] <- tero(gp$smooth.spec[[i]])
        #if (inherits(gp$smooth.spec[[i]],c("re.smooth.spec","fs.smooth.spec"))&&gp$smooth.spec[[i]]$dim>1)
	 if (!is.null(gp$smooth.spec[[i]]$tensor.possible)&&gp$smooth.spec[[i]]$dim>1){
          class(gp$smooth.spec[[i]]) <- c(class(gp$smooth.spec[[i]]),"tensor.smooth.spec")
	  if (is.null(gp$smooth.spec[[i]]$margin)) {
            gp$smooth.spec[[i]]$margin <- list()
            ## only ok for 'fs' with univariate metric variable (caught in 'fs' construcor)...
            for (j in 1:gp$smooth.spec[[i]]$dim) gp$smooth.spec[[i]]$margin[[j]] <- list(term=gp$smooth.spec[[i]]$term[j])
	  }  
        }
      } else for (j in 1:length(formula)) if (length(gp[[j]]$smooth.spec)>0) for (i in 1:length(gp[[j]]$smooth.spec)) {
        if (inherits(gp[[j]]$smooth.spec[[i]],"tensor.smooth.spec")) gp[[j]]$smooth.spec[[i]] <- tero(gp[[j]]$smooth.spec[[i]])
        #if (inherits(gp[[j]]$smooth.spec[[i]],c("re.smooth.spec","fs.smooth.spec"))&&gp[[j]]$smooth.spec[[i]]$dim>1)
	if (!is.null(gp[[j]]$smooth.spec[[i]]$tensor.possible)&&gp[[j]]$smooth.spec[[i]]$dim>1) {
          class(gp[[j]]$smooth.spec[[i]]) <- c(class(gp[[j]]$smooth.spec[[i]]),"tensor.smooth.spec")
          if (is.null(gp[[j]]$smooth.spec[[i]]$margin)) {
	    gp[[j]]$smooth.spec[[i]]$margin <- list()
            ## only ok for 'fs' with univariate metric variable (caught in 'fs' construcor)...
            for (k in 1:gp[[j]]$smooth.spec[[i]]$dim) gp[[j]]$smooth.spec[[i]]$margin[[k]] <- list(term=gp[[j]]$smooth.spec[[i]]$term[k])
          }
        }
      }
    } ## if (discretize)
    cl <- match.call() # call needed in gam object for update to work
    mf <- match.call(expand.dots=FALSE)
    mf$formula <- gp$fake.formula 
    mf$method <-  mf$family<-mf$control<-mf$scale<-mf$knots<-mf$sp<-mf$min.sp <- mf$gc.level <-
    mf$gamma <- mf$paraPen<- mf$chunk.size <- mf$rho  <- mf$cluster <- mf$discrete <-
    mf$use.chol <- mf$samfrac <- mf$nthreads <- mf$G <- mf$fit <- mf$select <- mf$drop.intercept <-
    mf$coef <- mf$...<-NULL
    mf$drop.unused.levels <- drop.unused.levels
    mf[[1]] <- quote(stats::model.frame) ## as.name("model.frame")

    if (is.list(formula)) { ## then there are several linear predictors
      environment(formula) <- environment(formula[[1]]) ## e.g. termplots needs this
      pterms <- list()
      tlab <- rep("",0)
      pmf.names <- rep("",0)
      for (i in 1:length(formula)) {
        pmf <- mf
        pmf$formula <- gp[[i]]$pf
	pmf <- eval(pmf, parent.frame())
	pmf.names <- c(pmf.names,names(pmf))
        pterms[[i]] <- attr(pmf,"terms")
        tlabi <- attr(pterms[[i]],"term.labels")
        if (i>1&&length(tlabi)>0) tlabi <- paste(tlabi,i-1,sep=".")
        tlab <- c(tlab,tlabi)
      }
      pmf.names <- unique(pmf.names)
      attr(pterms,"term.labels") <- tlab ## labels for all parametric terms, distinguished by predictor
      nlp <- gp$nlp
      lpid <- list() ## list of terms for each lp
      lpid[[nlp]] <- rep(0,0)
    } else { ## single linear predictor case
      nlp <- 1
      pmf <- mf
      pmf$formula <- gp$pf
      pmf <- eval(pmf, parent.frame()) # pmf contains all data for parametric part
      pterms <- attr(pmf,"terms") ## pmf only used for this and discretization, if selected.
      pmf.names <- names(pmf)
    }
    
    if (gc.level>0) gc()

    mf <- eval(mf, parent.frame()) # the model frame now contains all the data

    if (nrow(mf)<2) stop("Not enough (non-NA) data to do anything meaningful")
    terms <- attr(mf,"terms")
    if (gc.level>0) gc()  
    if (rho!=0&&!is.null(mf$"(AR.start)")) if (!is.logical(mf$"(AR.start)")) stop("AR.start must be logical")
    
    ## summarize the *raw* input variables
    ## note can't use get_all_vars here -- buggy with matrices
    vars <- all.vars1(gp$fake.formula[-2]) ## drop response here
    inp <- parse(text = paste("list(", paste(vars, collapse = ","),")"))

    ## allow a bit of extra flexibility in what `data' is allowed to be (as model.frame actually does)
    if (!is.list(data)&&!is.data.frame(data)) data <- as.data.frame(data) 

    dl <- eval(inp, data, parent.frame())
    if (!control$keepData) { rm(data);if (gc.level>0) gc()} ## save space
    names(dl) <- vars ## list of all variables needed
    var.summary <- variable.summary(gp$pf,dl,nrow(mf)) ## summarize the input data
    rm(dl); if (gc.level>0) gc() ## save space    

    ## should we force the intercept to be dropped, meaning that the constant is removed
    ## from the span of the parametric effects?
    if (is.null(family$drop.intercept)) { ## family does not provide information
      lengthf <- if (is.list(formula)) length(formula) else 1
      if (is.null(drop.intercept)) drop.intercept <- rep(FALSE,lengthf) else {
        drop.intercept <- rep(drop.intercept,length=lengthf) ## force drop.intercept to correct length
	if (sum(drop.intercept)) family$drop.intercept <- drop.intercept ## ensure prediction works
      }
    } else drop.intercept <- as.logical(family$drop.intercept) ## family overrides argument
 

    ## need mini.mf for basis setup, then accumulate full X, y, w and offset
    if (discretize) {
      ## discretize the data, creating list mf0 with discrete values
      ## and indices giving the discretized value for each element of model frame.
      ## 'discrete' can be null, or contain a discretization size, or
      ## a discretization size per smooth term.   
      dk <- discrete.mf(gp,mf,pmf.names,m=discrete)
      mf0 <- dk$mf ## padded discretized model frame
      sparse.cons <- 0 ## default constraints required for tensor terms

    } else { 
      mf0 <-