R/mvgam_setup.R

Defines functions write_jagslp olid compress_iseq gam_setup.list jagam initial_spg variable_summary clone_smooth_spec parametric_penalty gam_setup init_gam rmvn replace_nas trim_mgcv get_offset jagam_setup mvgam_setup

#' Generic GAM setup function
#' @importFrom stats na.fail
#' @noRd
mvgam_setup <- function(
  formula,
  knots,
  family = gaussian(),
  dat = list(),
  na.action,
  drop.unused.levels = FALSE,
  maxit = 5
) {
  if (missing(knots)) {
    out <- init_gam(formula(formula), data = dat, family = family)
    attr(out, 'knots') <- NULL
  } else {
    if (!is.list(knots)) {
      stop('all "knot" arguments must be supplied as lists', call. = FALSE)
    }
    out <- init_gam(
      formula(formula),
      data = dat,
      family = family,
      knots = knots
    )
    attr(out, 'knots') <- knots
  }
  out
}

#' Generic JAGAM setup function
#' @noRd
#'
jagam_setup <- function(
  ss_gam,
  formula,
  data_train,
  family,
  family_char,
  knots
) {
  # Change the formula to a Poisson-like formula if this is a cbind Binomial,
  # as jagam will fail if it sees that
  if (family$family %in% c('binomial', 'beta_binomial')) {
    resp_terms <- as.character(terms(formula(formula))[[2]])
    if (any(grepl('cbind', resp_terms))) {
      resp_terms <- resp_terms[-grepl('cbind', resp_terms)]
      out_name <- resp_terms[1]
    } else {
      stop(
        'Binomial family requires the cbind() left-hand side formula syntax',
        call. = FALSE
      )
    }
    formula <- update(formula, paste(out_name, '~ .'))
    family <- poisson()
  }

  # Set file save location in tempdir
  file_name <- tempfile(pattern = 'base_gam', fileext = '.txt')
  if (length(ss_gam$smooth) == 0) {
    smooths_included <- FALSE

    # If no smooth terms are included, jagam will fail; so add a fake one and remove
    # it from the model and data structures later
    data_train$fakery <- rnorm(length(data_train$y))
    form_fake <- update.formula(formula, ~ . + s(fakery, k = 3))
    fakery_names <- names(
      mvgam_setup(
        formula = form_fake,
        family = family_to_mgcvfam(family),
        dat = data_train,
        drop.unused.levels = FALSE
      )$coefficients
    )
    xcols_drop <- grep('s(fakery', fakery_names, fixed = TRUE)

    if (!missing(knots)) {
      ss_jagam <- mgcv::jagam(
        form_fake,
        data = data_train,
        family = family_to_jagamfam(family_char),
        file = file_name,
        sp.prior = 'gamma',
        diagonalize = FALSE,
        knots = knots,
        drop.unused.levels = FALSE
      )
    } else {
      ss_jagam <- mgcv::jagam(
        form_fake,
        data = data_train,
        family = family_to_jagamfam(family_char),
        file = file_name,
        sp.prior = 'gamma',
        diagonalize = FALSE,
        drop.unused.levels = FALSE
      )
    }
    data_train$fakery <- NULL
  } else {
    smooths_included <- TRUE
    xcols_drop <- NULL

    # If smooth terms included, use the original formula
    if (!missing(knots)) {
      ss_jagam <- mgcv::jagam(
        formula,
        data = data_train,
        family = family_to_jagamfam(family_char),
        file = file_name,
        sp.prior = 'gamma',
        diagonalize = FALSE,
        knots = knots,
        drop.unused.levels = FALSE
      )
    } else {
      ss_jagam <- mgcv::jagam(
        formula,
        data = data_train,
        family = family_to_jagamfam(family_char),
        file = file_name,
        sp.prior = 'gamma',
        diagonalize = FALSE,
        drop.unused.levels = FALSE
      )
    }
  }
  return(list(
    file_name = file_name,
    ss_jagam = ss_jagam,
    smooths_included = smooths_included,
    xcols_drop = xcols_drop
  ))
}

#' @noRd
get_offset <- function(model) {
  nm1 <- names(attributes(model$terms)$dataClasses)
  if ('(offset)' %in% nm1) {
    deparse(as.list(model$call)$offset)
  } else {
    sub("offset\\((.*)\\)$", "\\1", grep('offset', nm1, value = TRUE))
  }
}

#' @noRd
trim_mgcv <- function(mgcv_model) {
  mgcv_model$fitted.values <- mgcv_model$residuals <- mgcv_model$linear.predictors <-
    mgcv_model$working.weights <- mgcv_model$z <- NULL

  mgcv_model
}

#' Fill in missing observations in data_train so the size of the dataset is correct when
#' building the initial JAGS model
#' @noRd
replace_nas = function(var) {
  if (all(is.na(var))) {
    # Sampling from uniform[0.1,0.99] will allow all the gam models
    # to work, even though the Poisson / Negative Binomial will issue
    # warnings. This is ok as we just need to produce the linear predictor matrix
    # and store the coefficient names
    var <- runif(length(var), 0.1, 0.99)
  } else {
    # If there are some non-missing observations,
    # sample from the observed values to ensure
    # distributional assumptions are met without warnings
    var[which(is.na(var))] <-
      sample(var[which(!is.na(var))], length(which(is.na(var))), replace = TRUE)
  }
  var
}

#' The below functions are mostly perfect copies of functions
#' written originally by Prof Simon Wood
#' All credit goes to Prof Wood and the mgcv development team.
#' They only exist in mvgam because of CRAN restrictions on
#' calling internal functions from other packages

#' @noRd
rmvn <- function(n, mu, sig) {
  L <- mgcv::mroot(sig)
  m <- ncol(L)
  t(mu + L %*% matrix(rnorm(m * n), m, n))
}

#' @noRd
init_gam <- function(
  formula,
  family = gaussian(),
  data = list(),
  na.action = na.omit,
  knots = NULL,
  drop.unused.levels = FALSE,
  control = mgcv::gam.control(),
  centred = TRUE,
  diagonalize = FALSE,
  sp = NULL
) {
  if (is.character(family)) family <- eval(parse(text = family))
  if (is.function(family)) family <- family()
  if (is.null(family$family)) stop("family not recognized")
  gp <- mgcv::interpret.gam(formula) # interpret the formula
  cl <- match.call() # call needed in gam object for update to work
  mf <- match.call(expand.dots = FALSE)
  mf$formula <- gp$fake.formula
  mf$family <- mf$knots <- mf$sp <- mf$file <- mf$control <-
    mf$centred <- mf$sp.prior <- mf$diagonalize <- NULL
  mf$drop.unused.levels <- drop.unused.levels
  mf[[1]] <- quote(stats::model.frame) ##as.name("model.frame")
  pmf <- mf

  pmf$formula <- gp$pf
  pmf <- eval(pmf, parent.frame())
  pterms <- attr(pmf, "terms")
  rm(pmf)

  mf <- eval(mf, parent.frame())
  if (nrow(mf) < 2) stop("Not enough (non-NA) data to do anything meaningful")
  terms <- attr(mf, "terms")

  ## summarize the *raw* input variables
  ## note can't use get_all_vars here -- buggy with matrices
  vars <- all.vars(gp$fake.formula[-2]) ## drop response here
  inp <- parse(text = paste("list(", paste(vars, collapse = ","), ")"))
  if (!is.list(data) && !is.data.frame(data)) data <- as.data.frame(data)

  dl <- eval(inp, data, parent.frame())
  if (!control$keepData) {
    rm(data)
  } ## save space
  names(dl) <- vars ## list of all variables needed
  var.summary <- variable_summary(gp$pf, dl, nrow(mf)) ## summarize the input data
  rm(dl)

  G <- gam_setup(
    gp,
    pterms = pterms,
    data = mf,
    knots = knots,
    sp = sp,
    H = NULL,
    absorb.cons = centred,
    sparse.cons = FALSE,
    select = TRUE,
    idLinksBases = TRUE,
    scale.penalty = control$scalePenalty,
    diagonal.penalty = diagonalize
  )
  G$model <- mf
  G$terms <- terms
  G$family <- family
  G$call <- cl
  G$var.summary <- var.summary

  lambda <- initial_spg(
    G$X,
    G$y,
    G$w,
    family,
    G$S,
    G$rank,
    G$off,
    offset = G$offset,
    L = G$L
  )
  jags.ini <- list()
  lam <- if (is.null(G$L)) lambda else G$L %*% lambda
  #jin <- mgcv:::jini(G,lam)
  G$formula <- formula
  G$coefficients <- rep(0, length(G$term.names))
  names(G$coefficients) <- G$term.names
  G$residuals <- rnorm(NROW(G$X))
  G$edf <- rep(1, length(G$coefficients))
  names(G$edf) <- G$term.names
  G$edf1 <- rep(1, length(G$coefficients))
  names(G$edf1) <- G$term.names
  G$sig2 <- 1
  G$rank <- ncol(G$X)
  G$Vp <- G$Ve <- diag(rep(1, length(G$coefficients)))
  G$sp <- exp(G$sp)
  G$scale.estimated <- FALSE
  G$method <- 'UBRE'
  G$pred.formula <- gp$pred.formula
  class(G) <- c('gam', 'glm', 'lm')
  G$R <- model.matrix(G)
  return(G)
}

#'@importFrom mgcv gam.side smoothCon get.var Rrank interpret.gam initial.sp
#'@importFrom stats .getXlevels model.matrix model.offset na.omit
#'@importFrom methods cbind2
#'@noRd
gam_setup <- function(
  formula,
  pterms,
  data = stop("No data supplied to gam_setup"),
  knots = NULL,
  sp = NULL,
  min.sp = NULL,
  H = NULL,
  absorb.cons = TRUE,
  sparse.cons = 0,
  select = FALSE,
  idLinksBases = TRUE,
  scale.penalty = TRUE,
  paraPen = NULL,
  gamm.call = FALSE,
  drop.intercept = FALSE,
  diagonal.penalty = FALSE,
  apply.by = TRUE,
  list.call = FALSE,
  modCon = 0
) {
  if (inherits(formula, "split.gam.formula")) split <- formula else if (
    inherits(formula, "formula")
  )
    split <- mgcv::interpret.gam(formula) else
    stop("First argument is no sort of formula!")
  if (length(split$smooth.spec) == 0) {
    if (split$pfok == 0) stop("You've got no model....")
    m <- 0
  } else m <- length(split$smooth.spec)
  G <- list(
    m = m,
    min.sp = min.sp,
    H = H,
    pearson.extra = 0,
    dev.extra = 0,
    n.true = -1,
    pterms = pterms
  )
  if (is.null(attr(data, "terms")))
    mf <- model.frame(split$pf, data, drop.unused.levels = FALSE) else
    mf <- data
  G$intercept <- attr(attr(mf, "terms"), "intercept") > 0
  if (list.call) {
    offi <- attr(pterms, "offset")
    if (!is.null(offi)) {
      G$offset <- mf[[names(attr(pterms, "dataClasses"))[offi]]]
    }
  } else G$offset <- model.offset(mf)
  if (!is.null(G$offset)) G$offset <- as.numeric(G$offset)
  if (drop.intercept) attr(pterms, "intercept") <- 1
  X <- model.matrix(pterms, mf)
  if (drop.intercept) {
    xat <- attributes(X)
    ind <- xat$assign > 0
    X <- X[, ind, drop = FALSE]
    xat$assign <- xat$assign[ind]
    xat$dimnames[[2]] <- xat$dimnames[[2]][ind]
    xat$dim[2] <- xat$dim[2] - 1
    attributes(X) <- xat
    G$intercept <- FALSE
  }
  rownames(X) <- NULL
  G$nsdf <- ncol(X)
  G$contrasts <- attr(X, "contrasts")
  G$xlevels <- .getXlevels(pterms, mf)
  G$assign <- attr(X, "assign")
  PP <- parametric_penalty(pterms, G$assign, paraPen, sp)
  if (!is.null(PP)) {
    ind <- 1:length(PP$sp)
    if (!is.null(sp)) sp <- sp[-ind]
    if (!is.null(min.sp)) {
      PP$min.sp <- min.sp[ind]
      min.sp <- min.sp[-ind]
    }
  }
  G$smooth <- list()
  G$S <- list()
  if (gamm.call) {
    if (m > 0) for (i in 1:m) attr(split$smooth.spec[[i]], "gamm") <- TRUE
  }
  if (m > 0 && idLinksBases) {
    id.list <- list()
    for (i in 1:m)
      if (!is.null(split$smooth.spec[[i]]$id)) {
        id <- as.character(split$smooth.spec[[i]]$id)
        if (length(id.list) && id %in% names(id.list)) {
          ni <- length(id.list[[id]]$sm.i)
          id.list[[id]]$sm.i[ni + 1] <- i
          base.i <- id.list[[id]]$sm.i[1]
          split$smooth.spec[[i]] <- clone_smooth_spec(
            split$smooth.spec[[base.i]],
            split$smooth.spec[[i]]
          )
          temp.term <- split$smooth.spec[[i]]$term
          for (j in 1:length(temp.term))
            id.list[[id]]$data[[j]] <- cbind(
              id.list[[id]]$data[[j]],
              mgcv::get.var(temp.term[j], data, vecMat = FALSE)
            )
        } else {
          id.list[[id]] <- list(sm.i = i)
          id.list[[id]]$data <- list()
          term <- split$smooth.spec[[i]]$term
          for (j in 1:length(term))
            id.list[[id]]$data[[j]] <- mgcv::get.var(
              term[j],
              data,
              vecMat = FALSE
            )
        }
      }
  }
  G$off <- array(0, 0)
  first.para <- G$nsdf + 1
  sm <- list()
  newm <- 0
  if (m > 0)
    for (i in 1:m) {
      id <- split$smooth.spec[[i]]$id
      if (is.null(id) || !idLinksBases) {
        sml <- mgcv::smoothCon(
          split$smooth.spec[[i]],
          data,
          knots,
          absorb.cons,
          scale.penalty = scale.penalty,
          null.space.penalty = select,
          sparse.cons = sparse.cons,
          diagonal.penalty = diagonal.penalty,
          apply.by = apply.by,
          modCon = modCon
        )
      } else {
        names(id.list[[id]]$data) <- split$smooth.spec[[i]]$term
        sml <- mgcv::smoothCon(
          split$smooth.spec[[i]],
          id.list[[id]]$data,
          knots,
          absorb.cons,
          n = nrow(data),
          dataX = data,
          scale.penalty = scale.penalty,
          null.space.penalty = select,
          sparse.cons = sparse.cons,
          diagonal.penalty = diagonal.penalty,
          apply.by = apply.by,
          modCon = modCon
        )
      }
      ind <- 1:length(sml)
      sm[ind + newm] <- sml[ind]
      newm <- newm + length(sml)
    }
  G$m <- m <- newm
  if (m > 0) {
    sm <- mgcv::gam.side(sm, X, tol = .Machine$double.eps^0.5)
    if (!apply.by)
      for (i in 1:length(sm)) {
        if (!is.null(sm[[i]]$X0)) {
          ind <- attr(sm[[i]], "del.index")
          sm[[i]]$X <- if (is.null(ind)) sm[[i]]$X0 else
            sm[[i]]$X0[, -ind, drop = FALSE]
        }
      }
  }
  idx <- list()
  L <- matrix(0, 0, 0)
  lsp.names <- sp.names <- rep("", 0)
  if (m > 0)
    for (i in 1:m) {
      id <- sm[[i]]$id
      length.S <- if (is.null(sm[[i]]$updateS)) length(sm[[i]]$S) else
        sm[[i]]$n.sp
      Li <- if (is.null(sm[[i]]$L)) diag(length.S) else sm[[i]]$L
      if (length.S > 0) {
        if (length.S == 1) lspn <- sm[[i]]$label else {
          Sname <- names(sm[[i]]$S)
          lspn <- if (is.null(Sname))
            paste(sm[[i]]$label, 1:length.S, sep = "") else
            paste(sm[[i]]$label, Sname, sep = "")
        }
        spn <- lspn[1:ncol(Li)]
      }
      if (is.null(id) || is.null(idx[[id]])) {
        if (!is.null(id)) {
          idx[[id]]$c <- ncol(L) + 1
          idx[[id]]$nc <- ncol(Li)
        }
        L <- rbind(
          cbind(L, matrix(0, nrow(L), ncol(Li))),
          cbind(matrix(0, nrow(Li), ncol(L)), Li)
        )
        if (length.S > 0) {
          sp.names <- c(sp.names, spn)
          lsp.names <- c(lsp.names, lspn)
        }
      } else {
        L0 <- matrix(0, nrow(Li), ncol(L))
        if (ncol(Li) > idx[[id]]$nc) {
          stop(
            "Later terms sharing an `id' can not have more smoothing parameters than the first such term"
          )
        }
        L0[, idx[[id]]$c:(idx[[id]]$c + ncol(Li) - 1)] <- Li
        L <- rbind(L, L0)
        if (length.S > 0) {
          lsp.names <- c(lsp.names, lspn)
        }
      }
    }
  Xp <- NULL
  if (m > 0)
    for (i in 1:m) {
      n.para <- ncol(sm[[i]]$X)
      sm[[i]]$first.para <- first.para
      first.para <- first.para + n.para
      sm[[i]]$last.para <- first.para - 1
      Xoff <- attr(sm[[i]]$X, "offset")
      if (!is.null(Xoff)) {
        if (is.null(G$offset)) G$offset <- Xoff else G$offset <- G$offset + Xoff
      }
      if (is.null(sm[[i]]$Xp)) {
        if (!is.null(Xp)) Xp <- cbind2(Xp, sm[[i]]$X)
      } else {
        if (is.null(Xp)) Xp <- X
        Xp <- cbind2(Xp, sm[[i]]$Xp)
        sm[[i]]$Xp <- NULL
      }
      X <- cbind2(X, sm[[i]]$X)
      sm[[i]]$X <- NULL
      G$smooth[[i]] <- sm[[i]]
    }
  if (is.null(Xp)) {
    G$cmX <- colMeans(X)
  } else {
    G$cmX <- colMeans(Xp)
    qrx <- qr(Xp, LAPACK = TRUE)
    R <- qr.R(qrx)
    p <- ncol(R)
    rank <- mgcv::Rrank(R)
    QtX <- qr.qty(qrx, X)[1:rank, ]
    if (rank < p) {
      R <- R[1:rank, ]
      qrr <- qr(t(R), tol = 0)
      R <- qr.R(qrr)
      G$P <- forwardsolve(t(R), QtX)
    } else {
      G$P <- backsolve(R, QtX)
    }
    if (rank < p) {
      G$P <- qr.qy(qrr, rbind(G$P, matrix(0, p - rank, p)))
    }
    G$P[qrx$pivot, ] <- G$P
  }
  G$X <- X
  rm(X)
  n.p <- ncol(G$X)
  if (!is.null(sp)) {
    ok <- TRUE
    if (length(sp) < ncol(L)) {
      warning("Supplied smoothing parameter vector is too short - ignored.")
      ok <- FALSE
    }
    if (sum(is.na(sp))) {
      warning("NA's in supplied smoothing parameter vector - ignoring.")
      ok <- FALSE
    }
  } else ok <- FALSE
  G$sp <- if (ok) sp[1:ncol(L)] else rep(-1, ncol(L))
  names(G$sp) <- sp.names
  k <- 1
  if (m > 0)
    for (i in 1:m) {
      id <- sm[[i]]$id
      if (is.null(sm[[i]]$L)) Li <- diag(length(sm[[i]]$S)) else Li <- sm[[i]]$L
      if (is.null(id)) {
        spi <- sm[[i]]$sp
        if (!is.null(spi)) {
          if (length(spi) != ncol(Li))
            stop(
              "incorrect number of smoothing parameters supplied for a smooth term"
            )
          G$sp[k:(k + ncol(Li) - 1)] <- spi
        }
        k <- k + ncol(Li)
      } else {
        spi <- sm[[i]]$sp
        if (is.null(idx[[id]]$sp.done)) {
          if (!is.null(spi)) {
            if (length(spi) != ncol(Li))
              stop(
                "incorrect number of smoothing parameters supplied for a smooth term"
              )
            G$sp[idx[[id]]$c:(idx[[id]]$c + idx[[id]]$nc - 1)] <- spi
          }
          idx[[id]]$sp.done <- TRUE
          k <- k + idx[[id]]$nc
        }
      }
    }
  k <- 1
  if (length(idx)) for (i in 1:length(idx)) idx[[i]]$sp.done <- FALSE
  if (m > 0)
    for (i in 1:m) {
      id <- sm[[i]]$id
      if (!is.null(id)) {
        if (idx[[id]]$nc > 0) {
          G$smooth[[i]]$sp <- G$sp[
            idx[[id]]$c:(idx[[id]]$c +
              idx[[id]]$nc -
              1)
          ]
        }
        if (!idx[[id]]$sp.done) {
          idx[[id]]$sp.done <- TRUE
          k <- k + idx[[id]]$nc
        }
      } else {
        if (is.null(sm[[i]]$L)) nc <- length(sm[[i]]$S) else
          nc <- ncol(sm[[i]]$L)
        if (nc > 0) G$smooth[[i]]$sp <- G$sp[k:(k + nc - 1)]
        k <- k + nc
      }
    }
  if (!is.null(min.sp)) {
    if (length(min.sp) < nrow(L)) stop("length of min.sp is wrong.")
    if (nrow(L) > 0) min.sp <- min.sp[1:nrow(L)]
    if (sum(is.na(min.sp))) stop("NA's in min.sp.")
    if (sum(min.sp < 0)) stop("elements of min.sp must be non negative.")
  }
  k.sp <- 0
  G$rank <- array(0, 0)
  if (m > 0)
    for (i in 1:m) {
      sm <- G$smooth[[i]]
      if (length(sm$S) > 0)
        for (j in 1:length(sm$S)) {
          k.sp <- k.sp + 1
          G$off[k.sp] <- sm$first.para
          G$S[[k.sp]] <- sm$S[[j]]
          G$rank[k.sp] <- sm$rank[j]
          if (!is.null(min.sp)) {
            if (is.null(H)) H <- matrix(0, n.p, n.p)
            H[sm$first.para:sm$last.para, sm$first.para:sm$last.para] <- H[
              sm$first.para:sm$last.para,
              sm$first.para:sm$last.para
            ] +
              min.sp[k.sp] *
                sm$S[[j]]
          }
        }
    }
  if (!is.null(PP)) {
    L <- rbind(
      cbind(L, matrix(0, nrow(L), ncol(PP$L))),
      cbind(matrix(0, nrow(PP$L), ncol(L)), PP$L)
    )
    G$off <- c(PP$off, G$off)
    G$S <- c(PP$S, G$S)
    G$rank <- c(PP$rank, G$rank)
    G$sp <- c(PP$sp, G$sp)
    lsp.names <- c(PP$full.sp.names, lsp.names)
    G$n.paraPen <- length(PP$off)
    if (!is.null(PP$min.sp)) {
      if (is.null(H)) H <- matrix(0, n.p, n.p)
      for (i in 1:length(PP$S)) {
        ind <- PP$off[i]:(PP$off[i] + ncol(PP$S[[i]]) - 1)
        H[ind, ind] <- H[ind, ind] + PP$min.sp[i] * PP$S[[i]]
      }
    }
  } else G$n.paraPen <- 0
  fix.ind <- G$sp >= 0
  if (sum(fix.ind)) {
    lsp0 <- G$sp[fix.ind]
    ind <- lsp0 == 0
    ef0 <- indi <- (1:length(ind))[ind]
    if (length(indi) > 0)
      for (i in 1:length(indi)) {
        ii <- G$off[i]:(G$off[i] + ncol(G$S[[i]]) - 1)
        ef0[i] <- norm(G$X[, ii], type = "F")^2 /
          norm(G$S[[i]], type = "F") *
          .Machine$double.eps *
          0.1
      }
    lsp0[!ind] <- log(lsp0[!ind])
    lsp0[ind] <- log(ef0)
    lsp0 <- as.numeric(L[, fix.ind, drop = FALSE] %*% lsp0)
    L <- L[, !fix.ind, drop = FALSE]
    G$sp <- G$sp[!fix.ind]
  } else {
    lsp0 <- rep(0, nrow(L))
  }
  G$H <- H
  if (ncol(L) == nrow(L) && !sum(L != diag(ncol(L)))) L <- NULL
  G$L <- L
  G$lsp0 <- lsp0
  names(G$lsp0) <- lsp.names
  if (absorb.cons == FALSE) {
    G$C <- matrix(0, 0, n.p)
    if (m > 0) {
      for (i in 1:m) {
        if (is.null(G$smooth[[i]]$C)) n.con <- 0 else
          n.con <- nrow(G$smooth[[i]]$C)
        C <- matrix(0, n.con, n.p)
        C[, G$smooth[[i]]$first.para:G$smooth[[i]]$last.para] <- G$smooth[[i]]$C
        G$C <- rbind(G$C, C)
        G$smooth[[i]]$C <- NULL
      }
      rm(C)
    }
  }
  G$y <- drop(data[[split$response]])
  ydim <- dim(G$y)
  if (!is.null(ydim) && length(ydim) < 2) dim(G$y) <- NULL
  G$n <- nrow(data)
  if (is.null(data$"(weights)")) G$w <- rep(1, G$n) else G$w <- data$"(weights)"
  if (G$nsdf > 0) term.names <- colnames(G$X)[1:G$nsdf] else
    term.names <- array("", 0)
  n.smooth <- length(G$smooth)
  n.sp0 <- 0
  if (n.smooth)
    for (i in 1:n.smooth) {
      k <- 1
      jj <- G$smooth[[i]]$first.para:G$smooth[[i]]$last.para
      if (G$smooth[[i]]$df > 0)
        for (j in jj) {
          term.names[j] <- paste(
            G$smooth[[i]]$label,
            ".",
            as.character(k),
            sep = ""
          )
          k <- k + 1
        }
      n.sp <- length(G$smooth[[i]]$S)
      if (n.sp) {
        G$smooth[[i]]$first.sp <- n.sp0 + 1
        n.sp0 <- G$smooth[[i]]$last.sp <- n.sp0 + n.sp
      }
      if (!is.null(G$smooth[[i]]$g.index)) {
        if (is.null(G$g.index)) G$g.index <- rep(FALSE, n.p)
        G$g.index[jj] <- G$smooth[[i]]$g.index
      }
    }
  G$term.names <- term.names
  G$pP <- PP
  G
}

#' @noRd
parametric_penalty <- function(pterms, assign, paraPen, sp0) {
  S <- list()
  off <- rep(0, 0)
  rank <- rep(0, 0)
  sp <- rep(0, 0)
  full.sp.names <- rep("", 0)
  L <- matrix(0, 0, 0)
  k <- 0
  tind <- unique(assign)
  n.t <- length(tind)
  if (n.t > 0)
    for (j in 1:n.t)
      if (tind[j] > 0) {
        term.label <- attr(pterms[tind[j]], "term.label")
        P <- paraPen[[term.label]]
        if (!is.null(P)) {
          ind <- (1:length(assign))[assign == tind[j]]
          Li <- P$L
          P$L <- NULL
          spi <- P$sp
          P$sp <- NULL
          ranki <- P$rank
          P$rank <- NULL
          np <- length(P)
          if (!is.null(ranki) && length(ranki) != np)
            stop("`rank' has wrong length in `paraPen'")
          if (np)
            for (i in 1:np) {
              k <- k + 1
              S[[k]] <- P[[i]]
              off[k] <- min(ind)
              if (ncol(P[[i]]) != nrow(P[[i]]) || nrow(P[[i]]) != length(ind))
                stop(" a parametric penalty has wrong dimension")
              if (is.null(ranki)) {
                ev <- eigen(S[[k]], symmetric = TRUE, only.values = TRUE)$values
                rank[k] <- sum(ev > max(ev) * .Machine$double.eps * 10)
              } else rank[k] <- ranki[i]
            }
          if (np) {
            if (is.null(Li)) Li <- diag(np)
            if (nrow(Li) != np) stop("L has wrong dimension in `paraPen'")
            L <- rbind(
              cbind(L, matrix(0, nrow(L), ncol(Li))),
              cbind(matrix(0, nrow(Li), ncol(L)), Li)
            )
            ind <- (length(sp) + 1):(length(sp) + ncol(Li))
            ind2 <- (length(sp) + 1):(length(sp) + nrow(Li))
            if (is.null(spi)) {
              sp[ind] <- -1
            } else {
              if (length(spi) != ncol(Li))
                stop("`sp' dimension wrong in `paraPen'")
              sp[ind] <- spi
            }
            if (length(ind) > 1)
              names(sp)[ind] <- paste(
                term.label,
                ind -
                  ind[1] +
                  1,
                sep = ""
              ) else names(sp)[ind] <- term.label
            if (length(ind2) > 1)
              full.sp.names[ind2] <- paste(
                term.label,
                ind2 - ind2[1] + 1,
                sep = ""
              ) else full.sp.names[ind2] <- term.label
          }
        }
      }
  if (k == 0) return(NULL)
  if (!is.null(sp0)) {
    if (length(sp0) < length(sp)) stop("`sp' too short")
    sp0 <- sp0[1:length(sp)]
    sp[sp < 0] <- sp0[sp < 0]
  }
  list(
    S = S,
    off = off,
    sp = sp,
    L = L,
    rank = rank,
    full.sp.names = full.sp.names
  )
}

#' @noRd
clone_smooth_spec <- function(specb, spec) {
  if (specb$dim != spec$dim)
    stop("`id' linked smooths must have same number of arguments")
  if (inherits(specb, c("tensor.smooth.spec", "t2.smooth.spec"))) {
    specb$term <- spec$term
    specb$label <- spec$label
    specb$by <- spec$by
    k <- 1
    for (i in 1:length(specb$margin)) {
      if (is.null(spec$margin)) {
        for (j in 1:length(specb$margin[[i]]$term)) {
          specb$margin[[i]]$term[j] <- spec$term[k]
          k <- k + 1
        }
        specb$margin[[i]]$label <- ""
      } else {
        specb$margin[[i]]$term <- spec$margin[[i]]$term
        specb$margin[[i]]$label <- spec$margin[[i]]$label
        specb$margin[[i]]$xt <- spec$margin[[i]]$xt
      }
    }
  } else {
    specb$term <- spec$term
    specb$label <- spec$label
    specb$by <- spec$by
    specb$xt <- spec$xt
  }
  specb
}

#' Summarize all the variables in a list of variables
#'
#' This function is derived from \code{mgcv:::variable.summary}
#'
#' @author Simon N Wood with modifications by Nicholas Clark
#' @noRd
variable_summary <- function(pf, dl, n) {
  v.n <- length(dl)
  v.name <- v.name1 <- names(dl)
  if (v.n) {
    k <- 0
    for (i in 1:v.n)
      if (length(dl[[i]]) >= n) {
        k <- k + 1
        v.name[k] <- v.name1[i]
      }
    if (k > 0) v.name <- v.name[1:k] else v.name <- rep("", k)
  }
  p.name <- all.vars(pf[-2])
  vs <- list()
  v.n <- length(v.name)
  if (v.n > 0)
    for (i in 1:v.n) {
      if (v.name[i] %in% p.name) para <- TRUE else para <- FALSE
      if (para && is.matrix(dl[[v.name[i]]]) && ncol(dl[[v.name[i]]]) > 1) {
        x <- matrix(
          apply(
            dl[[v.name[i]]],
            2,
            quantile,
            probs = 0.5,
            type = 3,
            na.rm = TRUE
          ),
          1,
          ncol(dl[[v.name[i]]])
        )
      } else {
        x <- dl[[v.name[i]]]
        if (is.character(x)) x <- as.factor(x)
        if (is.factor(x)) {
          x <- x[!is.na(x)]
          lx <- levels(x)
          freq <- tabulate(x)
          ii <- min((1:length(lx))[freq == max(freq)])
          x <- factor(lx[ii], levels = lx)
        } else {
          x <- as.numeric(x)
          x <- c(
            min(x, na.rm = TRUE),
            as.numeric(quantile(x, probs = 0.5, type = 3, na.rm = TRUE)),
            max(x, na.rm = TRUE)
          )
        }
      }
      vs[[v.name[i]]] <- x
    }
  vs
}

#' @importFrom stats lm
#' @noRd
initial_spg <- function(
  x,
  y,
  weights,
  family,
  S,
  rank,
  off,
  offset = NULL,
  L = NULL,
  lsp0 = NULL,
  type = 1,
  start = NULL,
  mustart = NULL,
  etastart = NULL,
  E = NULL,
  ...
) {
  if (length(S) == 0) return(rep(0, 0))
  nobs <- nrow(x)
  if (is.null(mustart)) mukeep <- NULL else mukeep <- mustart
  eval(family$initialize)
  if (inherits(family, "general.family")) {
    lbb <- family$ll(
      y,
      x,
      start,
      weights,
      family,
      offset = offset,
      deriv = 1
    )$lbb
    pcount <- rep(0, ncol(lbb))
    for (i in 1:length(S)) {
      ind <- off[i]:(off[i] + ncol(S[[i]]) - 1)
      dlb <- -diag(lbb[ind, ind, drop = FALSE])
      indp <- rowSums(abs(S[[i]])) > max(S[[i]]) * .Machine$double.eps^0.75 &
        dlb != 0
      ind <- ind[indp]
      pcount[ind] <- pcount[ind] + 1
    }
    lambda <- rep(0, length(S))
    for (i in 1:length(S)) {
      ind <- off[i]:(off[i] + ncol(S[[i]]) - 1)
      lami <- 1
      dlb <- abs(diag(lbb[ind, ind, drop = FALSE]))
      dS <- diag(S[[i]])
      pc <- pcount[ind]
      ind <- rowSums(abs(S[[i]])) > max(S[[i]]) * .Machine$double.eps^0.75 &
        dlb != 0
      dlb <- dlb[ind] / pc[ind]
      dS <- dS[ind]
      rm <- max(length(dS) / rank[i], 1)
      while (
        sqrt(
          mean(dlb / (dlb + lami * dS * rm)) *
            mean(dlb) /
            mean(
              dlb +
                lami * dS * rm
            )
        ) >
          0.4
      )
        lami <- lami * 5
      while (
        sqrt(
          mean(dlb / (dlb + lami * dS * rm)) *
            mean(dlb) /
            mean(
              dlb +
                lami * dS * rm
            )
        ) <
          0.4
      )
        lami <- lami / 5
      lambda[i] <- lami
    }
  } else {
    if (is.null(mukeep)) {
      if (!is.null(start)) etastart <- drop(x %*% start)
      if (!is.null(etastart)) mustart <- family$linkinv(etastart)
    } else mustart <- mukeep
    if (inherits(family, "extended.family")) {
      theta <- family$getTheta()
      Ddo <- family$Dd(y, mustart, theta, weights)
      mu.eta2 <- family$mu.eta(family$linkfun(mustart))^2
      w <- 0.5 * as.numeric(Ddo$Dmu2 * mu.eta2)
      if (any(w < 0)) w <- 0.5 * as.numeric(Ddo$EDmu2 * mu.eta2)
    } else
      w <- as.numeric(
        weights *
          family$mu.eta(family$linkfun(mustart))^2 /
          family$variance(mustart)
      )
    w <- sqrt(w)
    if (type == 1) {
      lambda <- mgcv::initial.sp(w * x, S, off)
    } else {
      csX <- colSums((w * x)^2)
      lambda <- rep(0, length(S))
      for (i in 1:length(S)) {
        ind <- off[i]:(off[i] + ncol(S[[i]]) - 1)
        lambda[i] <- sum(csX[ind]) / sqrt(sum(S[[i]]^2))
      }
    }
  }
  if (!is.null(L)) {
    lsp <- log(lambda)
    if (is.null(lsp0)) lsp0 <- rep(0, nrow(L))
    lsp <- as.numeric(coef(lm(lsp ~ L - 1 + offset(lsp0))))
    lambda <- exp(lsp)
  }
  lambda
}

#' Set up JAGS data and model file for fitting GAMs
#'
#' This function is derived from \code{mgcv:::jagam}
#'
#' @author Simon N Wood with modifications by Nicholas Clark
#' @importFrom mgcv gam.control interpret.gam
#' @noRd
jagam <- function(
  formula,
  family = gaussian,
  data = list(),
  file,
  weights = NULL,
  na.action,
  offset = NULL,
  knots = NULL,
  sp = NULL,
  drop.unused.levels = FALSE,
  control = mgcv::gam.control(),
  centred = TRUE,
  diagonalize = FALSE
) {
  ## Start the model specification
  cat("model {\n", file = file)
  sp.prior <- 'gamma'

  # Evaluate family
  if (is.character(family)) family <- eval(parse(text = family))
  if (is.function(family)) family <- family()
  if (is.null(family$family)) stop("family not recognized")

  # Interpret the formula and initialize the model.frame object
  gp <- mgcv::interpret.gam(formula)
  gp$pfok <- 1
  cl <- match.call()
  mf <- match.call(expand.dots = FALSE)
  mf$formula <- gp$fake.formula
  mf$family <- mf$knots <- mf$sp <- mf$file <- mf$control <-
    mf$centred <- mf$sp.prior <- mf$diagonalize <- NULL
  mf$drop.unused.levels <- drop.unused.levels
  mf[[1]] <- quote(stats::model.frame)
  pmf <- mf

  # Extract fixed effect terms
  # Multiple formula objects
  if (is.list(formula)) {
    environment(formula) <- environment(formula[[1]])
    pterms <- list()
    tlab <- rep("", 0)
    for (i in 1:length(formula)) {
      pmf$formula <- gp[[i]]$pf
      pterms[[i]] <- attr(eval(pmf, parent.frame()), "terms")
      tlabi <- attr(pterms[[i]], "term.labels")
      if (i > 1 && length(tlabi) > 0) tlabi <- paste(tlabi, i - 1, sep = ".")
      tlab <- c(tlab, tlabi)
    }
    attr(pterms, "term.labels") <- tlab

    # Single linear predictor case
  } else {
    pmf$formula <- gp$pf
    pmf <- eval(pmf, parent.frame())
    pterms <- attr(pmf, "terms")
  }

  mf <- eval(mf, parent.frame())
  if (nrow(mf) < 2) stop("Not enough (non-NA) data to do anything meaningful")
  terms <- attr(mf, "terms")

  # Summarize the *raw* input variables
  vars <- all.vars(gp$fake.formula[-2])
  inp <- parse(text = paste("list(", paste(vars, collapse = ","), ")"))
  if (!is.list(data) && !is.data.frame(data)) data <- as.data.frame(data)
  dl <- eval(inp, data, parent.frame())
  rm(data)
  names(dl) <- vars
  var.summary <- variable_summary(gp$pf, dl, nrow(mf))
  rm(dl)

  gsname <- if (is.list(formula)) "gam_setup.list" else "gam_setup"

  G <- do.call(
    gsname,
    list(
      formula = gp,
      pterms = pterms,
      data = mf,
      knots = knots,
      sp = sp,
      H = NULL,
      absorb.cons = TRUE,
      sparse.cons = FALSE,
      select = TRUE,
      idLinksBases = TRUE,
      scale.penalty = control$scalePenalty
    )
  )

  G$model <- mf
  G$terms <- terms
  G$family <- family
  G$call <- cl
  G$var.summary <- var.summary

  ## write JAGS code producing linear predictor and linking linear predictor to
  ## response....
  use.weights <- if (is.null(weights)) FALSE else TRUE
  use.weights <- write_jagslp(
    "y",
    family = poisson(),
    file,
    use.weights,
    !is.null(G$offset)
  )

  if (is.null(weights) && use.weights) weights <- rep(1, nrow(G$X))

  ## start the JAGS data list...

  jags.stuff <- list(y = G$y, n = length(G$y), X = G$X)
  if (!is.null(G$offset)) jags.stuff$offset <- G$offset
  if (use.weights) jags.stuff$w <- weights

  if (family$family == "binomial") {
    jags.stuff$y <- G$y * weights
  }

  ## set the fixed effect priors...
  lambda <- rep(1, length(G$S))
  jags.ini <- list()
  jags.ini$b <- rep(0, NCOL(G$X))
  prior.tau <- 10
  ptau <- 10
  if (is.list(formula)) {
    for (i in 1:length(G$nsdf)) {
      if (G$nsdf[i] > 0) {
        if (i == 1) {
          cat(
            "  ## Parametric effect priors CHECK tau=1/",
            signif(1 / sqrt(ptau), 2),
            "^2 is appropriate!\n",
            file = file,
            append = TRUE,
            sep = ""
          )
          cat(
            "  for (i in 1:",
            G$nsdf[i],
            ") { b[i] ~ dnorm(0,",
            ptau,
            ") }\n",
            file = file,
            append = TRUE,
            sep = ""
          )
        } else {
          cat(
            "  ## Parametric effect priors CHECK tau=1/",
            signif(1 / sqrt(ptau), 2),
            "^2 is appropriate!\n",
            file = file,
            append = TRUE,
            sep = ""
          )
          cat(
            "  for (i in ",
            attr(G$nsdf, 'pstart')[i],
            ':',
            attr(G$nsdf, 'pstart')[i] + G$nsdf[i],
            ") { b[i] ~ dnorm(0,",
            ptau,
            ") }\n",
            file = file,
            append = TRUE,
            sep = ""
          )
        }
      }
    }
  } else {
    if (sum(G$nsdf) > 0) {
      cat(
        "  ## Parametric effect priors CHECK tau=1/",
        signif(1 / sqrt(ptau), 2),
        "^2 is appropriate!\n",
        file = file,
        append = TRUE,
        sep = ""
      )
      cat(
        "  for (i in 1:",
        sum(G$nsdf),
        ") { b[i] ~ dnorm(0,",
        ptau,
        ") }\n",
        file = file,
        append = TRUE,
        sep = ""
      )
    }
  }

  ## Work through smooths.
  n.sp <- 0 ## count the smoothing parameters....
  for (i in 1:length(G$smooth)) {
    ## Are penalties seperable...
    seperable <- FALSE
    M <- length(G$smooth[[i]]$S)
    p <- G$smooth[[i]]$last.para - G$smooth[[i]]$first.para + 1 ## number of params
    if (M <= 1) seperable <- TRUE else {
      overlap <- rowSums(G$smooth[[i]]$S[[1]])
      for (j in 2:M) overlap <- overlap & rowSums(G$smooth[[i]]$S[[j]])
      if (!sum(overlap)) seperable <- TRUE
    }
    if (seperable) {
      ## double check that they are diagonal
      if (M > 0)
        for (j in 1:M) {
          if (
            max(abs(
              G$smooth[[i]]$S[[j]] - diag(diag(G$smooth[[i]]$S[[j]]), nrow = p)
            )) >
              0
          )
            seperable <- FALSE
        }
    }
    cat(
      "  ## prior for ",
      G$smooth[[i]]$label,
      "... \n",
      file = file,
      append = TRUE,
      sep = ""
    )
    if (seperable) {
      b0 <- G$smooth[[i]]$first.para
      b1 <- G$smooth[[i]]$last.para
      if (M == 0) {
        cat(
          "  ## Note fixed vague prior, CHECK tau = 1/",
          signif(1 / sqrt(ptau), 2),
          "^2...\n",
          file = file,
          append = TRUE,
          sep = ""
        )
        #b1 <- G$smooth[[i]]$last.para
        ptau <- min(prior.tau[b0:b1])
        cat(
          "  for (i in ",
          b0,
          ":",
          b1,
          ") { b[i] ~ dnorm(0,",
          ptau,
          ") }\n",
          file = file,
          append = TRUE,
          sep = ""
        )
      } else
        for (j in 1:M) {
          D <- diag(G$smooth[[i]]$S[[j]]) > 0
          #b1 <- sum(as.numeric(D)) + b0 - 1
          n.sp <- n.sp + 1
          #cat("  for (i in ",b0,":",b1,") { b[i] ~ dnorm(0, lambda[",n.sp,"]) }\n",file=file,append=TRUE,sep="")
          #b0 <- b1 + 1
          cat(
            "  for (i in ",
            compress_iseq((b0:b1)[D]),
            ") { b[i] ~ dnorm(0, lambda[",
            n.sp,
            "]) }\n",
            file = file,
            append = TRUE,
            sep = ""
          )
        }
    } else {
      ## inseperable - requires the penalty matrices to be supplied to JAGS...
      b0 <- G$smooth[[i]]$first.para
      b1 <- G$smooth[[i]]$last.para
      Kname <- paste("K", i, sep = "") ## total penalty matrix in JAGS
      Sname <- paste("S", i, sep = "") ## components of total penalty in R & JAGS
      cat(
        "  ",
        Kname,
        " <- ",
        Sname,
        "[1:",
        p,
        ",1:",
        p,
        "] * lambda[",
        n.sp + 1,
        "] ",
        file = file,
        append = TRUE,
        sep = ""
      )
      if (M > 1) {
        ## code to form total precision matrix...
        for (j in 2:M)
          cat(
            " + ",
            Sname,
            "[1:",
            p,
            ",",
            (j - 1) * p + 1,
            ":",
            j * p,
            "] * lambda[",
            n.sp + j,
            "]",
            file = file,
            append = TRUE,
            sep = ""
          )
      }
      cat(
        "\n  b[",
        b0,
        ":",
        b1,
        "] ~ dmnorm(zero[",
        b0,
        ":",
        b1,
        "],",
        Kname,
        ") \n",
        file = file,
        append = TRUE,
        sep = ""
      )
      n.sp <- n.sp + M
      Sc <- G$smooth[[i]]$S[[1]]
      if (M > 1) for (j in 2:M) Sc <- cbind(Sc, G$smooth[[i]]$S[[j]])
      jags.stuff[[Sname]] <- Sc
      jags.stuff$zero <- rep(0, ncol(G$X))
    }
  } ## smoothing penalties finished

  ## Write the smoothing parameter prior code, using L if it exists.

  cat(
    "  ## smoothing parameter priors CHECK...\n",
    file = file,
    append = TRUE,
    sep = ""
  )
  if (is.null(G$L)) {
    if (sp.prior == "log.uniform") {
      cat("  for (i in 1:", n.sp, ") {\n", file = file, append = TRUE, sep = "")
      cat("    rho[i] ~ dunif(-12,12)\n", file = file, append = TRUE, sep = "")
      cat(
        "    lambda[i] <- exp(rho[i])\n",
        file = file,
        append = TRUE,
        sep = ""
      )
      cat("  }\n", file = file, append = TRUE, sep = "")
      jags.ini$rho <- log(lambda)
    } else {
      ## gamma priors
      cat("  for (i in 1:", n.sp, ") {\n", file = file, append = TRUE, sep = "")
      cat(
        "    lambda[i] ~ dgamma(.05,.005)\n",
        file = file,
        append = TRUE,
        sep = ""
      )
      cat(
        "    rho[i] <- log(lambda[i])\n",
        file = file,
        append = TRUE,
        sep = ""
      )
      cat("  }\n", file = file, append = TRUE, sep = "")
      jags.ini$lambda <- lambda
    }
  } else {
    jags.stuff$L <- G$L
    rho.lo <- FALSE
    if (any(G$lsp0 != 0)) {
      jags.stuff$rho.lo <- G$lsp0
      rho.lo <- TRUE
    }
    nr <- ncol(G$L)
    if (sp.prior == "log.uniform") {
      cat(
        "  for (i in 1:",
        nr,
        ") { rho0[i] ~ dunif(-12,12) }\n",
        file = file,
        append = TRUE,
        sep = ""
      )
      if (rho.lo)
        cat(
          "  rho <- rho.lo + L %*% rho0\n",
          file = file,
          append = TRUE,
          sep = ""
        ) else
        cat("  rho <- L %*% rho0\n", file = file, append = TRUE, sep = "")
      cat(
        "  for (i in 1:",
        n.sp,
        ") { lambda[i] <- exp(rho[i]) }\n",
        file = file,
        append = TRUE,
        sep = ""
      )
      jags.ini$rho0 <- log(lambda)
    } else {
      ## gamma prior
      cat("  for (i in 1:", nr, ") {\n", file = file, append = TRUE, sep = "")
      cat(
        "    lambda0[i] ~ dgamma(.05,.005)\n",
        file = file,
        append = TRUE,
        sep = ""
      )
      cat(
        "    rho0[i] <- log(lambda0[i])\n",
        file = file,
        append = TRUE,
        sep = ""
      )
      cat("  }\n", file = file, append = TRUE, sep = "")
      if (rho.lo)
        cat(
          "  rho <- rho.lo + L %*% rho0\n",
          file = file,
          append = TRUE,
          sep = ""
        ) else
        cat("  rho <- L %*% rho0\n", file = file, append = TRUE, sep = "")
      cat(
        "  for (i in 1:",
        n.sp,
        ") { lambda[i] <- exp(rho[i]) }\n",
        file = file,
        append = TRUE,
        sep = ""
      )
      jags.ini$lambda0 <- lambda
    }
  }
  cat("}", file = file, append = TRUE)

  G$formula = formula
  G$rank = ncol(G$X) ## to Gibbs sample we force full rank!
  list(pregam = G, jags.data = jags.stuff, jags.ini = jags.ini)
} ## new_jagam


#' Initialize a gam object using a list of formulae
#'
#' This function is derived from \code{mgcv:::gam.setup.list}
#'
#' @author Simon N Wood with modifications by Nicholas Clark
#' @noRd
gam_setup.list <- function(
  formula,
  pterms,
  data = stop("No data supplied to gam.setup"),
  knots = NULL,
  sp = NULL,
  min.sp = NULL,
  H = NULL,
  absorb.cons = TRUE,
  sparse.cons = 0,
  select = FALSE,
  idLinksBases = TRUE,
  scale.penalty = TRUE,
  paraPen = NULL,
  gamm.call = FALSE,
  drop.intercept = NULL,
  apply.by = TRUE,
  modCon = 0
) {
  # version of gam.setup for when gam is called with a list of formulae,
  # specifying several linear predictors...
  # key difference to gam.setup is an attribute to the model matrix,
  # "lpi", which is a list
  # of column indices for each linear predictor
  d <- length(pterms)
  if (is.null(drop.intercept)) drop.intercept <- rep(FALSE, d)
  if (length(drop.intercept) != d)
    stop("length(drop.intercept) should be equal to number of model formulas")

  lp.overlap <- if (formula$nlp < d) TRUE else FALSE

  G <- gam_setup(
    formula[[1]],
    pterms[[1]],
    data,
    knots,
    sp,
    min.sp,
    H,
    absorb.cons,
    sparse.cons,
    select,
    idLinksBases,
    scale.penalty,
    paraPen,
    gamm.call,
    drop.intercept[1],
    apply.by = apply.by,
    list.call = TRUE,
    modCon = modCon
  )

  G$pterms <- pterms
  G$offset <- list(G$offset)
  G$xlevels <- list(G$xlevels)
  G$assign <- list(G$assign)
  used.sp <- length(G$lsp0)

  if (!is.null(sp) && used.sp > 0) sp <- sp[-(1:used.sp)]
  if (!is.null(min.sp) && nrow(G$L) > 0) min.sp <- min.sp[-(1:nrow(G$L))]

  flpi <- lpi <- list()
  for (i in 1:formula$nlp) lpi[[i]] <- rep(0, 0)
  lpi[[1]] <- 1:ncol(G$X)
  flpi[[1]] <- formula[[1]]$lpi

  pof <- ncol(G$X)
  pstart <- rep(0, d)
  pstart[1] <- 1
  if (d > 1)
    for (i in 2:d) {
      if (is.null(formula[[i]]$response)) {
        formula[[i]]$response <- formula$response
        mv.response <- FALSE
      } else mv.response <- TRUE
      formula[[i]]$pfok <- 1
      um <- gam_setup(
        formula[[i]],
        pterms[[i]],
        data,
        knots,
        sp,
        min.sp,
        H,
        absorb.cons,
        sparse.cons,
        select,
        idLinksBases,
        scale.penalty,
        paraPen,
        gamm.call,
        drop.intercept[i],
        apply.by = apply.by,
        list.call = TRUE,
        modCon = modCon
      )
      used.sp <- length(um$lsp0)
      if (!is.null(sp) && used.sp > 0) sp <- sp[-(1:used.sp)]
      if (!is.null(min.sp) && nrow(um$L) > 0) min.sp <- min.sp[-(1:nrow(um$L))]

      flpi[[i]] <- formula[[i]]$lpi
      for (j in formula[[i]]$lpi) {
        lpi[[j]] <- c(lpi[[j]], pof + 1:ncol(um$X))
      }
      if (mv.response) G$y <- cbind(G$y, um$y)
      if (i > formula$nlp && !is.null(um$offset)) {
        stop("shared offsets not allowed")
      }
      G$offset[[i]] <- um$offset
      if (!is.null(um$contrasts)) G$contrasts <- c(G$contrasts, um$contrasts)
      G$xlevels[[i]] <- um$xlevels
      G$assign[[i]] <- um$assign
      G$rank <- c(G$rank, um$rank)
      pstart[i] <- pof + 1
      G$X <- cbind(G$X, um$X)
      k <- G$m
      if (um$m)
        for (j in 1:um$m) {
          um$smooth[[j]]$first.para <- um$smooth[[j]]$first.para + pof
          um$smooth[[j]]$last.para <- um$smooth[[j]]$last.para + pof
          k <- k + 1
          G$smooth[[k]] <- um$smooth[[j]]
        }
      ks <- length(G$S)
      M <- length(um$S)

      if (!is.null(um$L) || !is.null(G$L)) {
        if (is.null(G$L)) G$L <- diag(1, nrow = ks)
        if (is.null(um$L)) um$L <- diag(1, nrow = M)
        G$L <- rbind(
          cbind(G$L, matrix(0, nrow(G$L), ncol(um$L))),
          cbind(matrix(0, nrow(um$L), ncol(G$L)), um$L)
        )
      }

      G$off <- c(G$off, um$off + pof)
      if (M)
        for (j in 1:M) {
          ks <- ks + 1
          G$S[[ks]] <- um$S[[j]]
        }

      G$m <- G$m + um$m
      G$nsdf[i] <- um$nsdf
      if (!is.null(um$P) || !is.null(G$P)) {
        if (is.null(G$P)) G$P <- diag(1, nrow = pof)
        k <- ncol(um$X)
        if (is.null(um$P)) um$P <- diag(1, nrow = k)
        G$P <- rbind(
          cbind(G$P, matrix(0, pof, k)),
          cbind(matrix(0, k, pof), um$P)
        )
      }
      G$cmX <- c(G$cmX, um$cmX)
      if (um$nsdf > 0)
        um$term.names[1:um$nsdf] <- paste(
          um$term.names[1:um$nsdf],
          i - 1,
          sep = "."
        )
      G$term.names <- c(G$term.names, um$term.names)
      G$lsp0 <- c(G$lsp0, um$lsp0)
      G$sp <- c(G$sp, um$sp)
      pof <- ncol(G$X)
    }

  ## If there is overlap then there is a danger of lack of identifiability of the
  ## parameteric terms, especially if there are factors present in shared components.
  ## The following code deals with this possibility...
  if (lp.overlap) {
    rt <- olid(G$X, G$nsdf, pstart, flpi, lpi)
    if (length(rt$dind) > 0) {
      warning(
        "dropping unidentifiable parametric terms from model",
        call. = FALSE
      )
      G$X <- G$X[, -rt$dind]
      G$cmX <- G$cmX[-rt$dind]
      G$term.names <- G$term.names[-rt$dind]
      for (i in 1:length(G$smooth)) {
        k <- sum(rt$dind < G$smooth[[i]]$first.para)
        G$smooth[[i]]$first.para <- G$smooth[[i]]$first.para - k
        G$smooth[[i]]$last.para <- G$smooth[[i]]$last.para - k
      }
      for (i in 1:length(G$off)) G$off[i] <- G$off[i] - sum(rt$dind < G$off[i])
      attr(G$nsdf, "drop.ind") <- rt$dind ## store drop index
    }
  }
  attr(lpi, "overlap") <- lp.overlap
  attr(G$X, "lpi") <- lpi
  attr(G$nsdf, "pstart") <- pstart

  G$g.index <- rep(FALSE, ncol(G$X))
  n.sp0 <- 0
  if (length(G$smooth))
    for (i in 1:length(G$smooth)) {
      if (!is.null(G$smooth[[i]]$g.index)) {
        G$g.index[
          G$smooth[[i]]$first.para:G$smooth[[i]]$last.para
        ] <- G$smooth[[i]]$g.index
      }
      n.sp <- length(G$smooth[[i]]$S)
      if (n.sp) {
        G$smooth[[i]]$first.sp <- n.sp0 + 1
        n.sp0 <- G$smooth[[i]]$last.sp <- n.sp0 + n.sp
      }
    }
  if (!any(G$g.index)) G$g.index <- NULL

  G
}


#' Takes a set of non-negative integers and returns minimal code for generating it
#'
#' This function is derived from \code{mgcv:::compress.iseq}
#'
#' @author Simon N Wood with modifications by Nicholas Clark
#' @noRd
compress_iseq <- function(x) {
  x1 <- sort(x)
  br <- diff(x1) != 1 ## TRUE at sequence breaks
  txt <- paste(x1[c(TRUE, br)], x1[c(br, TRUE)], sep = ":") ## subsequences
  txt1 <- paste(x1[c(TRUE, br)]) ## subseq starts
  ii <- x1[c(TRUE, br)] == x1[c(br, TRUE)] ## index start and end equal
  txt[ii] <- txt1[ii] ## replace length on sequences with integers
  paste("c(", paste(txt, collapse = ","), ")", sep = "")
}

#' Returns a vector dind of columns of X to drop for identifiability
#'
#' This function is derived from \code{mgcv:::olid}
#'
#' @author Simon N Wood with modifications by Nicholas Clark
#' @noRd
olid <- function(X, nsdf, pstart, flpi, lpi) {
  nlp <- length(lpi) ## number of linear predictors
  n <- nrow(X)
  nf <- length(nsdf) ## number of formulae blocks
  Xp <- matrix(0, n * nlp, sum(nsdf))
  start <- 1
  ii <- 1:n
  tind <- rep(0, 0) ## complete index of all parametric columns in X
  ## create a block matrix, Xp, with the same identifiability properties as
  ## unpenalized part of model...
  for (i in 1:nf) {
    stop <- start - 1 + nsdf[i]
    if (stop >= start) {
      ind <- pstart[i] + 1:nsdf[i] - 1
      for (k in flpi[[i]]) {
        Xp[ii + (k - 1) * n, start:stop] <- X[, ind]
      }
      tind <- c(tind, ind)
      start <- start + nsdf[i]
    }
  }
  ## rank deficiency of Xp will reveal number of redundant parametric
  ## terms, and a pivoted QR will reveal which to drop to restore
  ## full rank...
  qrx <- qr(Xp, LAPACK = TRUE, tol = 0.0) ## unidentifiable columns get pivoted to final cols
  r <- mgcv::Rrank(qr.R(qrx)) ## get rank from R factor of pivoted QR
  if (r == ncol(Xp)) {
    ## full rank, all fine, drop nothing
    dind <- rep(0, 0)
  } else {
    ## reduced rank, drop some columns
    dind <- tind[sort(qrx$pivot[(r + 1):ncol(X)], decreasing = TRUE)] ## columns to drop
    ## now we need to adjust nsdf, pstart and lpi
    for (d in dind) {
      ## working down through drop indices
      ## following commented out code is useful should it ever prove necessary to
      ## adjust pstart and nsdf, but at present these are only used in prediction,
      ## and it is cleaner to leave them unchanged, and simply drop using dind during prediction.
      #k <- if (d>=pstart[nf]) nlp else which(d >= pstart[1:(nf-1)] & d < pstart[2:nf])
      #nsdf[k] <- nsdf[k] - 1 ## one less unpenalized column in this block
      #if (k<nf) pstart[(k+1):nf] <-  pstart[(k+1):nf] - 1 ## later block starts move down 1
      for (i in 1:nlp) {
        k <- which(d == lpi[[i]])
        if (length(k) > 0) lpi[[i]] <- lpi[[i]][-k] ## drop row
        k <- which(lpi[[i]] > d)
        if (length(k) > 0) lpi[[i]][k] <- lpi[[i]][k] - 1 ## close up
      }
    } ## end of drop index loop
  }
  list(dind = dind, lpi = lpi) ##,pstart=pstart,nsdf=nsdf)
}


#' Write linear predictor section of a jagam file
#'
#' This function is derived from \code{mgcv:::write.jagslp}
#'
#' @author Simon N Wood with modifications by Nicholas Clark
#' @noRd
write_jagslp <- function(resp, family, file, use.weights, offset = FALSE) {
  ## write the JAGS code for the linear predictor
  ## and response distribution.
  iltab <- ## table of inverse link functions
    c(
      "eta[i]",
      "exp(eta[i])",
      "ilogit(eta[i])",
      "phi(eta[i])",
      "1/eta[i]",
      "eta[i]^2"
    )
  names(iltab) <- c("identity", "log", "logit", "probit", "inverse", "sqrt")
  if (!family$link %in% names(iltab)) stop("sorry link not yet handled")

  ## code linear predictor and expected response...
  if (family$link == "identity") {
    if (offset)
      cat(
        "  mu <- X %*% b + offset ## expected response\n",
        file = file,
        append = TRUE
      ) else
      cat("  mu <- X %*% b ## expected response\n", file = file, append = TRUE)
  } else {
    if (offset)
      cat(
        "  eta <- X %*% b + offset ## linear predictor\n",
        file = file,
        append = TRUE
      ) else
      cat("  eta <- X %*% b ## linear predictor\n", file = file, append = TRUE)
    cat(
      "  for (i in 1:n) { mu[i] <- ",
      iltab[family$link],
      "} ## expected response\n",
      file = file,
      append = TRUE
    )
  }
  ## code the response given mu and any scale parameter prior...
  #scale <- TRUE ## is scale parameter free?
  cat("  for (i in 1:n) { ", file = file, append = TRUE)
  if (family$family == "gaussian") {
    if (use.weights)
      cat(
        resp,
        "[i] ~ dnorm(mu[i],tau*w[i]) } ## response \n",
        sep = "",
        file = file,
        append = TRUE
      ) else
      cat(
        resp,
        "[i] ~ dnorm(mu[i],tau) } ## response \n",
        sep = "",
        file = file,
        append = TRUE
      )
    cat(
      "  scale <- 1/tau ## convert tau to standard GLM scale\n",
      file = file,
      append = TRUE
    )
    cat(
      "  tau ~ dgamma(.05,.005) ## precision parameter prior \n",
      file = file,
      append = TRUE
    )
  } else if (family$family == "poisson") {
    # scale <- FALSE
    cat(
      resp,
      "[i] ~ dpois(mu[i]) } ## response \n",
      sep = "",
      file = file,
      append = TRUE
    )
    if (use.weights) warning("weights ignored")
    use.weights <- FALSE
  } else if (family$family == "binomial") {
    # scale <- FALSE
    cat(
      resp,
      "[i] ~ dbin(mu[i],w[i]) } ## response \n",
      sep = "",
      file = file,
      append = TRUE
    )
    use.weights <- TRUE
  } else if (family$family == "Gamma") {
    if (use.weights)
      cat(
        resp,
        "[i] ~ dgamma(r*w[i],r*w[i]/mu[i]) } ## response \n",
        sep = "",
        file = file,
        append = TRUE
      ) else
      cat(
        resp,
        "[i] ~ dgamma(r,r/mu[i]) } ## response \n",
        sep = "",
        file = file,
        append = TRUE
      )
    cat(
      "  r ~ dgamma(.05,.005) ## scale parameter prior \n",
      file = file,
      append = TRUE
    )
    cat(
      "  scale <- 1/r ## convert r to standard GLM scale\n",
      file = file,
      append = TRUE
    )
  } else stop("family not implemented yet")
  use.weights
}
nicholasjclark/mvgam documentation built on April 17, 2025, 9:39 p.m.