R/hhh4.R

Defines functions fitHHH d2Sigma3 dSigma3 d2Sigma2 dSigma2 d2Sigma1 dSigma1 marFisher marScore marLogLik getSigma getSigmaInv getSigmaiInv getSigmai sqrtOf1pr2 penFisher penScore penLogLik meanHHH splitParams checkFormula ri fe isInModel

Documented in fe meanHHH ri

################################################################################
### Endemic-epidemic modelling for univariate or multivariate
### time series of infectious disease counts (data class "sts")
###
### Copyright (C) 2010-2012 Michaela Paul, 2012-2016,2019-2021 Sebastian Meyer
###
### This file is part of the R package "surveillance",
### free software under the terms of the GNU General Public License, version 2,
### a copy of which is available at https://www.R-project.org/Licenses/.
###
### $Revision: 2805 $
### $Date: 2022-02-09 15:29:47 +0100 (Wed, 09. Feb 2022) $
################################################################################

## Error message issued in loglik, score and fisher functions upon NA parameters
ADVICEONERROR <- "\n  Try different starting values, more iterations, or another optimizer.\n"


### Main function to be called by the user

hhh4 <- function (stsObj, control = list(
    ar = list(f = ~ -1,        # a formula "exp(x'lamba)*y_t-lag" (ToDo: matrix)
              offset = 1,      # multiplicative offset
              lag = 1),        # autoregression on y_i,t-lag
    ne = list(f = ~ -1,        # a formula "exp(x'phi) * sum_j w_ji * y_j,t-lag"
              offset = 1,      # multiplicative offset
              lag = 1,         # regression on y_j,t-lag
              weights = neighbourhood(stsObj) == 1,  # weights w_ji
              scale = NULL,    # such that w_ji = scale * weights
              normalize = FALSE), # w_ji -> w_ji / rowSums(w_ji), after scaling
    end = list(f = ~ 1,        # a formula "exp(x'nu) * n_it"
               offset = 1),    # optional multiplicative offset e_it
    family = c("Poisson", "NegBin1", "NegBinM"), # or a factor of length nUnit
    subset = 2:nrow(stsObj),   # epidemic components require Y_{t-lag}
    optimizer = list(stop = list(tol = 1e-5, niter = 100), # control arguments
                     regression = list(method = "nlminb"), # for optimization
                     variance = list(method = "nlminb")),  # <- or "Nelder-Mead"
    verbose = FALSE,           # level of reporting during optimization
    start = list(fixed = NULL, # list of start values, replacing initial
                 random = NULL,  # values from fe() and ri() in 'f'ormulae
                 sd.corr = NULL),
    data = list(t = stsObj@epoch - min(stsObj@epoch)), # named list of covariates
    keep.terms = FALSE  # whether to keep interpretControl(control, stsObj)
    ), check.analyticals = FALSE)
{
  ptm <- proc.time()

  ## Convert old disProg class to new sts class
  if (inherits(stsObj, "disProg")) {
      stsObj <- disProg2sts(stsObj)
  } else {
      stopifnot(inherits(stsObj, "sts"))
  }

  ## check control and set default values (for missing arguments)
  control <- setControl(control, stsObj)

  ## get model terms
  model <- interpretControl(control, stsObj)
  dimFixedEffects <- model$nFE + model$nd + model$nOverdisp
  dimRandomEffects <- model$nRE

  ## starting values
  #* -> better default values possible
  theta.start <- model$initialTheta
  Sigma.start <- model$initialSigma

  ## check if initial values are valid
  ## CAVE: there might be NA's in mu if there are missing values in Y
  mu <- meanHHH(theta.start, model, total.only=TRUE)
  if(any(mu==0, na.rm=TRUE) || any(is.infinite(mu)))
    stop("some mean is degenerate (0 or Inf) at initial values")

  ## check score vector and fisher information at starting values
  check.analyticals <- if (isTRUE(check.analyticals)) {
      if (length(theta.start) > 50) "maxLik" else "numDeriv"
  } else if (is.character(check.analyticals)) {
      match.arg(check.analyticals, c("numDeriv", "maxLik"), several.ok=TRUE)
  } else NULL
  if (length(check.analyticals) > 0L) {
      resCheck <- checkAnalyticals(model, theta.start, Sigma.start,
                                   methods=check.analyticals)
      return(resCheck)
  }

  ## maximize loglikelihood (penalized and marginal)
  myoptim <- fitHHH(theta=theta.start,sd.corr=Sigma.start, model=model,
                    cntrl.stop       = control$optimizer$stop,
                    cntrl.regression = control$optimizer$regression,
                    cntrl.variance   = control$optimizer$variance,
                    verbose=control$verbose)

  ## extract parameter estimates
  convergence <- myoptim$convergence == 0
  thetahat <- myoptim$theta
  if (dimRandomEffects>0) {
      Sigma.orig <- myoptim$sd.corr
      Sigma.trans <- getSigmai(head(Sigma.orig,model$nVar),
                               tail(Sigma.orig,model$nCorr),
                               model$nVar)
      dimnames(Sigma.trans) <-
          rep.int(list(sub("^sd\\.", "",
                           names(Sigma.orig)[seq_len(model$nVar)])), 2L)
  } else {
      Sigma.orig <- Sigma.trans <- NULL
  }

  ## compute covariance matrices of regression and variance parameters
  cov <- try(solve(myoptim$fisher), silent=TRUE)
  Sigma.cov <- if(dimRandomEffects>0) try(solve(myoptim$fisherVar), silent=TRUE)

  ## check for degenerate fisher info
  if(inherits(cov, "try-error")){ # fisher info is singular
    if (control$verbose)
        cat("WARNING: Final Fisher information matrix is singular!\n")
    convergence <- FALSE
  } else if(any(!is.finite(diag(cov))) || any(diag(cov)<0)){
    if (control$verbose)
        cat("WARNING: non-finite or negative covariance of regression parameters!\n")
    convergence <- FALSE
  }
  if (!convergence) {
      if (control$verbose) {
          cat("Penalized loglikelihood =", myoptim$loglik, "\n")
          thetastring <- paste(round(thetahat,2), collapse=", ")
          thetastring <- strwrap(thetastring, exdent=10, prefix="\n", initial="")
          cat("theta = (", thetastring, ")\n")
      }
      warning("Results are not reliable!",
              if (any(splitParams(thetahat, model)$overdisp > 10)) { # FALSE for Poisson
                  "\n  Overdispersion parameter close to zero; maybe try a Poisson model.\n"
              } else ADVICEONERROR)
  }

  ## gather results in a list -> "hhh4" object
  result <- list(coefficients=thetahat,
                 se=if (convergence) sqrt(diag(cov)), cov=cov,
                 Sigma=Sigma.trans,     # estimated covariance matrix of ri's
                 Sigma.orig=Sigma.orig, # variance parameters on original scale
                 Sigma.cov=Sigma.cov,   # covariance matrix of Sigma.orig
                 call=match.call(),
                 dim=c(fixed=dimFixedEffects,random=dimRandomEffects),
                 loglikelihood=myoptim$loglik, margll=myoptim$margll,
                 convergence=convergence,
                 fitted.values=meanHHH(thetahat, model, total.only=TRUE),
                 control=control,
                 terms=if(control$keep.terms) model else NULL,
                 stsObj=stsObj,
                 lags=sapply(control[c("ar","ne")], function (comp)
                             if (comp$inModel) comp$lag else NA_integer_),
                 nObs=sum(!model$isNA[control$subset,]),
                 nTime=length(model$subset), nUnit=ncol(stsObj),
                 ## CAVE: nTime is not nrow(stsObj) as usual!
                 runtime=proc.time()-ptm)
  if (!convergence) {
      ## add (singular) Fisher information for further investigation
      result[c("fisher","fisherVar")] <- myoptim[c("fisher","fisherVar")]
  }
  class(result) <- "hhh4"
  return(result)
}


## set default values for model specifications in control
setControl <- function (control, stsObj)
{
  stopifnot(is.list(control))
  nTime <- nrow(stsObj)
  nUnit <- ncol(stsObj)
  if(nTime <= 2) stop("too few observations")

  ## arguments in 'control' override any corresponding default arguments
  defaultControl <- eval(formals(hhh4)$control)
  environment(defaultControl$ar$f) <- environment(defaultControl$ne$f) <-
      environment(defaultControl$end$f) <- .GlobalEnv
  control <- modifyList(defaultControl, control)

  ## check that component specifications are list objects
  for (comp in c("ar", "ne", "end")) {
      if(!is.list(control[[comp]])) stop("'control$", comp, "' must be a list")
  }

  ## check lags in "ar" and "ne" components
  for (comp in c("ar", "ne")) {
      if (!isScalar(control[[comp]]$lag) || control[[comp]]$lag < (comp=="ar"))
          stop("'control$", comp, "$lag' must be a ",
               if (comp=="ar") "positive" else "non-negative", " integer")
      control[[comp]]$lag <- as.integer(control[[comp]]$lag)
  }


  ### check AutoRegressive component

  if (control$ar$isMatrix <- is.matrix(control$ar$f)) {
      ## this form is not implemented -> will stop() in interpretControl()
      if (any(dim(control$ar$f) != nUnit))
          stop("'control$ar$f' must be a square matrix of size ", nUnit)
      if (is.null(control$ar$weights)) { # use identity matrix
          control$ar$weights <- diag(nrow=nUnit)
      } else if (!is.matrix(control$ar$weights) ||
                 any(dim(control$ar$weights) != nUnit)) {
          stop("'control$ar$weights' must be a square matrix of size ", nUnit)
      }
      control$ar$inModel <- TRUE
  } else if (inherits(control$ar$f, "formula")) {
      if (!is.null(control$ar$weights)) {
          warning("argument 'control$ar$weights' is not used")
          control$ar$weights <- NULL
      }
      # check if formula is valid
      control$ar$inModel <- isInModel(control$ar$f)
  } else {
      stop("'control$ar$f' must be either a formula or a matrix")
  }


  ### check NEighbourhood component

  if (!inherits(control$ne$f, "formula"))
      stop("'control$ne$f' must be a formula")
  control$ne$inModel <- isInModel(control$ne$f)

  if (control$ne$inModel) {
      if (nUnit == 1)
          warning("\"ne\" component requires a multivariate 'stsObj'")
      ## if ar$f is a matrix it includes neighbouring units => no "ne" component
      if (control$ar$isMatrix)
          stop("there must not be an extra \"ne\" component ",
               "if 'control$ar$f' is a matrix")
      ## check ne$weights specification
      checkWeights(control$ne$weights, nUnit, nTime,
                   neighbourhood(stsObj), control$data,
                   check0diag = control$ar$inModel)
      ## check optional scaling of weights
      if (!is.null(control$ne$scale)) {
          stopifnot(is.numeric(control$ne$scale))
          if (is.vector(control$ne$scale)) {
              stopifnot(length(control$ne$scale) == 1L ||
                            length(control$ne$scale) %% nUnit == 0,
                        !is.na(control$ne$scale))
          } else {
              checkWeightsArray(control$ne$scale, nUnit, nTime)
          }
      }
  } else {
      control$ne[c("weights", "scale", "normalize")] <- list(NULL, NULL, FALSE)
  }


  ### check ENDemic component

  if (!inherits(control$end$f, "formula"))
      stop("'control$end$f' must be a formula")
  control$end$inModel <- isInModel(control$end$f)


  ### check offsets

  for (comp in c("ar", "ne", "end")) {
      if (is.matrix(control[[comp]]$offset) && is.numeric(control[[comp]]$offset)){
          if (!identical(dim(control[[comp]]$offset), dim(stsObj)))
              stop("'control$",comp,"$offset' must be a numeric matrix of size ",
                   nTime, "x", nUnit)
          if (any(is.na(control[[comp]]$offset)))
              stop("'control$",comp,"$offset' must not contain NA values")
      } else if (!identical(as.numeric(control[[comp]]$offset), 1)) {
          stop("'control$",comp,"$offset' must either be 1 or a numeric ",
               nTime, "x", nUnit, " matrix")
      }
  }


  ### stop if no component is included in the model

  if (length(comps <- componentsHHH4(list(control=control))) == 0L)
      stop("none of the components 'ar', 'ne', 'end' is included in the model")


  ### check remaining components of the control list

  if (is.factor(control$family)) {
      stopifnot(length(control$family) == nUnit)
      ## guard against misuse as family = factor("Poisson"), e.g., if taken
      ## from a data.frame of control options with "stringsAsFactors"
      if (nUnit == 1 && as.character(control$family) %in% defaultControl$family) {
          control$family <- as.character(control$family)
          warning("'family = factor(\"", control$family, "\")' is interpreted ",
                  "as 'family = \"", control$family, "\"'")
      } else {
          control$family <- droplevels(control$family)
          names(control$family) <- colnames(stsObj)
      }
  } else {
      control$family <- match.arg(control$family, defaultControl$family)
  }

  if (!is.vector(control$subset, mode="numeric") ||
      !all(control$subset %in% seq_len(nTime)))
      stop("'control$subset' must be %in% 1:", nTime)
  lags <- c(ar = control$ar$lag, ne = control$ne$lag)
  maxlag <- suppressWarnings(max(lags[names(lags) %in% comps])) # could be -Inf
  if (control$subset[1L] <= maxlag) {
      warning("'control$subset' should be > ", maxlag, " due to epidemic lags")
  }

  if (!is.list(control$optimizer) ||
      any(! sapply(c("stop", "regression", "variance"),
                   function(x) is.list(control$optimizer[[x]]))))
      stop("'control$optimizer' must be a list of lists")

  control$verbose <- as.integer(control$verbose)
  if (length(control$verbose) != 1L || control$verbose < 0)
      stop("'control$verbose' must be a logical or non-negative numeric value")

  stopifnot(is.list(control$start))
  control$start <- local({
      defaultControl$start[] <- control$start[names(defaultControl$start)]
      defaultControl$start
  })
  if (!all(vapply(X = control$start,
                  FUN = function(x) is.null(x) || is.vector(x, mode="numeric"),
                  FUN.VALUE = TRUE, USE.NAMES = FALSE)))
      stop("'control$start' must be a list of numeric start values")

  stopifnot(length(control$keep.terms) == 1L, is.logical(control$keep.terms))

  ## Done
  return(control)
}


# check whether or not one of the three components is included in the model
isInModel <- function(formula, name=deparse(substitute(formula)))
{
  term <- terms.formula(formula)
  if(attr(term,"response") > 0) stop(name, " cannot contain a response")
  attr(term, "intercept") + length(attr(term, "term.labels")) > 0
}

# used to incorporate covariates and unit-specific effects
fe <- function(x,          # covariate
               unitSpecific = FALSE, # TRUE means which = rep.int(TRUE, nUnits)
               which=NULL, # NULL = overall, vector with booleans = unit-specific
               initial=NULL) # vector of inital values for parameters
{
  stsObj <- get("stsObj", envir=parent.frame(1), inherits=TRUE) #checkFormula()
  nTime <- nrow(stsObj)
  nUnits <- ncol(stsObj)

  if(!is.numeric(x)){
    stop("Covariate \'",deparse(substitute(x)),"\' is not numeric\n")
  }
  lengthX <- length(x)
  if(lengthX == 1){
    terms <- matrix(x, nTime, nUnits, byrow=FALSE)
    mult <- "*"
  } else if(lengthX == nTime){
    terms <- matrix(x, nTime, nUnits, byrow=FALSE)
    mult <- "*"
  } else if(lengthX == nTime*nUnits){
    if(!is.matrix(x)){
     stop("Covariate \'",deparse(substitute(x)),"\' is not a matrix\n")
    }
    # check dimensions of covariate
    if((ncol(x) != nUnits) | (nrow(x) != nTime)){
     stop("Dimension of covariate \'",deparse(substitute(x)),"\' is not suitably specified\n")
    }
    terms <- x
    mult <- "*"
  } else {
    stop("Covariate \'",deparse(substitute(x)),"\' is not suitably specified\n")
  }

  intercept <- all(terms==1)

  # overall or unit-specific effect?
  unitSpecific <- unitSpecific || !is.null(which)
  if (unitSpecific) {
      if (is.null(which)) {
          which <- rep.int(TRUE, nUnits)
      } else {
          stopifnot(is.vector(which, mode="logical"), length(which) == nUnits)
      }

      terms[,!which] <- 0
  }

  # get dimension of parameter
  dim.fe <- if (unitSpecific) sum(which) else 1

  # check length of initial values + set default values
  if (is.null(initial)) {
    initial <- rep.int(0,dim.fe)
  } else if (length(initial) != dim.fe) {
    stop("initial values for '",deparse(substitute(x)),"' must be of length ",dim.fe)
  }

  name <- deparse(substitute(x))
  if (unitSpecific)
      name <- paste(name, colnames(stsObj)[which], sep=".")

  result <- list(terms=terms,
                name=name,
                Z.intercept=NULL,
                which=which,
                dim.fe=dim.fe,
                initial.fe=initial,
                dim.re=0,
                dim.var=0,
                initial.var=NULL,
                initial.re=NULL,
                intercept=intercept,
                unitSpecific=unitSpecific,
                random=FALSE,
                corr=FALSE,
                mult=mult
                )
  return(result)
}

# random intercepts
ri <- function(type=c("iid","car"),
               corr=c("none","all"),
               initial.fe=0,
               initial.var=-.5,
               initial.re=NULL)
{
  stsObj <- get("stsObj", envir=parent.frame(1), inherits=TRUE) #checkFormula()
  if (ncol(stsObj) == 1)
      stop("random intercepts require a multivariate 'stsObj'")
  type <- match.arg(type)
  corr <- match.arg(corr)
  corr <- switch(corr,
                  "none"=FALSE,
                  "all"=TRUE)

  if(type=="iid"){
    Z <- 1
    dim.re <- ncol(stsObj)
    mult <- "*"
  } else if(type=="car"){ # construct penalty matrix K
    K <- neighbourhood(stsObj)
    checkNeighbourhood(K)
    K <- K == 1                         # indicate first-order neighbours
    ne <- colSums(K)                    # number of first-order neighbours
    K <- -1*K
    diag(K) <- ne

    dimK <- nrow(K)

    # check rank of the nhood, only connected neighbourhoods are allowed
    if(qr(K)$rank != dimK-1) stop("neighbourhood matrix contains islands")
    # singular-value decomposition of K
    svdK <- svd(K)
    # just use the positive eigenvalues  of K in descending order
    # for a the factorisation of the penalty matrix K = LL'
    L <- svdK$u[,-dimK] %*% diag(sqrt(svdK$d[-dimK]))            #* only use non-zero eigenvalues

    # Z = L(L'L)^-1, which can't be simplified to Z=(L')^-1 as L is not square
    Z <- L %*% solve(t(L)%*%L)

    dim.re <- dimK - 1L
    mult <- "%*%"
  }

  # check length of initial values + set default values
  stopifnot(length(initial.fe) == 1, length(initial.var) == 1)
  if (is.null(initial.re)) {
    initial.re <- rnorm(dim.re,0,sd=sqrt(0.001))
  } else if (length(initial.re) != dim.re) {
    stop("'initial.re' must be of length ", dim.re)
  }

  result <- list(terms=1,
                name=paste("ri(",type,")",sep=""),
                Z.intercept=Z,
                which=NULL,
                dim.fe=1,
                initial.fe=initial.fe,
                dim.re=dim.re,
                dim.var=1,
                initial.var=initial.var,
                initial.re=initial.re,
                intercept=TRUE,
                unitSpecific=FALSE,
                random=TRUE,
                corr=corr,
                mult=mult
                )
  return(result)
}

### check specification of formula
## f: one of the component formulae (ar$f, ne$f, or end$f)
## component: 1, 2, or 3, corresponding to the ar/ne/end component, respectively
## data: the data-argument of hhh4()
## stsObj: the stsObj is not used directly in checkFormula, but in fe() and ri()
checkFormula <- function(f, component, data, stsObj)
{
  term <- terms.formula(f, specials=c("fe","ri"))

  # check if there is an overall intercept
  intercept.all <- attr(term, "intercept") == 1

  # list of variables in the component
  vars <- as.list(attr(term,"variables"))[-1] # first element is "list"
  nVars <- length(vars)

  # begin with intercept
  res <- if (intercept.all) {
      c(fe(1), list(offsetComp=component))
  } else {
      if (nVars==0)
          stop("formula ", deparse(substitute(f)), " contains no variables")
      NULL
  }

  # find out fixed effects without "fe()" specification
  # (only if there are variables in addition to an intercept "1")
  fe.raw <- setdiff(seq_len(nVars), unlist(attr(term, "specials")))

  # evaluate covariates
  for(i in fe.raw)
      res <- cbind(res, c(
          eval(substitute(fe(x), list(x=vars[[i]])), envir=data),
          list(offsetComp=component)
          ))

  # fixed effects
  for(i in attr(term, "specials")$fe)
      res <- cbind(res, c(
          eval(vars[[i]], envir=data),
          list(offsetComp=component)
          ))

  res <- cbind(res, deparse.level=0) # ensure res has matrix dimensions

  # random intercepts
  RI <- attr(term, "specials")$ri
  if (sum(unlist(res["intercept",])) + length(RI) > 1)
      stop("There can only be one intercept in the formula ",
           deparse(substitute(f)))
  for(i in RI)
      res <- cbind(res, c(
          eval(vars[[i]], envir=data),
          list(offsetComp=component)
          ))

  return(res)
}


## Create function (pars, type = "response") which
## returns the weighted sum of time-lagged counts of neighbours
## (or its derivates, if type = "gradient" or type = "hessian").
## For type="reponse", this is a nTime x nUnits matrix (like Y),
## otherwise a list of such matrices,
## which for the gradient has length length(pars) and
## length(pars)*(length(pars)+1)/2 for the hessian.
## If neweights=NULL (i.e. no NE component in model), the result is always 0.
## offset is a multiplicative offset for \phi_{it}, e.g., the population.
## scale is a nUnit-vector or a nUnit x nUnit matrix scaling neweights.
neOffsetFUN <- function (Y, neweights, scale, normalize,
                         nbmat, data, lag = 1, offset = 1)
{
    if (is.null(neweights)) { # no neighbourhood component
        as.function(alist(...=, 0), envir=.GlobalEnv)
        ## dimY <- dim(Y)
        ## as.function(c(alist(...=),
        ##               substitute(matrix(0, r, c), list(r=dimY[1], c=dimY[2]))),
        ##             envir=.GlobalEnv)
    } else if (is.list(neweights)) { # parametric weights
        wFUN <- scaleNEweights.list(neweights, scale, normalize)
        function (pars, type = "response") {
            name <- switch(type, response="w", gradient="dw", hessian="d2w")
            weights <- wFUN[[name]](pars, nbmat, data)
            ## gradient and hessian are lists if length(pars$d) > 1L
            ## but can be single matrices/arrays if == 1 => _c_onditional lapply
            res <- clapply(weights, function (W)
                           offset * weightedSumNE(Y, W, lag))
            ##<- clapply always returns a list (possibly of length 1)
            if (type=="response") res[[1L]] else res
        }
    } else { # fixed (known) weight structure (0-length pars)
        weights <- scaleNEweights.default(neweights, scale, normalize)
        env <- new.env(hash = FALSE, parent = emptyenv())  # small -> no hash
        env$initoffset <- offset * weightedSumNE(Y, weights, lag)
        as.function(c(alist(...=), quote(initoffset)), envir=env)
    }
}


# interpret and check the specifications of each component
# control must contain all arguments, i.e. setControl was used
interpretControl <- function (control, stsObj)
{
  nTime <- nrow(stsObj)
  nUnits <- ncol(stsObj)

  Y <- observed(stsObj)


  ##########################################################################
  ##  get the model specifications for each of the three components
  ##########################################################################
  ar <- control$ar
  ne <- control$ne
  end <- control$end

  ## for backwards compatibility with surveillance < 1.8-0, where the ar and ne
  ## components of the control object did not have an offset
  if (is.null(ar$offset)) ar$offset <- 1
  if (is.null(ne$offset)) ne$offset <- 1
  ## for backward compatibility with surveillance < 1.9-0
  if (is.null(ne$normalize)) ne$normalize <- FALSE

  ## create list of offsets of the three components
  Ym1 <- rbind(matrix(NA_integer_, ar$lag, nUnits), head(Y, nTime-ar$lag))
  Ym1.ne <- neOffsetFUN(Y, ne$weights, ne$scale, ne$normalize,
                        neighbourhood(stsObj), control$data, ne$lag, ne$offset)
  offsets <- list(ar=ar$offset*Ym1, ne=Ym1.ne, end=end$offset)
  ## -> offset$ne is a function of the parameter vector 'd', which returns a
  ##    nTime x nUnits matrix -- or 0 (scalar) if there is no NE component
  ## -> offset$end might just be 1 (scalar)

  ## Initial parameter vector 'd' of the neighbourhood weight function
  initial.d <- if (is.list(ne$weights)) ne$weights$initial else numeric(0L)
  dim.d <- length(initial.d)
  names.d <- if (dim.d == 0L) character(0L) else {
      paste0("neweights.", if (is.null(names(initial.d))) {
          if (dim.d==1L) "d" else paste0("d", seq_len(dim.d))
      } else names(initial.d))
  }

  ## determine all NA's
  isNA <- is.na(Y)
  if (ar$inModel)
      isNA <- isNA | is.na(offsets[[1L]])
  if (ne$inModel)
      isNA <- isNA | is.na(offsets[[2L]](initial.d))

  ## get terms for all components
  all.term <- NULL
  if(ar$isMatrix) stop("matrix-form of 'control$ar$f' is not implemented")
  if(ar$inModel) # ar$f is a formula
      all.term <- cbind(all.term, checkFormula(ar$f, 1, control$data, stsObj))
  if(ne$inModel)
      all.term <- cbind(all.term, checkFormula(ne$f, 2, control$data, stsObj))
  if(end$inModel)
      all.term <- cbind(all.term, checkFormula(end$f,3, control$data, stsObj))

  dim.fe <- sum(unlist(all.term["dim.fe",]))
  dim.re.group <- unlist(all.term["dim.re",], use.names=FALSE)
  dim.re <- sum(dim.re.group)
  dim.var <- sum(unlist(all.term["dim.var",]))
  dim.corr <- sum(unlist(all.term["corr",]))

  if(dim.corr>0){
    if(dim.var!=dim.corr) stop("Use corr=\'all\' or corr=\'none\' ")
    dim.corr <- switch(dim.corr,0,1,3)
  }

  # the vector with dims of the random effects must be equal if they are correlated
  if(length(unique(dim.re.group[dim.re.group>0]))!=1 & dim.corr>0){
    stop("Correlated effects must have same penalty")
  }

  n <- c("ar","ne","end")[unlist(all.term["offsetComp",])]
  names.fe <- names.var <- names.re <- character(0L)
  for(i in seq_along(n)){
    .name <- all.term["name",i][[1]]
    names.fe <- c(names.fe, paste(n[i], .name, sep="."))
    if(all.term["random",i][[1]]) {
        names.var <- c(names.var, paste("sd", n[i], .name, sep="."))
        names.re <- c(names.re, paste(n[i], .name, if (.name == "ri(iid)") {
            colnames(stsObj)
        } else {
            seq_len(all.term["dim.re",i][[1]])
        }, sep = "."))
    }
  }
  index.fe <- rep(1:ncol(all.term), times=unlist(all.term["dim.fe",]))
  index.re <- rep(1:ncol(all.term), times=unlist(all.term["dim.re",]))

  # poisson or negbin model
  if(identical(control$family, "Poisson")){
    ddistr <- function(y,mu,size){
        dpois(y, lambda=mu, log=TRUE)
    }
    dim.overdisp <- 0L
    index.overdisp <- names.overdisp <- NULL
  } else { # NegBin
    ddistr <- function(y,mu,size){
        dnbinom(y, mu=mu, size=size, log=TRUE)
    }
    ## version that can handle size = Inf (i.e. the Poisson special case):
    ## ddistr <- function (y,mu,size) {
    ##     poisidx <- is.infinite(size)
    ##     res <- y
    ##     res[poisidx] <- dpois(y[poisidx], lambda=mu[poisidx], log=TRUE)
    ##     res[!poisidx] <- dnbinom(y[!poisidx], mu=mu[!poisidx],
    ##                              size=size[!poisidx], log=TRUE)
    ##     res
    ## }
    index.overdisp <- if (is.factor(control$family)) {
        control$family
    } else if (control$family == "NegBinM") {
        factor(colnames(stsObj), levels = colnames(stsObj))
        ## do not sort levels (for consistency with unitSpecific effects)
    } else { # "NegBin1"
        factor(character(nUnits))
    }
    names(index.overdisp) <- colnames(stsObj)
    dim.overdisp <- nlevels(index.overdisp)
    names.overdisp <- if (dim.overdisp == 1L) {
        "-log(overdisp)"
    } else {
        paste0("-log(", paste("overdisp", levels(index.overdisp), sep = "."), ")")
    }
  }
  environment(ddistr) <- getNamespace("stats")  # function is self-contained

  # parameter start values from fe() and ri() calls via checkFormula()
  initial <- list(
      fixed = c(unlist(all.term["initial.fe",]),
                initial.d,
                rep.int(2, dim.overdisp)),
      random = as.numeric(unlist(all.term["initial.re",])), # NULL -> numeric(0)
      sd.corr = c(unlist(all.term["initial.var",]),
                  rep.int(0, dim.corr))
  )
  # set names of parameter vectors
  names(initial$fixed) <- c(names.fe, names.d, names.overdisp)
  names(initial$random) <- names.re
  names(initial$sd.corr) <- c(names.var, head(paste("corr",1:3,sep="."), dim.corr))

  # modify initial values according to the supplied 'start' values
  initial[] <- mapply(
      FUN = function (initial, start, name) {
          if (is.null(start))
              return(initial)
          if (is.null(names(initial)) || is.null(names(start))) {
              if (length(start) == length(initial)) {
                  initial[] <- start
              } else {
                  stop("initial values in 'control$start$", name,
                       "' must be of length ", length(initial))
              }
          } else {
              ## we match by name and silently ignore additional start values
              start <- start[names(start) %in% names(initial)]
              initial[names(start)] <- start
          }
          return(initial)
      },
      initial, control$start[names(initial)], names(initial),
      SIMPLIFY = FALSE, USE.NAMES = FALSE
  )

  # Done
  result <- list(response = Y,
                 terms = all.term,
                 nTime = nTime,
                 nUnits = nUnits,
                 nFE = dim.fe,
                 nd = dim.d,
                 nOverdisp = dim.overdisp,
                 nRE = dim.re,
                 rankRE = dim.re.group,
                 nVar = dim.var,
                 nCorr = dim.corr,
                 nSigma = dim.var+dim.corr,
                 nGroups = ncol(all.term),
                 namesFE = names.fe,
                 indexFE = index.fe,
                 indexRE = index.re,
                 initialTheta = c(initial$fixed, initial$random),
                 initialSigma = initial$sd.corr,
                 offset = offsets,
                 family = ddistr,
                 indexPsi = index.overdisp,
                 subset = control$subset,
                 isNA = isNA
                 )
  return(result)
}


splitParams <- function(theta, model){
  fixed <- theta[seq_len(model$nFE)]
  d <- theta[model$nFE + seq_len(model$nd)]
  overdisp <- theta[model$nFE + model$nd + seq_len(model$nOverdisp)]
  random <- theta[seq.int(to=length(theta), length.out=model$nRE)]
  list(fixed=fixed, random=random, overdisp=overdisp, d=d)
}


### compute predictor

meanHHH <- function(theta, model, subset=model$subset, total.only=FALSE)
{
  ## unpack theta
  pars <- splitParams(theta, model)
  fixed <- pars$fixed
  random <- pars$random

  ## unpack model
  term <- model$terms
  offsets <- model$offset
  offsets[[2L]] <- offsets[[2L]](pars$d) # evaluate at current parameter value
  nGroups <- model$nGroups
  comp <- unlist(term["offsetComp",])
  idxFE <- model$indexFE
  idxRE <- model$indexRE

  toMatrix <- function (x, r=model$nTime, c=model$nUnits)
      matrix(x, r, c, byrow=TRUE)

  unitNames <- dimnames(model$response)[[2L]]
  setColnames <- if (is.null(unitNames)) identity else
    function(x) "dimnames<-"(x, list(NULL, unitNames))

  ## go through groups of parameters and compute predictor of each component,
  ## i.e. lambda_it, phi_it, nu_it, EXCLUDING the multiplicative offset terms,
  ## as well as the resulting component mean (=exppred * offset)
  computePartMean <- function (component)
  {
    pred <- nullMatrix <- toMatrix(0)

    if(!any(comp==component)) { # component not in model -> return 0-matrix
        zeroes <- setColnames(pred[subset,,drop=FALSE])
        return(list(exppred = zeroes, mean = zeroes))
    }

    for(i in seq_len(nGroups)[comp==component]){
      fe <- fixed[idxFE==i]
      if(term["unitSpecific",i][[1]]){
        fe <- nullMatrix
        which <- term["which",i][[1]]
        fe[,which] <- toMatrix(fixed[idxFE==i],c=sum(which))
      }
      if(term["random",i][[1]]){
        re <- random[idxRE==i]
        "%m%" <- get(term["mult",i][[1]])
        Z.re <- toMatrix(term["Z.intercept",i][[1]] %m% re)
      } else {
        Z.re <- 0
      }
      X <- term["terms",i][[1]]
      pred <- pred + X*fe + Z.re
    }

    exppred <- setColnames(exp(pred[subset,,drop=FALSE]))
    offset <- offsets[[component]]
    if (length(offset) > 1) offset <- offset[subset,,drop=FALSE]
    ##<- no subsetting if offset is scalar (time- and unit-independent)
    list(exppred = exppred, mean = exppred * offset)
  }

  ## compute component means
  ar <- computePartMean(1)
  ne <- computePartMean(2)
  end <- computePartMean(3)

  ## Done
  epidemic <- ar$mean + ne$mean
  endemic <- end$mean
  if (total.only) epidemic + endemic else
  list(mean=epidemic+endemic, epidemic=epidemic, endemic=endemic,
       epi.own=ar$mean, epi.neighbours=ne$mean,
       ar.exppred=ar$exppred, ne.exppred=ne$exppred, end.exppred=end$exppred)
}


### compute dispersion in dnbinom (mu, size) parametrization

sizeHHH <- function (theta, model, subset = model$subset)
{
    if (model$nOverdisp == 0L) # Poisson case
        return(NULL)

    ## extract dispersion in dnbinom() parametrization
    pars <- splitParams(theta, model)
    size <- exp(pars$overdisp)             # = 1/psi, pars$overdisp = -log(psi)

    ## return either a vector or a time x unit matrix of dispersion parameters
    if (is.null(subset)) {
        unname(size)  # no longer is "-log(overdisp)"
    } else {
        matrix(data = size[model$indexPsi],
               nrow = length(subset), ncol = model$nUnits, byrow = TRUE,
               dimnames = list(NULL, names(model$indexPsi)))
    }
}

## auxiliary function used in penScore and penFisher
## it sums colSums(x) within the groups defined by f (of length ncol(x))
## and returns these sums in the order of levels(f)
.colSumsGrouped <- function (x, f, na.rm = TRUE)
{
    nlev <- nlevels(f)
    if (nlev == 1L) { # all columns belong to the same group ("NegBin1")
        sum(x, na.rm = na.rm)
    } else {
        dimx <- dim(x)
        colsums <- .colSums(x, dimx[1L], dimx[2L], na.rm = na.rm)
        if (nlev == dimx[2L]) { # each column separately ("NegBinM" or factor)
            colsums[order(f)] # for NegBinM, order(f)==1:nlev, not in general
        } else { # sum colsums within groups
            unlist(lapply(
                X = split.default(colsums, f, drop = FALSE),
                FUN = sum
                ), recursive = FALSE, use.names = FALSE)
        }
    }
}


############################################


penLogLik <- function(theta, sd.corr, model, attributes=FALSE)
{
  if(anyNA(theta)) stop("NAs in regression parameters.", ADVICEONERROR)

  ## unpack model
  subset <- model$subset
  Y <- model$response[subset,,drop=FALSE]
  dimPsi <- model$nOverdisp
  dimRE <- model$nRE

  ## unpack random effects
  if (dimRE > 0) {
      pars <- splitParams(theta, model)
      randomEffects <- pars$random
      sd   <- head(sd.corr, model$nVar)
      corr <- tail(sd.corr, model$nCorr)
      dimBlock <- model$rankRE[model$rankRE>0]
      Sigma.inv <- getSigmaInv(sd, corr, model$nVar, dimBlock)
  }

  ############################################################

  ## evaluate dispersion
  psi <- sizeHHH(theta, model,
                 subset = if (dimPsi > 1L) subset) # else scalar or NULL

  #psi might be numerically equal to 0 or Inf in which cases dnbinom (in meanHHH)
  #would return NaN (with a warning). The case size=Inf rarely happens and
  #corresponds to a Poisson distribution. Currently this case is not handled
  #in order to have the usual non-degenerate case operate faster.
  #For size=0, log(dnbinom) equals -Inf for positive x or if (x=0 and mu=0), and
  #zero if x=0 and mu>0 and mu<Inf. Thus, if there is at least one case in Y
  #(x>0, which is always true), we have that sum(ll.units) = -Inf, hence:
  if (any(psi == 0)) return(-Inf)

  ## evaluate mean
  mu <- meanHHH(theta, model, total.only=TRUE)
  # if, numerically, mu=Inf, log(dnbinom) or log(dpois) both equal -Inf, hence:
  #if (any(is.infinite(mu))) return(-Inf)
  # however, since mu=Inf does not produce warnings below and this is a rare
  # case, it is faster to not include this conditional expression

  ## penalization term for random effects
  lpen <- if (dimRE==0) 0 else { # there are random effects
    ##-.5*(t(randomEffects)%*%Sigma.inv%*%randomEffects)
    ## the following implementation takes ~85% less computing time !
    -0.5 * c(crossprod(randomEffects, Sigma.inv) %*% randomEffects)
  }

  ## log-likelihood
  ll.units <- .colSums(model$family(Y,mu,psi),
                       length(subset), model$nUnits, na.rm=TRUE)

  ## penalized log-likelihood
  ll <- sum(ll.units) + lpen

  ## Done
  if (attributes) {
      attr(ll, "loglik") <- ll.units
      attr(ll, "logpen") <- lpen
  }
  ll
}


penScore <- function(theta, sd.corr, model)
{
  if(anyNA(theta)) stop("NAs in regression parameters.", ADVICEONERROR)

  ## unpack model
  subset <- model$subset
  Y <- model$response[subset,,drop=FALSE]
  isNA <- model$isNA[subset,,drop=FALSE]
  dimPsi <- model$nOverdisp
  dimRE <- model$nRE
  term <- model$terms
  nGroups <- model$nGroups
  dimd <- model$nd

  ## unpack parameters
  pars <- splitParams(theta, model)
  if (dimRE > 0) {
      randomEffects <- pars$random
      sd   <- head(sd.corr, model$nVar)
      corr <- tail(sd.corr, model$nCorr)
      dimBlock <- model$rankRE[model$rankRE>0]
      Sigma.inv <- getSigmaInv(sd, corr, model$nVar, dimBlock)
  }

  ## evaluate dispersion
  psi <- sizeHHH(theta, model,
                 subset = if (dimPsi > 1L) subset) # else scalar or NULL

  ## evaluate mean
  mu <- meanHHH(theta, model)
  meanTotal <- mu$mean

  ############################################################

  ## helper function for derivatives
  derivHHH.factor <- if(dimPsi > 0L){ # NegBin
      psiPlusMu <- psi + meanTotal    # also used below for calculation of grPsi
      psiYpsiMu <- (psi+Y) / psiPlusMu
      Y/meanTotal - psiYpsiMu
  } else { # Poisson
      Y/meanTotal - 1
  }
  derivHHH <- function (dmu) derivHHH.factor * dmu

  ## go through groups of parameters and compute the gradient of each component
  computeGrad <- function(mean.comp){

    grad.fe <- numeric(0L)
    grad.re <- numeric(0L)

    for(i in seq_len(nGroups)){
      comp <- term["offsetComp",i][[1]]
      Xit<- term["terms",i][[1]] # eiter 1 or a matrix with values
      if(is.matrix(Xit)){
        Xit <- Xit[subset,,drop=FALSE]
      }
      dTheta <- derivHHH(mean.comp[[comp]]*Xit)
      dTheta[isNA] <- 0   # dTheta must not contain NA's (set NA's to 0)

      if(term["unitSpecific",i][[1]]){
        which <- term["which",i][[1]]
        dimi <- sum(which)
        if(dimi < model$nUnits)
          dTheta <- dTheta[,which,drop=FALSE]
        dTheta <- .colSums(dTheta, length(subset), dimi)
        grad.fe <- c(grad.fe,dTheta)

      } else if(term["random",i][[1]]){
        Z <- term["Z.intercept",i][[1]]
        "%m%" <- get(term["mult",i][[1]])
        dRTheta <- .colSums(dTheta %m% Z, length(subset), term["dim.re",i][[1]])
        grad.re <- c(grad.re, dRTheta)
        grad.fe <- c(grad.fe, sum(dTheta))
      } else{
        grad.fe <- c(grad.fe, sum(dTheta))
      }
    }

    list(fe=grad.fe, re=grad.re)
  }

  gradients <- computeGrad(mu[c("epi.own","epi.neighbours","endemic")])

  ## gradient for parameter vector of the neighbourhood weights
  grd <- if (dimd > 0L) {
      dneOffset <- model$offset[[2L]](pars$d, type="gradient")
      ##<- this is always a list (of length dimd) of matrices
      onescore.d <- function (dneoff) {
          dmudd <- mu$ne.exppred * dneoff[subset,,drop=FALSE]
          grd.terms <- derivHHH(dmudd)
          sum(grd.terms, na.rm=TRUE)
      }
      unlist(clapply(dneOffset, onescore.d), recursive=FALSE, use.names=FALSE)
  } else numeric(0L)

  ## gradient for overdispersion parameter psi
  grPsi <- if(dimPsi > 0L){
      dPsiMat <- psi * (digamma(Y+psi) - digamma(psi) + log(psi) + 1
                        - log(psiPlusMu) - psiYpsiMu)
      .colSumsGrouped(dPsiMat, model$indexPsi)
  } else numeric(0L)

  ## add penalty to random effects gradient
  s.pen <- if(dimRE > 0) c(Sigma.inv %*% randomEffects) else numeric(0L)
  if(length(gradients$re) != length(s.pen))
      stop("oops... lengths of s(b) and Sigma.inv %*% b do not match")
  grRandom <- c(gradients$re - s.pen)

  ## Done
  res <- c(gradients$fe, grd, grPsi, grRandom)
  res
}


penFisher <- function(theta, sd.corr, model, attributes=FALSE)
{
  if(anyNA(theta)) stop("NAs in regression parameters.", ADVICEONERROR)

  ## unpack model
  subset <- model$subset
  Y <- model$response[subset,,drop=FALSE]
  isNA <- model$isNA[subset,,drop=FALSE]
  dimPsi <- model$nOverdisp
  dimRE <- model$nRE
  term <- model$terms
  nGroups <- model$nGroups
  dimd  <- model$nd
  dimFE <- model$nFE
  idxFE <- model$indexFE
  idxRE <- model$indexRE
  indexPsi <- model$indexPsi

  ## unpack parameters
  pars <- splitParams(theta, model)
  if (dimRE > 0) {
      randomEffects <- pars$random
      sd   <- head(sd.corr, model$nVar)
      corr <- tail(sd.corr, model$nCorr)
      dimBlock <- model$rankRE[model$rankRE>0]
      Sigma.inv <- getSigmaInv(sd, corr, model$nVar, dimBlock)
  }

  ## evaluate dispersion
  psi <- sizeHHH(theta, model,
                 subset = if (dimPsi > 1L) subset) # else scalar or NULL

  ## evaluate mean
  mu <- meanHHH(theta, model)
  meanTotal <- mu$mean

  ############################################################

  ## helper functions for derivatives:
  if (dimPsi > 0L) { # negbin
    psiPlusY <- psi + Y
    psiPlusMu <- psi + meanTotal
    psiPlusMu2 <- psiPlusMu^2
    psiYpsiMu <- psiPlusY / psiPlusMu
    psiYpsiMu2 <- psiPlusY / psiPlusMu2
    deriv2HHH.fac1 <- psiYpsiMu2 - Y / (meanTotal^2)
    deriv2HHH.fac2 <- Y / meanTotal - psiYpsiMu
    ## psi-related derivatives
    dThetadPsi.fac <- psi * (psiYpsiMu2 - 1/psiPlusMu)
    dThetadPsi <- function(dTheta){
        dThetadPsi.fac * dTheta
    }
    dPsiMat <- psi * (digamma(psiPlusY) - digamma(psi) + log(psi) + 1
                      - log(psiPlusMu) - psiYpsiMu)  # as in penScore()
    dPsidPsiMat <- psi^2 * (
        trigamma(psiPlusY) - trigamma(psi) + 1/psi - 1/psiPlusMu -
            (meanTotal-Y)/psiPlusMu2) + dPsiMat
  } else { # poisson
    deriv2HHH.fac1 <- -Y / (meanTotal^2)
    deriv2HHH.fac2 <- Y / meanTotal - 1
  }
  deriv2HHH <- function(dTheta_l, dTheta_k, dTheta_lk){
      dTheta_l * dTheta_k * deriv2HHH.fac1 + dTheta_lk * deriv2HHH.fac2
  }

  ## go through groups of parameters and compute the hessian of each component
  computeFisher <- function(mean.comp){
    # initialize hessian
    hessian.FE.FE <- matrix(0,dimFE,dimFE)
    hessian.FE.RE <- matrix(0,dimFE,dimRE)
    hessian.RE.RE <- matrix(0,dimRE,dimRE)

    hessian.FE.Psi <- matrix(0,dimFE,dimPsi)
    hessian.Psi.RE <- matrix(0,dimPsi,dimPsi+dimRE) # CAVE: contains PsiPsi and PsiRE

    hessian.FE.d <- matrix(0,dimFE,dimd)
    hessian.d.d <- matrix(0,dimd,dimd)
    hessian.d.Psi <- matrix(0,dimd,dimPsi)
    hessian.d.RE <- matrix(0,dimd,dimRE)

    ## derivatives wrt neighbourhood weight parameters d
    if (dimd > 0L) {
        phi.doff <- function (dneoff) {
            mu$ne.exppred * dneoff[subset,,drop=FALSE]
        }
        ## for type %in% c("gradient", "hessian"), model$offset[[2L]] always
        ## returns a list of matrices. It has length(pars$d) elements for the
        ## gradient and length(pars$d)*(length(pars$d)+1)/2 for the hessian.
        dneOffset <- model$offset[[2L]](pars$d, type="gradient")
        dmudd <- lapply(dneOffset, phi.doff)
        d2neOffset <- model$offset[[2L]](pars$d, type="hessian")
        d2mudddd <- lapply(d2neOffset, phi.doff)
        ## d l(theta,x) /dd dd (fill only upper triangle, BY ROW)
        ij <- 0L
        for (i in seq_len(dimd)) {
            for (j in i:dimd) {
                ij <- ij + 1L  #= dimd*(i-1) + j - (i-1)*i/2  # for j >= i
                ## d2mudddd contains upper triangle by row (=lowertri by column)
                d2ij <- deriv2HHH(dmudd[[i]], dmudd[[j]], d2mudddd[[ij]])
                hessian.d.d[i,j] <- sum(d2ij, na.rm=TRUE)
            }
        }
    }

    if (dimPsi > 0L) {
        ## d l(theta,x) /dpsi dpsi
        dPsidPsi <- .colSumsGrouped(dPsidPsiMat, indexPsi)
        hessian.Psi.RE[,seq_len(dimPsi)] <- if (dimPsi == 1L) {
            dPsidPsi
        } else {
            diag(dPsidPsi)
        }
        ## d l(theta) / dd dpsi
        for (i in seq_len(dimd)) {      # will not be run if dimd==0
            ## dPsi.i <- colSums(dThetadPsi(dmudd[[i]]),na.rm=TRUE)
            ## hessian.d.Psi[i,] <- if(dimPsi==1L) sum(dPsi.i) else dPsi.i[order(indexPsi)]
            hessian.d.Psi[i,] <- .colSumsGrouped(dThetadPsi(dmudd[[i]]), indexPsi)
        }
    }

    ##
    i.fixed <- function(){
      if(random.j){
        Z.j <- term["Z.intercept",j][[1]]
        "%mj%" <- get(term["mult",j][[1]])
        hessian.FE.RE[idxFE==i,idxRE==j] <<- colSums(didj %mj% Z.j)
        ##<- didj must not contain NA's (all NA's set to 0)
        dIJ <- sum(didj,na.rm=TRUE)     # fixed on 24/09/2012
      } else if(unitSpecific.j){
        dIJ <- colSums(didj,na.rm=TRUE)[ which.j ]
      } else {
        dIJ <- sum(didj,na.rm=TRUE)
      }
      hessian.FE.FE[idxFE==i,idxFE==j] <<- dIJ
    }
    ##
    i.unit <- function(){
      if(random.j){
        Z.j <- term["Z.intercept",j][[1]]
        "%mj%" <- get(term["mult",j][[1]])
        dIJ <- colSums(didj %mj% Z.j)   # didj must not contain NA's (all NA's set to 0)
        hessian.FE.RE[idxFE==i,idxRE==j] <<- diag(dIJ)[ which.i, ] # FIXME: does not work if type="car"
        dIJ <- dIJ[ which.i ]           # added which.i subsetting in r432
      } else if(unitSpecific.j){
        dIJ <- diag(colSums(didj))[ which.i, which.j ]
      } else {
        dIJ <- colSums(didj)[ which.i ]
      }
      hessian.FE.FE[idxFE==i,idxFE==j] <<- dIJ
    }
    ##
    i.random <- function(){
      if(random.j){
        Z.j <- term["Z.intercept",j][[1]]
        "%mj%" <- get(term["mult",j][[1]])
        hessian.FE.RE[idxFE==i,idxRE==j] <<- colSums(didj %mj% Z.j)
        if (j != i)  # otherwise redundant (duplicate)
          hessian.FE.RE[idxFE==j,idxRE==i] <<- colSums(didj %m% Z.i)

        if(length(Z.j)==1 & length(Z.i)==1){ # both iid
          Z <- Z.i*Z.j
          hessian.RE.RE[which(idxRE==i),idxRE==j] <<- diag(colSums( didj %m% Z))
        } else if(length(Z.j)==1 & length(Z.i)>1){         #*
          Z.j <- diag(nrow=model$nUnits)
          for(k in seq_len(ncol(Z.j))){
            Z <- Z.i*Z.j[,k]
            hessian.RE.RE[idxRE==i,which(idxRE==j)[k]] <<- colSums( didj %m% Z)
          }
        } else if(length(Z.j)>1 & length(Z.i)==1){         #*
          Z.i <- diag(nrow=model$nUnits)
          for(k in seq_len(ncol(Z.i))){
            Z <- Z.i[,k]*Z.j
            hessian.RE.RE[which(idxRE==i)[k],idxRE==j] <<- colSums( didj %mj% Z)
          }
        } else { # both CAR
          for(k in seq_len(ncol(Z.j))){
            Z <- Z.i*Z.j[,k]
            hessian.RE.RE[which(idxRE==i)[k],idxRE==j] <<- colSums( didj %m% Z)
          }
        }
        dIJ <- sum(didj)
      } else if(unitSpecific.j){
        dIJ <- colSums(didj %m% Z.i)
        hessian.FE.RE[idxFE==j,idxRE==i] <<- diag(dIJ)[ which.j, ]
        dIJ <- dIJ[ which.j ]
      } else {
        hessian.FE.RE[idxFE==j,idxRE==i] <<- colSums(didj %m% Z.i)
        dIJ <- sum(didj)
      }
      hessian.FE.FE[idxFE==i,idxFE==j] <<- dIJ
    }
  ##----------------------------------------------

    for(i in seq_len(nGroups)){ #go through rows of hessian
      # parameter group belongs to which components
      comp.i <- term["offsetComp",i][[1]]
      # get covariate value
      Xit <- term["terms",i][[1]] # eiter 1 or a matrix with values
      if(is.matrix(Xit)){
        Xit <- Xit[subset,,drop=FALSE]
      }
      m.Xit <- mean.comp[[comp.i]] * Xit

      random.i <- term["random",i][[1]]
      unitSpecific.i <- term["unitSpecific",i][[1]]

      ## fill psi-related entries and select fillHess function
      if (random.i) {
        Z.i <- term["Z.intercept",i][[1]]   # Z.i and %m% (of i) determined here
        "%m%" <- get(term["mult",i][[1]])   # will also be used in j's for loop
        fillHess <- i.random
        if (dimPsi > 0L) {
          dThetadPsiMat <- dThetadPsi(m.Xit)
          hessian.FE.Psi[idxFE==i,] <- .colSumsGrouped(dThetadPsiMat, indexPsi)
          dThetadPsi.i <- .colSums(dThetadPsiMat %m% Z.i, length(subset), term["dim.re",i][[1]], na.rm=TRUE)
          if (dimPsi==1L) {
              hessian.Psi.RE[,dimPsi + which(idxRE==i)] <- dThetadPsi.i
          } else {
              hessian.Psi.RE[cbind(indexPsi,dimPsi + which(idxRE==i))] <- dThetadPsi.i
              ## FIXME: does not work with type="car"
          }
        }
      } else if (unitSpecific.i) {
        which.i <- term["which",i][[1]]
        fillHess <- i.unit
        if (dimPsi > 0L) {
          dThetadPsi.i <- .colSums(dThetadPsi(m.Xit), length(subset), model$nUnits, na.rm=TRUE)
          if (dimPsi==1L) {
              hessian.FE.Psi[idxFE==i,] <- dThetadPsi.i[which.i]
          } else {
              hessian.FE.Psi[cbind(which(idxFE==i),indexPsi[which.i])] <-
                  dThetadPsi.i[which.i]
          }
        }
      } else {
        fillHess <- i.fixed
        if (dimPsi > 0L) {
          ## dPsi <- colSums(dThetadPsi(m.Xit),na.rm=TRUE)
          ## hessian.FE.Psi[idxFE==i,] <- if (dimPsi==1L) sum(dPsi) else dPsi[order(indexPsi)]
          hessian.FE.Psi[idxFE==i,] <- .colSumsGrouped(dThetadPsi(m.Xit), indexPsi)
        }
      }

      ## fill pars$d-related entries
      for (j in seq_len(dimd)) {      # will not be run if dimd==0
          didd <- deriv2HHH(dTheta_l = m.Xit, dTheta_k = dmudd[[j]],
                            dTheta_lk = if (comp.i == 2) dmudd[[j]] * Xit else 0)
          didd[isNA] <- 0
          hessian.FE.d[idxFE==i,j] <- if (unitSpecific.i) {
              colSums(didd,na.rm=TRUE)[which.i]
          } else sum(didd)
          if (random.i) hessian.d.RE[j,idxRE==i] <- colSums(didd %m% Z.i)
      }

      ## fill other (non-psi, non-d) entries (only upper triangle, j >= i!)
      for(j in i:nGroups){
        comp.j <- term["offsetComp",j][[1]]

        Xjt <- term["terms",j][[1]] # eiter 1 or a matrix with values
        if(is.matrix(Xjt)){
          Xjt <- Xjt[subset,,drop=FALSE]
        }
        # if param i and j do not belong to the same component, d(i)d(j)=0
        m.Xit.Xjt <- if (comp.i != comp.j) 0 else m.Xit * Xjt

        didj <- deriv2HHH(dTheta_l = m.Xit, dTheta_k = mean.comp[[comp.j]]*Xjt,
                          dTheta_lk = m.Xit.Xjt)
        didj[isNA]<-0

        random.j <- term["random",j][[1]]
        unitSpecific.j <- term["unitSpecific",j][[1]]
        which.j <- term["which",j][[1]]

        fillHess()
      }

    }


    #########################################################
    ## fill lower triangle of hessians and combine them
    ########################################################
    hessian <- rbind(cbind(hessian.FE.FE,hessian.FE.d,hessian.FE.Psi,hessian.FE.RE),
                     cbind(matrix(0,dimd,dimFE),hessian.d.d,hessian.d.Psi,hessian.d.RE),
                     cbind(matrix(0,dimPsi,dimFE+dimd),hessian.Psi.RE),
                     cbind(matrix(0,dimRE,dimFE+dimd+dimPsi),hessian.RE.RE))

    hessian[lower.tri(hessian)] <- 0  # CAR blocks in hessian.RE.RE were fully filled
    diagHessian <- diag(hessian)
    fisher <- -(hessian + t(hessian))
    diag(fisher) <- -diagHessian

    return(fisher)
  }

  fisher <- computeFisher(mu[c("epi.own","epi.neighbours","endemic")])

  ## add penalty for random effects
  pen <- matrix(0, length(theta), length(theta))
  Fpen <- if(dimRE > 0){
    thetaIdxRE <- seq.int(to=length(theta), length.out=dimRE)
    pen[thetaIdxRE,thetaIdxRE] <- Sigma.inv
    fisher + pen
  } else fisher

  ## Done
  if(attributes){
    attr(Fpen, "fisher") <- fisher
    attr(Fpen, "pen") <- pen
  }
  Fpen
}


#################################################

sqrtOf1pr2 <- function(r){ sqrt(1+r^2) }

getSigmai <- function(sd,    # vector of length dim with log-stdev's
                      correlation, # vector of length dim with correlation
                                   # parameters, 0-length if uncorrelated
                      dim
                      ){
  if(dim==0) return(NULL)

  Sigma.i <- if (length(correlation) == 0L) diag(exp(2*sd), dim) else {
      D <- diag(exp(sd), dim)
      L <- diag(nrow=dim)
      L[2,1:2] <- c(correlation[1],1)/sqrtOf1pr2(correlation[1])
      if (dim==3) {
          L[3,] <- c(correlation[2:3],1)/sqrtOf1pr2(correlation[2])
          L[3,2:3] <- L[3,2:3]/sqrtOf1pr2(correlation[3])
      }
      D %*% tcrossprod(L) %*% D  # ~75% quicker than D %*% L %*% t(L) %*% D
  }
  return(Sigma.i)
}

getSigmaiInv <- function(sd,    # vector of length dim with log-stdev's
                         correlation, # vector of length dim with correlation
                                      # parameters, 0-length if uncorrelated
                         dim
                         ){

  if(dim==0) return(NULL)

  Sigma.i.inv <- if (length(correlation) == 0L) diag(exp(-2*sd), dim) else {
      r <- correlation
      Dinv <- diag(exp(-sd), dim)
      L <- diag(nrow=dim)
      L[2,1:2] <- c(-r[1],sqrtOf1pr2(r[1]))
      if(dim==3){
          L[3,1] <- r[1]*r[3]-r[2]*sqrtOf1pr2(r[3])
          L[3,2] <- -L[2,2]*r[3]
          L[3,3] <- sqrtOf1pr2(r[2])*sqrtOf1pr2(r[3])
      }
      Dinv %*% crossprod(L) %*% Dinv  # ~75% quicker than Dinv %*% t(L) %*% L %*% Dinv
  }

  return(Sigma.i.inv)
}

#* allow blockdiagonal matrix blockdiag(A,B), with A=kronecker product, B=diagonal matrix?
getSigmaInv <- function(sd, correlation, dimSigma, dimBlocks, SigmaInvi=NULL){
  if(is.null(SigmaInvi)){
    SigmaInvi <- getSigmaiInv(sd,correlation,dimSigma)
  }
  if(length(unique(dimBlocks))==1){  # kronecker product formulation possible
    kronecker(SigmaInvi,diag(nrow=dimBlocks[1]))
    # the result is a symmetric matrix if SigmaInvi is symmetric
  } else { # kronecker product not possible -> correlation=0
    diag(rep.int(diag(SigmaInvi),dimBlocks))
  }
}

getSigma <- function(sd, correlation, dimSigma, dimBlocks, Sigmai=NULL){
  if(is.null(Sigmai)){
    Sigmai <- getSigmai(sd,correlation,dimSigma)
  }
  if(length(unique(dimBlocks))==1){  # kronecker product formulation possible
    kronecker(Sigmai,diag(nrow=dimBlocks[1]))
    # the result is a symmetric matrix if Sigmai is symmetric
  } else { # kronecker product not possible -> correlation=0
    diag(rep.int(diag(Sigmai),dimBlocks))
  }
}


## Approximate marginal likelihood for variance components
## Parameter and model unpacking at the beginning (up to the ###...-line) is
## identical in marScore() and marFisher()
marLogLik <- function(sd.corr, theta, model, fisher.unpen=NULL, verbose=FALSE){

  dimSigma <- model$nSigma
  if(dimSigma == 0){
    return(-Inf)
  }
  if(anyNA(sd.corr)) stop("NAs in variance parameters.", ADVICEONERROR)

  dimVar <- model$nVar
  dimCorr <- model$nCorr
  sd   <- head(sd.corr,dimVar)
  corr <- tail(sd.corr,dimCorr)

  pars <- splitParams(theta,model)
  randomEffects <- pars$random
  dimRE <- model$nRE

  dimBlocks <- model$rankRE[model$rankRE>0]
  Sigma.inv <- getSigmaInv(sd, corr, dimVar, dimBlocks)

  # if not given, calculate unpenalized part of fisher info
  if(is.null(fisher.unpen)){
    fisher.unpen <- attr(penFisher(theta, sd.corr, model,attributes=TRUE), "fisher")
  }

  # add penalty to fisher
  fisher <- fisher.unpen
  thetaIdxRE <- seq.int(to=length(theta), length.out=dimRE)
  fisher[thetaIdxRE,thetaIdxRE] <- fisher[thetaIdxRE,thetaIdxRE] + Sigma.inv

  ############################################################

  # penalized part of likelihood
  # compute -0.5*log(|Sigma|) - 0.5*RE' %*% Sigma.inv %*% RE
  # where -0.5*log(|Sigma|) = -dim(RE_i)*[Sum(sd_i) -0.5*log(1+corr_i^2)]
  ##lpen <- -0.5*(t(randomEffects)%*%Sigma.inv%*%randomEffects)
  ## the following implementation takes ~85% less computing time !
  lpen <- -0.5 * c(crossprod(randomEffects, Sigma.inv) %*% randomEffects)
  loglik.pen <- sum(-dimBlocks*sd) + lpen
  if(dimCorr >0){
    loglik.pen <- loglik.pen + 0.5*dimBlocks[1]*sum(log(1+corr^2))
  }

  ## approximate marginal likelihood
  logdetfisher <- determinant(fisher,logarithm=TRUE)$modulus
  lmarg <- loglik.pen -0.5*c(logdetfisher)

  return(lmarg)
}


marScore <- function(sd.corr, theta, model, fisher.unpen=NULL, verbose=FALSE){

  dimSigma <- model$nSigma
  if(dimSigma == 0){
    return(numeric(0L))
  }
  if(anyNA(sd.corr)) stop("NAs in variance parameters.", ADVICEONERROR)

  dimVar <- model$nVar
  dimCorr <- model$nCorr
  sd   <- head(sd.corr,dimVar)
  corr <- tail(sd.corr,dimCorr)

  pars <- splitParams(theta,model)
  randomEffects <- pars$random
  dimRE <- model$nRE

  dimBlocks <- model$rankRE[model$rankRE>0]
  Sigma.inv <- getSigmaInv(sd, corr, dimVar, dimBlocks)

  # if not given, calculate unpenalized part of fisher info
  if(is.null(fisher.unpen)){
    fisher.unpen <- attr(penFisher(theta, sd.corr, model,attributes=TRUE), "fisher")
  }

  # add penalty to fisher
  fisher <- fisher.unpen
  thetaIdxRE <- seq.int(to=length(theta), length.out=dimRE)
  fisher[thetaIdxRE,thetaIdxRE] <- fisher[thetaIdxRE,thetaIdxRE] + Sigma.inv

  # inverse of penalized fisher info
  F.inv <- try(solve(fisher),silent=TRUE)
  if(inherits(F.inv,"try-error")){
    if(verbose) cat("  WARNING (in marScore): penalized Fisher is singular!\n")
    #return(rep.int(0,dimSigma))
    ## continuing with the generalized inverse often works, otherwise we would
    ## have to stop() here, because nlminb() cannot deal with NA's
    F.inv <- ginv(fisher)
  }
  F.inv.RE <- F.inv[thetaIdxRE,thetaIdxRE]

  ############################################################

  ## compute marginal score and fisher for each variance component
  # initialize score and fisher info
  marg.score <- rep.int(NA_real_,dimSigma)

  ## specify functions for derivatives
  deriv1 <- switch(dimVar, dSigma1, dSigma2, dSigma3)

  d1Sigma <- deriv1(sd, corr)
  Sigmai.inv <- getSigmaiInv(sd, corr, dimVar)


  # derivation of log determinant
  # -.5*tr(Sigma^-1 %*% dSigma/ds) = -R (for sd.i)
  #                                =  R*corr.i/(corr.i^2+1) (for corr.i)
  d1logDet <- c(-dimBlocks,dimBlocks[1]*corr/(corr^2+1))

  # go through all variance parameters
  for(i in seq_len(dimSigma)){
    dSi <- -Sigmai.inv %*% d1Sigma[,,i] %*% Sigmai.inv # CAVE: sign
    dS.i <- getSigma(dimSigma=dimVar,dimBlocks=dimBlocks,Sigmai=dSi)
    #dlpen.i <- -0.5* t(randomEffects) %*% dS.i %*% randomEffects
    # ~85% faster implementation using crossprod() avoiding "slow" t():
    dlpen.i <- -0.5 * c(crossprod(randomEffects, dS.i) %*% randomEffects)
    #tr.d1logDetF <- sum(diag(F.inv.RE %*% dS.i))
    tr.d1logDetF <- sum(F.inv.RE * dS.i)   # since dS.i is symmetric
    #<- needs 1/100 (!) of the computation time of sum(diag(F.inv.RE %*% dS.i))
    marg.score[i] <- d1logDet[i] + dlpen.i - 0.5 * tr.d1logDetF
  }

  return(marg.score)
}


marFisher <- function(sd.corr, theta, model, fisher.unpen=NULL, verbose=FALSE){

  dimSigma <- model$nSigma
  if(dimSigma == 0){
    return(matrix(numeric(0L),0L,0L))
  }
  if(anyNA(sd.corr)) stop("NAs in variance parameters.", ADVICEONERROR)

  dimVar <- model$nVar
  dimCorr <- model$nCorr
  sd   <- head(sd.corr,dimVar)
  corr <- tail(sd.corr,dimCorr)

  pars <- splitParams(theta,model)
  randomEffects <- pars$random
  dimRE <- model$nRE

  dimBlocks <- model$rankRE[model$rankRE>0]
  Sigma.inv <- getSigmaInv(sd, corr, dimVar, dimBlocks)

  # if not given, calculate unpenalized part of fisher info
  if(is.null(fisher.unpen)){
    fisher.unpen <- attr(penFisher(theta, sd.corr, model,attributes=TRUE), "fisher")
  }

  # add penalty to fisher
  fisher <- fisher.unpen
  thetaIdxRE <- seq.int(to=length(theta), length.out=dimRE)
  fisher[thetaIdxRE,thetaIdxRE] <- fisher[thetaIdxRE,thetaIdxRE] + Sigma.inv

  # inverse of penalized fisher info
  F.inv <- try(solve(fisher),silent=TRUE)
  if(inherits(F.inv,"try-error")){
    if(verbose) cat("  WARNING (in marFisher): penalized Fisher is singular!\n")
    #return(matrix(Inf,dimSigma,dimSigma))
    ## continuing with the generalized inverse often works, otherwise we would
    ## have to stop() here, because nlminb() cannot deal with NA's
    F.inv <- ginv(fisher)
  }
  F.inv.RE <- F.inv[thetaIdxRE,thetaIdxRE]
  ## declare F.inv.RE as a symmetric matrix?
  ##F.inv.RE <- new("dsyMatrix", Dim = dim(F.inv.RE), x = c(F.inv.RE))
  ## -> no, F.inv.RE %*% dS.i becomes actually slower (dS.i is a "sparseMatrix")

  ############################################################

  marg.hesse <- matrix(NA_real_,dimSigma,dimSigma)

  ## specify functions for derivatives
  deriv1 <- switch(dimVar,dSigma1, dSigma2, dSigma3)
  deriv2 <- switch(dimVar,d2Sigma1, d2Sigma2, d2Sigma3)

  d1Sigma <- deriv1(sd, corr)
  d2Sigma <- deriv2(sd, corr, d1Sigma)
  Sigmai.inv <- getSigmaiInv(sd, corr, dimVar)


  # 2nd derivatives of log determinant
  d2logDet <- diag(c(rep.int(0,dimVar),-dimBlocks[1]*(corr^2-1)/(corr^2+1)^2),dimSigma)

  # function to convert dS.i and dS.j matrices to sparse matrix objects
  dS2sparse <- if (dimCorr > 0) function (x) {
      forceSymmetric(as(x, "sparseMatrix")) # dS.i & dS.j are symmetric
  } else function (x) { #as(x, "diagonalMatrix")
      new("ddiMatrix", Dim = dim(x), diag = "N", x = diag(x))
  }

  # go through all variance parameters
  for(i in seq_len(dimSigma)){
    # compute first derivative of the penalized Fisher info (-> of Sigma^-1)
    # with respect to the i-th element of Sigma (= kronecker prod. of Sigmai and identity matrix)
    # Harville Ch15, Eq. 8.15: (d/d i)S^-1 = - S^-1 * (d/d i) S * S^-1
    SigmaiInv.d1i <- Sigmai.inv %*% d1Sigma[,,i]
    dSi <- -SigmaiInv.d1i %*% Sigmai.inv
    dS.i <- getSigma(dimSigma=dimVar,dimBlocks=dimBlocks,Sigmai=dSi)
    dS.i <- dS2sparse(dS.i)

    # compute second derivatives
    for(j in i:dimSigma){
      # compute (d/d j) S^-1
      SigmaiInv.d1j <- Sigmai.inv %*% d1Sigma[,,j]
      dSj <- -SigmaiInv.d1j %*% Sigmai.inv
      dS.j <- getSigma(dimSigma=dimVar,dimBlocks=dimBlocks,Sigmai=dSj)
      dS.j <- dS2sparse(dS.j)
      # compute (d/di dj) S^-1
      #dS.ij <- getSigma(dimSigma=dimVar,dimBlocks=dimBlocks,
      #                  Sigmai=d2Sigma[[i]][,,j])

      # compute second derivatives of Sigma^-1 (Harville Ch15, Eq 9.2)
      d2S <- (- Sigmai.inv %*% d2Sigma[[i]][,,j] +
               SigmaiInv.d1i %*% SigmaiInv.d1j +
               SigmaiInv.d1j %*% SigmaiInv.d1i) %*% Sigmai.inv

      dSij <- getSigma(dimSigma=dimVar,dimBlocks=dimBlocks,Sigmai=d2S)

      #d2lpen.i <- -0.5* t(randomEffects) %*% dSij %*% randomEffects
      # ~85% faster implementation using crossprod() avoiding "slow" t():
      d2lpen.i <- -0.5 * c(crossprod(randomEffects, dSij) %*% randomEffects)

      # compute second derivative of log-determinant of penFisher
      mpart1 <- dS.j %*% F.inv.RE  # 3 times as fast as the other way round
      mpart2 <- dS.i %*% F.inv.RE
      mpart <- mpart1 %*% mpart2
      ## speed-ups: - tr(F.inv.RE %*% dSij) simply equals sum(F.inv.RE * dSij)
      ##            - accelerate matrix product by sparse matrices dS.i and dS.j
      ##            - use cyclic permutation of trace:
      ##              tr(F.inv.RE %*% dS.j %*% F.inv.RE %*% dS.i) =
      ##              tr(dS.j %*% F.inv.RE %*% dS.i %*% F.inv.RE)
      tr.d2logDetF <- -sum(Matrix::diag(mpart)) + sum(F.inv.RE * dSij)

      marg.hesse[i,j] <- marg.hesse[j,i] <-
          d2logDet[i,j] + d2lpen.i - 0.5 * tr.d2logDetF
    }

  }

  marg.Fisher <- as.matrix(-marg.hesse)

  return(marg.Fisher)
}


## first and second derivatives of the covariance matrix
dSigma1 <- function(sd,corr){
  derivs <- array(2*exp(2*sd), c(1,1,1))
  return(derivs)
}

#d1: result of dSigma1
d2Sigma1 <- function(sd,corr,d1){
  return(list(dsd1=2*d1))
}

dSigma2 <- function(sd,corr){
  derivs <- array(0,c(2,2,3))

  dSigma <- diag(2*exp(2*sd))
  if(length(corr)>0){
    dSigma[1,2] <- dSigma[2,1] <- exp(sum(sd[1:2]))*corr[1]/sqrtOf1pr2(corr[1])

    # derivative of corr_1
    derivs[2,1,3] <- derivs[1,2,3] <- exp(sum(sd[1:2]))/(sqrtOf1pr2(corr[1])^3)
  }

  derivs[,,1:2] <- dSigma
  # derivative of sd_1
  derivs[2,2,1] <- 0
  # derivative of sd_2
  derivs[1,1,2] <- 0

  return(derivs)
}

d2Sigma2 <- function(sd,corr, d1){

  derivs <- array(0,c(2,2,3))
  result <- list(dsd1=d1, dsd2=derivs, dcorr1=derivs)
  result$dsd1[1,1,1] <- 2*d1[1,1,1]
  result$dsd1[2,2,2] <- 0

  result$dsd2[,,2:3]<- d1[,,2:3]
  result$dsd2[2,2,2] <- 2*d1[2,2,2]


  if(length(corr)>0){
    result$dcorr1[2,1,3] <- result$dcorr1[1,2,3] <-
        -(3*corr[1]*exp(sum(sd[1:2])))/(sqrtOf1pr2(corr[1])^5)
  }

  return(result)
}


dSigma3 <- function(sd,corr){
  derivs <- array(0,c(3,3,6))

  dSigma <- diag(2*exp(2*sd)) #
  if(length(corr)>0){
    dSigma[1,2] <- dSigma[2,1] <- exp(sum(sd[1:2]))*corr[1]/sqrtOf1pr2(corr[1]) #
    dSigma[1,3] <- dSigma[3,1] <- exp(sum(sd[c(1,3)]))*corr[2]/sqrtOf1pr2(corr[2]) #
    dSigma[2,3] <- dSigma[3,2] <- exp(sum(sd[c(2,3)]))*(corr[1]*corr[2]*sqrtOf1pr2(corr[3])+corr[3])/prod(sqrtOf1pr2(corr[1:3]))#

    # derivative of corr_1
    derivs[2,1,4] <- derivs[1,2,4] <- exp(sum(sd[1:2]))/(sqrtOf1pr2(corr[1])^3)
    derivs[3,2,4] <- derivs[2,3,4] <-(exp(sum(sd[2:3]))*(corr[2]*sqrtOf1pr2(corr[3])-prod(corr[c(1,3)])))/ (prod(sqrtOf1pr2(corr[2:3]))*(sqrtOf1pr2(corr[1])^3))#

    # derivative of corr_2
    derivs[3,1,5] <- derivs[1,3,5] <- exp(sum(sd[c(3,1)]))/(sqrtOf1pr2(corr[2])^3)#
    derivs[3,2,5] <- derivs[2,3,5] <- (exp(sum(sd[2:3]))*(corr[1]*sqrtOf1pr2(corr[3])-prod(corr[c(2,3)])))/ (prod(sqrtOf1pr2(corr[c(1,3)]))*(sqrtOf1pr2(corr[2])^3)) #

    # derivative of corr_3
    derivs[3,2,6] <- derivs[2,3,6] <- exp(sum(sd[2:3]))/ (prod(sqrtOf1pr2(corr[c(1,2)]))*(sqrtOf1pr2(corr[3])^3))
  }

  derivs[,,1:3] <- dSigma
  # derivative of sd_1
  derivs[2:3,2:3,1] <- 0
  # derivative of sd_2
  derivs[1,c(1,3),2] <- derivs[3,c(1,3),2] <- 0
  # derivative of sd_3
  derivs[1:2,1:2,3] <- 0

  return(derivs)

}

d2Sigma3 <- function(sd,corr, d1)
{
  derivs <- array(0,c(3,3,6))
  result <- list(dsd1=d1, dsd2=derivs, dsd3=derivs, dcorr1=derivs, dcorr2=derivs, dcorr3=derivs)

  result$dsd1[1,1,1] <- 2*d1[1,1,1]
  result$dsd1[2,2:3,2] <- result$dsd1[3,2,2] <- 0
  result$dsd1[2:3,2:3,3] <-  0 #

  result$dsd2[,,2]<- d1[,,2]
  result$dsd2[2,2,2] <- 2*d1[2,2,2]
  result$dsd2[3,2,3] <- result$dsd2[2,3,3] <- d1[3,2,3]#

  result$dsd3[,,3]<- d1[,,3]
  result$dsd3[3,3,3] <- 2*d1[3,3,3]#

  if (length(corr)>0)
  {
    result$dsd1[2:3,2:3,4] <-  0
    result$dsd1[2:3,2:3,5] <-  0
    result$dsd1[,,6] <-  0

    result$dsd2[,,c(4,6)] <- d1[,,c(4,6)]
    result$dsd2[3,2,5] <- result$dsd2[2,3,5] <- d1[3,2,5]

    result$dsd3[3,2,4] <- result$dsd3[2,3,4] <- d1[3,2,4]
    result$dsd3[,,c(5,6)] <- d1[,,c(5,6)]

    # derivative of corr_1
    result$dcorr1[2,1,4] <- result$dcorr1[1,2,4] <- -(exp(sum(sd[1:2]))*3*corr[1])/(sqrtOf1pr2(corr[1])^5) #
    result$dcorr1[3,2,4] <- result$dcorr1[2,3,4] <- -(exp(sum(sd[2:3]))*(corr[1]*(3*corr[2]*sqrtOf1pr2(corr[3])-2*prod(corr[c(1,3)])) + corr[3]) )/ (prod(sqrtOf1pr2(corr[2:3]))*(sqrtOf1pr2(corr[1])^5)) #

    result$dcorr1[3,2,5] <- result$dcorr1[2,3,5] <- (exp(sum(sd[2:3]))*(sqrtOf1pr2(corr[3])+prod(corr[1:3])))/ (prod(sqrtOf1pr2(corr[c(1,2)])^3)*sqrtOf1pr2(corr[3]))

    result$dcorr1[3,2,6] <- result$dcorr1[2,3,6] <- -(exp(sum(sd[2:3]))*corr[1])/ (prod(sqrtOf1pr2(corr[c(1,3)])^3)*sqrtOf1pr2(corr[2]))

    # derivative of corr_2
    result$dcorr2[3,1,5] <- result$dcorr2[1,3,5] <- -(exp(sum(sd[c(3,1)]))*3*corr[2])/(sqrtOf1pr2(corr[2])^5)
    result$dcorr2[3,2,5] <- result$dcorr2[2,3,5] <- -(exp(sum(sd[2:3]))*(corr[2]*(3*corr[1]*sqrtOf1pr2(corr[3])-2*prod(corr[c(2,3)])) + corr[3]) )/ (prod(sqrtOf1pr2(corr[c(1,3)]))*(sqrtOf1pr2(corr[2])^5))

    result$dcorr2[3,2,6] <- result$dcorr2[2,3,6] <-
        -exp(sum(sd[2:3]))*corr[2] / # SM @ 14/05/13: formula fixed, marFisher()
                                     # and hhh4()$Sigma.cov[5,6] are now correct
            (prod(sqrtOf1pr2(corr[c(2,3)])^3)*sqrtOf1pr2(corr[1]))

    # derivative of corr_3
    result$dcorr3[3,2,6] <- result$dcorr3[2,3,6] <- -(exp(sum(sd[2:3]))*3*corr[3])/ (prod(sqrtOf1pr2(corr[c(1,2)]))*sqrtOf1pr2(corr[3])^5)
  }

  return(result)
}


### Various optimizers

updateParams_nlminb <- function (start, ll, sc, fi, ..., control)
{
    lower <- control[["lower"]]; control$lower <- NULL
    upper <- control[["upper"]]; control$upper <- NULL
    scale <- control[["scale"]]; control$scale <- NULL
    negll <- function (x, ...) -ll(x, ...)
    negsc <- function (x, ...) -sc(x, ...)
    ## run the optimization
    res <- nlminb(start, negll, gradient=negsc, hessian=fi, ...,
                  scale=scale, control=control, lower=lower, upper=upper)
    if (any(is.finite(c(lower, upper)))) checkParBounds(res$par, lower, upper)
    ## Done
    list(par=res$par, ll=-res$objective,
         rel.tol=getRelDiff(res$par, start),
         convergence=res$convergence, message=res$message)
}

updateParams_nr <- function (start, ll, sc, fi, ..., control)
{
    ## objective function
    llscfi <- function (x, ...) {
        loglik <- ll(x, ...)
        attr(loglik, "score") <- sc(x, ...)
        attr(loglik, "fisher") <- fi(x, ...)
        loglik
    }
    ## run the optimization
    res <- newtonRaphson(start, llscfi, ...,
                         control=control, verbose=control$verbose)
    ## Done
    list(par=res$coefficients, ll=res$loglikelihood,
         rel.tol=getRelDiff(res$coefficients, start),
         convergence=res$convergence, message=res$message)
}

updateParams_nlm <- function (start, ll, sc, fi, ..., control)
{
    ## objective function
    negllscfi <- function (x, ...) {
        negloglik <- -ll(x, ...)
        attr(negloglik, "gradient") <- -sc(x, ...)
        attr(negloglik, "hessian") <- fi(x, ...)
        negloglik
    }
    ## run the optimization
    res <- do.call("nlm", args=c(alist(p=start, f=negllscfi, ...), control))
    ## Done
    list(par=res$estimate, ll=-res$minimum,
         rel.tol=getRelDiff(res$estimate, start),
         convergence=as.numeric(res$code>2), message=res$message)
    ## nlm returns convergence status in $code, 1-2 indicate convergence,
    ## 3-5 indicate non-convergence
}

updateParams_optim <- function (start, ll, sc, fi, ..., control)
{
    ## Note: "fi" is not used in optim
    method <- control[["method"]]; control$method <- NULL
    lower <- control[["lower"]]; control$lower <- NULL
    upper <- control[["upper"]]; control$upper <- NULL
    res <- optim(start, ll, sc, ...,    # Note: control$fnscale is negative
                 method=method, lower=lower, upper=upper, control=control)
    if (any(is.finite(c(lower, upper)))) checkParBounds(res$par, lower, upper)
    ## Done
    list(par=res$par, ll=res$value,
         rel.tol=getRelDiff(res$par, start),
         convergence=res$convergence, message=res$message)
}


## Calculate relative parameter change criterion.
## We use a weaker criterion than the maximum relative parameter change
## max(abs(sd.corr.new/sd.corr - 1))
getRelDiff <- function (final, start)
    max(abs(final - start)) / max(abs(start))

checkParBounds <- function (par, lower, upper)
{
    if (is.null(names(par))) names(par) <- seq_along(par)
    if (any(atl <- par <= lower))
        cat("  WARNING: parameters reached lower bounds:",
            paste(names(par)[atl], par[atl], sep="=", collapse=", "), "\n")
    if (any(atu <- par >= upper))
        cat("  WARNING: parameters reached upper bounds:",
            paste(names(par)[atu], par[atu], sep="=", collapse=", "), "\n")
}


## default control arguments for updates
defaultOptimControl <- function (method = "nlminb", lower = -Inf, upper = Inf,
                                 iter.max = NULL, verbose = 0)
{
    if (is.null(iter.max)) iter.max <- 20 + 480*(method=="Nelder-Mead")
    lowVerbose <- verbose %in% 0:2
    luOptimMethod <- method %in% c("Brent", "L-BFGS-B")
    defaults.nr <- list(scoreTol=1e-5, paramTol=1e-7, F.inc=0.01, stepFrac=0.5,
                        niter=iter.max, verbose=verbose)
    defaults.nlminb <- list(iter.max=iter.max, scale=1, lower=lower, upper=upper,
                            trace=if(lowVerbose) c(0,0,5)[verbose+1] else 1)
    defaults.nlm <- list(iterlim=iter.max, check.analyticals=FALSE,
                         print.level=if(lowVerbose) c(0,0,1)[verbose+1] else 2)
    defaults.optim <- list(maxit=iter.max, fnscale=-1, trace=max(0,verbose-1),
                           lower=if (luOptimMethod) lower else -Inf,
                           upper=if (luOptimMethod) upper else Inf)
    switch(method, "nr" = defaults.nr, "nlm" = defaults.nlm,
           "nlminb" = defaults.nlminb, defaults.optim)
}

setOptimControl <- function (method, control, ...)
{
    defaults <- defaultOptimControl(method, ...)
    cntrl <- modifyList(defaults, control)
    ## ensure fnscale < 0 (optim performs minimization)
    if (!is.null(cntrl$fnscale)) {      # i.e., using optim()
        cntrl$method <- method          # append method to control list
        if (cntrl$fnscale > 0) cntrl$fnscale <- -cntrl$fnscale
    }
    cntrl
}


## fitHHH is the main workhorse where the iterative optimization is performed
fitHHH <- function(theta, sd.corr, model,
                   cntrl.stop=list(tol=1e-5, niter=100),
                   cntrl.regression=list(method="nlminb"),
                   cntrl.variance=list(method="nlminb"),
                   verbose=0, shrinkage=FALSE)
{
  dimFE.d.O <- model$nFE + model$nd + model$nOverdisp
  dimRE <- model$nRE

  getUpdater <- function (cntrl, start, ...) {
      method <- cntrl$method; cntrl$method <- NULL
      if (length(start) == 1 && method == "Nelder-Mead") {
          method <- "Brent"
          message("Switched optimizer from \"Nelder-Mead\" to \"Brent\"",
                  " (dim(", deparse(substitute(start)), ")=1)")
      }
      list(paste("updateParams",
                 if (method %in% c("nlminb", "nlm", "nr"))
                 method else "optim", sep="_"),
           control = setOptimControl(method, cntrl, ...))
  }

  ## ## artificial lower bound on intercepts of epidemic components
  ## reg.lower <- rep.int(-Inf, length(theta))
  ## reg.lower[grep("^(ar|ne)\\.(1|ri)", model$namesFE)] <- -20

  ## set optimizer for regression parameters
  updateRegressionControl <- getUpdater(cntrl.regression, theta,
                                        ## lower=reg.lower,
                                        iter.max=if(dimRE==0) 100,
                                        verbose=verbose+(dimRE==0))
  updateRegression <- function (theta, sd.corr)
      do.call(updateRegressionControl[[1]],
              alist(theta, penLogLik, penScore, penFisher,
                    sd.corr=sd.corr, model=model,
                    control=updateRegressionControl[[2]]))

  ## set optimizer for variance parameters
  updateVarianceControl <- getUpdater(cntrl.variance, sd.corr,
                                      lower=-5, upper=5, verbose=verbose)
  updateVariance <- function (sd.corr, theta, fisher.unpen)
      do.call(updateVarianceControl[[1]],
              alist(sd.corr, marLogLik, marScore, marFisher,
                    theta=theta, model=model,
                    fisher.unpen=fisher.unpen, verbose=verbose>1,
                    control=updateVarianceControl[[2]]))

  ## Let's go
  if (verbose>0) {
      cat(as.character(Sys.time()), ":",
          if (dimRE == 0) "Optimization of regression parameters" else
          "Iterative optimization of regression & variance parameters", "\n")
  }

  if (dimRE == 0) { # optimization of regression coefficients only
      parReg <- updateRegression(theta, sd.corr)
      theta <- parReg$par
      if ((convergence <- parReg$convergence) != 0 && !is.null(parReg$message))
          cat("! Non-convergence message from optimizer:", parReg$message, "\n")
  } else { # swing between updateRegression & updateVariance
      convergence <- 99
      i <- 0
      while(convergence != 0 && (i < cntrl.stop$niter)){
          i <- i+1
          if (verbose>0) cat("\n")

          ## update regression coefficients
          parReg <- updateRegression(theta, sd.corr)
          theta <- parReg$par
          fisher.unpen <- attr(penFisher(theta, sd.corr, model, attributes=TRUE),
                               "fisher")

          if(verbose>0)
              cat("Update of regression parameters: ",
                  "max|x_0 - x_1| / max|x_0| =", parReg$rel.tol, "\n")

          if(parReg$convergence != 0) {
              if (!is.null(parReg$message))
                  cat("! Non-convergence message from optimizer:",
                      parReg$message, "\n")
              cat("Update of regression coefficients in iteration ",
                  i, " unreliable\n")
          }

          if(parReg$convergence > 20 && shrinkage){
              cat("\n\n***************************************\nshrinkage",
                  0.1*theta[abs(theta)>10],"\n")
              theta[abs(theta)>10] <- 0.1*theta[abs(theta)>10]
              diag(fisher.unpen) <- diag(fisher.unpen)+1e-2
          }

          ## update variance parameters
          parVar <- updateVariance(sd.corr, theta, fisher.unpen)

          if(verbose>0)
              cat("Update of variance parameters:  max|x_0 - x_1| / max|x_0| =",
                  parVar$rel.tol, "\n")

          if(parVar$convergence!=0) {
              if (!is.null(parVar$message))
                  cat("! Non-convergence message from optimizer:",
                      parVar$message, "\n")
              cat("Update of variance parameters in iteration ", i, " unreliable\n")
          }

          sd.corr <- parVar$par

          ## overall convergence ?
          if( (parReg$rel.tol < cntrl.stop$tol) && (parVar$rel.tol < cntrl.stop$tol)
             && (parReg$convergence==0) && (parVar$convergence==0) )
              convergence <- 0

          ## exit loop if no more change in parameters (maybe false convergence)
          if (parReg$rel.tol == 0 && parVar$rel.tol == 0)
              break
      }
  }

  if(verbose > 0) {
    cat("\n")
    cat(as.character(Sys.time()), ":", if (convergence==0)
        "Optimization converged" else "Optimization DID NOT CONVERGE", "\n\n")
  }

  ll     <- penLogLik(theta, sd.corr, model)
  fisher <- penFisher(theta, sd.corr, model, attributes = TRUE)
  dimnames(fisher) <- list(names(theta), names(theta))
  margll     <- marLogLik(sd.corr, theta, model, attr(fisher, "fisher"))
  fisher.var <- marFisher(sd.corr, theta, model, attr(fisher, "fisher"))
  dimnames(fisher.var) <- list(names(sd.corr), names(sd.corr))

  list(theta=theta, sd.corr=sd.corr,
       loglik=ll, margll=margll,
       fisher=fisher, fisherVar=fisher.var,
       convergence=convergence, dim=c(fixed=dimFE.d.O,random=dimRE))
}



## check analytical score functions and Fisher informations for
## a given model (the result of interpretControl(control, stsObj))
## and given parameters theta (regression par.) and sd.corr (variance par.).
## This is a wrapper around functionality of the numDeriv and maxLik packages.
checkAnalyticals <- function (model,
                              theta = model$initialTheta,
                              sd.corr = model$initialSigma,
                              methods = c("numDeriv","maxLik"))
{
    cat("\nPenalized log-likelihood:\n")
    resCheckPen <- sapply(methods, function(derivMethod) {
        if (requireNamespace(derivMethod)) {
            do.call(paste("checkDerivatives", derivMethod, sep="."),
                    args=alist(penLogLik, penScore, penFisher, theta,
                    sd.corr=sd.corr, model=model))
        }
    }, simplify=FALSE, USE.NAMES=TRUE)
    if (length(resCheckPen) == 1L) resCheckPen <- resCheckPen[[1L]]

    resCheckMar <- if (length(sd.corr) == 0L) list() else {
        cat("\nMarginal log-likelihood:\n")
        fisher.unpen <- attr(penFisher(theta, sd.corr, model, attributes=TRUE),
                             "fisher")
        resCheckMar <- sapply(methods, function(derivMethod) {
            if (requireNamespace(derivMethod)) {
                do.call(paste("checkDerivatives", derivMethod, sep="."),
                        args=alist(marLogLik, marScore, marFisher, sd.corr,
                        theta=theta, model=model, fisher.unpen=fisher.unpen))
            }
        }, simplify=FALSE, USE.NAMES=TRUE)
        if (length(resCheckMar) == 1L) resCheckMar[[1L]] else resCheckMar
    }

    list(pen = resCheckPen, mar = resCheckMar)
}

Try the surveillance package in your browser

Any scripts or data that you put into this service are public.

surveillance documentation built on July 20, 2022, 1:06 a.m.