Nothing
################################################################################
### Endemic-epidemic modelling for univariate or multivariate
### time series of infectious disease counts (data class "sts")
###
### Copyright (C) 2010-2012 Michaela Paul, 2012-2016,2019-2023 Sebastian Meyer
###
### This file is part of the R package "surveillance",
### free software under the terms of the GNU General Public License, version 2,
### a copy of which is available at https://www.R-project.org/Licenses/.
################################################################################
## Error message issued in loglik, score and fisher functions upon NA parameters
ADVICEONERROR <- "\n Try different starting values, more iterations, or another optimizer.\n"
### Main function to be called by the user
hhh4 <- function (stsObj, control = list(
ar = list(f = ~ -1, # a formula "exp(x'lamba)*y_t-lag" (ToDo: matrix)
offset = 1, # multiplicative offset
lag = 1), # autoregression on y_i,t-lag
ne = list(f = ~ -1, # a formula "exp(x'phi) * sum_j w_ji * y_j,t-lag"
offset = 1, # multiplicative offset
lag = 1, # regression on y_j,t-lag
weights = neighbourhood(stsObj) == 1, # weights w_ji
scale = NULL, # such that w_ji = scale * weights
normalize = FALSE), # w_ji -> w_ji / rowSums(w_ji), after scaling
end = list(f = ~ 1, # a formula "exp(x'nu) * n_it"
offset = 1), # optional multiplicative offset e_it
family = c("Poisson", "NegBin1", "NegBinM"), # or a factor of length nUnit
subset = 2:nrow(stsObj), # epidemic components require Y_{t-lag}
optimizer = list(stop = list(tol = 1e-5, niter = 100), # control arguments
regression = list(method = "nlminb"), # for optimization
variance = list(method = "nlminb")), # <- or "Nelder-Mead"
verbose = FALSE, # level of reporting during optimization
start = list(fixed = NULL, # list of start values, replacing initial
random = NULL, # values from fe() and ri() in 'f'ormulae
sd.corr = NULL),
data = list(t = stsObj@epoch - min(stsObj@epoch)), # named list of covariates
keep.terms = FALSE # whether to keep interpretControl(control, stsObj)
), check.analyticals = FALSE)
{
ptm <- proc.time()
## Convert old disProg class to new sts class
if (inherits(stsObj, "disProg")) {
stsObj <- disProg2sts(stsObj)
} else {
stopifnot(inherits(stsObj, "sts"))
}
## check control and set default values (for missing arguments)
control <- setControl(control, stsObj)
## get model terms
model <- interpretControl(control, stsObj)
dimFixedEffects <- model$nFE + model$nd + model$nOverdisp
dimRandomEffects <- model$nRE
## starting values
#* -> better default values possible
theta.start <- model$initialTheta
Sigma.start <- model$initialSigma
## check if initial values are valid
## CAVE: there might be NA's in mu if there are missing values in Y
mu <- meanHHH(theta.start, model, total.only=TRUE)
if(any(mu==0, na.rm=TRUE) || any(is.infinite(mu)))
stop("some mean is degenerate (0 or Inf) at initial values")
## check score vector and fisher information at starting values
check.analyticals <- if (isTRUE(check.analyticals)) {
if (length(theta.start) > 50) "maxLik" else "numDeriv"
} else if (is.character(check.analyticals)) {
match.arg(check.analyticals, c("numDeriv", "maxLik"), several.ok=TRUE)
} else NULL
if (length(check.analyticals) > 0L) {
resCheck <- checkAnalyticals(model, theta.start, Sigma.start,
methods=check.analyticals)
return(resCheck)
}
## maximize loglikelihood (penalized and marginal)
myoptim <- fitHHH(theta=theta.start,sd.corr=Sigma.start, model=model,
cntrl.stop = control$optimizer$stop,
cntrl.regression = control$optimizer$regression,
cntrl.variance = control$optimizer$variance,
verbose=control$verbose)
## extract parameter estimates
convergence <- myoptim$convergence == 0
thetahat <- myoptim$theta
if (dimRandomEffects>0) {
Sigma.orig <- myoptim$sd.corr
Sigma.trans <- getSigmai(head(Sigma.orig,model$nVar),
tail(Sigma.orig,model$nCorr),
model$nVar)
dimnames(Sigma.trans) <-
rep.int(list(sub("^sd\\.", "",
names(Sigma.orig)[seq_len(model$nVar)])), 2L)
} else {
Sigma.orig <- Sigma.trans <- NULL
}
## compute covariance matrices of regression and variance parameters
cov <- try(solve(myoptim$fisher), silent=TRUE)
Sigma.cov <- if(dimRandomEffects>0) try(solve(myoptim$fisherVar), silent=TRUE)
## check for degenerate fisher info
if(inherits(cov, "try-error")){ # fisher info is singular
if (control$verbose)
cat("WARNING: Final Fisher information matrix is singular!\n")
convergence <- FALSE
} else if(any(!is.finite(diag(cov))) || any(diag(cov)<0)){
if (control$verbose)
cat("WARNING: non-finite or negative covariance of regression parameters!\n")
convergence <- FALSE
}
if (!convergence) {
if (control$verbose) {
cat("Penalized loglikelihood =", myoptim$loglik, "\n")
thetastring <- paste(round(thetahat,2), collapse=", ")
thetastring <- strwrap(thetastring, exdent=10, prefix="\n", initial="")
cat("theta = (", thetastring, ")\n")
}
warning("Results are not reliable!",
if (any(splitParams(thetahat, model)$overdisp > 10)) { # FALSE for Poisson
"\n Overdispersion parameter close to zero; maybe try a Poisson model.\n"
} else ADVICEONERROR)
}
## gather results in a list -> "hhh4" object
result <- list(coefficients=thetahat,
se=if (convergence) sqrt(diag(cov)), cov=cov,
Sigma=Sigma.trans, # estimated covariance matrix of ri's
Sigma.orig=Sigma.orig, # variance parameters on original scale
Sigma.cov=Sigma.cov, # covariance matrix of Sigma.orig
call=match.call(),
dim=c(fixed=dimFixedEffects,random=dimRandomEffects),
loglikelihood=myoptim$loglik, margll=myoptim$margll,
convergence=convergence,
fitted.values=meanHHH(thetahat, model, total.only=TRUE),
control=control,
terms=if(control$keep.terms) model else NULL,
stsObj=stsObj,
lags=sapply(control[c("ar","ne")], function (comp)
if (comp$inModel) comp$lag else NA_integer_),
nObs=sum(!model$isNA[control$subset,]),
nTime=length(model$subset), nUnit=ncol(stsObj),
## CAVE: nTime is not nrow(stsObj) as usual!
runtime=proc.time()-ptm)
if (!convergence) {
## add (singular) Fisher information for further investigation
result[c("fisher","fisherVar")] <- myoptim[c("fisher","fisherVar")]
}
class(result) <- "hhh4"
return(result)
}
## set default values for model specifications in control
setControl <- function (control, stsObj)
{
stopifnot(is.list(control))
nTime <- nrow(stsObj)
nUnit <- ncol(stsObj)
if(nTime <= 2) stop("too few observations")
## arguments in 'control' override any corresponding default arguments
defaultControl <- eval(formals(hhh4)$control)
environment(defaultControl$ar$f) <- environment(defaultControl$ne$f) <-
environment(defaultControl$end$f) <- .GlobalEnv
control <- modifyList(defaultControl, control, keep.null = TRUE)
## check that component specifications are list objects
for (comp in c("ar", "ne", "end")) {
if(!is.list(control[[comp]])) stop("'control$", comp, "' must be a list")
}
## check lags in "ar" and "ne" components
for (comp in c("ar", "ne")) {
if (!isScalar(control[[comp]]$lag) || control[[comp]]$lag < (comp=="ar"))
stop("'control$", comp, "$lag' must be a ",
if (comp=="ar") "positive" else "non-negative", " integer")
control[[comp]]$lag <- as.integer(control[[comp]]$lag)
}
### check AutoRegressive component
if (control$ar$isMatrix <- is.matrix(control$ar$f)) {
## this form is not implemented -> will stop() in interpretControl()
if (any(dim(control$ar$f) != nUnit))
stop("'control$ar$f' must be a square matrix of size ", nUnit)
if (is.null(control$ar$weights)) { # use identity matrix
control$ar$weights <- diag(nrow=nUnit)
} else if (!is.matrix(control$ar$weights) ||
any(dim(control$ar$weights) != nUnit)) {
stop("'control$ar$weights' must be a square matrix of size ", nUnit)
}
control$ar$inModel <- TRUE
} else if (inherits(control$ar$f, "formula")) {
if (!is.null(control$ar$weights)) {
warning("argument 'control$ar$weights' is not used")
control$ar$weights <- NULL
}
# check if formula is valid
control$ar$inModel <- isInModel(control$ar$f)
} else {
stop("'control$ar$f' must be either a formula or a matrix")
}
### check NEighbourhood component
if (!inherits(control$ne$f, "formula"))
stop("'control$ne$f' must be a formula")
control$ne$inModel <- isInModel(control$ne$f)
if (control$ne$inModel) {
if (nUnit == 1)
warning("\"ne\" component requires a multivariate 'stsObj'")
## if ar$f is a matrix it includes neighbouring units => no "ne" component
if (control$ar$isMatrix)
stop("there must not be an extra \"ne\" component ",
"if 'control$ar$f' is a matrix")
## check ne$weights specification
checkWeights(control$ne$weights, nUnit, nTime,
neighbourhood(stsObj), control$data,
check0diag = control$ar$inModel)
## check optional scaling of weights
if (!is.null(control$ne$scale)) {
stopifnot(is.numeric(control$ne$scale))
if (is.vector(control$ne$scale)) {
stopifnot(length(control$ne$scale) == 1L ||
length(control$ne$scale) %% nUnit == 0,
!is.na(control$ne$scale))
} else {
checkWeightsArray(control$ne$scale, nUnit, nTime)
}
}
} else {
control$ne[c("weights", "scale", "normalize")] <- list(NULL, NULL, FALSE)
}
### check ENDemic component
if (!inherits(control$end$f, "formula"))
stop("'control$end$f' must be a formula")
control$end$inModel <- isInModel(control$end$f)
### check offsets
for (comp in c("ar", "ne", "end")) {
if (is.matrix(control[[comp]]$offset) && is.numeric(control[[comp]]$offset)){
if (!identical(dim(control[[comp]]$offset), dim(stsObj)))
stop("'control$",comp,"$offset' must be a numeric matrix of size ",
nTime, "x", nUnit)
if (anyNA(control[[comp]]$offset))
stop("'control$",comp,"$offset' must not contain NA values")
} else if (!identical(as.numeric(control[[comp]]$offset), 1)) {
stop("'control$",comp,"$offset' must either be 1 or a numeric ",
nTime, "x", nUnit, " matrix")
}
}
### stop if no component is included in the model
if (length(comps <- componentsHHH4(list(control=control))) == 0L)
stop("none of the components 'ar', 'ne', 'end' is included in the model")
### check remaining components of the control list
if (is.factor(control$family)) {
stopifnot(length(control$family) == nUnit)
## guard against misuse as family = factor("Poisson"), e.g., if taken
## from a data.frame of control options with "stringsAsFactors"
if (nUnit == 1 && as.character(control$family) %in% defaultControl$family) {
control$family <- as.character(control$family)
warning("'family = factor(\"", control$family, "\")' is interpreted ",
"as 'family = \"", control$family, "\"'")
} else {
control$family <- droplevels(control$family)
names(control$family) <- colnames(stsObj)
}
} else {
control$family <- match.arg(control$family, defaultControl$family)
}
if (!is.vector(control$subset, mode="numeric") ||
!all(control$subset %in% seq_len(nTime)))
stop("'control$subset' must be %in% 1:", nTime)
lags <- c(ar = control$ar$lag, ne = control$ne$lag)
maxlag <- suppressWarnings(max(lags[names(lags) %in% comps])) # could be -Inf
if (control$subset[1L] <= maxlag) {
warning("'control$subset' should be > ", maxlag, " due to epidemic lags")
}
if (!is.list(control$optimizer) ||
any(! sapply(c("stop", "regression", "variance"),
function(x) is.list(control$optimizer[[x]]))))
stop("'control$optimizer' must be a list of lists")
control$verbose <- as.integer(control$verbose)
if (length(control$verbose) != 1L || control$verbose < 0)
stop("'control$verbose' must be a logical or non-negative numeric value")
stopifnot(is.list(control$start))
control$start <- local({
defaultControl$start[] <- control$start[names(defaultControl$start)]
defaultControl$start
})
if (!all(vapply(X = control$start,
FUN = function(x) is.null(x) || is.vector(x, mode="numeric"),
FUN.VALUE = TRUE, USE.NAMES = FALSE)))
stop("'control$start' must be a list of numeric start values")
stopifnot(length(control$keep.terms) == 1L, is.logical(control$keep.terms))
## Done
return(control)
}
# check whether or not one of the three components is included in the model
isInModel <- function(formula, name=deparse(substitute(formula)))
{
term <- terms.formula(formula)
if(attr(term,"response") > 0) stop(name, " cannot contain a response")
attr(term, "intercept") + length(attr(term, "term.labels")) > 0
}
# used to incorporate covariates and unit-specific effects
fe <- function(x, # covariate
unitSpecific = FALSE, # TRUE means which = rep.int(TRUE, nUnits)
which=NULL, # NULL = overall, vector with booleans = unit-specific
initial=NULL) # vector of initial values for parameters
{
stsObj <- get("stsObj", envir=parent.frame(1), inherits=TRUE) #checkFormula()
nTime <- nrow(stsObj)
nUnits <- ncol(stsObj)
if(!is.numeric(x)){
stop("Covariate \'",deparse(substitute(x)),"\' is not numeric\n")
}
lengthX <- length(x)
if(lengthX == 1){
terms <- matrix(x, nTime, nUnits, byrow=FALSE)
mult <- "*"
} else if(lengthX == nTime){
terms <- matrix(x, nTime, nUnits, byrow=FALSE)
mult <- "*"
} else if(lengthX == nTime*nUnits){
if(!is.matrix(x)){
stop("Covariate \'",deparse(substitute(x)),"\' is not a matrix\n")
}
# check dimensions of covariate
if((ncol(x) != nUnits) | (nrow(x) != nTime)){
stop("Dimension of covariate \'",deparse(substitute(x)),"\' is not suitably specified\n")
}
terms <- x
mult <- "*"
} else {
stop("Covariate \'",deparse(substitute(x)),"\' is not suitably specified\n")
}
intercept <- all(terms==1)
# overall or unit-specific effect?
unitSpecific <- unitSpecific || !is.null(which)
if (unitSpecific) {
if (is.null(which)) {
which <- rep.int(TRUE, nUnits)
} else {
stopifnot(is.vector(which, mode="logical"), length(which) == nUnits)
}
terms[,!which] <- 0
}
# get dimension of parameter
dim.fe <- if (unitSpecific) sum(which) else 1
# check length of initial values + set default values
if (is.null(initial)) {
initial <- rep.int(0,dim.fe)
} else if (length(initial) != dim.fe) {
stop("initial values for '",deparse(substitute(x)),"' must be of length ",dim.fe)
}
name <- deparse(substitute(x))
if (unitSpecific)
name <- paste(name, colnames(stsObj)[which], sep=".")
result <- list(terms=terms,
name=name,
Z.intercept=NULL,
which=which,
dim.fe=dim.fe,
initial.fe=initial,
dim.re=0,
dim.var=0,
initial.var=NULL,
initial.re=NULL,
intercept=intercept,
unitSpecific=unitSpecific,
random=FALSE,
corr=FALSE,
mult=mult
)
return(result)
}
# random intercepts
ri <- function(type=c("iid","car"),
corr=c("none","all"),
initial.fe=0,
initial.var=-.5,
initial.re=NULL)
{
stsObj <- get("stsObj", envir=parent.frame(1), inherits=TRUE) #checkFormula()
if (ncol(stsObj) == 1)
stop("random intercepts require a multivariate 'stsObj'")
type <- match.arg(type)
corr <- match.arg(corr)
corr <- switch(corr,
"none"=FALSE,
"all"=TRUE)
if(type=="iid"){
Z <- 1
dim.re <- ncol(stsObj)
mult <- "*"
} else if(type=="car"){ # construct penalty matrix K
K <- neighbourhood(stsObj)
checkNeighbourhood(K)
K <- K == 1 # indicate first-order neighbours
ne <- colSums(K) # number of first-order neighbours
K <- -1*K
diag(K) <- ne
dimK <- nrow(K)
# check rank of the nhood, only connected neighbourhoods are allowed
if(qr(K)$rank != dimK-1) stop("neighbourhood matrix contains islands")
# singular-value decomposition of K
svdK <- svd(K)
# just use the positive eigenvalues of K in descending order
# for a the factorisation of the penalty matrix K = LL'
L <- svdK$u[,-dimK] %*% diag(sqrt(svdK$d[-dimK])) #* only use non-zero eigenvalues
# Z = L(L'L)^-1, which can't be simplified to Z=(L')^-1 as L is not square
Z <- L %*% solve(t(L)%*%L)
dim.re <- dimK - 1L
mult <- "%*%"
}
# check length of initial values + set default values
stopifnot(length(initial.fe) == 1, length(initial.var) == 1)
if (is.null(initial.re)) {
initial.re <- rnorm(dim.re,0,sd=sqrt(0.001))
} else if (length(initial.re) != dim.re) {
stop("'initial.re' must be of length ", dim.re)
}
result <- list(terms=1,
name=paste("ri(",type,")",sep=""),
Z.intercept=Z,
which=NULL,
dim.fe=1,
initial.fe=initial.fe,
dim.re=dim.re,
dim.var=1,
initial.var=initial.var,
initial.re=initial.re,
intercept=TRUE,
unitSpecific=FALSE,
random=TRUE,
corr=corr,
mult=mult
)
return(result)
}
### check specification of formula
## f: one of the component formulae (ar$f, ne$f, or end$f)
## component: 1, 2, or 3, corresponding to the ar/ne/end component, respectively
## data: the data-argument of hhh4()
## stsObj: the stsObj is not used directly in checkFormula, but in fe() and ri()
checkFormula <- function(f, component, data, stsObj)
{
term <- terms.formula(f, specials=c("fe","ri"))
# check if there is an overall intercept
intercept.all <- attr(term, "intercept") == 1
# list of variables in the component
vars <- as.list(attr(term,"variables"))[-1] # first element is "list"
nVars <- length(vars)
if (nVars > 0 && any(attr(term, "order") > 1))
warning("interaction terms are not implemented")
# begin with intercept
res <- if (intercept.all) {
c(fe(1), list(offsetComp=component))
} else {
if (nVars==0)
stop("formula ", deparse(substitute(f)), " contains no variables")
NULL
}
# find out fixed effects without "fe()" specification
# (only if there are variables in addition to an intercept "1")
fe.raw <- setdiff(seq_len(nVars), unlist(attr(term, "specials")))
# evaluate covariates
for(i in fe.raw)
res <- cbind(res, c(
eval(substitute(fe(x), list(x=vars[[i]])), envir=data),
list(offsetComp=component)
))
# fixed effects
for(i in attr(term, "specials")$fe)
res <- cbind(res, c(
eval(vars[[i]], envir=data),
list(offsetComp=component)
))
res <- cbind(res, deparse.level=0) # ensure res has matrix dimensions
# random intercepts
RI <- attr(term, "specials")$ri
if (sum(unlist(res["intercept",])) + length(RI) > 1)
stop("There can only be one intercept in the formula ",
deparse(substitute(f)))
for(i in RI)
res <- cbind(res, c(
eval(vars[[i]], envir=data),
list(offsetComp=component)
))
return(res)
}
## Create function (pars, type = "response") which
## returns the weighted sum of time-lagged counts of neighbours
## (or its derivates, if type = "gradient" or type = "hessian").
## For type="response", this is a nTime x nUnits matrix (like Y),
## otherwise a list of such matrices,
## which for the gradient has length length(pars) and
## length(pars)*(length(pars)+1)/2 for the hessian.
## If neweights=NULL (i.e. no NE component in model), the result is always 0.
## offset is a multiplicative offset for \phi_{it}, e.g., the population.
## scale is a nUnit-vector or a nUnit x nUnit matrix scaling neweights.
neOffsetFUN <- function (Y, neweights, scale, normalize,
nbmat, data, lag = 1, offset = 1)
{
if (is.null(neweights)) { # no neighbourhood component
as.function(alist(...=, 0), envir=.GlobalEnv)
## dimY <- dim(Y)
## as.function(c(alist(...=),
## substitute(matrix(0, r, c), list(r=dimY[1], c=dimY[2]))),
## envir=.GlobalEnv)
} else if (is.list(neweights)) { # parametric weights
wFUN <- scaleNEweights.list(neweights, scale, normalize)
function (pars, type = "response") {
name <- switch(type, response="w", gradient="dw", hessian="d2w")
weights <- wFUN[[name]](pars, nbmat, data)
## gradient and hessian are lists if length(pars$d) > 1L
## but can be single matrices/arrays if == 1 => _c_onditional lapply
res <- clapply(weights, function (W)
offset * weightedSumNE(Y, W, lag))
##<- clapply always returns a list (possibly of length 1)
if (type=="response") res[[1L]] else res
}
} else { # fixed (known) weight structure (0-length pars)
weights <- scaleNEweights.default(neweights, scale, normalize)
env <- new.env(hash = FALSE, parent = emptyenv()) # small -> no hash
env$initoffset <- offset * weightedSumNE(Y, weights, lag)
as.function(c(alist(...=), quote(initoffset)), envir=env)
}
}
# interpret and check the specifications of each component
# control must contain all arguments, i.e. setControl was used
interpretControl <- function (control, stsObj)
{
nTime <- nrow(stsObj)
nUnits <- ncol(stsObj)
Y <- observed(stsObj)
##########################################################################
## get the model specifications for each of the three components
##########################################################################
ar <- control$ar
ne <- control$ne
end <- control$end
## for backwards compatibility with surveillance < 1.8-0, where the ar and ne
## components of the control object did not have an offset
if (is.null(ar$offset)) ar$offset <- 1
if (is.null(ne$offset)) ne$offset <- 1
## for backward compatibility with surveillance < 1.9-0
if (is.null(ne$normalize)) ne$normalize <- FALSE
## create list of offsets of the three components
Ym1 <- rbind(matrix(NA_integer_, ar$lag, nUnits), head(Y, nTime-ar$lag))
Ym1.ne <- neOffsetFUN(Y, ne$weights, ne$scale, ne$normalize,
neighbourhood(stsObj), control$data, ne$lag, ne$offset)
offsets <- list(ar=ar$offset*Ym1, ne=Ym1.ne, end=end$offset)
## -> offset$ne is a function of the parameter vector 'd', which returns a
## nTime x nUnits matrix -- or 0 (scalar) if there is no NE component
## -> offset$end might just be 1 (scalar)
## Initial parameter vector 'd' of the neighbourhood weight function
initial.d <- if (is.list(ne$weights)) ne$weights$initial else numeric(0L)
dim.d <- length(initial.d)
names.d <- if (dim.d == 0L) character(0L) else {
paste0("neweights.", if (is.null(names(initial.d))) {
if (dim.d==1L) "d" else paste0("d", seq_len(dim.d))
} else names(initial.d))
}
## determine all NA's
isNA <- is.na(Y)
if (ar$inModel)
isNA <- isNA | is.na(offsets[[1L]])
if (ne$inModel)
isNA <- isNA | is.na(offsets[[2L]](initial.d))
## get terms for all components
all.term <- NULL
if(ar$isMatrix) stop("matrix-form of 'control$ar$f' is not implemented")
if(ar$inModel) # ar$f is a formula
all.term <- cbind(all.term, checkFormula(ar$f, 1, control$data, stsObj))
if(ne$inModel)
all.term <- cbind(all.term, checkFormula(ne$f, 2, control$data, stsObj))
if(end$inModel)
all.term <- cbind(all.term, checkFormula(end$f,3, control$data, stsObj))
dim.fe <- sum(unlist(all.term["dim.fe",]))
dim.re.group <- unlist(all.term["dim.re",], use.names=FALSE)
dim.re <- sum(dim.re.group)
dim.var <- sum(unlist(all.term["dim.var",]))
dim.corr <- sum(unlist(all.term["corr",]))
if(dim.corr>0){
if(dim.var!=dim.corr) stop("Use corr=\'all\' or corr=\'none\' ")
dim.corr <- switch(dim.corr,0,1,3)
}
# the vector with dims of the random effects must be equal if they are correlated
if(length(unique(dim.re.group[dim.re.group>0]))!=1 & dim.corr>0){
stop("Correlated effects must have same penalty")
}
n <- c("ar","ne","end")[unlist(all.term["offsetComp",])]
names.fe <- names.var <- names.re <- character(0L)
for(i in seq_along(n)){
.name <- all.term["name",i][[1]]
names.fe <- c(names.fe, paste(n[i], .name, sep="."))
if(all.term["random",i][[1]]) {
names.var <- c(names.var, paste("sd", n[i], .name, sep="."))
names.re <- c(names.re, paste(n[i], .name, if (.name == "ri(iid)") {
colnames(stsObj)
} else {
seq_len(all.term["dim.re",i][[1]])
}, sep = "."))
}
}
index.fe <- rep(1:ncol(all.term), times=unlist(all.term["dim.fe",]))
index.re <- rep(1:ncol(all.term), times=unlist(all.term["dim.re",]))
# poisson or negbin model
if(identical(control$family, "Poisson")){
ddistr <- function(y,mu,size){
dpois(y, lambda=mu, log=TRUE)
}
dim.overdisp <- 0L
index.overdisp <- names.overdisp <- NULL
} else { # NegBin
ddistr <- function(y,mu,size){
dnbinom(y, mu=mu, size=size, log=TRUE)
}
## version that can handle size = Inf (i.e. the Poisson special case):
## ddistr <- function (y,mu,size) {
## poisidx <- is.infinite(size)
## res <- y
## res[poisidx] <- dpois(y[poisidx], lambda=mu[poisidx], log=TRUE)
## res[!poisidx] <- dnbinom(y[!poisidx], mu=mu[!poisidx],
## size=size[!poisidx], log=TRUE)
## res
## }
index.overdisp <- if (is.factor(control$family)) {
control$family
} else if (control$family == "NegBinM") {
factor(colnames(stsObj), levels = colnames(stsObj))
## do not sort levels (for consistency with unitSpecific effects)
} else { # "NegBin1"
factor(character(nUnits))
}
names(index.overdisp) <- colnames(stsObj)
dim.overdisp <- nlevels(index.overdisp)
names.overdisp <- if (dim.overdisp == 1L) {
"-log(overdisp)"
} else {
paste0("-log(", paste("overdisp", levels(index.overdisp), sep = "."), ")")
}
}
environment(ddistr) <- getNamespace("stats") # function is self-contained
# parameter start values from fe() and ri() calls via checkFormula()
initial <- list(
fixed = c(unlist(all.term["initial.fe",]),
initial.d,
rep.int(2, dim.overdisp)),
random = as.numeric(unlist(all.term["initial.re",])), # NULL -> numeric(0)
sd.corr = c(unlist(all.term["initial.var",]),
rep.int(0, dim.corr))
)
# set names of parameter vectors
names(initial$fixed) <- c(names.fe, names.d, names.overdisp)
names(initial$random) <- names.re
names(initial$sd.corr) <- c(names.var, head(paste("corr",1:3,sep="."), dim.corr))
# modify initial values according to the supplied 'start' values
initial[] <- mapply(
FUN = function (initial, start, name) {
if (is.null(start))
return(initial)
if (is.null(names(initial)) || is.null(names(start))) {
if (length(start) == length(initial)) {
initial[] <- start
} else {
stop("initial values in 'control$start$", name,
"' must be of length ", length(initial))
}
} else {
## we match by name and silently ignore additional start values
start <- start[names(start) %in% names(initial)]
initial[names(start)] <- start
}
return(initial)
},
initial, control$start[names(initial)], names(initial),
SIMPLIFY = FALSE, USE.NAMES = FALSE
)
# Done
result <- list(response = Y,
terms = all.term,
nTime = nTime,
nUnits = nUnits,
nFE = dim.fe,
nd = dim.d,
nOverdisp = dim.overdisp,
nRE = dim.re,
rankRE = dim.re.group,
nVar = dim.var,
nCorr = dim.corr,
nSigma = dim.var+dim.corr,
nGroups = ncol(all.term),
namesFE = names.fe,
indexFE = index.fe,
indexRE = index.re,
initialTheta = c(initial$fixed, initial$random),
initialSigma = initial$sd.corr,
offset = offsets,
family = ddistr,
indexPsi = index.overdisp,
subset = control$subset,
isNA = isNA
)
return(result)
}
splitParams <- function(theta, model){
fixed <- theta[seq_len(model$nFE)]
d <- theta[model$nFE + seq_len(model$nd)]
overdisp <- theta[model$nFE + model$nd + seq_len(model$nOverdisp)]
random <- theta[seq.int(to=length(theta), length.out=model$nRE)]
list(fixed=fixed, random=random, overdisp=overdisp, d=d)
}
### compute predictor
meanHHH <- function(theta, model, subset=model$subset, total.only=FALSE)
{
## unpack theta
pars <- splitParams(theta, model)
fixed <- pars$fixed
random <- pars$random
## unpack model
term <- model$terms
offsets <- model$offset
offsets[[2L]] <- offsets[[2L]](pars$d) # evaluate at current parameter value
nGroups <- model$nGroups
comp <- unlist(term["offsetComp",])
idxFE <- model$indexFE
idxRE <- model$indexRE
toMatrix <- function (x, r=model$nTime, c=model$nUnits)
matrix(x, r, c, byrow=TRUE)
unitNames <- dimnames(model$response)[[2L]]
setColnames <- if (is.null(unitNames)) identity else
function(x) "dimnames<-"(x, list(NULL, unitNames))
## go through groups of parameters and compute predictor of each component,
## i.e. lambda_it, phi_it, nu_it, EXCLUDING the multiplicative offset terms,
## as well as the resulting component mean (=exppred * offset)
computePartMean <- function (component)
{
pred <- nullMatrix <- toMatrix(0)
if(!any(comp==component)) { # component not in model -> return 0-matrix
zeroes <- setColnames(pred[subset,,drop=FALSE])
return(list(exppred = zeroes, mean = zeroes))
}
for(i in seq_len(nGroups)[comp==component]){
fe <- fixed[idxFE==i]
if(term["unitSpecific",i][[1]]){
fe <- nullMatrix
which <- term["which",i][[1]]
fe[,which] <- toMatrix(fixed[idxFE==i],c=sum(which))
}
if(term["random",i][[1]]){
re <- random[idxRE==i]
"%m%" <- get(term["mult",i][[1]])
Z.re <- toMatrix(term["Z.intercept",i][[1]] %m% re)
} else {
Z.re <- 0
}
X <- term["terms",i][[1]]
pred <- pred + X*fe + Z.re
}
exppred <- setColnames(exp(pred[subset,,drop=FALSE]))
offset <- offsets[[component]]
if (length(offset) > 1) offset <- offset[subset,,drop=FALSE]
##<- no subsetting if offset is scalar (time- and unit-independent)
list(exppred = exppred, mean = exppred * offset)
}
## compute component means
ar <- computePartMean(1)
ne <- computePartMean(2)
end <- computePartMean(3)
## Done
epidemic <- ar$mean + ne$mean
endemic <- end$mean
if (total.only) epidemic + endemic else
list(mean=epidemic+endemic, epidemic=epidemic, endemic=endemic,
epi.own=ar$mean, epi.neighbours=ne$mean,
ar.exppred=ar$exppred, ne.exppred=ne$exppred, end.exppred=end$exppred)
}
### compute dispersion in dnbinom (mu, size) parametrization
sizeHHH <- function (theta, model, subset = model$subset)
{
if (model$nOverdisp == 0L) # Poisson case
return(NULL)
## extract dispersion in dnbinom() parametrization
pars <- splitParams(theta, model)
size <- exp(pars$overdisp) # = 1/psi, pars$overdisp = -log(psi)
## return either a vector or a time x unit matrix of dispersion parameters
if (is.null(subset)) {
unname(size) # no longer is "-log(overdisp)"
} else {
matrix(data = size[model$indexPsi],
nrow = length(subset), ncol = model$nUnits, byrow = TRUE,
dimnames = list(NULL, names(model$indexPsi)))
}
}
## auxiliary function used in penScore and penFisher
## it sums colSums(x) within the groups defined by f (of length ncol(x))
## and returns these sums in the order of levels(f)
.colSumsGrouped <- function (x, f, na.rm = TRUE)
{
nlev <- nlevels(f)
if (nlev == 1L) { # all columns belong to the same group ("NegBin1")
sum(x, na.rm = na.rm)
} else {
dimx <- dim(x)
colsums <- .colSums(x, dimx[1L], dimx[2L], na.rm = na.rm)
if (nlev == dimx[2L]) { # each column separately ("NegBinM" or factor)
colsums[order(f)] # for NegBinM, order(f)==1:nlev, not in general
} else { # sum colsums within groups
unlist(lapply(
X = split.default(colsums, f, drop = FALSE),
FUN = sum
), recursive = FALSE, use.names = FALSE)
}
}
}
############################################
penLogLik <- function(theta, sd.corr, model, attributes=FALSE)
{
if(anyNA(theta)) stop("NAs in regression parameters.", ADVICEONERROR)
## unpack model
subset <- model$subset
Y <- model$response[subset,,drop=FALSE]
dimPsi <- model$nOverdisp
dimRE <- model$nRE
## unpack random effects
if (dimRE > 0) {
pars <- splitParams(theta, model)
randomEffects <- pars$random
sd <- head(sd.corr, model$nVar)
corr <- tail(sd.corr, model$nCorr)
dimBlock <- model$rankRE[model$rankRE>0]
Sigma.inv <- getSigmaInv(sd, corr, model$nVar, dimBlock)
}
############################################################
## evaluate dispersion
psi <- sizeHHH(theta, model,
subset = if (dimPsi > 1L) subset) # else scalar or NULL
#psi might be numerically equal to 0 or Inf in which cases dnbinom (in meanHHH)
#would return NaN (with a warning). The case size=Inf rarely happens and
#corresponds to a Poisson distribution. Currently this case is not handled
#in order to have the usual non-degenerate case operate faster.
#For size=0, log(dnbinom) equals -Inf for positive x or if (x=0 and mu=0), and
#zero if x=0 and mu>0 and mu<Inf. Thus, if there is at least one case in Y
#(x>0, which is always true), we have that sum(ll.units) = -Inf, hence:
if (any(psi == 0)) return(-Inf)
## evaluate mean
mu <- meanHHH(theta, model, total.only=TRUE)
# if, numerically, mu=Inf, log(dnbinom) or log(dpois) both equal -Inf, hence:
#if (any(is.infinite(mu))) return(-Inf)
# however, since mu=Inf does not produce warnings below and this is a rare
# case, it is faster to not include this conditional expression
## penalization term for random effects
lpen <- if (dimRE==0) 0 else { # there are random effects
##-.5*(t(randomEffects)%*%Sigma.inv%*%randomEffects)
## the following implementation takes ~85% less computing time !
-0.5 * c(crossprod(randomEffects, Sigma.inv) %*% randomEffects)
}
## log-likelihood
ll.units <- .colSums(model$family(Y,mu,psi),
length(subset), model$nUnits, na.rm=TRUE)
## penalized log-likelihood
ll <- sum(ll.units) + lpen
## Done
if (attributes) {
attr(ll, "loglik") <- ll.units
attr(ll, "logpen") <- lpen
}
ll
}
penScore <- function(theta, sd.corr, model)
{
if(anyNA(theta)) stop("NAs in regression parameters.", ADVICEONERROR)
## unpack model
subset <- model$subset
Y <- model$response[subset,,drop=FALSE]
isNA <- model$isNA[subset,,drop=FALSE]
dimPsi <- model$nOverdisp
dimRE <- model$nRE
term <- model$terms
nGroups <- model$nGroups
dimd <- model$nd
## unpack parameters
pars <- splitParams(theta, model)
if (dimRE > 0) {
randomEffects <- pars$random
sd <- head(sd.corr, model$nVar)
corr <- tail(sd.corr, model$nCorr)
dimBlock <- model$rankRE[model$rankRE>0]
Sigma.inv <- getSigmaInv(sd, corr, model$nVar, dimBlock)
}
## evaluate dispersion
psi <- sizeHHH(theta, model,
subset = if (dimPsi > 1L) subset) # else scalar or NULL
## evaluate mean
mu <- meanHHH(theta, model)
meanTotal <- mu$mean
############################################################
## helper function for derivatives
derivHHH.factor <- if(dimPsi > 0L){ # NegBin
psiPlusMu <- psi + meanTotal # also used below for calculation of grPsi
psiYpsiMu <- (psi+Y) / psiPlusMu
Y/meanTotal - psiYpsiMu
} else { # Poisson
Y/meanTotal - 1
}
derivHHH <- function (dmu) derivHHH.factor * dmu
## go through groups of parameters and compute the gradient of each component
computeGrad <- function(mean.comp){
grad.fe <- numeric(0L)
grad.re <- numeric(0L)
for(i in seq_len(nGroups)){
comp <- term["offsetComp",i][[1]]
Xit<- term["terms",i][[1]] # either 1 or a matrix with values
if(is.matrix(Xit)){
Xit <- Xit[subset,,drop=FALSE]
}
dTheta <- derivHHH(mean.comp[[comp]]*Xit)
dTheta[isNA] <- 0 # dTheta must not contain NA's (set NA's to 0)
if(term["unitSpecific",i][[1]]){
which <- term["which",i][[1]]
dimi <- sum(which)
if(dimi < model$nUnits)
dTheta <- dTheta[,which,drop=FALSE]
dTheta <- .colSums(dTheta, length(subset), dimi)
grad.fe <- c(grad.fe,dTheta)
} else if(term["random",i][[1]]){
Z <- term["Z.intercept",i][[1]]
"%m%" <- get(term["mult",i][[1]])
dRTheta <- .colSums(dTheta %m% Z, length(subset), term["dim.re",i][[1]])
grad.re <- c(grad.re, dRTheta)
grad.fe <- c(grad.fe, sum(dTheta))
} else{
grad.fe <- c(grad.fe, sum(dTheta))
}
}
list(fe=grad.fe, re=grad.re)
}
gradients <- computeGrad(mu[c("epi.own","epi.neighbours","endemic")])
## gradient for parameter vector of the neighbourhood weights
grd <- if (dimd > 0L) {
dneOffset <- model$offset[[2L]](pars$d, type="gradient")
##<- this is always a list (of length dimd) of matrices
onescore.d <- function (dneoff) {
dmudd <- mu$ne.exppred * dneoff[subset,,drop=FALSE]
grd.terms <- derivHHH(dmudd)
sum(grd.terms, na.rm=TRUE)
}
unlist(clapply(dneOffset, onescore.d), recursive=FALSE, use.names=FALSE)
} else numeric(0L)
## gradient for overdispersion parameter psi
grPsi <- if(dimPsi > 0L){
dPsiMat <- psi * (digamma(Y+psi) - digamma(psi) + log(psi) + 1
- log(psiPlusMu) - psiYpsiMu)
.colSumsGrouped(dPsiMat, model$indexPsi)
} else numeric(0L)
## add penalty to random effects gradient
s.pen <- if(dimRE > 0) c(Sigma.inv %*% randomEffects) else numeric(0L)
if(length(gradients$re) != length(s.pen))
stop("oops... lengths of s(b) and Sigma.inv %*% b do not match")
grRandom <- c(gradients$re - s.pen)
## Done
res <- c(gradients$fe, grd, grPsi, grRandom)
res
}
penFisher <- function(theta, sd.corr, model, attributes=FALSE)
{
if(anyNA(theta)) stop("NAs in regression parameters.", ADVICEONERROR)
## unpack model
subset <- model$subset
Y <- model$response[subset,,drop=FALSE]
isNA <- model$isNA[subset,,drop=FALSE]
dimPsi <- model$nOverdisp
dimRE <- model$nRE
term <- model$terms
nGroups <- model$nGroups
dimd <- model$nd
dimFE <- model$nFE
idxFE <- model$indexFE
idxRE <- model$indexRE
indexPsi <- model$indexPsi
## unpack parameters
pars <- splitParams(theta, model)
if (dimRE > 0) {
randomEffects <- pars$random
sd <- head(sd.corr, model$nVar)
corr <- tail(sd.corr, model$nCorr)
dimBlock <- model$rankRE[model$rankRE>0]
Sigma.inv <- getSigmaInv(sd, corr, model$nVar, dimBlock)
}
## evaluate dispersion
psi <- sizeHHH(theta, model,
subset = if (dimPsi > 1L) subset) # else scalar or NULL
## evaluate mean
mu <- meanHHH(theta, model)
meanTotal <- mu$mean
############################################################
## helper functions for derivatives:
if (dimPsi > 0L) { # negbin
psiPlusY <- psi + Y
psiPlusMu <- psi + meanTotal
psiPlusMu2 <- psiPlusMu^2
psiYpsiMu <- psiPlusY / psiPlusMu
psiYpsiMu2 <- psiPlusY / psiPlusMu2
deriv2HHH.fac1 <- psiYpsiMu2 - Y / (meanTotal^2)
deriv2HHH.fac2 <- Y / meanTotal - psiYpsiMu
## psi-related derivatives
dThetadPsi.fac <- psi * (psiYpsiMu2 - 1/psiPlusMu)
dThetadPsi <- function(dTheta){
dThetadPsi.fac * dTheta
}
dPsiMat <- psi * (digamma(psiPlusY) - digamma(psi) + log(psi) + 1
- log(psiPlusMu) - psiYpsiMu) # as in penScore()
dPsidPsiMat <- psi^2 * (
trigamma(psiPlusY) - trigamma(psi) + 1/psi - 1/psiPlusMu -
(meanTotal-Y)/psiPlusMu2) + dPsiMat
} else { # poisson
deriv2HHH.fac1 <- -Y / (meanTotal^2)
deriv2HHH.fac2 <- Y / meanTotal - 1
}
deriv2HHH <- function(dTheta_l, dTheta_k, dTheta_lk){
dTheta_l * dTheta_k * deriv2HHH.fac1 + dTheta_lk * deriv2HHH.fac2
}
## go through groups of parameters and compute the hessian of each component
computeFisher <- function(mean.comp){
# initialize hessian
hessian.FE.FE <- matrix(0,dimFE,dimFE)
hessian.FE.RE <- matrix(0,dimFE,dimRE)
hessian.RE.RE <- matrix(0,dimRE,dimRE)
hessian.FE.Psi <- matrix(0,dimFE,dimPsi)
hessian.Psi.RE <- matrix(0,dimPsi,dimPsi+dimRE) # CAVE: contains PsiPsi and PsiRE
hessian.FE.d <- matrix(0,dimFE,dimd)
hessian.d.d <- matrix(0,dimd,dimd)
hessian.d.Psi <- matrix(0,dimd,dimPsi)
hessian.d.RE <- matrix(0,dimd,dimRE)
## derivatives wrt neighbourhood weight parameters d
if (dimd > 0L) {
phi.doff <- function (dneoff) {
mu$ne.exppred * dneoff[subset,,drop=FALSE]
}
## for type %in% c("gradient", "hessian"), model$offset[[2L]] always
## returns a list of matrices. It has length(pars$d) elements for the
## gradient and length(pars$d)*(length(pars$d)+1)/2 for the hessian.
dneOffset <- model$offset[[2L]](pars$d, type="gradient")
dmudd <- lapply(dneOffset, phi.doff)
d2neOffset <- model$offset[[2L]](pars$d, type="hessian")
d2mudddd <- lapply(d2neOffset, phi.doff)
## d l(theta,x) /dd dd (fill only upper triangle, BY ROW)
ij <- 0L
for (i in seq_len(dimd)) {
for (j in i:dimd) {
ij <- ij + 1L #= dimd*(i-1) + j - (i-1)*i/2 # for j >= i
## d2mudddd contains upper triangle by row (=lowertri by column)
d2ij <- deriv2HHH(dmudd[[i]], dmudd[[j]], d2mudddd[[ij]])
hessian.d.d[i,j] <- sum(d2ij, na.rm=TRUE)
}
}
}
if (dimPsi > 0L) {
## d l(theta,x) /dpsi dpsi
dPsidPsi <- .colSumsGrouped(dPsidPsiMat, indexPsi)
hessian.Psi.RE[,seq_len(dimPsi)] <- if (dimPsi == 1L) {
dPsidPsi
} else {
diag(dPsidPsi)
}
## d l(theta) / dd dpsi
for (i in seq_len(dimd)) { # will not be run if dimd==0
## dPsi.i <- colSums(dThetadPsi(dmudd[[i]]),na.rm=TRUE)
## hessian.d.Psi[i,] <- if(dimPsi==1L) sum(dPsi.i) else dPsi.i[order(indexPsi)]
hessian.d.Psi[i,] <- .colSumsGrouped(dThetadPsi(dmudd[[i]]), indexPsi)
}
}
##
i.fixed <- function(){
if(random.j){
Z.j <- term["Z.intercept",j][[1]]
"%mj%" <- get(term["mult",j][[1]])
hessian.FE.RE[idxFE==i,idxRE==j] <<- colSums(didj %mj% Z.j)
##<- didj must not contain NA's (all NA's set to 0)
dIJ <- sum(didj,na.rm=TRUE) # fixed on 24/09/2012
} else if(unitSpecific.j){
dIJ <- colSums(didj,na.rm=TRUE)[ which.j ]
} else {
dIJ <- sum(didj,na.rm=TRUE)
}
hessian.FE.FE[idxFE==i,idxFE==j] <<- dIJ
}
##
i.unit <- function(){
if(random.j){
Z.j <- term["Z.intercept",j][[1]]
"%mj%" <- get(term["mult",j][[1]])
dIJ <- colSums(didj %mj% Z.j) # didj must not contain NA's (all NA's set to 0)
hessian.FE.RE[idxFE==i,idxRE==j] <<- diag(dIJ)[ which.i, ] # FIXME: does not work if type="car"
dIJ <- dIJ[ which.i ] # added which.i subsetting in r432
} else if(unitSpecific.j){
dIJ <- diag(colSums(didj))[ which.i, which.j ]
} else {
dIJ <- colSums(didj)[ which.i ]
}
hessian.FE.FE[idxFE==i,idxFE==j] <<- dIJ
}
##
i.random <- function(){
if(random.j){
Z.j <- term["Z.intercept",j][[1]]
"%mj%" <- get(term["mult",j][[1]])
hessian.FE.RE[idxFE==i,idxRE==j] <<- colSums(didj %mj% Z.j)
if (j != i) # otherwise redundant (duplicate)
hessian.FE.RE[idxFE==j,idxRE==i] <<- colSums(didj %m% Z.i)
if(length(Z.j)==1 & length(Z.i)==1){ # both iid
Z <- Z.i*Z.j
hessian.RE.RE[which(idxRE==i),idxRE==j] <<- diag(colSums( didj %m% Z))
} else if(length(Z.j)==1 & length(Z.i)>1){ #*
Z.j <- diag(nrow=model$nUnits)
for(k in seq_len(ncol(Z.j))){
Z <- Z.i*Z.j[,k]
hessian.RE.RE[idxRE==i,which(idxRE==j)[k]] <<- colSums( didj %m% Z)
}
} else if(length(Z.j)>1 & length(Z.i)==1){ #*
Z.i <- diag(nrow=model$nUnits)
for(k in seq_len(ncol(Z.i))){
Z <- Z.i[,k]*Z.j
hessian.RE.RE[which(idxRE==i)[k],idxRE==j] <<- colSums( didj %mj% Z)
}
} else { # both CAR
for(k in seq_len(ncol(Z.j))){
Z <- Z.i*Z.j[,k]
hessian.RE.RE[which(idxRE==i)[k],idxRE==j] <<- colSums( didj %m% Z)
}
}
dIJ <- sum(didj)
} else if(unitSpecific.j){
dIJ <- colSums(didj %m% Z.i)
hessian.FE.RE[idxFE==j,idxRE==i] <<- diag(dIJ)[ which.j, ]
dIJ <- dIJ[ which.j ]
} else {
hessian.FE.RE[idxFE==j,idxRE==i] <<- colSums(didj %m% Z.i)
dIJ <- sum(didj)
}
hessian.FE.FE[idxFE==i,idxFE==j] <<- dIJ
}
##----------------------------------------------
for(i in seq_len(nGroups)){ #go through rows of hessian
# parameter group belongs to which components
comp.i <- term["offsetComp",i][[1]]
# get covariate value
Xit <- term["terms",i][[1]] # either 1 or a matrix with values
if(is.matrix(Xit)){
Xit <- Xit[subset,,drop=FALSE]
}
m.Xit <- mean.comp[[comp.i]] * Xit
random.i <- term["random",i][[1]]
unitSpecific.i <- term["unitSpecific",i][[1]]
## fill psi-related entries and select fillHess function
if (random.i) {
Z.i <- term["Z.intercept",i][[1]] # Z.i and %m% (of i) determined here
"%m%" <- get(term["mult",i][[1]]) # will also be used in j's for loop
fillHess <- i.random
if (dimPsi > 0L) {
dThetadPsiMat <- dThetadPsi(m.Xit)
hessian.FE.Psi[idxFE==i,] <- .colSumsGrouped(dThetadPsiMat, indexPsi)
dThetadPsi.i <- .colSums(dThetadPsiMat %m% Z.i, length(subset), term["dim.re",i][[1]], na.rm=TRUE)
if (dimPsi==1L) {
hessian.Psi.RE[,dimPsi + which(idxRE==i)] <- dThetadPsi.i
} else {
hessian.Psi.RE[cbind(indexPsi,dimPsi + which(idxRE==i))] <- dThetadPsi.i
## FIXME: does not work with type="car"
}
}
} else if (unitSpecific.i) {
which.i <- term["which",i][[1]]
fillHess <- i.unit
if (dimPsi > 0L) {
dThetadPsi.i <- .colSums(dThetadPsi(m.Xit), length(subset), model$nUnits, na.rm=TRUE)
if (dimPsi==1L) {
hessian.FE.Psi[idxFE==i,] <- dThetadPsi.i[which.i]
} else {
hessian.FE.Psi[cbind(which(idxFE==i),indexPsi[which.i])] <-
dThetadPsi.i[which.i]
}
}
} else {
fillHess <- i.fixed
if (dimPsi > 0L) {
## dPsi <- colSums(dThetadPsi(m.Xit),na.rm=TRUE)
## hessian.FE.Psi[idxFE==i,] <- if (dimPsi==1L) sum(dPsi) else dPsi[order(indexPsi)]
hessian.FE.Psi[idxFE==i,] <- .colSumsGrouped(dThetadPsi(m.Xit), indexPsi)
}
}
## fill pars$d-related entries
for (j in seq_len(dimd)) { # will not be run if dimd==0
didd <- deriv2HHH(dTheta_l = m.Xit, dTheta_k = dmudd[[j]],
dTheta_lk = if (comp.i == 2) dmudd[[j]] * Xit else 0)
didd[isNA] <- 0
hessian.FE.d[idxFE==i,j] <- if (unitSpecific.i) {
colSums(didd,na.rm=TRUE)[which.i]
} else sum(didd)
if (random.i) hessian.d.RE[j,idxRE==i] <- colSums(didd %m% Z.i)
}
## fill other (non-psi, non-d) entries (only upper triangle, j >= i!)
for(j in i:nGroups){
comp.j <- term["offsetComp",j][[1]]
Xjt <- term["terms",j][[1]] # either 1 or a matrix with values
if(is.matrix(Xjt)){
Xjt <- Xjt[subset,,drop=FALSE]
}
# if param i and j do not belong to the same component, d(i)d(j)=0
m.Xit.Xjt <- if (comp.i != comp.j) 0 else m.Xit * Xjt
didj <- deriv2HHH(dTheta_l = m.Xit, dTheta_k = mean.comp[[comp.j]]*Xjt,
dTheta_lk = m.Xit.Xjt)
didj[isNA]<-0
random.j <- term["random",j][[1]]
unitSpecific.j <- term["unitSpecific",j][[1]]
which.j <- term["which",j][[1]]
fillHess()
}
}
#########################################################
## fill lower triangle of hessians and combine them
########################################################
hessian <- rbind(cbind(hessian.FE.FE,hessian.FE.d,hessian.FE.Psi,hessian.FE.RE),
cbind(matrix(0,dimd,dimFE),hessian.d.d,hessian.d.Psi,hessian.d.RE),
cbind(matrix(0,dimPsi,dimFE+dimd),hessian.Psi.RE),
cbind(matrix(0,dimRE,dimFE+dimd+dimPsi),hessian.RE.RE))
hessian[lower.tri(hessian)] <- 0 # CAR blocks in hessian.RE.RE were fully filled
diagHessian <- diag(hessian)
fisher <- -(hessian + t(hessian))
diag(fisher) <- -diagHessian
return(fisher)
}
fisher <- computeFisher(mu[c("epi.own","epi.neighbours","endemic")])
## add penalty for random effects
pen <- matrix(0, length(theta), length(theta))
Fpen <- if(dimRE > 0){
thetaIdxRE <- seq.int(to=length(theta), length.out=dimRE)
pen[thetaIdxRE,thetaIdxRE] <- Sigma.inv
fisher + pen
} else fisher
## Done
if(attributes){
attr(Fpen, "fisher") <- fisher
attr(Fpen, "pen") <- pen
}
Fpen
}
#################################################
sqrtOf1pr2 <- function(r){ sqrt(1+r^2) }
getSigmai <- function(sd, # vector of length dim with log-stdev's
correlation, # vector of length dim with correlation
# parameters, 0-length if uncorrelated
dim
){
if(dim==0) return(NULL)
Sigma.i <- if (length(correlation) == 0L) diag(exp(2*sd), dim) else {
D <- diag(exp(sd), dim)
L <- diag(nrow=dim)
L[2,1:2] <- c(correlation[1],1)/sqrtOf1pr2(correlation[1])
if (dim==3) {
L[3,] <- c(correlation[2:3],1)/sqrtOf1pr2(correlation[2])
L[3,2:3] <- L[3,2:3]/sqrtOf1pr2(correlation[3])
}
D %*% tcrossprod(L) %*% D # ~75% quicker than D %*% L %*% t(L) %*% D
}
return(Sigma.i)
}
getSigmaiInv <- function(sd, # vector of length dim with log-stdev's
correlation, # vector of length dim with correlation
# parameters, 0-length if uncorrelated
dim
){
if(dim==0) return(NULL)
Sigma.i.inv <- if (length(correlation) == 0L) diag(exp(-2*sd), dim) else {
r <- correlation
Dinv <- diag(exp(-sd), dim)
L <- diag(nrow=dim)
L[2,1:2] <- c(-r[1],sqrtOf1pr2(r[1]))
if(dim==3){
L[3,1] <- r[1]*r[3]-r[2]*sqrtOf1pr2(r[3])
L[3,2] <- -L[2,2]*r[3]
L[3,3] <- sqrtOf1pr2(r[2])*sqrtOf1pr2(r[3])
}
Dinv %*% crossprod(L) %*% Dinv # ~75% quicker than Dinv %*% t(L) %*% L %*% Dinv
}
return(Sigma.i.inv)
}
#* allow blockdiagonal matrix blockdiag(A,B), with A=kronecker product, B=diagonal matrix?
getSigmaInv <- function(sd, correlation, dimSigma, dimBlocks, SigmaInvi=NULL){
if(is.null(SigmaInvi)){
SigmaInvi <- getSigmaiInv(sd,correlation,dimSigma)
}
if(length(unique(dimBlocks))==1){ # kronecker product formulation possible
kronecker(SigmaInvi,diag(nrow=dimBlocks[1]))
# the result is a symmetric matrix if SigmaInvi is symmetric
} else { # kronecker product not possible -> correlation=0
diag(rep.int(diag(SigmaInvi),dimBlocks))
}
}
getSigma <- function(sd, correlation, dimSigma, dimBlocks, Sigmai=NULL){
if(is.null(Sigmai)){
Sigmai <- getSigmai(sd,correlation,dimSigma)
}
if(length(unique(dimBlocks))==1){ # kronecker product formulation possible
kronecker(Sigmai,diag(nrow=dimBlocks[1]))
# the result is a symmetric matrix if Sigmai is symmetric
} else { # kronecker product not possible -> correlation=0
diag(rep.int(diag(Sigmai),dimBlocks))
}
}
## Approximate marginal likelihood for variance components
## Parameter and model unpacking at the beginning (up to the ###...-line) is
## identical in marScore() and marFisher()
marLogLik <- function(sd.corr, theta, model, fisher.unpen=NULL, verbose=FALSE){
dimSigma <- model$nSigma
if(dimSigma == 0){
return(-Inf)
}
if(anyNA(sd.corr)) stop("NAs in variance parameters.", ADVICEONERROR)
dimVar <- model$nVar
dimCorr <- model$nCorr
sd <- head(sd.corr,dimVar)
corr <- tail(sd.corr,dimCorr)
pars <- splitParams(theta,model)
randomEffects <- pars$random
dimRE <- model$nRE
dimBlocks <- model$rankRE[model$rankRE>0]
Sigma.inv <- getSigmaInv(sd, corr, dimVar, dimBlocks)
# if not given, calculate unpenalized part of fisher info
if(is.null(fisher.unpen)){
fisher.unpen <- attr(penFisher(theta, sd.corr, model,attributes=TRUE), "fisher")
}
# add penalty to fisher
fisher <- fisher.unpen
thetaIdxRE <- seq.int(to=length(theta), length.out=dimRE)
fisher[thetaIdxRE,thetaIdxRE] <- fisher[thetaIdxRE,thetaIdxRE] + Sigma.inv
############################################################
# penalized part of likelihood
# compute -0.5*log(|Sigma|) - 0.5*RE' %*% Sigma.inv %*% RE
# where -0.5*log(|Sigma|) = -dim(RE_i)*[Sum(sd_i) -0.5*log(1+corr_i^2)]
##lpen <- -0.5*(t(randomEffects)%*%Sigma.inv%*%randomEffects)
## the following implementation takes ~85% less computing time !
lpen <- -0.5 * c(crossprod(randomEffects, Sigma.inv) %*% randomEffects)
loglik.pen <- sum(-dimBlocks*sd) + lpen
if(dimCorr >0){
loglik.pen <- loglik.pen + 0.5*dimBlocks[1]*sum(log(1+corr^2))
}
## approximate marginal likelihood
logdetfisher <- determinant(fisher,logarithm=TRUE)$modulus
lmarg <- loglik.pen -0.5*c(logdetfisher)
return(lmarg)
}
marScore <- function(sd.corr, theta, model, fisher.unpen=NULL, verbose=FALSE){
dimSigma <- model$nSigma
if(dimSigma == 0){
return(numeric(0L))
}
if(anyNA(sd.corr)) stop("NAs in variance parameters.", ADVICEONERROR)
dimVar <- model$nVar
dimCorr <- model$nCorr
sd <- head(sd.corr,dimVar)
corr <- tail(sd.corr,dimCorr)
pars <- splitParams(theta,model)
randomEffects <- pars$random
dimRE <- model$nRE
dimBlocks <- model$rankRE[model$rankRE>0]
Sigma.inv <- getSigmaInv(sd, corr, dimVar, dimBlocks)
# if not given, calculate unpenalized part of fisher info
if(is.null(fisher.unpen)){
fisher.unpen <- attr(penFisher(theta, sd.corr, model,attributes=TRUE), "fisher")
}
# add penalty to fisher
fisher <- fisher.unpen
thetaIdxRE <- seq.int(to=length(theta), length.out=dimRE)
fisher[thetaIdxRE,thetaIdxRE] <- fisher[thetaIdxRE,thetaIdxRE] + Sigma.inv
# inverse of penalized fisher info
F.inv <- try(solve(fisher),silent=TRUE)
if(inherits(F.inv,"try-error")){
if(verbose) cat(" WARNING (in marScore): penalized Fisher is singular!\n")
#return(rep.int(0,dimSigma))
## continuing with the generalized inverse often works, otherwise we would
## have to stop() here, because nlminb() cannot deal with NA's
F.inv <- ginv(fisher)
}
F.inv.RE <- F.inv[thetaIdxRE,thetaIdxRE]
############################################################
## compute marginal score and fisher for each variance component
# initialize score and fisher info
marg.score <- rep.int(NA_real_,dimSigma)
## specify functions for derivatives
deriv1 <- switch(dimVar, dSigma1, dSigma2, dSigma3)
d1Sigma <- deriv1(sd, corr)
Sigmai.inv <- getSigmaiInv(sd, corr, dimVar)
# derivation of log determinant
# -.5*tr(Sigma^-1 %*% dSigma/ds) = -R (for sd.i)
# = R*corr.i/(corr.i^2+1) (for corr.i)
d1logDet <- c(-dimBlocks,dimBlocks[1]*corr/(corr^2+1))
# go through all variance parameters
for(i in seq_len(dimSigma)){
dSi <- -Sigmai.inv %*% d1Sigma[,,i] %*% Sigmai.inv # CAVE: sign
dS.i <- getSigma(dimSigma=dimVar,dimBlocks=dimBlocks,Sigmai=dSi)
#dlpen.i <- -0.5* t(randomEffects) %*% dS.i %*% randomEffects
# ~85% faster implementation using crossprod() avoiding "slow" t():
dlpen.i <- -0.5 * c(crossprod(randomEffects, dS.i) %*% randomEffects)
#tr.d1logDetF <- sum(diag(F.inv.RE %*% dS.i))
tr.d1logDetF <- sum(F.inv.RE * dS.i) # since dS.i is symmetric
#<- needs 1/100 (!) of the computation time of sum(diag(F.inv.RE %*% dS.i))
marg.score[i] <- d1logDet[i] + dlpen.i - 0.5 * tr.d1logDetF
}
return(marg.score)
}
marFisher <- function(sd.corr, theta, model, fisher.unpen=NULL, verbose=FALSE){
dimSigma <- model$nSigma
if(dimSigma == 0){
return(matrix(numeric(0L),0L,0L))
}
if(anyNA(sd.corr)) stop("NAs in variance parameters.", ADVICEONERROR)
dimVar <- model$nVar
dimCorr <- model$nCorr
sd <- head(sd.corr,dimVar)
corr <- tail(sd.corr,dimCorr)
pars <- splitParams(theta,model)
randomEffects <- pars$random
dimRE <- model$nRE
dimBlocks <- model$rankRE[model$rankRE>0]
Sigma.inv <- getSigmaInv(sd, corr, dimVar, dimBlocks)
# if not given, calculate unpenalized part of fisher info
if(is.null(fisher.unpen)){
fisher.unpen <- attr(penFisher(theta, sd.corr, model,attributes=TRUE), "fisher")
}
# add penalty to fisher
fisher <- fisher.unpen
thetaIdxRE <- seq.int(to=length(theta), length.out=dimRE)
fisher[thetaIdxRE,thetaIdxRE] <- fisher[thetaIdxRE,thetaIdxRE] + Sigma.inv
# inverse of penalized fisher info
F.inv <- try(solve(fisher),silent=TRUE)
if(inherits(F.inv,"try-error")){
if(verbose) cat(" WARNING (in marFisher): penalized Fisher is singular!\n")
#return(matrix(Inf,dimSigma,dimSigma))
## continuing with the generalized inverse often works, otherwise we would
## have to stop() here, because nlminb() cannot deal with NA's
F.inv <- ginv(fisher)
}
F.inv.RE <- F.inv[thetaIdxRE,thetaIdxRE]
## declare F.inv.RE as a symmetric matrix?
##F.inv.RE <- new("dsyMatrix", Dim = dim(F.inv.RE), x = c(F.inv.RE))
## -> no, F.inv.RE %*% dS.i becomes actually slower (dS.i is a "sparseMatrix")
############################################################
marg.hesse <- matrix(NA_real_,dimSigma,dimSigma)
## specify functions for derivatives
deriv1 <- switch(dimVar,dSigma1, dSigma2, dSigma3)
deriv2 <- switch(dimVar,d2Sigma1, d2Sigma2, d2Sigma3)
d1Sigma <- deriv1(sd, corr)
d2Sigma <- deriv2(sd, corr, d1Sigma)
Sigmai.inv <- getSigmaiInv(sd, corr, dimVar)
# 2nd derivatives of log determinant
d2logDet <- diag(c(rep.int(0,dimVar),-dimBlocks[1]*(corr^2-1)/(corr^2+1)^2),dimSigma)
# function to convert dS.i and dS.j matrices to sparse matrix objects
dS2sparse <- if (dimCorr > 0) function (x) {
forceSymmetric(as(x, "sparseMatrix")) # dS.i & dS.j are symmetric
} else function (x) { #as(x, "diagonalMatrix")
new("ddiMatrix", Dim = dim(x), diag = "N", x = diag(x))
}
# go through all variance parameters
for(i in seq_len(dimSigma)){
# compute first derivative of the penalized Fisher info (-> of Sigma^-1)
# with respect to the i-th element of Sigma (= kronecker prod. of Sigmai and identity matrix)
# Harville Ch15, Eq. 8.15: (d/d i)S^-1 = - S^-1 * (d/d i) S * S^-1
SigmaiInv.d1i <- Sigmai.inv %*% d1Sigma[,,i]
dSi <- -SigmaiInv.d1i %*% Sigmai.inv
dS.i <- getSigma(dimSigma=dimVar,dimBlocks=dimBlocks,Sigmai=dSi)
dS.i <- dS2sparse(dS.i)
# compute second derivatives
for(j in i:dimSigma){
# compute (d/d j) S^-1
SigmaiInv.d1j <- Sigmai.inv %*% d1Sigma[,,j]
dSj <- -SigmaiInv.d1j %*% Sigmai.inv
dS.j <- getSigma(dimSigma=dimVar,dimBlocks=dimBlocks,Sigmai=dSj)
dS.j <- dS2sparse(dS.j)
# compute (d/di dj) S^-1
#dS.ij <- getSigma(dimSigma=dimVar,dimBlocks=dimBlocks,
# Sigmai=d2Sigma[[i]][,,j])
# compute second derivatives of Sigma^-1 (Harville Ch15, Eq 9.2)
d2S <- (- Sigmai.inv %*% d2Sigma[[i]][,,j] +
SigmaiInv.d1i %*% SigmaiInv.d1j +
SigmaiInv.d1j %*% SigmaiInv.d1i) %*% Sigmai.inv
dSij <- getSigma(dimSigma=dimVar,dimBlocks=dimBlocks,Sigmai=d2S)
#d2lpen.i <- -0.5* t(randomEffects) %*% dSij %*% randomEffects
# ~85% faster implementation using crossprod() avoiding "slow" t():
d2lpen.i <- -0.5 * c(crossprod(randomEffects, dSij) %*% randomEffects)
# compute second derivative of log-determinant of penFisher
mpart1 <- dS.j %*% F.inv.RE # 3 times as fast as the other way round
mpart2 <- dS.i %*% F.inv.RE
mpart <- mpart1 %*% mpart2
## speed-ups: - tr(F.inv.RE %*% dSij) simply equals sum(F.inv.RE * dSij)
## - accelerate matrix product by sparse matrices dS.i and dS.j
## - use cyclic permutation of trace:
## tr(F.inv.RE %*% dS.j %*% F.inv.RE %*% dS.i) =
## tr(dS.j %*% F.inv.RE %*% dS.i %*% F.inv.RE)
tr.d2logDetF <- -sum(Matrix::diag(mpart)) + sum(F.inv.RE * dSij)
marg.hesse[i,j] <- marg.hesse[j,i] <-
d2logDet[i,j] + d2lpen.i - 0.5 * tr.d2logDetF
}
}
marg.Fisher <- as.matrix(-marg.hesse)
return(marg.Fisher)
}
## first and second derivatives of the covariance matrix
dSigma1 <- function(sd,corr){
derivs <- array(2*exp(2*sd), c(1,1,1))
return(derivs)
}
#d1: result of dSigma1
d2Sigma1 <- function(sd,corr,d1){
return(list(dsd1=2*d1))
}
dSigma2 <- function(sd,corr){
derivs <- array(0,c(2,2,3))
dSigma <- diag(2*exp(2*sd))
if(length(corr)>0){
dSigma[1,2] <- dSigma[2,1] <- exp(sum(sd[1:2]))*corr[1]/sqrtOf1pr2(corr[1])
# derivative of corr_1
derivs[2,1,3] <- derivs[1,2,3] <- exp(sum(sd[1:2]))/(sqrtOf1pr2(corr[1])^3)
}
derivs[,,1:2] <- dSigma
# derivative of sd_1
derivs[2,2,1] <- 0
# derivative of sd_2
derivs[1,1,2] <- 0
return(derivs)
}
d2Sigma2 <- function(sd,corr, d1){
derivs <- array(0,c(2,2,3))
result <- list(dsd1=d1, dsd2=derivs, dcorr1=derivs)
result$dsd1[1,1,1] <- 2*d1[1,1,1]
result$dsd1[2,2,2] <- 0
result$dsd2[,,2:3]<- d1[,,2:3]
result$dsd2[2,2,2] <- 2*d1[2,2,2]
if(length(corr)>0){
result$dcorr1[2,1,3] <- result$dcorr1[1,2,3] <-
-(3*corr[1]*exp(sum(sd[1:2])))/(sqrtOf1pr2(corr[1])^5)
}
return(result)
}
dSigma3 <- function(sd,corr){
derivs <- array(0,c(3,3,6))
dSigma <- diag(2*exp(2*sd)) #
if(length(corr)>0){
dSigma[1,2] <- dSigma[2,1] <- exp(sum(sd[1:2]))*corr[1]/sqrtOf1pr2(corr[1]) #
dSigma[1,3] <- dSigma[3,1] <- exp(sum(sd[c(1,3)]))*corr[2]/sqrtOf1pr2(corr[2]) #
dSigma[2,3] <- dSigma[3,2] <- exp(sum(sd[c(2,3)]))*(corr[1]*corr[2]*sqrtOf1pr2(corr[3])+corr[3])/prod(sqrtOf1pr2(corr[1:3]))#
# derivative of corr_1
derivs[2,1,4] <- derivs[1,2,4] <- exp(sum(sd[1:2]))/(sqrtOf1pr2(corr[1])^3)
derivs[3,2,4] <- derivs[2,3,4] <-(exp(sum(sd[2:3]))*(corr[2]*sqrtOf1pr2(corr[3])-prod(corr[c(1,3)])))/ (prod(sqrtOf1pr2(corr[2:3]))*(sqrtOf1pr2(corr[1])^3))#
# derivative of corr_2
derivs[3,1,5] <- derivs[1,3,5] <- exp(sum(sd[c(3,1)]))/(sqrtOf1pr2(corr[2])^3)#
derivs[3,2,5] <- derivs[2,3,5] <- (exp(sum(sd[2:3]))*(corr[1]*sqrtOf1pr2(corr[3])-prod(corr[c(2,3)])))/ (prod(sqrtOf1pr2(corr[c(1,3)]))*(sqrtOf1pr2(corr[2])^3)) #
# derivative of corr_3
derivs[3,2,6] <- derivs[2,3,6] <- exp(sum(sd[2:3]))/ (prod(sqrtOf1pr2(corr[c(1,2)]))*(sqrtOf1pr2(corr[3])^3))
}
derivs[,,1:3] <- dSigma
# derivative of sd_1
derivs[2:3,2:3,1] <- 0
# derivative of sd_2
derivs[1,c(1,3),2] <- derivs[3,c(1,3),2] <- 0
# derivative of sd_3
derivs[1:2,1:2,3] <- 0
return(derivs)
}
d2Sigma3 <- function(sd,corr, d1)
{
derivs <- array(0,c(3,3,6))
result <- list(dsd1=d1, dsd2=derivs, dsd3=derivs, dcorr1=derivs, dcorr2=derivs, dcorr3=derivs)
result$dsd1[1,1,1] <- 2*d1[1,1,1]
result$dsd1[2,2:3,2] <- result$dsd1[3,2,2] <- 0
result$dsd1[2:3,2:3,3] <- 0 #
result$dsd2[,,2]<- d1[,,2]
result$dsd2[2,2,2] <- 2*d1[2,2,2]
result$dsd2[3,2,3] <- result$dsd2[2,3,3] <- d1[3,2,3]#
result$dsd3[,,3]<- d1[,,3]
result$dsd3[3,3,3] <- 2*d1[3,3,3]#
if (length(corr)>0)
{
result$dsd1[2:3,2:3,4] <- 0
result$dsd1[2:3,2:3,5] <- 0
result$dsd1[,,6] <- 0
result$dsd2[,,c(4,6)] <- d1[,,c(4,6)]
result$dsd2[3,2,5] <- result$dsd2[2,3,5] <- d1[3,2,5]
result$dsd3[3,2,4] <- result$dsd3[2,3,4] <- d1[3,2,4]
result$dsd3[,,c(5,6)] <- d1[,,c(5,6)]
# derivative of corr_1
result$dcorr1[2,1,4] <- result$dcorr1[1,2,4] <- -(exp(sum(sd[1:2]))*3*corr[1])/(sqrtOf1pr2(corr[1])^5) #
result$dcorr1[3,2,4] <- result$dcorr1[2,3,4] <- -(exp(sum(sd[2:3]))*(corr[1]*(3*corr[2]*sqrtOf1pr2(corr[3])-2*prod(corr[c(1,3)])) + corr[3]) )/ (prod(sqrtOf1pr2(corr[2:3]))*(sqrtOf1pr2(corr[1])^5)) #
result$dcorr1[3,2,5] <- result$dcorr1[2,3,5] <- (exp(sum(sd[2:3]))*(sqrtOf1pr2(corr[3])+prod(corr[1:3])))/ (prod(sqrtOf1pr2(corr[c(1,2)])^3)*sqrtOf1pr2(corr[3]))
result$dcorr1[3,2,6] <- result$dcorr1[2,3,6] <- -(exp(sum(sd[2:3]))*corr[1])/ (prod(sqrtOf1pr2(corr[c(1,3)])^3)*sqrtOf1pr2(corr[2]))
# derivative of corr_2
result$dcorr2[3,1,5] <- result$dcorr2[1,3,5] <- -(exp(sum(sd[c(3,1)]))*3*corr[2])/(sqrtOf1pr2(corr[2])^5)
result$dcorr2[3,2,5] <- result$dcorr2[2,3,5] <- -(exp(sum(sd[2:3]))*(corr[2]*(3*corr[1]*sqrtOf1pr2(corr[3])-2*prod(corr[c(2,3)])) + corr[3]) )/ (prod(sqrtOf1pr2(corr[c(1,3)]))*(sqrtOf1pr2(corr[2])^5))
result$dcorr2[3,2,6] <- result$dcorr2[2,3,6] <-
-exp(sum(sd[2:3]))*corr[2] / # SM @ 14/05/13: formula fixed, marFisher()
# and hhh4()$Sigma.cov[5,6] are now correct
(prod(sqrtOf1pr2(corr[c(2,3)])^3)*sqrtOf1pr2(corr[1]))
# derivative of corr_3
result$dcorr3[3,2,6] <- result$dcorr3[2,3,6] <- -(exp(sum(sd[2:3]))*3*corr[3])/ (prod(sqrtOf1pr2(corr[c(1,2)]))*sqrtOf1pr2(corr[3])^5)
}
return(result)
}
### Various optimizers
updateParams_nlminb <- function (start, ll, sc, fi, ..., control)
{
lower <- control[["lower"]]; control$lower <- NULL
upper <- control[["upper"]]; control$upper <- NULL
scale <- control[["scale"]]; control$scale <- NULL
negll <- function (x, ...) -ll(x, ...)
negsc <- function (x, ...) -sc(x, ...)
## run the optimization
res <- nlminb(start, negll, gradient=negsc, hessian=fi, ...,
scale=scale, control=control, lower=lower, upper=upper)
if (any(is.finite(c(lower, upper)))) checkParBounds(res$par, lower, upper)
## Done
list(par=res$par, ll=-res$objective,
rel.tol=getRelDiff(res$par, start),
convergence=res$convergence, message=res$message)
}
updateParams_nlm <- function (start, ll, sc, fi, ..., control)
{
## objective function
negllscfi <- function (x, ...) {
negloglik <- -ll(x, ...)
attr(negloglik, "gradient") <- -sc(x, ...)
attr(negloglik, "hessian") <- fi(x, ...)
negloglik
}
## run the optimization
res <- do.call("nlm", args=c(alist(p=start, f=negllscfi, ...), control))
## Done
list(par=setNames(res$estimate, names(start)), ll=-res$minimum,
rel.tol=getRelDiff(res$estimate, start),
convergence=as.numeric(res$code>2), message=res$message)
## nlm returns convergence status in $code, 1-2 indicate convergence,
## 3-5 indicate non-convergence
}
updateParams_optim <- function (start, ll, sc, fi, ..., control)
{
## Note: "fi" is not used in optim
method <- control[["method"]]; control$method <- NULL
lower <- control[["lower"]]; control$lower <- NULL
upper <- control[["upper"]]; control$upper <- NULL
res <- optim(start, ll, sc, ..., # Note: control$fnscale is negative
method=method, lower=lower, upper=upper, control=control)
if (any(is.finite(c(lower, upper)))) checkParBounds(res$par, lower, upper)
## Done
list(par=res$par, ll=res$value,
rel.tol=getRelDiff(res$par, start),
convergence=res$convergence, message=res$message)
}
## Calculate relative parameter change criterion.
## We use a weaker criterion than the maximum relative parameter change
## max(abs(sd.corr.new/sd.corr - 1))
getRelDiff <- function (final, start)
max(abs(final - start)) / max(abs(start))
checkParBounds <- function (par, lower, upper)
{
if (is.null(names(par))) names(par) <- seq_along(par)
if (any(atl <- par <= lower))
cat(" WARNING: parameters reached lower bounds:",
paste(names(par)[atl], par[atl], sep="=", collapse=", "), "\n")
if (any(atu <- par >= upper))
cat(" WARNING: parameters reached upper bounds:",
paste(names(par)[atu], par[atu], sep="=", collapse=", "), "\n")
}
## default control arguments for updates
defaultOptimControl <- function (method = "nlminb", lower = -Inf, upper = Inf,
iter.max = NULL, verbose = 0)
{
if (is.null(iter.max)) iter.max <- 20 + 480*(method=="Nelder-Mead")
lowVerbose <- verbose %in% 0:2
luOptimMethod <- method %in% c("Brent", "L-BFGS-B")
defaults.nlminb <- list(iter.max=iter.max, scale=1, lower=lower, upper=upper,
trace=if(lowVerbose) c(0,0,5)[verbose+1] else 1)
defaults.nlm <- list(iterlim=iter.max, check.analyticals=FALSE,
print.level=if(lowVerbose) c(0,0,1)[verbose+1] else 2)
defaults.optim <- list(maxit=iter.max, fnscale=-1, trace=max(0,verbose-1),
lower=if (luOptimMethod) lower else -Inf,
upper=if (luOptimMethod) upper else Inf)
switch(method, "nlm" = defaults.nlm,
"nlminb" = defaults.nlminb, defaults.optim)
}
setOptimControl <- function (method, control, ...)
{
defaults <- defaultOptimControl(method, ...)
cntrl <- modifyList(defaults, control)
## ensure fnscale < 0 (optim performs minimization)
if (!is.null(cntrl$fnscale)) { # i.e., using optim()
cntrl$method <- method # append method to control list
if (cntrl$fnscale > 0) cntrl$fnscale <- -cntrl$fnscale
}
cntrl
}
## fitHHH is the main workhorse where the iterative optimization is performed
fitHHH <- function(theta, sd.corr, model,
cntrl.stop=list(tol=1e-5, niter=100),
cntrl.regression=list(method="nlminb"),
cntrl.variance=list(method="nlminb"),
verbose=0, shrinkage=FALSE)
{
dimFE.d.O <- model$nFE + model$nd + model$nOverdisp
dimRE <- model$nRE
getUpdater <- function (cntrl, start, ...) {
method <- cntrl$method; cntrl$method <- NULL
if (length(start) == 1 && method == "Nelder-Mead") {
method <- "Brent"
message("Switched optimizer from \"Nelder-Mead\" to \"Brent\"",
" (dim(", deparse(substitute(start)), ")=1)")
}
list(paste("updateParams",
if (method %in% c("nlminb", "nlm"))
method else "optim", sep="_"),
control = setOptimControl(method, cntrl, ...))
}
## ## artificial lower bound on intercepts of epidemic components
## reg.lower <- rep.int(-Inf, length(theta))
## reg.lower[grep("^(ar|ne)\\.(1|ri)", model$namesFE)] <- -20
## set optimizer for regression parameters
updateRegressionControl <- getUpdater(cntrl.regression, theta,
## lower=reg.lower,
iter.max=if(dimRE==0) 100,
verbose=verbose+(dimRE==0))
updateRegression <- function (theta, sd.corr)
do.call(updateRegressionControl[[1]],
alist(theta, penLogLik, penScore, penFisher,
sd.corr=sd.corr, model=model,
control=updateRegressionControl[[2]]))
## set optimizer for variance parameters
updateVarianceControl <- getUpdater(cntrl.variance, sd.corr,
lower=-5, upper=5, verbose=verbose)
updateVariance <- function (sd.corr, theta, fisher.unpen)
do.call(updateVarianceControl[[1]],
alist(sd.corr, marLogLik, marScore, marFisher,
theta=theta, model=model,
fisher.unpen=fisher.unpen, verbose=verbose>1,
control=updateVarianceControl[[2]]))
## Let's go
if (verbose>0) {
if (verbose > 1) utils::timestamp()
cat(if (dimRE == 0) "Optimization of regression parameters"
else "Iterative optimization of regression & variance parameters",
"\n", sep = "")
}
if (dimRE == 0) { # optimization of regression coefficients only
parReg <- updateRegression(theta, sd.corr)
theta <- parReg$par
if ((convergence <- parReg$convergence) != 0 && !is.null(parReg$message))
cat("! Non-convergence message from optimizer:", parReg$message, "\n")
} else { # swing between updateRegression & updateVariance
convergence <- 99
i <- 0
while(convergence != 0 && (i < cntrl.stop$niter)){
i <- i+1
if (verbose>0) cat("\n")
## update regression coefficients
parReg <- updateRegression(theta, sd.corr)
theta <- parReg$par
fisher.unpen <- attr(penFisher(theta, sd.corr, model, attributes=TRUE),
"fisher")
if(verbose>0)
cat("Update of regression parameters: ",
"max|x_0 - x_1| / max|x_0| =", parReg$rel.tol, "\n")
if(parReg$convergence != 0) {
if (!is.null(parReg$message))
cat("! Non-convergence message from optimizer:",
parReg$message, "\n")
cat("Update of regression coefficients in iteration ",
i, " unreliable\n")
}
if(parReg$convergence > 20 && shrinkage){
cat("\n\n***************************************\nshrinkage",
0.1*theta[abs(theta)>10],"\n")
theta[abs(theta)>10] <- 0.1*theta[abs(theta)>10]
diag(fisher.unpen) <- diag(fisher.unpen)+1e-2
}
## update variance parameters
parVar <- updateVariance(sd.corr, theta, fisher.unpen)
if(verbose>0)
cat("Update of variance parameters: max|x_0 - x_1| / max|x_0| =",
parVar$rel.tol, "\n")
if(parVar$convergence!=0) {
if (!is.null(parVar$message))
cat("! Non-convergence message from optimizer:",
parVar$message, "\n")
cat("Update of variance parameters in iteration ", i, " unreliable\n")
}
sd.corr <- parVar$par
## overall convergence ?
if( (parReg$rel.tol < cntrl.stop$tol) && (parVar$rel.tol < cntrl.stop$tol)
&& (parReg$convergence==0) && (parVar$convergence==0) )
convergence <- 0
## exit loop if no more change in parameters (maybe false convergence)
if (parReg$rel.tol == 0 && parVar$rel.tol == 0)
break
}
}
if(verbose > 0) {
cat("\n", if (convergence==0) "Optimization converged"
else "Optimization DID NOT CONVERGE", "\n", sep = "")
if (verbose > 1) utils::timestamp()
}
ll <- penLogLik(theta, sd.corr, model)
fisher <- penFisher(theta, sd.corr, model, attributes = TRUE)
dimnames(fisher) <- list(names(theta), names(theta))
margll <- marLogLik(sd.corr, theta, model, attr(fisher, "fisher"))
fisher.var <- marFisher(sd.corr, theta, model, attr(fisher, "fisher"))
dimnames(fisher.var) <- list(names(sd.corr), names(sd.corr))
list(theta=theta, sd.corr=sd.corr,
loglik=ll, margll=margll,
fisher=fisher, fisherVar=fisher.var,
convergence=convergence, dim=c(fixed=dimFE.d.O,random=dimRE))
}
## check analytical score functions and Fisher information for
## a given model (the result of interpretControl(control, stsObj))
## and given parameters theta (regression par.) and sd.corr (variance par.).
## This is a wrapper around functionality of the numDeriv and maxLik packages.
checkAnalyticals <- function (model,
theta = model$initialTheta,
sd.corr = model$initialSigma,
methods = c("numDeriv","maxLik"))
{
cat("\nPenalized log-likelihood:\n")
resCheckPen <- sapply(methods, function(derivMethod) {
if (requireNamespace(derivMethod)) {
do.call(paste("checkDerivatives", derivMethod, sep="."),
args=alist(penLogLik, penScore, penFisher, theta,
sd.corr=sd.corr, model=model))
}
}, simplify=FALSE, USE.NAMES=TRUE)
if (length(resCheckPen) == 1L) resCheckPen <- resCheckPen[[1L]]
resCheckMar <- if (length(sd.corr) == 0L) list() else {
cat("\nMarginal log-likelihood:\n")
fisher.unpen <- attr(penFisher(theta, sd.corr, model, attributes=TRUE),
"fisher")
resCheckMar <- sapply(methods, function(derivMethod) {
if (requireNamespace(derivMethod)) {
do.call(paste("checkDerivatives", derivMethod, sep="."),
args=alist(marLogLik, marScore, marFisher, sd.corr,
theta=theta, model=model, fisher.unpen=fisher.unpen))
}
}, simplify=FALSE, USE.NAMES=TRUE)
if (length(resCheckMar) == 1L) resCheckMar[[1L]] else resCheckMar
}
list(pen = resCheckPen, mar = resCheckMar)
}
Any scripts or data that you put into this service are public.
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.