\newcommand{\bm}[1]{\boldsymbol{#1}} \newcommand{\tx}[1]{\mathrm{#1}} \newcommand{\N}{\mathcal{N}} \newcommand\invchi{\mathop{\mbox{inv-$\chi^2$}}} \newcommand\NIX{\tx{NIX}} \newcommand{\mNIX}{\tx{mNIX}} \newcommand\CI{\tx{CI}} \newcommand{\iid}{\stackrel{\tx{iid}}{\sim}} \newcommand{\ind}{\stackrel{\tx{ind}}{\sim}} \newcommand{\s}{\sigma} \newcommand{\ig}{\tx{InvGamma}} \newcommand{\SSi}{{\bm{\Sigma}}} \newcommand{\OOm}{{\bm{\Omega}}} \newcommand{\Mu}{{\bm{\mu}}} \newcommand{\rv}[3]{#2_{#1},\ldots,#2_{#3}} \newcommand{\sumi}[3][i]{\sum_{#1 = #2}^{#3}} \newcommand{\yy}{{\bm y}} \newcommand{\xx}{{\bm x}} \newcommand{\zz}{{\bm z}} \newcommand{\YY}{{\bm Y}} \newcommand{\XX}{{\bm X}} \newcommand{\UU}{{\bm U}} \newcommand{\tth}{{\bm \theta}} \newcommand{\pph}{{\bm \phi}} \newcommand{\pps}{{\bm \psi}} \newcommand{\TTh}{{\bm \Theta}} \newcommand{\lla}{{\bm \lambda}} \newcommand{\aal}{{\bm \alpha}} \newcommand{\bbe}{{\bm \beta}} \newcommand{\oom}{{\bm \omega}} \newcommand{\bind}{\hat \beta^{\tx{std}}} \newcommand{\bpool}{\hat \beta^{\tx{pool}}} \newcommand{\ud}{\tx{d}} \newcommand{\argmax}{\operatorname{arg\,max}} \newcommand{\argmin}{\operatorname{arg\,min}} \newcommand{\var}{\operatorname{var}} \newcommand{\logchol}{\operatorname{log-Chol}} \newcommand{\VV}{{\bm V}} \newcommand{\gg}{{\bm g}} \newcommand{\HH}{{\bm H}} \newcommand{\eet}{{\bm \eta}}
suppressMessages({ require(losmix) require(TMB) }) source("format.R") knitr::opts_chunk$set(comment = NA)
r section("Modeling Framework")
Let $y_{it}$ and $\xx_{it} = (\rv {i1} x {ip})$ denote the response and variable and covariate vector of individual $i$ at time $t$. Let $\yy_i = (\rv {i1} y {iN_i})$ and $\XX_i = (\rv {i1} {\xx} {iN_i})$ denote the collection of $N_i$ measurements for individual $i$, and $\YY = (\rv 1 {\yy} M)$ and $\XX = (\rv 1 {\XX} M)$, the collection of all measuments for the $M$ observed individuals.
The basic linear random-effects location-scale model we consider here is
\begin{equation}
\begin{aligned}
y_{it} & \ind \N(\xx_{it}'\bbe_i, \sigma_i^2) \
(\bbe_i, \sigma_i^2) & \iid \mNIX(\lla, \OOm, \nu, \tau),
\end{aligned}
\label{eq:hlr}
\end{equation}
where the multivariate normal-inverse-chi-square (mNIX) distribution is defined as
\begin{equation}
(\bbe, \sigma^2) \iid \mNIX(\lla, \OOm, \nu, \tau) \qquad \iff \qquad
\begin{aligned}
\sigma^2 & \sim \ig(\tfrac 1 2 \nu, \tfrac 1 2 \nu \tau) \
\bbe \mid \sigma & \sim \N(\lla, \sigma^2 \OOm^{-1}).
\end{aligned}
\label{eq:mnix}
\end{equation}
The advantage of using the mNIX random-effects distribution \eqref{eq:mnix} is that it is the conjugate prior for $\tth_i = (\bbe_i, \s_i^2)$:
\begin{equation}
\tth_i \mid \yy, \XX \ind \mNIX(\hat \lla_i, \hat \OOm_i, \hat \nu_i, \hat \tau_i),
\end{equation}
where
\begin{equation}
\begin{aligned}
\hat \OOm_i & = \XX_i'\XX_i + \OOm
&
\hat \nu_i & = N_i + \nu
\
\hat \lla_i & = \hat \OOm_i^{-1}(\OOm\lla + \XX_i'\yy_i)
&
\hat \tau_i & = \hat \nu_i^{-1}\big(\yy_i'\yy_i - \hat \lla_i \hat \OOm_i \hat \lla_i + \lla'\OOm\lla + \nu\tau\big).
\end{aligned}
\label{eq:phihat}
\end{equation}
Moreover, the marginal likelihood of the hyperparameters $\pph = (\lla, \OOm, \nu, \tau)$ is available in closed form:
\begin{equation}
\ell(\pph \mid \YY, \XX) = - M \zeta(\pph) + \sum_{i=1}^M \zeta(\hat \pph_i),
\end{equation}
where
\begin{equation}
\zeta(\pph) = \tfrac 1 2 \big[2\log \Gamma(\nu/2) - \log |\OOm| - \nu \log(\nu\tau/2)\big],
\end{equation}
and $\hat \pph_i = (\hat \lla_i, \hat \OOm_i, \hat \nu_i, \hat \tau_i)$ is given by \eqref{eq:phihat}.
r subsection("Covariate-Free Case")
A case of particular interest is when $\xx_{it} \equiv 1$. In this case we typically relabel $\mu_i = \beta_{i1}$ and $\kappa = \OOm_{1\times 1}$, and the mNIX distribution reduces to a normal-inverse-chi-square (NIX). That is, the random-effects distribution is $(\mu_i, \s_i^2) \iid \NIX(\lambda, \kappa, \nu, \tau)$, and the posterior distribution is also NIX with parameters \begin{equation} \begin{aligned} \hat \kappa_i & = N_i + \kappa & \hat \nu_i & = N_i + \nu \ \hat \lambda_i & = \hat \kappa_i^{-1}(\kappa\lambda + N_i \bar y_i) & \hat \tau_i & = \hat \nu_i^{-1}(\nu\tau + S_i + N_i\kappa/\hat \kappa_i(\bar y_i - \lambda)^2), \end{aligned} \label{eq:repostsimple} \end{equation} where $\bar y_i = \frac 1 {N_i} \sum_{t=1}^{N_i} y_{it}$ and $S_i = \sum_{t=1}^{N_i}(y_{it} - \bar y_i)^2$.
r section("Bayesian Inference", "sec:bayes_inf")
Given a hyperparameter prior $\pi(\pph)$, let $q(\pph \mid \YY, \XX) = \ell(\pph \mid \YY, \XX) + \log \pi(\pph)$, and calculate \begin{equation} \hat \pph = \argmax_{\pph} q(\pph \mid \YY, \XX), \qquad \hat \VV = - \left[\frac{\partial^2}{\partial \pph^2} q(\hat \pph \mid \YY, \XX)\right]^{-1}. \label{eq:hyperopt} \end{equation} Then for $M$ sufficiently large, the hyperparameter posterior distribution may be approximated as \begin{equation} \pph \mid \YY, \XX \sim \N(\hat \pph, \hat \VV). \end{equation}
In order to estimate the random effect $\tth_i = (\bbe_i, \s_i^2)$ for a given observation $i$, simulate iid draws $\tth_i^{(1)}, \ldots, \tth_i^{(B)}$ via \begin{equation} \begin{aligned} \pph^{(b)} & \iid \N(\hat \pph, \hat \VV) \ \tth_i^{(b)} \mid \pph^{(b)} & \ind \mNIX(\hat \lla_i^{(b)}, \hat \OOm_i^{(b)}, \hat \nu_i^{(b)}, \hat \tau_i^{(b)}), \end{aligned} \label{eq:repost} \end{equation} where $\hat \pph_i^{(b)} = (\hat \lla_i^{(b)}, \hat \OOm_i^{(b)}, \hat \nu_i^{(b)}, \hat \tau_i^{(b)})$ is calculated from \eqref{eq:phihat} using $\yy_i$, $\XX_i$, and $\pph^{(b)}$. Then the $B$ draws from \eqref{eq:repost} are approximately iid draws from the random effects posterior distribution \begin{equation} p(\tth_i \mid \YY, \XX) = \int p(\tth_i \mid \yy_i, \XX_i, \pph) p(\pph \mid \YY, \XX) \ud \pph. \end{equation}
Thus, the crux of the computational challenge lies in the optimization problem \eqref{eq:hyperopt}. To do this, losmix uses the R package r cran_link("TMB")
[@kristensen.etal16] to leverage the power of automatic differentiation; both the (approximate) logposterior $q(\pph \mid \YY, \XX)$ and its gradient are computed in C++, thus allowing for very fast quasi-Newton optimization algorithms to calculate $\hat \pph$.
r subsection("Choice of Prior")
In order to specify the hyperparameter prior $\pi(\pph)$, we first switch to an unconstrained parametrization, namely \begin{equation} \pps = (\lla, \oom, \log \nu, \log \tau), \label{eq:utrans} \end{equation} where $\oom$ is a vector of length $p(p+1)/2$ corresponding to the upper Cholesky factor of $\OOm_{p \times p}$ but with the log of its diagonal elements. In other words, if $$ \UU_{p\times p} = \begin{bmatrix} \exp(\omega_1) & \omega_2 & \cdots & \omega_{p(p-1)/2 + 1} \ 0 & \exp(\omega_3) & \cdots & \omega_{p(p-1)/2 + 2} \ \vdots & & \ddots & \vdots \ 0 & 0 & \cdots & \exp(\omega_{p(p-1)/2+p}) \end{bmatrix}, $$ then $\UU$ is the unique upper-triangular matrix with positive diagonal elements such that $\OOm = \UU'\UU$.
A straightforward choice of default prior is the standard uninformative prior $\pi(\pps) \propto 1$. However, this prior is not invariant to permutation of the elements of $\OOm$ corresponding to relabeling the elements of $\bbe$. In other words, if $\xx_{it} = (\tx{gender}i, \tx{age}_i)$ for subject $i$, then inference with the prior $\pi(\pps) \propto 1$ will be different if instead we had $\xx{it} = (\tx{age}i, \tx{gender}_i)$. In order to avoid this, consider instead the prior \begin{equation} \pi(\pps) \propto \prod{j=1}^p U_{jj}^{p-j}, \label{eq:scaleprior} \end{equation} where the $U_{jj}$ are easily obtained from $\oom$. Then not only is Bayesian inference invariant to relabeling of $\xx_{it}$, but in fact also to any transformation $\tilde \xx_{it} = \bm{A} \xx_{it}$ for invertible $\bm A_{p \times p}$.
With this choice of prior and unconstrained parametrization, the TMB optimization problem becomes finding \begin{equation} \hat \pps = \argmin_{\pps} r(\pps \mid \YY, \XX), \qquad \hat \VV = -\left[\frac{\partial^2}{\partial \pps^2} r(\hat \pps \mid \YY, \XX)\right]^{-1}, \label{eq:tmbopt} \end{equation} where $r(\pps \mid \YY, \XX) = -\ell(\pps \mid \YY, \XX) - \log \pi(\pps)$ is the negative marginal log-posterior (the desired input for TMB).
r section("Example")
The following example provides a quick overview of the main losmix R functions:
mnix_sim()
: Generate random draws from an mNIX distribution.mnix_marg()
: Create a TMB::MakeADFun()
model object for the marginal hyperparameter posterior distribution (more on this momentarily).r subsection("Simulate Data")
Let's start by generating an arbitary dataset. Note the use of mnix_sim()
to generate $(\bbe_i, \s_i^2)$ from the mNIX distribution.
require(losmix) set.seed(1234) # reproducible results p <- 3 # number of covariates nSub <- 50 # number of subjects N <- sample(20:50, nSub, replace = TRUE) # number of observations per subject # hyperparameters lambda <- rnorm(p) Omega <- diag(p) nu <- 5 * runif(1) + 3 tau <- rexp(1) # parameters: generate them from an mNIX distribution Theta <- mnix_sim(nSub, lambda = lambda, Omega = Omega, nu = nu, tau = tau) # data: generate vector/matrix for each subject, # then merge into single vector/matrix # covariate matrices X <- lapply(N, function(n) matrix(rnorm(n*p), n, p)) # response vectors y <- lapply(1:nSub, function(ii) { rnorm(N[ii], mean = c(X[[ii]] %*% Theta$beta[ii,]), sd = Theta$sigma[ii]) }) # convert lists to single matrix/vector X <- do.call(rbind, X) y <- do.call(c, y) # subject identifiers id <- rep(1:nSub, times = N)
r subsection("Marginal Posterior Inference", "sec:mpost")
Now that we have simulated data, we can estimate the approximate marginal posterior distribution $p(\pph \mid \YY, \XX)$. To do this, we first use TMB::MakeADFun()
to create a model object. This object contains, among other things, the negative log marginal posterior $r(\pps \mid \YY, \XX) = -\log p(\pps \mid \YY, \XX)$ on the unconstrained scale \eqref{eq:utrans} with the scale-invariant prior \eqref{eq:scaleprior}, along with its gradient, $\gg(\pps) = \frac{\partial}{\partial \pps} r(\pps \mid \YY, \XX)$.
# marginal posterior distribution nlp <- mnix_marg(id = id, y = y, X = X) names(nlp)
To conduct the Bayesian inference scheme described in Section r ref_label("sec:bayes_inf")
, we proceed with the following steps:
Simulate draws from the approximate posterior distribution
$$ \hat p(\pps \mid \YY, \XX) \qquad \iff \qquad \pps \mid \YY, \XX \sim \N(\hat \pps, \hat \VV). $$
If we are interested in parameters on the original scale, then must apply the inverse transformation of \eqref{eq:utrans} to every sample obtained in Step 2.
In order to accomplish Step 1, R provides a couple of build-in functions for numerical optimization:
stats::optim()
: The default algorithm method = "Nelder-Mead"
is derivative-free, but method = "BFGS"
is a gradient-based quasi-Newton algorithm.stats::nlm()
: This provides the option of including both gradient and hessian information. TMB can in fact use AD to compute the hessian $\HH(\pps) = \frac{\partial^2}{\partial \pps^2} r(\pps \mid \YY, \XX)$. stats::nlminb()
: Another optimizer with optional gradient and hessian information.Each of the algorithms above offers a different trade-off between speed, stability, and accuracy. As this trade-off is typically problem-dependent, losmix currently does not provide a default routine, but using any of the functions above is relatively straightforward. For high-dimensional optimization with large $p$, the user is invited to explore the r cran_link("nloptr")
suite of optimizers if the options above are insufficient.
The R code below using stats::nlm()
to perform the optimization. Note the peculiar way in which function and gradient information are input simultaneously, instead of a separate arguments. This is to increase computational efficiency when function and gradient share certain calculations. This is the case for TMB functions, as calling nlp$gr()
without arguments reuses calculations from the last call to nlp$fn()
.
# objective function ofun <- function(par) { out <- nlp$fn(par) # include gradient information via 'attribute' attr(out, "gradient") <- nlp$gr() out } # optimization opt <- nlm(p = nlp$par, # starting value (losmix picks a reasonable default) f = ofun) # objective function opt$code # code == 1 means that 'nlm' thinks it converged
To check whether optimization was successful, the first thing is to check the built-in convergence diagnostic of stat::nlm()
, in this case, opt$code == 1
as above. Another check is the size of the gradient at the potential solution, on the absolute and relative scale:
disp <- rbind(est = opt$estimate, # potential solution grad = opt$gradient, # gradient at the potential solution rel = opt$gradient/abs(opt$estimate)) # relative size signif(disp, 2)
In this case the maximum gradient magnitude relative to the potential solution is r max(abs(disp["rel",]))
, so we are reasonably confident to have arrived at a local minimum of $r(\pps \mid \YY, \XX)$. Thus we may use TMB to calculate $\hat \VV = \HH(\hat \pps \mid \YY, \XX)^{-1}$ in order to complete Step 1. Note the use of solveV()
to efficiently and stably compute the inverse of the symmetric positive-definite matrix $\HH(\hat \pps \mid \YY, \XX)$.
psi_mean <- opt$estimate # (approximate) posterior mean of p(psi | Y, X) psi_var <- solveV(nlp$he(opt$estimate)) # (approximate) posterior variance
Figure r ref_label("fig:ex1_mpost")
displays the approximate posterior distribution $\hat p(\phi_j \mid \YY, \XX)$ for each of hyperparameter on its original scale, after completing Steps 2 and 3.
# Step 2: sample from p_hat(psi | Y, X) npost <- 1e4 # number of posterior draws Psi_post <- rmvn(n = npost, mu = psi_mean, Sigma = psi_var) # Step 3: convert to sample from p_hat(phi | Y, X) Phi_post <- ivec_phi(Psi_post) # histograms of posterior distributions # true hyperparameter values are vertical lines in red # format data for plotting iOmega <- cbind(rbind(1:p, 1:p), combn(1:p,2)) # unique elements of Omega Phi_plot <- cbind(Phi_post$lambda, # lambda apply(iOmega, 2, function(ii) Phi_post$Omega[ii[1],ii[2],]), # Omega Phi_post$nu, # nu Phi_post$tau) # tau # hyperparameter names phi_names <- c(paste0("lambda[", 1:p, "]"), paste0("Omega[", iOmega[1,], iOmega[2,], "]"), "nu", "tau") # true values phi_true <- c(lambda, Omega[t(iOmega)], nu, tau) # create plot par(mfrow = c(3,4), mar = c(2,2,4,.5)) for(ii in 1:ncol(Phi_plot)) { # approximate posterior hist(Phi_plot[,ii], breaks = 40, xlab = "", ylab = "", main = parse(text = paste0("hat(p)(", phi_names[ii], "*\" | \"*bold(Y),bold(X))"))) # true parameter value abline(v = phi_true[ii], col = "red", lwd = 2) } # legend plot(0, type = "n", xlim = c(0,1), ylim = c(0,1), xlab = "", ylab = "", axes = FALSE) legend("bottom", inset = .05, legend = c("Posterior Distribution", "True Hyperparameter Value"), lwd = c(NA, 2), pch = c(22, NA), seg.len = 1.5, col = c("black", "red"), bg = c("white", NA), cex = .85)
r fig_label("Approximate posterior distribution for each hyperparameter on the original scale.", "fig:ex1_mpost")
r subsection("Inference for Random Effects", "sec:rx1")
The following R code shows how to estimate the random effects $\tth_i = (\bbe_i, \s_i^2)$ for an individual $i$ using the approximate posterior distribution \eqref{eq:repost}. The approximate posterior $\hat p(\tth_i \mid \YY, \XX)$ and the true parameter value are shown in Figure r ref_label("fig:ex1_rxpost")
.
# inference for random effects iSub <- sample(nSub, 1) # pick a subject at random # data for subject i Xi <- X[id == iSub,] yi <- y[id == iSub] # sample from p(thetai | y, X) Thetai_post <- mnix_sim(npost, lambda = Phi_post$lambda, Omega = Phi_post$Omega, nu = Phi_post$nu, tau = Phi_post$tau, y = yi, X = Xi) # plot (approximate) posterior distributions # true parameter values are plotted in red # format data for plotting Thetai_plot <- cbind(Thetai_post$beta, # beta Thetai_post$sigma) # sigma # parameter names thetai_names <- c(paste0("beta[i", 1:p, "]"), "sigma[i]") # true parameter values for subject i thetai_true <- c(Theta$beta[iSub,], sigma = Theta$sigma[iSub]) # create plot par(mfrow = c(2,2), mar = c(2,2,4,.5)) for(ii in 1:ncol(Thetai_plot)) { # approximate posterior hist(Thetai_plot[,ii], breaks = 40, xlab = "", ylab = "", main = parse(text = paste0("hat(p)(", thetai_names[ii], "*\" | \"*bold(Y),bold(X))"))) # true parameter value abline(v = thetai_true[ii], col = "red", lwd = 2) }
r fig_label("Approximate posterior distribution for the random effects of a given subject $i$.", "fig:ex1_rxpost")
r section("Model Extensions")
Model \eqref{eq:hlr} can easily be extended by letting $\XX = \XX(\eet)$ and $\pph = \pph(\eet)$ each depend on hyperparameters $\eet$. For example, consider the model \begin{equation} \begin{aligned} y_{it} & \ind \N(\beta_{i1} + \beta_{i2} \exp(-\gamma t), \s_i^2) \ (\bbe_i, \s_i^2) & \ind \mNIX(\lla, \OOm, \nu \cdot x_i, \tau), \end{aligned} \label{eq:hlrex} \end{equation} such that covariates of the model are $x_{it} = (t, x_i)$ and the hyperparameters are $\eet = (\gamma, \lla, \OOm, \nu, \tau)$.
Model \eqref{eq:hlrex} is implemented in C++ using TMB interface to R, along with the losmix C++ library in the code block below. The code can also be found in the file ModelExt.cpp
located in the directory returned by the following call:
mxt_file <- system.file("include", "losmix", "ModelExt.cpp", package = "losmix") system.file("include", "losmix", "ModelExt.cpp", package = "losmix")
A couple of useful resources for following the C++ code:
cat("```cpp", readLines(mxt_file), "```", sep = "\n")
r subsection("Compiling and Testing")
ModelExt.cpp
is compiled in an R session as follows:
require(TMB) model_name <- "ModelExt" # instruct TMB where to find losmix library include_path <- system.file("include", package = "losmix") # compile and load model TMB::compile(paste0(model_name, ".cpp"), PKG_CXXFLAGS = paste0('-I"', include_path, '"')) dyn.load(TMB::dynlib(model_name))
The following R code shows how to conduct approximate Bayesian inference for model \eqref{eq:hlrex} by testing that the C++ implementation is correct. So first let's simulate some data from model \eqref{eq:hlrex}:
nSub <- 10 # number of subjects N <- sample(20:50, nSub, replace = TRUE) # observations per subject # hyperparameters gamma <- runif(1) * .2 lambda <- rnorm(2) Omega <- crossprod(matrix(rnorm(4), 2, 2))/10 nu <- runif(1, 1, 2)*50 tau <- rexp(1)/100 # covariates X <- lapply(N, function(N) { cbind(t = 1:N, x = runif(1, 1, 5)) }) # parameters Theta <- mnix_sim(nSub, lambda = lambda, Omega = Omega, nu = nu * sapply(X, function(x) x[1,2]), tau = tau) # responses # mean vectors Mu <- lapply(1:nSub, function(ii) { Theta$beta[ii,1] + Theta$beta[ii,2] * exp(-gamma * X[[ii]][,1]) }) # observation vectors y <- lapply(1:nSub, function(ii) { rnorm(N[ii], mean = Mu[[ii]], sd = Theta$sigma[ii]) }) # convert data to regular format X <- do.call(rbind, X) y <- do.call(c, y) id <- rep(1:nSub, times = N) # plot data par(mfrow = c(1,1), mar = c(4,4,.5,.5)) clrs <- rep(c("black", "blue", "red", "orange", "green4", "brown"), len = nSub) plot(0, type = "n", ylab = expression(y[it]), xlab = expression(t), xlim = c(0, max(N)), ylim = range(sapply(Mu, range), y)) invisible(sapply(1:nSub, function(ii) { lines(x = 1:N[ii], y = Mu[[ii]], col = clrs[ii], lwd = 2) points(x = 1:N[ii], y = y[id == ii], pch = 16, cex = .8, col = clrs[ii]) }))
r fig_label("Mean and actual response (solid line and points) as a function of $t$ for each subject $i$.", "fig:ex2_data")
Now, let's implement the (negative) marginal log-posterior $r(\pps \mid \YY, \XX)$ in R and check that it returns exactly the same value as ModelExt.cpp
. First a few helper functions:
# simulate hyperparameters on the transformed scale sim_psi <- function() { gamma <- runif(1) lambda <- rnorm(2) Omega <- crossprod(matrix(rnorm(4), 2, 2)) nu <- runif(1, 1, 2) tau <- rexp(1)/5 list(gamma = gamma, lambda = lambda, logC_Omega = log_chol(Omega), log_nu = log(nu), log_tau = log(tau)) } # _negative_ marginal log-posterior for ModelExt: # R implementation mxt_r <- function(psi, id, y, X) { nSub <- length(unique(id)) lm <- sapply(1:nSub, function(ii) { # individual covariate and responses Xi <- X[id == ii,] yi <- y[id == ii] # hyperparameters on the regular scale gamma <- psi$gamma lambda <- psi$lambda Omega <- ilog_chol(psi$logC_Omega) nu <- exp(psi$log_nu) * Xi[1,2] # covariate-dependent tau <- exp(psi$log_tau) # covariate matrix Xi <- cbind(1, exp(-gamma * Xi[,1])) # hyperparameter-dependent # posterior mNIX hyperparameters phi_post <- mnix_post(y = yi, X = Xi, lambda = lambda, Omega = Omega, nu = nu, tau = tau) # marginal likelihood per individual mnix_zeta(Omega = phi_post$Omega, nu = phi_post$nu, tau = phi_post$tau) - mnix_zeta(Omega = Omega, nu = nu, tau = tau) }) # default prior lpi <- lchol_prior(ilog_chol(psi$logC_Omega)) -(sum(lm) + lpi) # negative loglikelihood }
Here's how to create an instance of the TMB (negative) marginal log-posterior:
model_name <- "ModelExt" p <- 2 mxt_tmb <- TMB::MakeADFun(data = c(list(model = model_name), format_data(id = id, X = X, y = y)), parameters = list(gamma = 1, lambda = rep(0,p), logC_Omega = log_chol(diag(p)), log_nu = 0, log_tau = 0), silent = TRUE, DLL = "losmix_TMBExports")
# _negative marginal logposterior for ModelExt: # TMB implementation mxt_data <- format_data(id = id, y = y, X = X) # TMB format mxt_pars <- sim_psi() # initialize with arbitrary values (placeholders) mxt_tmb <- TMB::MakeADFun(data = mxt_data, parameters = mxt_pars, DLL = model_name, silent = TRUE)
Now let's check that the R and TMB implementations return exactly the same results:
replicate(10, expr = { psi <- sim_psi() mxt_r(psi = psi, id = id, y = y, X = X) - mxt_tmb$fn(unlist(psi)) })
And finally, we can sample from $\hat p(\pps \mid \YY, \XX)$ as before:
# objective function ofun <- function(par) { out <- mxt_tmb$fn(par) attr(out, "gradient") <- mxt_tmb$gr() out } # optimization opt <- nlm(p = mxt_tmb$par, f = ofun) opt$code # code == 1 means that 'nlm' thinks it converged # approximate posterior inference npost <- 1e4 # number of posterior draws psi_mean <- opt$estimate # (approximate) posterior mean psi_var <- solveV(mxt_tmb$he(opt$estimate)) # (approximate) posterior variance Psi_post <- rmvn(n = npost, mu = psi_mean, Sigma = psi_var)
r subsection("Inference for Random Effects", "sec:rx2")
For simplicity, the simulation of random effects suggested here is slightly different than for the build-in model mnix_marg
using mnix_suff
. That is, the TMB model has been set up such that
mxt_tmb$simulate(psi)
will return one draw from $p(\tth_i \mid \yy_i, \XX_i, \pps)$ for each subject $i$. Therefore, the following R code produces samples from the approximate random effects distribution $\hat p(\tth_i \mid \YY, \XX)$ \eqref{eq:repost}:
iSub <- sample(nSub, 1) mxt1_tmb <- TMB::MakeADFun(data = c(list(model = model_name), format_data(X = X[id == iSub,], y = y[id == iSub])), parameters = list(gamma = 1, lambda = rep(0,p), logC_Omega = log_chol(diag(p)), log_nu = 0, log_tau = 0), silent = TRUE, DLL = "losmix_TMBExports")
iSub <- sample(nSub, 1) # pick an observation at random # TMB implementation for a single subject mxt1_data <- format_data(y = y[id == iSub], X = X[id == iSub,]) # TMB format mxt1_tmb <- TMB::MakeADFun(data = mxt1_data, parameters = sim_psi(), # placeholder DLL = model_name, silent = TRUE)
# simulate from random-effects distribution # note: there is no need to convert to original parametrization first Thetai_post <- apply(Psi_post, 1, function(psi) mxt1_tmb$simulate(psi)) # convert from list of lists to list with beta and sigma Thetai_post <- unlist_bind(Thetai_post, name = c("beta", "sigma"), bind = c(cbind, c)) Thetai_post$beta <- t(Thetai_post$beta) # beta needs to be transposed thetai_true <- c(Theta$beta[iSub,], Theta$sigma[iSub]) # true parameter values # plot Thetai_plot <- cbind(Thetai_post$beta, Thetai_post$sigma) thetai_names <- c("beta[i1]", "beta[i2]", "sigma[i]") par(mfrow = c(1,3), mar = c(2,2,4,.5)) for(ii in 1:ncol(Thetai_plot)) { # approximate posterior hist(Thetai_plot[,ii], breaks = 40, xlab = "", ylab = "", main = parse(text = paste0("hat(p)(", thetai_names[ii], "*\" | \"*bold(Y),bold(X))"))) # true parameter value abline(v = thetai_true[ii], col = "red", lwd = 2) }
r fig_label("Approximate posterior distribution for the random effects of a given subject $i$.", "fig:ex2_rxpost")
r section("References")
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.