LMN.R
In LMN: Inference for Linear Models with Nuisance Parameters

## ----setup, include = FALSE---------------------------------------------------
knitr::opts_chunk$set(out.width = "\\textwidth")
suppressMessages({
  require(LMN)
  require(kableExtra)
})
cran_link <- function(pkg) paste0("[**", pkg, "**](https://CRAN.R-project.org/package=", pkg, ")")
build_cache <- FALSE
oldpar <- par()

## ----toy_sim_calc, include = FALSE--------------------------------------------
require(LMN)

# problem dimensions
qq <- 2 # number of responses
n <- 200 # number of observations

# parameters
Beta <- matrix(c(.3, .5, .7, .2), 2, qq)
Sigma <- matrix(c(.005, -.001, -.001, .002), qq, qq)
alpha <- .4
lambda <- .1

# simulate data
xseq <- seq(0, 10, len = n) # x vector
X <- cbind(1, xseq^alpha) # covariate matrix
V <- exp(-(outer(xseq, xseq, "-")/lambda)^2) # between-row variance
Mu <- X %*% Beta # mean matrix
Y <- mniw::rMNorm(n = 1, Lambda = Mu,
                  SigmaR = V, SigmaC = Sigma) # response matrix
## # response matrix
## Y <- matrix(rnorm(n*qq), n, qq)
## Y <- crossprod(chol(V), Y) %*% chol(Sigma) + Mu

# plot data
par(mfrow = c(1,1), mar = c(4,4,.5,.5)+.1)
plot(x = 0, type = "n", xlim = range(xseq), ylim = range(Mu, Y),
     xlab = "x", ylab = "y")
lines(x = xseq, y = Mu[,1], col = "red")
lines(x = xseq, y = Mu[,2], col = "blue")
points(x = xseq, y = Y[,1], pch = 16, col = "red")
points(x = xseq, y = Y[,2], pch = 16, col = "blue")
legend("bottomright",
       legend = c("Response 1", "Response 2", "Expected", "Observed"),
       lty = c(NA, NA, 1, NA), pch = c(22, 22, NA, 16),
       pt.bg = c("red", "blue", "black", "black"), seg.len = 1)

## ----toysim, ref.label = "toy_sim_calc", fig.height = 4, fig.width = 6.5, fig.cap = paste0("Simulated data from the toy model \\@ref(eq:toy) with $n = ", n, "$ and $q = ", qq, "$.")----
require(LMN)

# problem dimensions
qq <- 2 # number of responses
n <- 200 # number of observations

# parameters
Beta <- matrix(c(.3, .5, .7, .2), 2, qq)
Sigma <- matrix(c(.005, -.001, -.001, .002), qq, qq)
alpha <- .4
lambda <- .1

# simulate data
xseq <- seq(0, 10, len = n) # x vector
X <- cbind(1, xseq^alpha) # covariate matrix
V <- exp(-(outer(xseq, xseq, "-")/lambda)^2) # between-row variance
Mu <- X %*% Beta # mean matrix
Y <- mniw::rMNorm(n = 1, Lambda = Mu,
                  SigmaR = V, SigmaC = Sigma) # response matrix
## # response matrix
## Y <- matrix(rnorm(n*qq), n, qq)
## Y <- crossprod(chol(V), Y) %*% chol(Sigma) + Mu

# plot data
par(mfrow = c(1,1), mar = c(4,4,.5,.5)+.1)
plot(x = 0, type = "n", xlim = range(xseq), ylim = range(Mu, Y),
     xlab = "x", ylab = "y")
lines(x = xseq, y = Mu[,1], col = "red")
lines(x = xseq, y = Mu[,2], col = "blue")
points(x = xseq, y = Y[,1], pch = 16, col = "red")
points(x = xseq, y = Y[,2], pch = 16, col = "blue")
legend("bottomright",
       legend = c("Response 1", "Response 2", "Expected", "Observed"),
       lty = c(NA, NA, 1, NA), pch = c(22, 22, NA, 16),
       pt.bg = c("red", "blue", "black", "black"), seg.len = 1)

## -----------------------------------------------------------------------------
suff <- lmn_suff(Y = Y, X = X, V = V, Vtype = "full")
sapply(suff, function(x) paste0(dim(as.array(x)), collapse = "x"))

## ---- results = "hold"--------------------------------------------------------
# check than dense matrix and Toeplitz matrix calculations are the same

# autocorrelation function, or first row of V_theta
toy_acf <- function(lambda) exp(-((xseq-xseq[1])/lambda)^2)

# check that calculation of suff is the same
all.equal(suff,
          lmn_suff(Y = Y, X = X, V = toy_acf(lambda), Vtype = "acf"))

# check that it's much faster to use Vtype = "acf"
system.time({
  # using dense variance matrix
  replicate(100, lmn_suff(Y = Y, X = X, V = V, Vtype = "full"))
})
system.time({
  # using Toeplitz variance matrix
  replicate(100, lmn_suff(Y = Y, X = X, V = toy_acf(lambda), Vtype = "acf"))
})

## -----------------------------------------------------------------------------
# pre-allocate memory for Toeplitz matrix calcuations
Tz <- SuperGauss::Toeplitz$new(N = n)

# sufficient statistics for the toy model
# sufficient statistics for toy model
toy_suff <- function(theta) {
  X <- cbind(1, xseq^theta[1])
  Tz$set_acf(acf = toy_acf(theta[2]))
  lmn_suff(Y = Y, X = X, V = Tz, Vtype = "acf")
}

# _negative_ profile likelihood for theta
toy_prof <- function(theta) {
  if(theta[2] < 0) return(-Inf) # range restriction lambda > 0
  suff <- toy_suff(theta)
  -lmn_prof(suff = suff)
}

# MLE of theta
opt <- optim(par = c(alpha, lambda), # starting value
             fn = toy_prof) # objective function
theta_mle <- opt$par

# MLE of (Beta, Sigma)
suff <- toy_suff(theta_mle)
Beta_mle <- suff$Bhat
Sigma_mle <- suff$S/suff$n

# display:
# convert variance matrix to vector of standard deviations and correlations.
cov2sigrho <- function(Sigma) {
  sig <- sqrt(diag(Sigma))
  n <- length(sig) # dimensions of Sigma
  names(sig) <- paste0("sigma",1:n)
  # indices of upper triangular elements
  iupper <- matrix(1:(n^2),n,n)[upper.tri(Sigma, diag = FALSE)]
  rho <- cov2cor(Sigma)[iupper]
  rnames <- apply(expand.grid(1:n, 1:n), 1, paste0, collapse = "")
  names(rho) <- paste0("rho",rnames[iupper])
  c(sig,rho)
}

Theta_mle <- c(theta_mle, t(Beta_mle), cov2sigrho(Sigma_mle))
names(Theta_mle) <- c("alpha", "lambda",
                      "beta_01", "beta_02", "beta_11", "beta_12",
                      "sigma_1", "sigma_2", "rho_12")
signif(Theta_mle, 2)

## -----------------------------------------------------------------------------
# full _negative_ loglikelihood for the toy model
toy_nll <- function(Theta) {
  theta <- Theta[1:2]
  Beta <- rbind(Theta[3:4], Theta[5:6])
  Sigma <- diag(Theta[7:8]^2)
  Sigma[1,2] <- Sigma[2,1] <- Theta[7]*Theta[8]*Theta[9]
  # calculate loglikelihood
  suff <- toy_suff(theta)
  -lmn_loglik(Beta = Beta, Sigma = Sigma, suff = suff)
}

# uncertainty estimate:
# variance estimator
Theta_ve <- solve(numDeriv::hessian(func = toy_nll, x = Theta_mle))
Theta_se <- sqrt(diag(Theta_ve)) # standard errors

# display
tab <- rbind(true = c(alpha, lambda, t(Beta), cov2sigrho(Sigma)),
             mle = Theta_mle, se = Theta_se)
colnames(tab) <- paste0("$\\", gsub("([0-9]+)", "{\\1}", names(Theta_mle)), "$")
rownames(tab) <- c("True Value", "MLE", "Std. Error")
kableExtra::kable(as.data.frame(signif(tab,2)))

## ----gcir_setup, include = FALSE----------------------------------------------
set.seed(7) # for reproducible results

# simulate data from the gcir model
gcir_sim <- function(N, dt, Theta, x0) {
  # parameters
  gamma <- Theta[1]
  mu <- Theta[2]
  sigma <- Theta[3]
  lambda <- Theta[4]
  Rt <- rep(NA, N+1)
  Rt[1] <- x0
  for(ii in 1:N) {
    Rt[ii+1] <- rnorm(1, mean = Rt[ii] - gamma * (Rt[ii] - mu) * dt,
                      sd = sigma * Rt[ii]^lambda * sqrt(dt))
  }
  Rt
}

# true parameter values
Theta <- c(gamma = .07, mu = .01, sigma = .6, lambda = .9)
dt <- 1/12 # interobservation time (in years)
N <- 12 * 20 # number of observations (20 years)

## ----gcir_sim, include = FALSE------------------------------------------------

Rt <- gcir_sim(N = N, dt = dt, Theta = Theta, x0 = Theta["mu"])

par(mar = c(4,4,.5,.5))
plot(x = 0:N*dt, y = 100*Rt, pch = 16, cex = .8,
     xlab = "Time (years)", ylab = "Interest Rate (%)")

## ---- gcirdata, ref.label = c("gcir_setup", "gcir_sim"), echo = -1, fig.height = 4, fig.width = 6.5, fig.cap = paste0("Simulated interest rates from discrete-time approximation \\@ref(eq:euler) to the gCIR model \\@ref(eq:chan), with $N = ", N, "$, $\\dt = 1/12$, and $\\TTh = (", paste0(Theta, collapse = ", "), ")$.")----
set.seed(7) # for reproducible results

# simulate data from the gcir model
gcir_sim <- function(N, dt, Theta, x0) {
  # parameters
  gamma <- Theta[1]
  mu <- Theta[2]
  sigma <- Theta[3]
  lambda <- Theta[4]
  Rt <- rep(NA, N+1)
  Rt[1] <- x0
  for(ii in 1:N) {
    Rt[ii+1] <- rnorm(1, mean = Rt[ii] - gamma * (Rt[ii] - mu) * dt,
                      sd = sigma * Rt[ii]^lambda * sqrt(dt))
  }
  Rt
}

# true parameter values
Theta <- c(gamma = .07, mu = .01, sigma = .6, lambda = .9)
dt <- 1/12 # interobservation time (in years)
N <- 12 * 20 # number of observations (20 years)

Rt <- gcir_sim(N = N, dt = dt, Theta = Theta, x0 = Theta["mu"])

par(mar = c(4,4,.5,.5))
plot(x = 0:N*dt, y = 100*Rt, pch = 16, cex = .8,
     xlab = "Time (years)", ylab = "Interest Rate (%)")

## ----gcir_mle-----------------------------------------------------------------
# precomputed values
Y <- matrix(diff(Rt))
X <- cbind(-Rt[1:N], 1) * dt
# since Rt^(2*lambda) is calculated as exp(2*lambda * log(Rt)),
# precompute 2*log(Rt) to speed up calculations
lR2 <- 2 * log(Rt[1:N])

# sufficient statistics for gCIR model
gcir_suff <- function(lambda) {
  lmn_suff(Y = Y, X = X,
           V = exp(lambda * lR2) * dt, Vtype = "diag")
}

# _negative_ profile likelihood for gCIR model
gcir_prof <- function(lambda) {
  if(lambda <= 0) return(Inf)
  -lmn_prof(suff = gcir_suff(lambda))
}

# MLE of Theta via profile likelihood

# profile likelihood for lambda
opt <- optimize(f = gcir_prof, interval = c(.001, 10))
lambda_mle <- opt$minimum
# conditional MLE for remaining parameters
suff <- gcir_suff(lambda_mle)
Theta_mle <- c(gamma = suff$Bhat[1,1],
               mu = suff$Bhat[2,1]/suff$Bhat[1,1],
               sigma = sqrt(suff$S[1,1]/suff$n),
               lambda = lambda_mle)
Theta_mle

## ----gcir_prior---------------------------------------------------------------
prior <- lmn_prior(p = 2, q = 1) # default prior
prior

## ----gcir_post_lambda, fig.height = 4, fig.width = 6.5------------------------

# log of marginal posterior p(lambda | R)
gcir_marg <- function(lambda) {
  suff <- gcir_suff(lambda)
  post <- lmn_post(suff, prior)
  lmn_marg(suff = suff, prior = prior, post = post)
}

# grid sampler for lambda ~ p(lambda | R)

# estimate the effective support of lambda by taking
# mode +/- 5 * sqrt(quadrature)
lambda_mode <- optimize(f = gcir_marg,
                        interval = c(.01, 10),
                        maximum = TRUE)$maximum
lambda_quad <- -numDeriv::hessian(func = gcir_marg, x = lambda_mode)[1]
lambda_rng <- lambda_mode + c(-5,5) * 1/sqrt(lambda_quad)

# plot posterior on this range
lambda_seq <- seq(lambda_rng[1], lambda_rng[2], len = 1000)
lambda_lpdf <- sapply(lambda_seq, gcir_marg) # log-pdf
# normalized pdf
lambda_pdf <- exp(lambda_lpdf - max(lambda_lpdf))
lambda_pdf <- lambda_pdf / sum(lambda_pdf) / (lambda_seq[2]-lambda_seq[1])
par(mar = c(2,4,2,.5))
plot(lambda_seq, lambda_pdf, type = "l",
     xlab = expression(lambda), ylab = "",
     main = expression(p(lambda*" | "*bold(R))))

## ----gcir_post, cache = build_cache-------------------------------------------
npost <- 5e4 # number of posterior draws

# marginal sampling from p(lambda | R)
lambda_post <- sample(lambda_seq, size = npost, prob = lambda_pdf,
                      replace = TRUE)

# conditional sampling from p(B, Sigma | lambda, R)
BSig_post <- lapply(lambda_post, function(lambda) {
  lmn_post(gcir_suff(lambda), prior)
})
BSig_post <- list2mniw(BSig_post) # convert to vectorized mniw format
BSig_post <- mniw::rmniw(npost,
                         Lambda = BSig_post$Lambda,
                         Omega = BSig_post$Omega,
                         Psi = BSig_post$Psi,
                         nu = BSig_post$nu)
# convert to Theta = (gamma, mu, sigma, lambda)
Theta_post <- cbind(gamma = BSig_post$X[1,1,],
                    mu = BSig_post$X[2,1,]/BSig_post$X[1,1,],
                    sigma = sqrt(BSig_post$V[1,1,]),
                    lambda = lambda_post)
apply(Theta_post, 2, min)

## ----gcirhist, fig.width = 6.5, fig.height = 5, fig.cap = "Posterior distribution and true value for each gCIR parameter $\\TTh = (\\gamma, \\mu, \\sigma, \\lambda)$."----
# keep only draws for which gamma, mu > 0
ikeep <- pmin(Theta_post[,1], Theta_post[,2]) > 0
mean(ikeep) # a good number of draws get discarded
Theta_post <- Theta_post[ikeep,]
# convert mu to log scale for plotting purposes
Theta_post[,"mu"] <- log10(Theta_post[,"mu"])

# posterior distributions and true parameter values
Theta_names <- c("gamma", "log[10](mu)", "sigma", "lambda")
Theta_true <- Theta
Theta_true["mu"] <- log10(Theta_true["mu"])
par(mfrow = c(2,2), mar = c(2,2,3,.5)+.5)
for(ii in 1:ncol(Theta_post)) {
  hist(Theta_post[,ii], breaks = 40, freq = FALSE,
       xlab = "", ylab = "",
       main = parse(text = paste0("p(",
                                  Theta_names[ii], "*\" | \"*bold(R))")))
  abline(v = Theta_true[ii], col = "red", lwd = 2)
  if(ii == 1) {
    legend("topright", inset = .05,
           legend = c("Posterior Distribution", "True Parameter Value"),
           lwd = c(NA, 2), pch = c(22, NA), seg.len = 1.5,
           col = c("black", "red"), bg = c("white", NA), cex = .85)
  }
}

## ----teardown, include = FALSE------------------------------------------------
par(oldpar)