################################################################################
############## IMPLEMENTATION OF THE LOG-NORMAL MODEL (NO MIXTURE)###############
################################################################################
#' @title MCMC algorithm for the log-normal model
#' @description Adaptive Metropolis-within-Gibbs algorithm with univariate
#' Gaussian random walk proposals for the log-normal model
#' (no mixture)
#' @param N Total number of iterations. Must be a multiple of thin.
#' @param thin Thinning period.
#' @param burn Burn-in period. Must be a multiple of thin.
#' @param Time Vector containing the survival times.
#' @param Cens Censoring indication (1: observed, 0: right-censored).
#' @param X Design matrix with dimensions \eqn{n} x \eqn{k} where \eqn{n} is
#' the number of observations and \eqn{k} is the number of covariates
#' (including the intercept).
#' @param beta0 Starting values for \eqn{\beta}. If not provided, they will
#' be randomly generated from a normal distribution.
#' @param sigma20 Starting value for \eqn{\sigma^2}. If not provided, it
#' will be randomly generated from a gamma distribution.
#' @param prior Indicator of prior (1: Jeffreys, 2: Type I Ind. Jeffreys,
#' 3: Ind. Jeffreys).
#' @param set Indicator for the use of set observations (1: set observations,
#' 0: point observations). The former is strongly recommended over the
#' latter as point observations cause problems in the context of Bayesian
#' inference (due to continuous sampling models assigning zero probability
#' to a point).
#' @param eps_l Lower imprecision \eqn{(\epsilon_l)} for set observations
#' (default value: 0.5).
#' @param eps_r Upper imprecision \eqn{(\epsilon_r)} for set observations
#' (default value: 0.5)
#' @return A matrix with \eqn{(N - burn) / thin + 1} rows. The columns are the
#' MCMC chains for \eqn{\beta} (\eqn{k} columns), \eqn{\sigma^2} (1 column),
#' \eqn{\theta} (1 column, if appropriate), \eqn{\log(t)} (\eqn{n} columns,
#' simulated via data augmentation) and the logarithm of the adaptive
#' variances (the number varies among models). The latter allows the user to
#' evaluate if the adaptive variances have been stabilized.
#' @examples
#' library(BASSLINE)
#'
#' # Please note: N=1000 is not enough to reach convergence.
#' # This is only an illustration. Run longer chains for more accurate
#' # estimations.
#'
#' LN <- MCMC_LN(
#' N = 1000, thin = 20, burn = 40, Time = cancer[, 1],
#' Cens = cancer[, 2], X = cancer[, 3:11]
#' )
#'
#' @export
MCMC_LN <- function(N,
thin,
burn,
Time,
Cens,
X,
beta0 = NULL,
sigma20 = NULL,
prior = 2,
set = TRUE,
eps_l = 0.5,
eps_r = 0.5) {
# Sample starting values if not given
if (is.null(beta0)) beta0 <- beta.sample(n = ncol(X))
if (is.null(sigma20)) sigma20 <- sigma2.sample()
MCMC.param.check(
N,
thin,
burn,
Time,
Cens,
X,
beta0,
sigma20,
prior,
set,
eps_l,
eps_r
)
chain <- MCMC_LN_CPP(
N,
thin,
burn,
Time,
Cens,
X,
beta0,
sigma20,
prior,
set,
eps_l,
eps_r
)
beta.cols <- paste("beta.", colnames(X), sep = "")
logt.cols <- paste("logt.", seq(length(Time)), sep = "")
colnames(chain) <- c(beta.cols, "sigma2", logt.cols)
if (burn > 0) {
burn.period <- 1:(burn / thin)
chain <- chain[-burn.period, ]
}
return(chain)
}
#' @title Log-marginal Likelihood estimator for the log-normal model
#' @inheritParams MCMC_LN
#' @param chain MCMC chains generated by a BASSLINE MCMC function
#' @examples
#' library(BASSLINE)
#'
#' # Please note: N=1000 is not enough to reach convergence.
#' # This is only an illustration. Run longer chains for more accurate
#' # estimations.LM
#'
#' LN <- MCMC_LN(
#' N = 1000, thin = 20, burn = 40, Time = cancer[, 1],
#' Cens = cancer[, 2], X = cancer[, 3:11]
#' )
#' LN.LML <- LML_LN(
#' thin = 20, Time = cancer[, 1], Cens = cancer[, 2],
#' X = cancer[, 3:11], chain = LN
#' )
#'
#' @export
LML_LN <- function(thin,
Time,
Cens,
X,
chain,
prior = 2,
set = TRUE,
eps_l = 0.5,
eps_r = 0.5) {
chain <- as.matrix(chain)
n <- length(Time)
N <- dim(chain)[1]
k <- dim(X)[2]
if (prior == 1) {
p <- 1 + k / 2
}
if (prior == 2) {
p <- 1
}
if (k > 1) {
beta.star <- apply(chain[, 1:k], 2, "median")
} else {
beta.star <- stats::median(chain[, 1])
}
sigma2.star <- stats::median(chain[, k + 1])
# LIKELIHOOD ORDINATE
LL.ord <- log.lik.LN(Time,
Cens,
X,
beta = beta.star,
sigma2 = sigma2.star,
set,
eps_l,
eps_r
)
cat("Likelihood ordinate ready!\n")
# PRIOR ORDINATE
LP.ord <- prior_LN(
beta = beta.star,
sigma2 = sigma2.star,
prior = prior,
logs = TRUE
)
cat("Prior ordinate ready!\n")
# POSTERIOR ORDINATE - sigma2
shape <- (n + 2 * p - 2) / 2
po.sigma2 <- rep(0, times = N)
for (i in 1:N) {
aux1 <- as.vector(as.vector(t(chain[i, (k + 2):(k + 1 + n)])) - X %*%
as.vector(chain[i, 1:k]))
rate.aux <- as.numeric(0.5 * t(aux1) %*% aux1)
po.sigma2[i] <- MCMCpack::dinvgamma(sigma2.star,
shape = shape,
scale = rate.aux
)
}
PO.sigma2 <- mean(po.sigma2)
cat("Posterior ordinate sigma2 ready!\n")
# POSTERIOR ORDINATE - beta
chain.beta <- MCMCR.sigma2.LN(
N = N * thin,
thin = thin,
Time,
Cens,
X,
beta0 = t(chain[N, 1:k]),
sigma20 = sigma2.star,
logt0 = t(chain[N, (k + 2):(k + 1 + n)]),
prior = prior,
set,
eps_l,
eps_r
)
po.beta <- rep(0, times = N)
aux1.beta <- solve(t(X) %*% X)
aux2.beta <- aux1.beta %*% t(X)
for (i in 1:N) {
mu.aux <- as.vector(aux2.beta %*% chain.beta[(i + 1), (k + 1):(k + n)])
po.beta[i] <- mvtnorm::dmvnorm(beta.star,
mean = mu.aux,
sigma = sigma2.star * aux1.beta
)
}
PO.beta <- mean(po.beta)
cat("Posterior ordinate beta ready!\n")
# TAKING LOGARITHM
LPO.sigma2 <- log(PO.sigma2)
LPO.beta <- log(PO.beta)
# MARGINAL LOG-LIKELIHOOD
LML <- LL.ord + LP.ord - LPO.sigma2 - LPO.beta
return(list(
LL.ord = LL.ord,
LP.ord = LP.ord,
LPO.sigma2 = LPO.sigma2,
LPO.beta = LPO.beta,
LML = LML
))
}
#' @title Deviance information criterion for the log-normal model
#' @description Deviance information criterion is based on the deviance function
#' \eqn{D(\theta, y) = -2 log(f(y|\theta))} but also incorporates a
#' penalization factor of the complexity of the model
#' @inheritParams MCMC_LN
#' @param chain MCMC chains generated by a BASSLINE MCMC function
#' @examples
#' library(BASSLINE)
#'
#' # Please note: N=1000 is not enough to reach convergence.
#' # This is only an illustration. Run longer chains for more accurate
#' # estimations.LM
#'
#' LN <- MCMC_LN(
#' N = 1000, thin = 20, burn = 40, Time = cancer[, 1],
#' Cens = cancer[, 2], X = cancer[, 3:11]
#' )
#' LN.DIC <- DIC_LN(
#' Time = cancer[, 1], Cens = cancer[, 2], X = cancer[, 3:11],
#' chain = LN
#' )
#'
#' @export
DIC_LN <- function(Time, Cens, X, chain, set = TRUE, eps_l = 0.5, eps_r = 0.5) {
chain <- as.matrix(chain)
N <- dim(chain)[1]
k <- dim(X)[2]
LL <- rep(0, times = N)
for (iter in 1:N) {
LL[iter] <- log.lik.LN(Time,
Cens,
X,
beta = as.vector(chain[iter, 1:k]),
sigma2 = chain[iter, k + 1],
set = set,
eps_l,
eps_r
)
}
aux <- apply(chain[, 1:(k + 1)], 2, "median")
pd <- -2 * mean(LL) + 2 * log.lik.LN(Time,
Cens,
X,
beta = aux[1:k],
sigma2 = aux[k + 1],
set = set,
eps_l,
eps_r
)
pd.aux <- k + 1
DIC <- -2 * mean(LL) + pd
cat(paste("Effective number of parameters :", round(pd, 2), "\n"))
cat(paste("Actual number of parameters :", pd.aux), "\n")
return(DIC)
}
#' @title Case deletion analysis for the log-normal model
#' @description Leave-one-out cross validation analysis. The function returns a
#' matrix with n rows. The first column contains the logarithm of the CPO
#' (Geisser and Eddy, 1979). Larger values of the CPO indicate better
#' predictive accuracy of the model. The second and third columns contain
#' the KL divergence between \eqn{\pi(\beta, \sigma^2, \theta | t_{-i})}
#' and \eqn{\pi(\beta, \sigma^2, \theta | t)} and its calibration index
#' \eqn{p_i}, respectively.
#' @inheritParams MCMC_LN
#' @param chain MCMC chains generated by a BASSLINE MCMC function
#' @examples
#' library(BASSLINE)
#'
#' # Please note: N=1000 is not enough to reach convergence.
#' # This is only an illustration. Run longer chains for more accurate
#' # estimations.LM
#'
#' LN <- MCMC_LN(
#' N = 1000, thin = 20, burn = 40, Time = cancer[, 1],
#' Cens = cancer[, 2], X = cancer[, 3:11]
#' )
#' LN.CD <- CaseDeletion_LN(
#' Time = cancer[, 1], Cens = cancer[, 2],
#' X = cancer[, 3:11], chain = LN
#' )
#'
#' @export
CaseDeletion_LN <- function(Time,
Cens,
X,
chain,
set = TRUE,
eps_l = 0.5,
eps_r = 0.5) {
chain <- as.matrix(chain)
n <- dim(X)[1]
k <- dim(X)[2]
logCPO <- rep(0, times = n)
KL.aux <- rep(0, times = n)
N <- dim(chain)[1]
for (s in 1:n) {
aux1 <- rep(0, times = N)
aux2 <- rep(0, times = N)
for (ITER in 1:N) {
aux2[ITER] <- log.lik.LN(Time[s],
Cens[s],
X[s, ],
beta = as.vector(chain[ITER, 1:k]),
sigma2 = chain[ITER, (k + 1)],
set,
eps_l,
eps_r
)
aux1[ITER] <- exp(-aux2[ITER])
}
logCPO[s] <- -log(mean(aux1))
KL.aux[s] <- mean(aux2)
if (KL.aux[s] - logCPO[s] < 0) {
cat(paste("Numerical problems for observation:", s, "\n"))
}
}
KL <- abs(KL.aux - logCPO)
CALIBRATION <- 0.5 * (1 + sqrt(1 - exp(-2 * KL)))
return(cbind(logCPO, KL, CALIBRATION))
}
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.