R/LogNormal.R

Defines functions CaseDeletion_LN DIC_LN LML_LN MCMC_LN

Documented in CaseDeletion_LN DIC_LN LML_LN MCMC_LN

################################################################################
############## IMPLEMENTATION OF THE LOG-NORMAL MODEL (NO MIXTURE)###############
################################################################################

#' @title MCMC algorithm for the log-normal model
#' @description  Adaptive Metropolis-within-Gibbs algorithm with univariate
#'     Gaussian random walk proposals for the log-normal model
#'     (no mixture)
#' @param N Total number of iterations. Must be a multiple of thin.
#' @param thin Thinning period.
#' @param burn Burn-in period. Must be a multiple of thin.
#' @param Time Vector containing the survival times.
#' @param Cens Censoring indication (1: observed, 0: right-censored).
#' @param X Design matrix with dimensions \eqn{n} x  \eqn{k} where \eqn{n} is
#'     the number of observations and \eqn{k} is the number of covariates
#'     (including the intercept).
#' @param beta0 Starting values for \eqn{\beta}. If not provided, they will
#'     be randomly generated from a normal distribution.
#' @param sigma20 Starting value for \eqn{\sigma^2}. If not provided, it
#'     will be randomly generated from a gamma distribution.
#' @param prior Indicator of prior (1: Jeffreys, 2: Type I Ind. Jeffreys,
#'     3: Ind. Jeffreys).
#' @param set Indicator for the use of set observations (1: set observations,
#'     0: point observations). The former is strongly recommended over the
#'     latter as point observations cause problems in the context of Bayesian
#'     inference (due to continuous sampling models assigning zero probability
#'     to a point).
#' @param eps_l Lower imprecision \eqn{(\epsilon_l)} for set observations
#'     (default value: 0.5).
#' @param eps_r Upper imprecision \eqn{(\epsilon_r)} for set observations
#'     (default value: 0.5)
#' @return A matrix with \eqn{(N - burn) / thin + 1} rows. The columns are the
#'     MCMC chains for \eqn{\beta} (\eqn{k} columns), \eqn{\sigma^2} (1 column),
#'     \eqn{\theta} (1 column, if appropriate), \eqn{\log(t)} (\eqn{n} columns,
#'     simulated via data augmentation) and the logarithm of the adaptive
#'     variances (the number varies among models). The latter allows the user to
#'     evaluate if the adaptive variances have been stabilized.
#' @examples
#' library(BASSLINE)
#'
#' # Please note: N=1000 is not enough to reach convergence.
#' # This is only an illustration. Run longer chains for more accurate
#' # estimations.
#'
#' LN <- MCMC_LN(
#'   N = 1000, thin = 20, burn = 40, Time = cancer[, 1],
#'   Cens = cancer[, 2], X = cancer[, 3:11]
#' )
#'
#' @export
MCMC_LN <- function(N,
                    thin,
                    burn,
                    Time,
                    Cens,
                    X,
                    beta0 = NULL,
                    sigma20 = NULL,
                    prior = 2,
                    set = TRUE,
                    eps_l = 0.5,
                    eps_r = 0.5) {

  # Sample starting values if not given
  if (is.null(beta0)) beta0 <- beta.sample(n = ncol(X))
  if (is.null(sigma20)) sigma20 <- sigma2.sample()

  MCMC.param.check(
    N,
    thin,
    burn,
    Time,
    Cens,
    X,
    beta0,
    sigma20,
    prior,
    set,
    eps_l,
    eps_r
  )

  chain <- MCMC_LN_CPP(
    N,
    thin,
    burn,
    Time,
    Cens,
    X,
    beta0,
    sigma20,
    prior,
    set,
    eps_l,
    eps_r
  )

  beta.cols <- paste("beta.", colnames(X), sep = "")
  logt.cols <- paste("logt.", seq(length(Time)), sep = "")
  colnames(chain) <- c(beta.cols, "sigma2", logt.cols)

  if (burn > 0) {
    burn.period <- 1:(burn / thin)
    chain <- chain[-burn.period, ]
  }
  return(chain)
}

#' @title Log-marginal Likelihood estimator for the log-normal model
#' @inheritParams MCMC_LN
#' @param chain MCMC chains generated by a BASSLINE MCMC function
#' @examples
#' library(BASSLINE)
#'
#' # Please note: N=1000 is not enough to reach convergence.
#' # This is only an illustration. Run longer chains for more accurate
#' # estimations.LM
#'
#' LN <- MCMC_LN(
#'   N = 1000, thin = 20, burn = 40, Time = cancer[, 1],
#'   Cens = cancer[, 2], X = cancer[, 3:11]
#' )
#' LN.LML <- LML_LN(
#'   thin = 20, Time = cancer[, 1], Cens = cancer[, 2],
#'   X = cancer[, 3:11], chain = LN
#' )
#'
#' @export
LML_LN <- function(thin,
                   Time,
                   Cens,
                   X,
                   chain,
                   prior = 2,
                   set = TRUE,
                   eps_l = 0.5,
                   eps_r = 0.5) {
  chain <- as.matrix(chain)
  n <- length(Time)
  N <- dim(chain)[1]
  k <- dim(X)[2]
  if (prior == 1) {
    p <- 1 + k / 2
  }
  if (prior == 2) {
    p <- 1
  }

  if (k > 1) {
    beta.star <- apply(chain[, 1:k], 2, "median")
  } else {
    beta.star <- stats::median(chain[, 1])
  }
  sigma2.star <- stats::median(chain[, k + 1])

  # LIKELIHOOD ORDINATE
  LL.ord <- log.lik.LN(Time,
    Cens,
    X,
    beta = beta.star,
    sigma2 = sigma2.star,
    set,
    eps_l,
    eps_r
  )
  cat("Likelihood ordinate ready!\n")

  # PRIOR ORDINATE
  LP.ord <- prior_LN(
    beta = beta.star,
    sigma2 = sigma2.star,
    prior = prior,
    logs = TRUE
  )
  cat("Prior ordinate ready!\n")

  # POSTERIOR ORDINATE - sigma2
  shape <- (n + 2 * p - 2) / 2
  po.sigma2 <- rep(0, times = N)
  for (i in 1:N) {
    aux1 <- as.vector(as.vector(t(chain[i, (k + 2):(k + 1 + n)])) - X %*%
      as.vector(chain[i, 1:k]))
    rate.aux <- as.numeric(0.5 * t(aux1) %*% aux1)
    po.sigma2[i] <- MCMCpack::dinvgamma(sigma2.star,
      shape = shape,
      scale = rate.aux
    )
  }
  PO.sigma2 <- mean(po.sigma2)
  cat("Posterior ordinate sigma2 ready!\n")

  # POSTERIOR ORDINATE - beta
  chain.beta <- MCMCR.sigma2.LN(
    N = N * thin,
    thin = thin,
    Time,
    Cens,
    X,
    beta0 = t(chain[N, 1:k]),
    sigma20 = sigma2.star,
    logt0 = t(chain[N, (k + 2):(k + 1 + n)]),
    prior = prior,
    set,
    eps_l,
    eps_r
  )
  po.beta <- rep(0, times = N)
  aux1.beta <- solve(t(X) %*% X)
  aux2.beta <- aux1.beta %*% t(X)
  for (i in 1:N) {
    mu.aux <- as.vector(aux2.beta %*% chain.beta[(i + 1), (k + 1):(k + n)])
    po.beta[i] <- mvtnorm::dmvnorm(beta.star,
      mean = mu.aux,
      sigma = sigma2.star * aux1.beta
    )
  }
  PO.beta <- mean(po.beta)
  cat("Posterior ordinate beta ready!\n")

  # TAKING LOGARITHM
  LPO.sigma2 <- log(PO.sigma2)
  LPO.beta <- log(PO.beta)

  # MARGINAL LOG-LIKELIHOOD
  LML <- LL.ord + LP.ord - LPO.sigma2 - LPO.beta

  return(list(
    LL.ord = LL.ord,
    LP.ord = LP.ord,
    LPO.sigma2 = LPO.sigma2,
    LPO.beta = LPO.beta,
    LML = LML
  ))
}


#' @title Deviance information criterion for the log-normal model
#' @description Deviance information criterion is based on the deviance function
#'     \eqn{D(\theta, y) = -2 log(f(y|\theta))} but also incorporates a
#'     penalization factor of the complexity of the model
#' @inheritParams MCMC_LN
#' @param chain MCMC chains generated by a BASSLINE MCMC function
#' @examples
#' library(BASSLINE)
#'
#' # Please note: N=1000 is not enough to reach convergence.
#' # This is only an illustration. Run longer chains for more accurate
#' # estimations.LM
#'
#' LN <- MCMC_LN(
#'   N = 1000, thin = 20, burn = 40, Time = cancer[, 1],
#'   Cens = cancer[, 2], X = cancer[, 3:11]
#' )
#' LN.DIC <- DIC_LN(
#'   Time = cancer[, 1], Cens = cancer[, 2], X = cancer[, 3:11],
#'   chain = LN
#' )
#'
#' @export

DIC_LN <- function(Time, Cens, X, chain, set = TRUE, eps_l = 0.5, eps_r = 0.5) {
  chain <- as.matrix(chain)
  N <- dim(chain)[1]
  k <- dim(X)[2]
  LL <- rep(0, times = N)

  for (iter in 1:N) {
    LL[iter] <- log.lik.LN(Time,
      Cens,
      X,
      beta = as.vector(chain[iter, 1:k]),
      sigma2 = chain[iter, k + 1],
      set = set,
      eps_l,
      eps_r
    )
  }

  aux <- apply(chain[, 1:(k + 1)], 2, "median")
  pd <- -2 * mean(LL) + 2 * log.lik.LN(Time,
    Cens,
    X,
    beta = aux[1:k],
    sigma2 = aux[k + 1],
    set = set,
    eps_l,
    eps_r
  )
  pd.aux <- k + 1

  DIC <- -2 * mean(LL) + pd

  cat(paste("Effective number of parameters :", round(pd, 2), "\n"))
  cat(paste("Actual number of parameters :", pd.aux), "\n")
  return(DIC)
}


#' @title Case deletion analysis for the log-normal model
#' @description Leave-one-out cross validation analysis. The function returns a
#'     matrix with n rows. The first column contains the logarithm of the CPO
#'     (Geisser and Eddy, 1979). Larger values of the CPO indicate better
#'     predictive accuracy of the model. The second and third columns contain
#'     the KL divergence between \eqn{\pi(\beta, \sigma^2,  \theta | t_{-i})}
#'     and \eqn{\pi(\beta, \sigma^2,  \theta | t)} and its calibration index
#'     \eqn{p_i}, respectively.
#' @inheritParams MCMC_LN
#' @param chain MCMC chains generated by a BASSLINE MCMC function
#' @examples
#' library(BASSLINE)
#'
#' # Please note: N=1000 is not enough to reach convergence.
#' # This is only an illustration. Run longer chains for more accurate
#' # estimations.LM
#'
#' LN <- MCMC_LN(
#'   N = 1000, thin = 20, burn = 40, Time = cancer[, 1],
#'   Cens = cancer[, 2], X = cancer[, 3:11]
#' )
#' LN.CD <- CaseDeletion_LN(
#'   Time = cancer[, 1], Cens = cancer[, 2],
#'   X = cancer[, 3:11], chain = LN
#' )
#'
#' @export
CaseDeletion_LN <- function(Time,
                            Cens,
                            X,
                            chain,
                            set = TRUE,
                            eps_l = 0.5,
                            eps_r = 0.5) {
  chain <- as.matrix(chain)
  n <- dim(X)[1]
  k <- dim(X)[2]
  logCPO <- rep(0, times = n)
  KL.aux <- rep(0, times = n)
  N <- dim(chain)[1]

  for (s in 1:n) {
    aux1 <- rep(0, times = N)
    aux2 <- rep(0, times = N)
    for (ITER in 1:N) {
      aux2[ITER] <- log.lik.LN(Time[s],
        Cens[s],
        X[s, ],
        beta = as.vector(chain[ITER, 1:k]),
        sigma2 = chain[ITER, (k + 1)],
        set,
        eps_l,
        eps_r
      )
      aux1[ITER] <- exp(-aux2[ITER])
    }
    logCPO[s] <- -log(mean(aux1))
    KL.aux[s] <- mean(aux2)
    if (KL.aux[s] - logCPO[s] < 0) {
      cat(paste("Numerical problems for observation:", s, "\n"))
    }
  }
  KL <- abs(KL.aux - logCPO)
  CALIBRATION <- 0.5 * (1 + sqrt(1 - exp(-2 * KL)))
  return(cbind(logCPO, KL, CALIBRATION))
}
nathansam/SMLN documentation built on May 14, 2022, 9:07 p.m.