R/setup_rjMCMC.R

Defines functions setup_rjMCMC

Documented in setup_rjMCMC

#' Set up the rjMCMC sampler
#'
#' Set up a list object to hold the data from the MCMC sampler, and generate starting values for all model parameters. This function is called internally by \code{\link{run_rjMCMC}}.
#'
#' In addition to split/merge moves, three other types of MCMC samplers are implemented in \code{espresso} to facilitate convergence and avoid getting stuck in local maxima: two data-driven samplers (type I and type II), in which proposals are informed by the “cues” present in the original data, and one independent sampler, in which proposals are drawn at random from a Uniform distribution bounded by \code{range.dB} (see \code{\link{read_data}} or \code{\link{simulate_data}}), with no dependence on current values.
#'
#' @param rj.input Input dataset. Must be an object of class \code{rjconfig}.
#' @param n.burn Number of MCMC iterations to treat as burn-in.
#' @param n.iter Number of posterior samples.
#' @param n.chains Number of MCMC chains. Defaults to 3.
#' @param p.split Probability of performing a group split when \code{model.select = TRUE}.
#' @param p.merge Probability of performing a group merge when \code{model.select = TRUE}.
#' @param do.update Logical. Whether to update an existing sampler or set up a new one.
#' @param start.values Starting values to use when updating an existing sampler and \code{do.update = TRUE}.
#' @author Phil J. Bouchet
#' @seealso \code{\link{run_rjMCMC}}
#' @keywords brs dose-response rjmcmc 

setup_rjMCMC <- function(rj.input, 
                         n.burn,
                         n.iter,
                         n.chains = 3,
                         p.split,
                         p.merge,
                         do.update = FALSE,
                         start.values = NULL) {
  
  #' -----------------------------------------------
  # Perform function checks
  #' -----------------------------------------------
  
  if(!do.update & !"rjconfig" %in% class(rj.input)) 
    stop("Input data must be initialised using <rj_initialise>.")
  if(is.null(start.values) & do.update) stop("Missing starting values.")
  
  if(do.update) tot.iter <- n.iter else tot.iter <- n.iter + n.burn
  
  rj <- vector(mode = "list", length = 29)
  names(rj) <- c("mcmc", "t.ij", "sigma", "phi", "mu.i", "mu", "mu.ij", "psi.i", "k.ij", 
                 "pi.ij", "nu",   "tau",  "alpha", "psi", "omega", "include.covariates",
                 "exposed", "sonar", "behaviour", "range", "beta", "phase", "model",
                 "mlist", "nglist", "abbrev", "dat",  "config", "update")
  
  rj$mcmc <- list(n.chains = n.chains,
                         n.burn = n.burn,
                         n.iter = n.iter,
                         tot.iter = tot.iter,
                         iter.rge = paste0(n.burn + 1, ":", tot.iter),
                         move = list(prob = setNames(c(p.split, p.merge), c("split", "merge"))))
  
  #' -----------------------------------------------
  # Create data matrices and vectors
  #' -----------------------------------------------
  
  rj$t.ij = matrix(data = 0, nrow = tot.iter, ncol = rj.input$trials$n)
  
  if(!rj.input$config$biphasic | rj.input$config$function.select){
    
    rj$mu.i <- matrix(data = 0, nrow = tot.iter, ncol = rj.input$whales$n)
    rj$mu <- matrix(data = 0, nrow = tot.iter, ncol = rj.input$species$n)
    rj$phi <- numeric(tot.iter)
    rj$sigma <- numeric(tot.iter)
  
  } else {
    
    rj$mu.i <- rj$mu <- rj$phi <- rj$sigma <- NULL
    
  }
  
  if(rj.input$config$biphasic){
  
    rj$mu.ij <- array(data = 0, dim = c(tot.iter, rj.input$trials$n, 2))
    rj$psi.i <- matrix(data = 0, nrow = tot.iter, ncol = rj.input$whales$n)
    rj$k.ij <- matrix(data = 0, nrow = tot.iter, ncol = rj.input$trials$n)
    rj$pi.ij <- matrix(data = 0, nrow = tot.iter, ncol = rj.input$trials$n)
    rj$nu <- array(data = 0, dim = c(tot.iter, rj.input$species$n, 2),
                                     dimnames = list(NULL, paste0(".", seq_len(rj.input$species$n)),
                                                     c("nu.lower", "nu.upper")))
    rj$tau <- matrix(data = 0, nrow = tot.iter, ncol = 2)
    rj$alpha <- matrix(data = 0, nrow = tot.iter, ncol = rj.input$species$n)
    rj$psi <- numeric(tot.iter)
    rj$omega <- numeric(tot.iter)
  } else{
    rj$mu.ij <- rj$psi.i <- rj$k.ij <- rj$pi.ij <- rj$nu <- rj$tau <- rj$alpha <- 
      rj$psi <- rj$omega  <- NULL
  }
 
  # Covariates, if present
  if (rj.input$covariates$n > 0) {
    for (j in rj.input$covariates$names) {
      rj[[j]] <- matrix(
        data = 0, nrow = tot.iter,
        ncol = ifelse(rj.input$covariates$fL[[j]]$nL == 0, 1, rj.input$covariates$fL[[j]]$nL)
      )
    }
    
    rj$include.covariates <- matrix(
      data = ifelse(!rj.input$config$covariate.select, 1, 0),
      nrow = tot.iter, ncol = rj.input$covariates$n
    )
    
  } else {
    rj$include.covariates <- matrix(data = 0, nrow = tot.iter, ncol = 1)
    rj$beta <- matrix(data = 0, nrow = tot.iter, ncol = 1)
    rj$exposed <- rj$sonar <- rj$behaviour <- rj$range <- NULL
  }
  
  rj$model <- rj$phase <- numeric(tot.iter)
  rj$mlist <- rj.input$config$mlist
  rj$nglist <- rj.input$config$nglist

  if(!rj.input$config$function.select){
    if(rj.input$config$biphasic) rj$phase <- rep(2, tot.iter) else rj$phase <- rep(1, tot.iter)
  }
  
  #' -----------------------------------------------
  # Assign column names
  #' -----------------------------------------------

  colnames(rj$t.ij) <- paste0("t", rep(seq_len(rj.input$whales$n), rj.input$trials$nper), ".", 
                              unlist(sapply(rj.input$trials$nper, seq_len)))
  
  if(!rj.input$config$biphasic | rj.input$config$function.select){
  colnames(rj$mu.i) <- paste0("mu.", 1:rj.input$whales$n)
  colnames(rj$mu) <- paste0("mu.", 1:rj.input$species$n)
  }
  
  if(rj.input$config$biphasic){
    # Note: cannot assign column names to mu.ij and nu as these are stored in arrays
    colnames(rj$psi.i) <- paste0("psi.i.", seq_len(rj.input$whales$n))
    colnames(rj$k.ij) <- paste0("k.ij.", seq_len(rj.input$trials$n))
    colnames(rj$pi.ij) <- paste0("pi.ij.", seq_len(rj.input$trials$n))
    colnames(rj$tau) <- c("tau.lower", "tau.upper")
    colnames(rj$alpha) <- paste0("alpha.", seq_len(rj.input$species$n))
  }
  
  if(is.null(rj.input$species$abbrev)) {
    rj$abbrev <- list(rj.input$species$abbrev) 
    } else { rj$abbrev <- rj.input$species$abbrev }
  
  if (rj.input$covariates$n > 0) {
    for (j in rj.input$covariates$names) {
      if(is.null(rj.input$covariates$fL[[j]]$Lnames)){
        colnames(rj[[j]]) <- j
      } else{
        colnames(rj[[j]]) <- paste0(j, "_", rj.input$covariates$fL[[j]]$Lnames)
      }
    }
    colnames(rj$include.covariates) <- rj.input$covariates$names
  } else {
    colnames(rj$include.covariates) <- "beta"
  }

  #' -----------------------------------------------
  # Generate starting values
  #' -----------------------------------------------
  
  if(do.update){
    
    for(it in names(start.values)){
      if(it == "mlist") rj[[it]] <- start.values[[it]]
      if(it == "nglist") rj[[it]] <- start.values[[it]]
      if(is.null(dim(rj[[it]]))) rj[[it]][1] <- start.values[[it]][1] 
      if(length(dim(rj[[it]])) == 2) rj[[it]][1, ] <- start.values[[it]]
      if(length(dim(rj[[it]])) == 3) rj[[it]][1, ,] <- start.values[[it]]
    }
    
  } else {
    
    # Model ID
    if (rj.input$config$model.select) {
      rj$current.model <- 
        rj$model[1] <-
        sample(x = rj.input$config$clust[[1]]$model, 
               size = 1, 
               prob = rj.input$config$clust[[1]]$p_scale)
    } else {
      # Because the model list only has length 1
      if(rj.input$species$n == 1) {
        rj$current.model <- rj$model[1:tot.iter] <- vec_to_model(1, rj.input$species$names)
      } else {
             rj$current.model <- rj$model[1:tot.iter] <- names(rj.input$config$mlist) 
      }
 
    }

    current.groups <- rj$mlist[[rj$current.model]]
    
    # Functional form
    if(rj.input$config$function.select) rj$phase[1] <- sample(x = 1:2, size = 1)
    
    if(rj.input$covariates$n > 0) {
      
      if(rj.input$config$covariate.select) rj$include.covariates[1, ] <- 
          sample(x = 0:1, size = rj.input$covariates$n, replace = TRUE) 
      
      for (j in rj.input$covariates$names){
        v <- rnorm(n = ifelse(rj.input$covariates$fL[[j]]$nL == 0, 1, 
                              rj.input$covariates$fL[[j]]$nparam), mean = 0, sd = 3)
        if(rj.input$covariates$fL[[j]]$nL == 0){ rj[[j]][1, ] <- v  } else { rj[[j]][1, ] <- c(0, v)}}
      
    }
    
    # Model parameters 
    # -- MONOPHASIC ---
    
    if(!rj.input$config$biphasic | rj.input$config$function.select){
      
    rj$sigma[1] <- rj.input$config$var[1] + 
      rt_norm(n = 1, location = 0, scale = 3, 
             L = rj.input$config$priors["sigma", 1] - rj.input$config$var[1],
             U = rj.input$config$priors["sigma", 2] - rj.input$config$var[1])
    
    rj$phi[1] <- rj.input$config$var[2] + 
      rt_norm(n = 1, location = 0, scale = 3, 
             L = rj.input$config$priors["phi", 1] - rj.input$config$var[2],
             U = rj.input$config$priors["phi", 2] - rj.input$config$var[2])
    
    input.data <- tibble::tibble(species = rj.input$species$trials,
                   y = rj.input$obs$y_ij,
                   censored = rj.input$obs$censored,
                   rc = rj.input$obs$Rc,
                   lc = rj.input$obs$Lc)
    
    mu.start <- purrr::map_dbl(
      .x = seq_len(dplyr::n_distinct(current.groups)),
      .f = ~ {
        
        sp.data <- input.data %>% 
          dplyr::filter(species == .x)
        
        if(all(is.na(sp.data$y))){
          sp.data %>% 
            dplyr::rowwise() %>% 
            dplyr::mutate(y = ifelse(censored == 1,
                   runif(n = 1, min = rc, max = rj.input$config$priors["mu", 2]),
                   runif(n = 1, min = rj.input$config$priors["mu", 1], max = lc))) %>% 
            dplyr::ungroup() %>% 
            dplyr::pull(y) %>% 
            mean(., na.rm = TRUE)
        } else {
          mean(sp.data$y, na.rm = TRUE)
        }})

    mu.deviates <- rt_norm(n = length(mu.start), 
                   location = 0,
                   scale = 5, 
                   L = rj.input$param$dose.range[1] - mu.start,
                   U = rj.input$param$dose.range[2] - mu.start)
    
    rj$mu[1, ] <- (mu.start + mu.deviates)[current.groups]
    
    # rj$mu[1,] <- mu.start + mu.deviates[sapply(X = mu.start, 
    #                         FUN = function(x) which(unique(mu.start) == x), simplify = TRUE)]
    
    rj$mu.i[1, ] <- rt_norm(n = rj.input$whales$n,
                           location = rj$mu[1, rj.input$species$id], 
                           scale = rj$phi[1],
                           L = rj.input$param$dose.range[1], 
                           U = rj.input$param$dose.range[2])
    
    mu.ij.config <- rj$mu.i[1, rj.input$whales$id]
    
    # The scale needs to be large here to prevent numerical issues (returning Inf)
    rj$t.ij[1, ] <- rt_norm(n = rj.input$trials$n, 
                           location = mu.ij.config, 
                           scale = 30,
                           # rj$sigma[1],
                           L = rj.input$param$dose.range[1], 
                           U = rj.input$param$dose.range[2])

    } 
    
    if(rj.input$config$biphasic | rj.input$config$function.select){

    # Model parameters 
    # -- BIPHASIC ----
      
      inits.bi <- purrr::map(.x = seq_len(dplyr::n_distinct(current.groups)),
                             .f = ~{
                               censored <- rj.input$obs$censored[rj.input$species$trials %in% 
                                            which(current.groups == .x)]
                               y.obs <- rj.input$obs$y_ij[rj.input$species$trials %in% 
                                           which(current.groups == .x)]
                               Lc.obs <- rj.input$obs$Lc[rj.input$species$trials %in% 
                                           which(current.groups == .x)]
                               Rc.obs <- rj.input$obs$Rc[rj.input$species$trials %in% 
                                           which(current.groups == .x)]
                               
                               y.obs[is.na(y.obs) & censored == 1] <- 
                                 runif(n = sum(is.na(y.obs) & censored == 1),
                                       min = Rc.obs[is.na(y.obs) & censored == 1],
                                       max = rj.input$param$dose.range[2])
                               
                               y.obs[is.na(y.obs) & censored == -1] <- 
                                 runif(n = sum(is.na(y.obs) & censored == -1),
                                       min = rj.input$param$dose.range[1],
                                       max = Lc.obs[is.na(y.obs) & censored == -1])
                               
                               sort(y.obs)})

      rj$alpha[1, ] <- sapply(X = inits.bi, FUN = function(x){
        
        # xx <- x[x<=rj.input$config$priors["alpha", 2]]
        
        max.x <- min(c(rj.input$config$priors["alpha", 2], max(x)))
        min.x <- max(c(rj.input$config$priors["alpha", 1], min(x)))
        
        rt_norm(n = 1, 
                location = min.x + (max.x-min.x)/2,
                scale = 2, 
                L = rj.input$param$dose.range[1],
                U = rj.input$param$dose.range[2])})[current.groups]
      
      # Add one random deviate from a Uniform distribution here to avoid numerical
      # issues with NAs when one of the list elements of inits.bi is of
      # length 1 and doesn't meet the constraint with respect to alpha
      
      rj$nu[1, ,1] <-
        sapply(X = seq_len(dplyr::n_distinct(current.groups)), 
               FUN = function(x){
                 rt_norm(n = 1, 
                        location = mean(c(inits.bi[[x]][inits.bi[[x]] < unique(rj$alpha[1, which(current.groups == x)])], runif(n = 1, min = rj.input$param$dose.range[1], max = unique(rj$alpha[1, which(current.groups == x)])))), 
                        scale = 2,
                        L = rj.input$param$dose.range[1],
                        U = unique(rj$alpha[1, which(current.groups == x)]))})[current.groups]
      
      rj$nu[1, ,2] <-
        sapply(X = seq_len(dplyr::n_distinct(current.groups)), 
               FUN = function(x){
                 rt_norm(n = 1, 
                        location = mean(c(inits.bi[[x]][inits.bi[[x]] > unique(rj$alpha[1, which(current.groups == x)])], runif(n = 1, min = unique(rj$alpha[1, which(current.groups == x)]), max = rj.input$param$dose.range[2]))), 
                        scale = 2,
                        L = unique(rj$alpha[1, which(current.groups == x)]),
                        U = rj.input$param$dose.range[2])})[current.groups]
      

      inits.tau <-
        rbind(sapply(X = seq_len(dplyr::n_distinct(current.groups)), FUN = function(x){
          sd(inits.bi[[x]][inits.bi[[x]] < unique(rj$alpha[1, which(current.groups == x)])])}),
          sapply(X = seq_len(dplyr::n_distinct(current.groups)), FUN = function(x){
            sd(inits.bi[[x]][inits.bi[[x]] > unique(rj$alpha[1, which(current.groups == x)])])}))
      
      inits.tau[is.na(inits.tau)] <- runif(n = sum(is.na(inits.tau)),
                                           min = rj.input$config$priors["tau", 1],
                                           max = rj.input$config$priors["tau", 2])
      
      rj$tau[1, ] <- 
        rt_norm(n = 2, 
                             location = rowMeans(inits.tau),
                             scale = 2, 
                             L = rj.input$config$priors["tau", 1],
                             U = rj.input$config$priors["tau", 2])

      rj$psi[1] <- rnorm(n = 1, mean = rj.input$config$priors["psi", 1], 
                         sd = rj.input$config$priors["psi", 2])
      
      rj$omega[1] <- runif(n = 1, min = rj.input$config$priors["omega", 1], 
                           max = rj.input$config$priors["omega", 2])
      rj$psi.i[1, ] <- rnorm(n = rj.input$whales$n, mean = rj$psi[1], sd = rj$omega[1])
      
      included.cov <- rj$include.covariates[1, ]
      
      if (sum(included.cov) == 0) {
        covnames <- "nil"
        cov.effects <- 0
      } else {
        included.cov <- included.cov[included.cov == 1]
        covvalues <- do.call(c, purrr::map(
          .x = names(included.cov),
          .f = ~ {qnorm(pnorm(q = rj[[.x]][1, ], 
                              mean = rj.input$config$priors[.x, 1], 
                              sd = rj.input$config$priors[.x, 2]))}))
        cov.effects <- colSums(t(do.call(cbind, rj.input$covariates$dummy[names(included.cov)])) * covvalues)
      }
      
      psi_ij <- rj$psi.i[1, rj.input$whales$id] + cov.effects

    rj$pi.ij[1, ] <- pnorm(q = psi_ij)  
    
    rj$mu.ij[1, , 1] <- unname(rt_norm(n = rj.input$trials$n, 
           location = rj$nu[1, , 1][rj.input$species$trials],
           scale = rj$tau[1, 1], 
           L = rj.input$param$dose.range[1], 
           U = rj$alpha[1, rj.input$species$trials]))

    rj$mu.ij[1, , 2] <- unname(rt_norm(n = rj.input$trials$n, 
          location = rj$nu[1, , 2][rj.input$species$trials],
          scale = rj$tau[1, 2], 
          L = rj$alpha[1, rj.input$species$trials],
          U = rj.input$param$dose.range[2]))

    rj$k.ij[1, ] <- (1 - rbinom(n = rj.input$trials$n, size = 1, prob = rj$pi.ij[1, ])) + 1
    rj$t.ij[1, ] <- ((2 - rj$k.ij[1, ]) * rj$mu.ij[1, , 1]) +
      ((1 - (2 - rj$k.ij[1, ])) * rj$mu.ij[1, , 2])
    }
    
    # Censoring
    if (!all(rj.input$obs$censored == 0)) {

      if(!rj.input$config$biphasic & !rj.input$config$function.select){
      
        # Right-censored
        if(sum(rj.input$obs$censored == 1) > 0){
          rj$t.ij[1, rj.input$obs$censored == 1] <-
            rt_norm(n = sum(rj.input$obs$censored == 1),
                    location = mu.ij.config[rj.input$obs$censored == 1],
                    scale = 30,
                    L = rj.input$obs$Rc[rj.input$obs$censored == 1], 
                    U = rj.input$param$dose.range[2])
        }
        
        # Left-censored
        if(sum(rj.input$obs$censored == -1) > 0){
          rj$t.ij[1, rj.input$obs$censored == -1] <-
            rt_norm(n = sum(rj.input$obs$censored == -1),
                    location = mu.ij.config[rj.input$obs$censored == -1],
                    scale = 30,
                    L = rj.input$param$dose.range[1],
                    U = rj.input$obs$Lc[rj.input$obs$censored == -1])
        }
      
      } else {
       
        rj$k.ij[1, rj.input$obs$censored == 1] <- 2
        rj$k.ij[1, rj.input$obs$censored == -1] <- 1
        
        for (a in 1:rj.input$trials$n) {
          
          if (rj.input$obs$censored[a] == 1){
            if(rj$mu.ij[1, a, 2] < rj.input$obs$Rc[a]){
              rj$mu.ij[1, a, 2] <- rt_norm(
                n = 1,
                location = rj$nu[1, rj.input$species$trials[a], 2],
                scale = 30,
                L = max(rj.input$obs$Rc[a], rj$alpha[1, rj.input$species$trials][a]),
                U = rj.input$param$dose.range[2])
            }} 
          
          if (rj.input$obs$censored[a] == -1){
            if(rj$mu.ij[1, a, 1] > rj.input$obs$Lc[a]){ 
              rj$mu.ij[1, a, 1] <- rt_norm(
                n = 1,
                location = rj$nu[1, rj.input$species$trials[a], 1],
                scale = 30,
                L = rj.input$param$dose.range[1],
                U = min(rj.input$obs$Lc[a], rj$alpha[1, rj.input$species$trials][a]))
              
            }}

          rj$t.ij[1, ] <- (2 - rj$k.ij[1, ]) * rj$mu.ij[1, , 1] + 
            ((1 - (2 - rj$k.ij[1, ])) * rj$mu.ij[1, , 2])
          
        }
      }} # End if censoring

    if(any(is.na(rj$t.ij[1, ]))) warning("NA(s) returned as initial values for t.ij")
    if(any(is.infinite(rj$t.ij[1, ]))) warning("Inf returned as initial values for t.ij")
    
    if(sum(rj$t.ij[1, rj$k.ij[1, ] == 2] < rj$alpha[1, rj.input$species$trials][rj$k.ij[1, ] == 2]) > 0) stop("[setup] t.ij cannot be less than alpha when k = 2")
    if(sum(rj$t.ij[1, rj$k.ij[1, ] == 1] > rj$alpha[1, rj.input$species$trials][rj$k.ij[1, ] == 1]) > 0) stop("[setup] t.ij cannot be greater than alpha when k = 1")
    
  } # End if do.update
  
  # Acceptance rates for model, covariate, and functional form moves
  if(rj.input$config$model.select) rj$accept.model <- numeric(tot.iter)
  if(rj.input$config$covariate.select) rj$accept.covariates <- numeric(tot.iter)
  if(rj.input$config$function.select) rj$accept.phase <- numeric(tot.iter)
  
  # Finally, store the data
  rj$dat <- rj.input
  rj$config <- rj$dat$config
  rj$dat$config <- NULL
  rj$update <- do.update
  
  return(rj)
  
}
pjbouchet/espresso documentation built on July 27, 2024, 12:31 p.m.