tempo: Temporally-Explicit Models for Phenological Observations

Documented in tempo_simulate

#' Simulate phenology observations
#' 
#' This function generates random time-to-event data that may be
#' left, right, and/or interval censored and arises from the tvgeom
#' distribution with time-varying transition probabilities as a
#' function of time-dependent covaraiates that can be provided
#' to the function or generated randomly.
# #' \deqn{p[i,j] = inv.logit(\beta_0 + x_ij*\beta)}
#' 
#' @param n integer; The number of data points to simulate.
#'
#' @param t integer; The number of timesteps over which the event may occur.
#' 
#' @param censor_intensity positive number; Governs how wide censor intervals
#' will be. Interval bounds are determined by adding and subtracting Poisson
#' random variables with rate \code{censor_intesity} to and from the exact 
#' observatoin values.
#' 
#' @param beta_mu numeric vector; Regression coefficients used to determine 
#' transition probabilities in the tvgeom distribution for each observation. The
#' first element corresponds to the intercept term.
#' If random effects are specified, these will be the means of the distributions
#' , otherwise, these will be the fixed values.
#' 
#' @param random_betas integer vector; indices of beta_mu specifying which betas
#' should have random effects. \code{c(3,4)} would specify that the 3rd and 4th
#' elements in beta_mu will have random effects. Defaults to \code{NULL}, in
#' which case no random effects are used.
#' 
#' @param beta_sd numeric vector; The standard deviations for random effects on
#' beta regression coefficients corresponding to idicies in \code{random_betas}.
#' Must be specified if random_betas is specified. Defaults to \code{NULL} when
#' random effects are not used.
#' 
#' @param covariates list; Defaults to \code{NULL}, in which case covariates are
#' randomly generated. If provided, a list of \code{c} covariates matrics, each 
#' of dimension \code{[n,t]}, where n is the number of observations, t is the
#' number of time steps during which state transition could take place, and c
#' is the number of covariates, \code{length(beta_mu)-1)}. You may use the 
#' function \code{tempo_wrangle} provided in this package to convert long form
#' covariate data to the format necessary for input into \code{tempo_simulate}.
#' 
#' @param n_group integer; the number of groups in which betas should vary 
#' randomly. Must be specified if \code{random_betas} is specified. Defaults to
#' \code{NULL} when random effects are not used.
#' 
#' @param rho numeric vector; A vector of length \eqn{p*(p-1)/2}, where p is 
#' the length of \code{beta_mu}. Correlations between beta random effects. 
#' The order is as follows: For example, if p = 3, then the elements of rho, in 
#' order, would correspond the the following indices in beta_mu: [1,2],
#' [1,3], [2,3]. For p = 4, the order would be [1,2], [1,3], [1,4], 
#' [2,3], [2,4], [3,4]. Defaults to \code{NULL} in which case random 
#' effects are not correlated.
#' @importFrom stats rnorm rpois sd
#' @importFrom rlang .data
#' @importFrom mvtnorm rmvnorm
#' @importFrom abind abind
#' @importFrom magrittr "%>%"
#' @importFrom boot inv.logit
#' @importFrom dplyr mutate row_number n_distinct
#' @importFrom tvgeom rtvgeom
#' @name tempo_simulate
#' @rdname tempo_simulate
#' @export
tempo_simulate <- function(n, t,
                           censor_intensity,
                           beta_mu,
                           random_betas = NULL,
                           beta_sd = NULL,
                           covariates = NULL,
                           n_group = NULL,
                           rho = NULL) {  
  ## Create various Boolean flags
  random_effects <- !is.null(random_betas)
  single_random_effect <- length(random_betas) == 1
  multiple_random_effects <- length(random_betas) > 1
  correlated <- !is.null(rho) & multiple_random_effects # multiple_random_effects 
                                                        # as failsafe in case user 
                                                        # wrongly adds rho arg
  simulate_covariates <- is.null(covariates)

  ## Catch errors
  if(!is.null(n_group)) {
    if (random_effects & n_group > n) {
      stop("n_group cannot be greater than n")
    }
  }
  ## Create correlation matrix
  if (correlated) {
    n_param <-length(random_betas)
    Rho <- diag(n_param)
    rho_indices <- NULL
    for (p in 1:(n_param - 1)){
      for (q in (p+1):n_param) {
        rho_indices <- rbind(rho_indices, c(p,q))
      }
    }
    
    for (i in 1:nrow(rho_indices)) {
      Rho[rho_indices[i, 1], rho_indices[i, 2]] <- rho[i]
      Rho[rho_indices[i, 2], rho_indices[i, 1]] <- rho[i]
    }
    
    Sigma <- beta_sd %*% t(beta_sd) * Rho
  } else if (random_effects) {
    Sigma <- beta_sd %*% t(beta_sd)
  }
  
  if(random_effects) {
    j <- NA
    while (n_distinct(j) < n_group) {
      j <- sort(round(runif(n, 0.5, 0.5 + n_group)))
    }
  }
  
  # Add covariates if not supplied by user
  if(simulate_covariates) {
    # Always include one trend covariate
    covariates <- abind(matrix(1, nrow = n, ncol = t),
                        matrix((1:t)/sd(1:t), nrow = n, ncol = t, byrow = TRUE),
                        along = 3)
    if (length(beta_mu) > 2) {
      for (i in 1:(length(beta_mu) - 2)){
        covariates <- abind(covariates, matrix(rnorm(n*t), nrow = n), along = 3)
      }
    }
  } else {
    orig_cov_names <- names(covariates)
    covariates <- c(list(intercept = array(1, dim = dim(covariates[[1]]))),
                    covariates)
    covariates <- do.call(abind, c(covariates, along = 3))
  }
  
  if(random_effects) {
    if(multiple_random_effects) {
      beta_matrix <- matrix(NA,
                            nrow = length(unique(j)),
                            ncol = length(beta_mu))
      
      for (row in 1:nrow(beta_matrix)){
        # fill in random betas
        beta_matrix[row, random_betas] <- 
          as.vector(rmvnorm(n = 1,
                            mean = beta_mu[random_betas],
                            sigma = Sigma))
        
        # fill in fixed betas
        beta_matrix[row, -random_betas] <- beta_mu[-random_betas]
      }
    } else if (single_random_effect) {
      beta_matrix <- matrix(NA,
                            nrow = length(unique(j)),
                            ncol = length(beta_mu))
      
      for (row in 1:nrow(beta_matrix)){
        # fill in random beta
        beta_matrix[row, random_betas] <- 
          as.vector(rnorm(n = 1,
                          mean = beta_mu[random_betas],
                          sd = beta_sd))
        # fill in fixed betas
        beta_matrix[row, -random_betas] <- beta_mu[-random_betas]
      }
    }
    
    prob <- matrix(NA, nrow = n, ncol = t)
    for (i in 1:n){ # lazy, but works
      for (time in 1:t){
        prob[i, time] <- 
          inv.logit(covariates[i, time, ] %*% beta_matrix[j[i], ])
      }
    }
  } else { # if no random effects
    prob <- apply(covariates, c(1, 2), function(x, beta) {
      inv.logit(x %*% beta)
    }, beta = beta_mu)
  }
  
  # Simulate uncensored observations
  y <- apply(prob, 1, FUN = function(x) {rtvgeom(1, x)})
  
  C <- data.frame(obs_id = seq_along(y), c1 = y - 1, c2 = y)
  
  # Impose censoring
  C <- C %>% 
    mutate(
      c1 = .data$c1 - rpois(n, censor_intensity),
      c2 = .data$c2 + rpois(n, censor_intensity)
    ) %>% 
    mutate( # replace any -'s with NA and >t with NA
      c1 = ifelse(.data$c1 < 1, NA, .data$c1),
      c2 = ifelse(.data$c2 > t, NA, .data$c2),
      obs_id = paste0("obs_", row_number())
    )
  # Name the dimensions of covariates
  dimnames(covariates)[1] <- list(paste0("obs_", 1:n))
  
  ## Create output object
  covariates <- asplit(covariates[ , , -1, drop = FALSE], 3)
  
  if(simulate_covariates) {
    names(covariates) <- c(paste0("covariate", 1:(length(beta_mu)-1)))
  } else {
    names(covariates) <- orig_cov_names
  }
  generating_vals <- list(beta_mu = beta_mu)
  
  if (random_effects) {
    group_betas <- as.data.frame(beta_matrix)
    colnames(group_betas) <- c("intercept", paste0("beta_", names(covariates)))
    group_betas$group <- sort(unique(j))
  
    generating_vals <- c(generating_vals, list(beta_sd = beta_sd,
                                               random_betas = random_betas,
                                               group_betas = group_betas
                                               ))
    if(correlated) {
      generating_vals <- c(generating_vals, list(Rho = Rho))
    }
  }
  out <- list(y = C, covariates = covariates)
  
  if(random_effects) {
    out <- c(out, list(j = j))
  }
  out <- c(out, list(generating_vals = generating_vals))
  
  out
}