R/sim_data.R

#' Simulate data based on a model and parameter distributions
#'
#' @param design a design dataset. See example
#' @param model A function with the first argument the simulation design, i.e. a dataset with the columns ... The second argument to this function is a dataset with parameters for every individual. This can be supplied by the user, or generated by this sim_data if theta and omega_mat are supplied.
#' @param theta vector of fixed effect parameters
#' @param omega_mat vector of between subject random effects, specified as lower triangle
#' @param par_names A character vector linking the parameters in the model to the variables in the dataset. See example.
#' @param par_values parameter values
#' @param draw_iiv draw between subject random effects?
#' @param error see example
#' @param n number of simulations to perform
#' @return a vector of simulated dependent variables (for us in the VPC plotting function)
#' @family aggregate functions
#' @seealso \code{\link{vpc}}
#' @export
#' @details
#' This function generates the simulated dependent values for use in the VPC plotting function.
sim_data <- function (design = cbind(id = c(1,1,1), idv = c(0,1,2)),
                      model = function(x) { return(x$alpha + x$beta) },
                      theta,
                      omega_mat,
                      par_names,
                      par_values = NULL,
                      draw_iiv = "mvrnorm",
                      error = list(proportional = 0, additive = 0, exponential = 0),
                      n=100) {
  if (is.null(par_values)) {
    param <- draw_params_mvr( # draw parameter values. can also be just population values, or specified manually ()
      ids = 1:n,
      n_sim = n,
      theta,
      omega_mat = triangle_to_full(omega_mat),
      par_names = par_names)
  } else {
    param <- par_values
  }
  sim_des <- do.call("rbind", replicate(n, design, simplify = FALSE))
  sim_des$sim  <- rep(1:n, each=nrow(design[,1]))
  sim_des$join <- paste(sim_des$sim, sim_des$id, sep="_")
  param$join   <- paste(param$sim, param$id, sep="_")
  tmp <- dplyr::as_tibble(merge(sim_des, param,
                      by.x="join", by.y="join"))
  tmp_pred <- cbind(data.frame(design), matrix(rep(theta, each=nrow(design[,1])), ncol=length(theta)))
  colnames(tmp_pred)[length(tmp_pred)-length(par_names)+1:3] <- par_names
  tmp$dv <- add_noise(model(tmp), ruv = error)
  tmp$pred <- rep(model(tmp_pred), n)

  colnames(tmp) <- gsub("\\.x", "", colnames(tmp))
  tmp %>%
    dplyr::arrange(sim, id, time)
}

sim_data_tte <- function (fit, t_cens = NULL, n = 100) {
  fit$coefficients <- as.list(fit$coefficients)
  dat <- data.frame(model.matrix(fit))
  for (i in seq(fit$coefficients)) { fit$coefficients[[i]] <- as.numeric (fit$coefficients[[i]]) }
  fact <- as.matrix(attr(fit$terms, "factors"))
  parm <- t(fact) %*% as.numeric(fit$coefficients)
  tmp.single <- data.frame (
    par = exp(as.numeric(fit$coefficients[1]) + as.matrix(dat[,rownames(parm)]) %*% parm),
    dv = 1
  )
  tmp <- do.call("rbind", replicate(n, tmp.single, simplify = FALSE))
  tmp$sim  <- rep(1:n, each=nrow(tmp.single[,1]))
  if (!fit$dist %in% c("exponential", "weibull")) {
    cat (paste("Simulation of ", fit$dist, "distribution not yet implemented, sorry."))
    return()
  }
  if (fit$dist == "exponential") {
    tmp$t = rweibull(nrow(dat[,1]) * n, shape = 1, scale = tmp$par)
    # or using: tmp$t = rexp(length(design$id), 1/tmp$par)
  }
  if (fit$dist == "weibull") {
    # annoyinly, the survreg and rweibull mix up the shape/scale parameter names and also take the inverse!!!
    tmp$t = rweibull(nrow(dat[,1]) * n, shape = 1/fit$scale, scale = tmp$par)
  }
  if (sum(tmp$t > t_cens) > 0) {
    tmp[tmp$t > t_cens,]$t <- t_cens
  }
  out <- c()
  for (i in 1:n) {
    km_fit <- compute_kaplan(tmp[tmp$sim == i,])
    idx_new <- idx + length(unique(tmp[tmp$sim == i,]$t))-1
    out <- rbind(out, cbind(i, km_fit$time, km_fit$surv))
  }
  colnames(out) <- c("sim", "time", "dv")
  dplyr::as_tibble(data.frame(out))
}
ronkeizer/vpc documentation built on May 11, 2023, 11:09 p.m.