R/generate_all_data.R

Defines functions generate_all_data

Documented in generate_all_data

#' Data simulated from a directed acyclic graph (DAG) model
#'
#' @description
#' Simulate data from a directed acyclic graph (DAG) model.
#'
#' @importFrom stats rnorm
#'
#' @param n a sample size
#' @param alpha_S a parameter in mediator model M~S+X for S
#' @param beta_M a parameter in outcome model Y~S+M+X for M
#' @param tau_S a parameter in outcome model Y~S+M+X for S
#' @param alpha_vec a parameter vector in mediator model M~S+X for X
#' @param beta_vec a parameter vector in outcome model Y~S+M+X for X
#' @param M.family an error distribution and link function to be used in the mediator model. See \code{\link{family}} for details of family functions. The default family is \code{\link{gaussian}}.
#' @param Y.family an error distribution and link function to be used in the outcome model. See \code{\link{family}} for details of family functions. The default family is \code{\link{gaussian}}.
#' @param sigma_M the variance of the error term in the mediator model M~S+X
#' @param sigma_Y the variance of the error term in the outcome model Y~S+M+X
#'
#' @examples
#' out <- generate_all_data(n = 200)
#' summary(out$S)
#' summary(out$M)
#' summary(out$Y)
#' summary(out$X)
#'
#' @returns A list with the following components:
#' \item{S}{exposure}
#' \item{M}{mediator}
#' \item{Y}{outcome}
#' \item{X}{confounder}
#' @export
generate_all_data <- function(n = 200,
                              alpha_S = 0,
                              beta_M = 0,
                              tau_S = 1,
                              alpha_vec = rep(1, 3),
                              beta_vec = rep(1, 3),
                              M.family = stats::gaussian(),
                              Y.family = stats::gaussian(),
                              sigma_M = 0.5,
                              sigma_Y = 0.5) {
  stopifnot(
    "The length of alpha_vec should equal that of beta_vec" = length(alpha_vec) ==
      length(beta_vec)
  )
  stopifnot("The length of alpha_vec should equal that of beta_vec" = length(alpha_S) ==
              length(beta_M))
  p <- length(alpha_vec)
  J <- length(alpha_S)
  linkinv <- M.family$linkinv

  if (Y.family$family == "gaussian") {
    alpha_vec <- as.matrix(alpha_vec)
    beta_vec <- as.matrix(beta_vec)
    alpha_S <- as.matrix(alpha_S)
    beta_M <- as.matrix(beta_M)

    S <- matrix(stats::rbinom(n, 1, 0.5), n, 1)
    X <- cbind(1, matrix(rnorm((p - 1) * n, 0, 1), n, p - 1))


    eps_M <- matrix(stats::rnorm(n * J, 0, sigma_M), n, J)
    eps_Y <- stats::rnorm(n, 0, sigma_Y)

    mu_glm <- M.family$linkinv(S %*% t(alpha_S) + X %*% alpha_vec %*% matrix(1, 1, J))

    if (M.family$family == "gaussian") {
      M <- S %*% t(alpha_S) + X %*% alpha_vec %*% matrix(1, 1, J) + eps_M
    } else if (M.family$family == "Gamma") {
      M <- stats::rgamma(n * J, shape = 1, scale = mu_glm)
      M <- matrix(M, n, J)
    } else if (M.family$family == "poisson") {
      M <- stats::rpois(n * J, lambda = mu_glm)
      M <- matrix(M, n, J)
    } else if (M.family$family == "binomial") {
      M <- stats::rbinom(n * J, 1, prob = mu_glm)
      M <- matrix(M, n, J)
    } else {
      stop("We only support Gaussian, Gamma, Poisson and Binomial family for the mediator M.")
    }


    Y <-   M %*% beta_M +
      X %*% beta_vec +
      tau_S * S +
      eps_Y


    return(structure(list(
      S = S,
      M = M,
      Y = Y,
      X = X[, -1, drop = F]
    ), class = "sim_data"))
  } else {
    stop("We only support Gaussian family for the outcome Y.")
  }

}

Try the abima package in your browser

Any scripts or data that you put into this service are public.

abima documentation built on Oct. 26, 2024, 1:08 a.m.