TOMS_submission/archive/simulation_study/data.R

# function for generating continuous covariate dependent data
cont_cov_dep_data <- function(p, n1, n2, n3){

  # create covariate for observations in each of the three intervals

  # define number of samples
  n <- sum(n1, n2, n3)

  # define the intervals
  limits1 <- c(-3, -1)
  limits2 <- c(-1, 1)
  limits3 <- c(1, 3)

  # define the interval labels
  interval <- c(rep(1, n1), rep(2, n2), rep(3, n3))

  # draw the covariate values within each interval
  z1 <- sort(stats::runif(n1, limits1[1], limits1[2]))
  z2 <- sort(stats::runif(n2, limits2[1], limits2[2]))
  z3 <- sort(stats::runif(n3, limits3[1], limits3[2]))
  Z <- matrix(c(z1, z2, z3), n, 1)

  # the shared part of the structure for all three intervals is a 2 on the
  # diagonal and a 1 in the (2, 3) position
  common_str <- diag(p)
  common_str[2, 3] <- 1

  # define constants for the structure of interval 2
  beta1 <- diff(limits2)^-1
  beta0 <- -limits2[1] * beta1

  # define omega12 and omega 13
  omega12 <- (Z < 1) * pmin(1, 1 - beta0 - beta1 * Z)
  omega13 <- (Z > -1) * pmin(1, beta0 + beta1 * Z)

  # interval 2 has two different linear functions of Z in the (1, 2) position
  # and (1, 3) positions; define structures for each of these components
  str12 <- str13 <- matrix(0, p, p)
  str12[1, 2] <- str13[1, 3] <- 1

  # create the precision matrices
  prec_mats <- vector("list", n)
  for (j in 1:n){
    prec_mats[[j]] <- common_str + omega12[j] * str12 + omega13[j] * str13
  }

  # symmetrize the precision matrices
  true_precision <- lapply(prec_mats, function(mat) t(mat) + mat)

  # invert the precision matrices to get the covariance matrices
  cov_mats <- lapply(true_precision, solve)

  # generate the data using the covariance matrices
  data_mat <- t(sapply(cov_mats, MASS::mvrnorm, n = 1, mu = rep(0, p)))

  return(list(X = data_mat, Z = Z, true_precision = true_precision,
              interval = interval))
}

# function for generating multivariate continuous covariate dependent data
cont_multi_cov_dep_data <- function(p, n){

  # create covariate for observations in each of the three intervals

  # define the intervals
  limits1 <- c(-3, -1)
  limits2 <- c(-1, 1)
  limits3 <- c(1, 3)
  intervals <- list(limits1, limits2, limits3)

  # draw the covariate values within each interval
  Z <- matrix(NA, 0, 2)
  for (int_x in intervals){
    for (int_y in intervals){
      x <- runif(n, int_x[1], int_x[2])
      y <- runif(n, int_y[1], int_y[2])
      Z <- rbind(Z, cbind(x, y))
    }
  }

  # the shared part of the structure for all three intervals is a 2 on the
  # diagonal and a 1 in the (2, 3) position
  common_str <- diag(p)
  common_str[2, 3] <- 1

  # define constants for the structure of interval 2
  beta1 <- diff(limits2)^-1
  beta0 <- -limits2[1] * beta1

  # define omega12 and omega 13
  omega12 <- (Z[ , 1] < 1) * pmin(1, 1 - beta0 - beta1 * Z[ , 1])
  omega13 <- (Z[ , 2] > -1) * pmin(1, beta0 + beta1 * Z[ , 2])

  # interval 2 has two different linear functions of Z in the (1, 2) position
  # and (1, 3) positions; define structures for each of these components
  str12 <- str13 <- matrix(0, p, p)
  str12[1, 2] <- str13[1, 3] <- 1

  # create the precision matrices
  prec_mats <- vector("list", n)
  for (j in 1:(9 * n)){
    prec_mats[[j]] <- common_str + omega12[j] * str12 + omega13[j] * str13
  }

  # symmetrize the precision matrices
  true_precision <- lapply(prec_mats, function(mat) t(mat) + mat)

  # invert the precision matrices to get the covariance matrices
  cov_mats <- lapply(true_precision, solve)

  # generate the data using the covariance matrices
  data_mat <- t(sapply(cov_mats, MASS::mvrnorm, n = 1, mu = rep(0, p)))

  return(list(X = data_mat, Z = Z, true_precision = true_precision))
}
JacobHelwig/covdepGE documentation built on April 11, 2024, 7:22 a.m.