tests/dataGeneration/get.cluster.dat.Abin.R

# -------------------------------------------------------------------------------------------------------
# DATA SET 4. TESTS FOR FITTING COMMUNITY-LEVEL BINARY EXPOSURE A IN CLUSTER DATA (IID within Cluster)
# -------------------------------------------------------------------------------------------------------
# Fitting binary exposure by logistic regression (One A with Binary/ Continuous Y)
# individual exposure is normal with mu for each observation being a function of (E1, E2, W1, W2);
# -------------------------------------------------------------------------------------------------------

get.cluster.dat.Abin <- function(id, n.ind = 10000, rndseed = NULL, is.Y.bin = TRUE, working.model = TRUE) {
  set.seed(rndseed)
  E1 <- runif(n = 1, min = 0, max = 1)
  E2 <- sample(x = c(0, 0.2, 0.4, 0.8, 1), size = 1)
  W1 <- rbinom(n = n.ind, size = 1, prob = plogis(- 0.13 + 1.5 * E1 - 0.6 * E2))
  W2_mean <- - 0.6 + 0.8 * E1 - 0.4 * E2 
  W3_mean <- 0.5 + 0.2 * E1
  W2W3 <- MASS::mvrnorm(n = n.ind, mu = c(W2_mean, W3_mean), Sigma = matrix(c(1, 0.6, 0.6, 1), ncol = 2))
  W2 <- W2W3[, 1]
  W3 <- W2W3[, 2] 
  A <- rbinom(n = 1, size = 1, prob = plogis(- 2.4 + 3 * E1 + 0.4 * E2 + 3 * mean(W1)))  # community-level A
  if (working.model) {  # working.model holds
    if (is.Y.bin) {  # Y binary
      Y <- rbinom(n = n.ind, size = 1, prob = plogis(- 0.2 + 0.5 * A + 0.4 * E1 + 0.2 * E2 + W2 - 0.2 * W3))
      Y1 <- rbinom(n = n.ind, size = 1, prob = plogis(- 0.2 + 0.5 * 1 + 0.4 * E1 + 0.2 * E2 + W2 - 0.2 * W3))
      Y0 <- rbinom(n = n.ind, size = 1, prob = plogis(- 0.2 + 0.55 * 0 + 0.4 * E1 + 0.2 * E2 + W2 - 0.2 * W3))
    } else {  # Y continuous 
      Y <- rnorm(n = n.ind, mean = 1 + 0.3 * A - 0.2 * E1 + 0.4 * E2 + 0.4 * W2 + 0.2 * W3, sd = 1)
      Y1 <- rnorm(n = n.ind, mean = 1 + 0.3 * A - 0.2 * E1 + 0.4 * E2 + 0.4 * W2 + 0.2 * W3, sd = 1)
      Y0 <- rnorm(n = n.ind, mean = 1 + 0.3 * A - 0.2 * E1 + 0.4 * E2 + 0.4 * W2 + 0.2 * W3, sd = 1)
    }
  } else {  # working.model fails
    if (is.Y.bin) {  # Y binary
      Y <- rbinom(n = n.ind, size = 1, prob = plogis(- 0.2 + 0.8 * A + 0.5 * E1 + 0.2 * E2 + 0.3 * W1 
                                                     - 0.15 * W3 + 0.75 * mean(W2) + 0.4 * mean(W3)))
      Y1 <- rbinom(n = n.ind, size = 1, prob = plogis(- 0.2 + 0.8 * 1 + 0.5 * E1 + 0.2 * E2 + 0.3 * W1 
                                                      - 0.15 * W3 + 0.75 * mean(W2) + 0.4 * mean(W3)))
      Y0 <- rbinom(n = n.ind, size = 1, prob = plogis(- 0.2 + 0.8 * 0 + 0.5 * E1 + 0.2 * E2 + 0.3 * W1 
                                                      - 0.15 * W3 + 0.75 * mean(W2) + 0.4 * mean(W3)))
    } else {  # Y continuous 
      Y <- rnorm(n = n.ind, mean = 1 + 0.3 * A - 0.2 * E1 + 0.4 * E2 + 0.3 * W1 - 0.15 * W3 + 0.75 * mean(W2) + 0.4 * mean(W3), sd = 1)
      Y1 <- rnorm(n = n.ind, mean = 1 + 0.3 * A - 0.2 * E1 + 0.4 * E2 + 0.3 * W1 - 0.15 * W3 + 0.75 * mean(W2) + 0.4 * mean(W3), sd = 1)
      Y0 <- rnorm(n = n.ind, mean = 1 + 0.3 * A - 0.2 * E1 + 0.4 * E2 + 0.3 * W1 - 0.15 * W3 + 0.75 * mean(W2) + 0.4 * mean(W3), sd = 1)
    }
  }
  return(data.frame(cbind(id, E1, E2, W1, W2, W3, A, Y, Y1, Y0)))
}

get.fullDat.Abin <- function(J, n.ind, rndseed = NULL, is.Y.bin = TRUE, working.model = TRUE, 
                             n.ind.fix = FALSE, onlyYkeep = FALSE, verbose = TRUE) {
  `%+%` <- function(a, b) paste0(a, b)
  set.seed(rndseed)
  if (n.ind.fix) {
    n.ind <- rep(n.ind, J)
  } else {  # don't fix the number of individuals in each community
    n.ind <- round(rnorm(J, n.ind, 10))  
    n.ind[n.ind <= 0] <- n.ind  # set to n.ind if any generated number less than 1
  }
  
  if (onlyYkeep) {
    message("Only the observed & (two) post-intervened outcomes are useful, only just keep these three")
    Y <- Y1 <- Y0 <- NULL
  } else {
    full.data <- NULL
  }
  
  for(j in 1:J) {
    if (verbose) message("#### generating " %+% j %+% "th cluster ####")
    cluster.data.j <- get.cluster.dat.Abin(id = j, n.ind = n.ind[j], is.Y.bin = is.Y.bin, working.model = working.model) 
    if (onlyYkeep) {
      Y <- c(Y, cluster.data.j[, "Y"])
      Y1 <- c(Y1, cluster.data.j[, "Y1"])
      Y0 <- c(Y0, cluster.data.j[, "Y0"])
    } else {
      full.data <- rbind(full.data, cluster.data.j)
    }
  }  
  if (!onlyYkeep) { full.data$id <- as.integer(full.data$id) }
  ifelse(onlyYkeep, return(data.frame(cbind(Y, Y1, Y0))), return(full.data))
}

J <- 1000
n.ind <- 50
rndseed <- 12345

#### Data 1. One binary, community-level A with Binary Y, when working model holds
comPop.wmT.bA.bY <- get.fullDat.Abin(J = 4000, n.ind = 4000, rndseed = NULL, is.Y.bin = TRUE, 
                                     working.model = TRUE, n.ind.fix = FALSE, onlyYkeep = TRUE)
mean(comPop.wmT.bA.bY$Y1) # 0.5150197
mean(comPop.wmT.bA.bY$Y0) # 0.4113038
psi0.Y <- mean(comPop.wmT.bA.bY$Y1) - mean(comPop.wmT.bA.bY$Y0) # 0.103716

comSample.wmT.bA.bY <- get.fullDat.Abin(J = J, n.ind = n.ind, rndseed = rndseed, is.Y.bin = TRUE, working.model = TRUE)
comSample.wmT.bA.bY <- comSample.wmT.bA.bY[, c("id", "E1", "E2", "W1", "W2", "W3", "A", "Y")]
comSample.wmT.bA.bY_list <- list(comSample.wmT.bA.bY = comSample.wmT.bA.bY, psi0.Y = psi0.Y, rndseed = rndseed)
comSample.wmT.bA.bY_list$psi0.Y  # 0.103716
save(comSample.wmT.bA.bY_list, file = "comSample.wmT.bA.bY_list.rda")

#### Data 2. One binary, community-level A with Binary Y, when working model fails
comPop.wmF.bA.bY <- get.fullDat.Abin(J = 4000, n.ind = 4000, rndseed = NULL, is.Y.bin = TRUE, 
                                     working.model = FALSE, n.ind.fix = FALSE, onlyYkeep = TRUE)
mean(comPop.wmF.bA.bY$Y1) # 0.7179001
mean(comPop.wmF.bA.bY$Y0) # 0.5410842
psi0.Y <- mean(comPop.wmF.bA.bY$Y1) - mean(comPop.wmF.bA.bY$Y0) # 0.1768159

comSample.wmF.bA.bY <- get.fullDat.Abin(J = J, n.ind = n.ind, rndseed = rndseed, is.Y.bin = TRUE, working.model = FALSE)
mean(comSample.wmF.bA.bY$Y1) - mean(comSample.wmF.bA.bY$Y0) # 0.1754167 (changes everytime)
comSample.wmF.bA.bY <- comSample.wmF.bA.bY[, c("id", "E1", "E2", "W1", "W2", "W3", "A", "Y")]
comSample.wmF.bA.bY_list <- list(comSample.wmF.bA.bY = comSample.wmF.bA.bY, psi0.Y = psi0.Y, rndseed = rndseed)
save(comSample.wmF.bA.bY_list, file = "comSample.wmF.bA.bY_list.rda")
chizhangucb/tmleCommunity documentation built on May 20, 2019, 3:34 p.m.