R/simulation_functions.R

Defines functions norta genIVData genIVDataOld genMultivarIVData simIVSurvivalData simIVMultivarSurvivalData SimIVDataCompareEstimators SimIVDataCompareEstimatorsMultivar summary.AFTsim createGrid simulateGrid

#################################
### This file contains all    ###
### functions for simulations ###
#################################



norta <- function(N = N, cor.matrix = cor.matrix, conf.corr.X = 0, instrument.strength = 0.8, beta0 = 0, beta1 = 1) 
{
  # generate correlated random variables with the IV assumptions using the NORTA method
  
  if (!is.numeric(N) | N < 1) 
    stop("'N' must be greater than or equal to one")
  
  N     <- as.integer(N)
  ans   <- rsmvnorm(R = N, cor.matrix = cor.matrix)
  probs <- pnorm(ans)
  U     <- ans[,1] #confounder
  Z     <- rnorm(N)
  X1    <- instrument.strength * Z + sqrt(1 - instrument.strength^2) * rnorm(N)
  X     <- conf.corr.X * U + sqrt(1 - conf.corr.X^2) * X1
  ans   <- cbind(Z, X, qexp(probs[,2], rate = exp(1 * (beta0 + beta1 * X))), U)
  
  colnames(ans) <- c("Z", "X", "Y", "U")
  data.frame(ans)
}

genIVData <- function(N = N, Z2XCoef, U2XCoef, U2YCoef, beta0 = 0, beta1 = 1,
                      survival.distribution = c("exponential", "normal"),
                      confounding.function = c("linear", "inverted", "exponential", "square", "sine"),
                      dichotomous.exposure = FALSE,
                      break2sls = FALSE, break.method = c("collider", "error", "error.u", "z.on.y"), 
                      error.amount = 0.01) 
{
  if (!is.numeric(N) | N < 1) 
    stop("'N' must be greater than or equal to one")
  surv.dist <- match.arg(survival.distribution)
  confounding.function <- match.arg(confounding.function)
  break.method <- match.arg(break.method)
  
  
  
  N <- as.integer(N)
  
  if (dichotomous.exposure)
  {
    sd.Y <- 1
  } else
  {
    sd.Y <- 1
  }
  if (break2sls) {
    if (break.method == "collider") {
      uy.confounder <- rnorm(N)
      U <- rnorm(N, sd = 1) + U2YCoef * uy.confounder
      err <- rnorm(N) + U2YCoef * uy.confounder
    } else if (break.method == "error") {
      U <- rnorm(N, sd = 1)
      err <- rnorm(N)
    } else if (break.method == "error.u") {
      err <- rnorm(N)
      U <- rnorm(N, sd = 1) + err * error.amount
    } else if (break.method == "z.on.y") {
      U <- rnorm(N, sd = 1)
      err <- rnorm(N)
    }
  } else {
    U <- rnorm(N, sd = 1)
    err <- rnorm(N, sd = sd.Y)
  }
  
  
  #U <- U / sd(U)
  
  #Z <- Z / sd(Z)
  if (dichotomous.exposure)
  {
    Z <- rbinom(N, 1, 0.5)
    if (break2sls & break.method == "error") {
      zb <- switch(confounding.function,
                  linear = (Z2XCoef * Z) + (U2XCoef * U) + error.amount * err,
                  inverted = (Z2XCoef * Z / abs(sin(Z))) + (U2XCoef * U) + error.amount * err,
                  exponential = (Z2XCoef * Z) * exp(U2XCoef * U) + error.amount * err,
                  square = (Z2XCoef * Z) * (U2XCoef * U)^2 + error.amount * err)
    } else {
      zb <- switch(confounding.function,
                  linear = Z2XCoef * Z + (U2XCoef * U),
                  inverted = Z2XCoef * Z / (1 + abs(Z)) + (U2XCoef * U),
                  exponential = Z2XCoef * exp(Z) + (U2XCoef * U),
                  sine = Z2XCoef * (Z + sin(Z * pi)) + (U2XCoef * U),
                  square = Z2XCoef * (Z - 1 * Z^2) + (U2XCoef * U))
      #X <- X/sd(X)
    }
    #prob <- 1 / (1 + exp(-zb))
    X <- 1 * (sign(beta1)*zb > rnorm(N, sd = 0.5))
  } else {
    Z <- rnorm(N, sd = 1)
    if (break2sls & break.method == "error") {
      X <- switch(confounding.function,
                  linear = (Z2XCoef * Z) + (U2XCoef * U) + error.amount * err + rnorm(N, sd = 1),
                  inverted = (Z2XCoef * Z / abs(sin(Z))) + (U2XCoef * U) + error.amount * err + rnorm(N, sd = 1),
                  exponential = (Z2XCoef * Z) * exp(U2XCoef * U) + error.amount * err + rnorm(N, sd = 1),
                  square = (Z2XCoef * Z) * (U2XCoef * U)^2 + error.amount * err + rnorm(N, sd = 1))
    } else {
      X <- switch(confounding.function,
                  linear = Z2XCoef * Z + (U2XCoef * U) + rnorm(N, sd = 1),
                  inverted = Z2XCoef * Z / (1 + abs(Z)) + (U2XCoef * U) + rnorm(N, sd = 1),
                  exponential = Z2XCoef * exp(Z) + (U2XCoef * U) + rnorm(N, sd = 1),
                  sine = Z2XCoef * (Z + sin(Z * pi)) + (U2XCoef * U) + rnorm(N, sd = 1),
                  square = Z2XCoef * (Z - 1 * Z^2) + (U2XCoef * U) + rnorm(N, sd = 1))
      #X <- X/sd(X)
    }
  }
  #X <- X / sd(X)
  
  if (!break2sls) {
    if (break.method == "z.on.y") {
      Y <- switch(surv.dist,
                  exponential = rexp(N, rate = exp(-(beta0 + beta1 * X + U2YCoef * U + error.amount * Z))),
                  normal = exp( ( beta0 + beta1 * X + U2YCoef * U + error.amount * Z + err) ))
    } else {
      Y <- switch(surv.dist,
                  exponential = rexp(N, rate = exp(-(beta0 + beta1 * X + U2YCoef * U))),
                  normal = exp( ( beta0 + beta1 * X + U2YCoef * U + err) ))
    }
  } else {
    if (break.method == "error" | break.method == "error.u") {
      Y <- exp(beta0 + beta1 * X + U2YCoef * U + err)
    } else if (break.method == "z.on.y") {
      Y <- switch(surv.dist,
                  exponential = rexp(N, rate = exp(-(beta0 + beta1 * X + U2YCoef * U + error.amount * Z))),
                  normal = exp( ( beta0 + beta1 * X + sign(beta1) * U2YCoef * U + error.amount * Z + err) ))
    } else {
      Y <- exp(beta0 + beta1 * X + U2YCoef * U + err)
    }
  }

  ans <- cbind(Z, X, Y, U)
  colnames(ans) <- c("Z", "X", "Y", "U")
  data.frame(ans)
}

genIVDataOld <- function(N = N, Z2XCoef, U2XCoef, U2YCoef, beta0 = 0, beta1 = 1,
                      survival.distribution = c("exponential", "normal")) 
{
  if (!is.numeric(N) | N < 1) 
    stop("'N' must be greater than or equal to one")
  surv.dist <- match.arg(survival.distribution)
  N <- as.integer(N)
  U <- rnorm(N, sd = 1)
  #U <- U / sd(U)
  Z <- rnorm(N, sd = 1)
  #Z <- Z / sd(Z)
  X <- (Z2XCoef * Z) + (U2XCoef * U) + rnorm(N, sd = 1)
  #X <- X / sd(X)
  Y <- switch(surv.dist,
              exponential = rexp(N, rate = exp(-(beta0 + beta1 * X + U2YCoef * U))),
              normal = exp(beta0 + beta1 * X + U2YCoef * U + rnorm(N)))
  
  ans <- cbind(Z, X, Y, U)
  colnames(ans) <- c("Z", "X", "Y", "U")
  data.frame(ans)
}


genMultivarIVData <- function(N = N, Z2XCoef, U2XCoef, U2YCoef, beta, num.confounded,
                              survival.distribution = c("exponential", "normal"),
                              intercept = FALSE, break2sls = FALSE, 
                              confounding.function = c("exponential", "linear", "sine", "square", "inverted"),
                              sd = 0.5) 
{
  if (!is.numeric(N) | N < 1) 
    stop("'N' must be greater than or equal to one")
  surv.dist <- match.arg(survival.distribution)
  confounding.function <- match.arg(confounding.function)
  num.vars <- length(beta)
  N <- as.integer(N)
  U <- matrix(rnorm(N * num.confounded), ncol = num.confounded)
  Z <- matrix(rnorm(N * num.confounded), ncol = num.confounded)
  intercept <- FALSE
  if (intercept) {
    no.int.beta <- beta[-1]
    Z.append <- U.append <- array(0, dim = c(N,(num.vars - 1)))
    Z.append[, 1:num.confounded] <- Z
    U.append[, 1:num.confounded] <- U
    rand.mat <- matrix(rnorm(N * (num.vars - 1)), ncol = (num.vars - 1))
    X <- Z.append * Z2XCoef + U.append * U2XCoef + rand.mat
    #X2Y <- t(t(X) * no.int.beta)
    beta <- no.int.beta
    colnames(X) <- paste("X", 1:length(no.int.beta), sep = "")
  } else {
    Z.append <- U.append <- array(0, dim = c(N, num.vars))
    Z.append[, 1:num.confounded] <- Z
    U.append[, 1:num.confounded] <- U
    rand.mat <- matrix(rnorm(N * num.vars), ncol = num.vars)
    
    if (confounding.function == "linear") {
      X <- Z2XCoef * Z.append + U2XCoef * U.append + rand.mat
    } else if (confounding.function == "inverted") {
      X <- Z2XCoef * Z.append / (1 + abs(Z.append)) + (U2XCoef * U.append) + rand.mat
    } else if (confounding.function == "exponential") {
      X <- Z2XCoef * exp(Z.append) + (U2XCoef * U.append) + rand.mat
    } else if (confounding.function == "sine") {
      X <- Z2XCoef * (sin(Z.append * pi)) + (U2XCoef * U.append) + rand.mat
    } else if (confounding.function == "square") {
      X <- Z2XCoef * (Z.append^2) + (U2XCoef * U.append) + rand.mat
    } else {
      X <- Z2XCoef * Z.append + U2XCoef * U.append + rand.mat
    }
    
    #X <- Z.append * Z2XCoef + U.append * U2XCoef + rand.mat
    #X2Y <- t(t(X) * beta)
    colnames(X) <- paste("X", 1:length(beta), sep = "")
  }

  Y <- switch(surv.dist,
              exponential = rexp(N, rate = exp(-(X %*% beta + rowSums(U)))),
              normal = exp(X %*% beta + rowSums(U) + rnorm(N, sd = sd)))
  
  ret <- list(Z = Z, X = X, Y = Y, U = U)
  ret
}



#' @export
simIVSurvivalData <- function(sample.size, 
                              conf.corr.X = 0.0, 
                              conf.corr.Y, 
                              instrument.strength, 
                              lambda, 
                              beta0, 
                              beta1, 
                              verbose = FALSE, 
                              norta = FALSE,
                              betax.cens, 
                              betaz.cens,
                              dichotomous.exposure = FALSE,
                              survival.distribution = c("exponential", "normal"),
                              confounding.function = c("linear", "inverted", "exponential", "square", "sine"),
                              dependent.censoring = FALSE,
                              break2sls = FALSE, 
                              break.method = c("collider", "error", "error.u", "z.on.y"),
                              error.amount = 0.01, 
                              cens.distribution = c("exp", "lognormal", "weibull", "unif", "fixed"),
                              plotcens = TRUE) 
{
  #conf.corr.X == confounder correlation with X
  #conf.corr.Y == confounder correlation with Y
  
  surv.dist <- match.arg(survival.distribution)
  confounding.function <- match.arg(confounding.function)
  break.method <- match.arg(break.method)
  cens.distribution <- match.arg(cens.distribution)
  
  if (dependent.censoring)
  {
    stopifnot(length(beta1) == length(betax.cens))
    stopifnot(length(betaz.cens) == 1)
  }
  
  if (norta)
  {
    #correlation matrix for U, Y
    sigma <- cbind(c(1,conf.corr.Y), c(conf.corr.Y, 1))
    #generate IV random variables
    vars <- norta(sample.size, sigma, conf.corr.X = conf.corr.X, instrument.strength = instrument.strength, beta0, beta1)
  } else 
  {
    #generate IV random variables
    vars <- genIVData(sample.size, 
                      Z2XCoef = instrument.strength, 
                      U2XCoef = conf.corr.X, 
                      U2YCoef = conf.corr.Y, 
                      beta0, 
                      beta1, 
                      confounding.function = confounding.function,
                      survival.distribution = surv.dist, 
                      break2sls = break2sls, 
                      dichotomous.exposure = dichotomous.exposure,
                      break.method = break.method,
                      error.amount = error.amount)
  }
  
  #if specified, print sample correlation between Z, X, U, and Y
  if (verbose) {print (cor(vars))}
  
  X <- vars$X #covariate
  Z <- vars$Z #instrument
  #failure variable
  Fail.time <- vars$Y
  
  
  lim.time  <- quantile(vars$Y, prob = c(0.99))
  Fail.time <- pmin(vars$Y, lim.time)
  if (dependent.censoring)
  {
    beta.x <- runif(NCOL(X), min = -1, max = 1)
    beta.z <- runif(1, min = -1, max = 1)
    
    beta.z <- betaz.cens
    beta.x <- betax.cens
    
    if (cens.distribution == "exp")
    {
      cens.effect <- exp(drop(X * beta.x + beta.z * Z))
      Cen.time <- rexp(sample.size, rate = 1/cens.effect)
    } else if (cens.distribution == "weibull")
    {
      shape <- 0.25
      
      xbeta <- drop(X * beta.x + beta.z * Z)
      nu <- runif(sample.size)
      
      # S(t|x) = exp(-H_0(t)exp(x'beta))
      # T = S^{-1}(V|x) = H_0^{-1}(-log(V) / exp(x'beta))
      # V ~ unif(0,1)
      # Weibull: H_0(t) = lambda * t ^ rho => H_0^{-1}(t) = (t/lambda)^(1/rho)
      #     Cen.time <- (-log(nu) / (lambda * exp(xbeta))) ^ (1 / shape)
      Cen.time <- rweibull(sample.size, shape = shape, scale = exp(xbeta / gamma(1 + 1/shape)) )
    } else if (cens.distribution == "lognormal" || 
               cens.distribution == "unif") ## can't have unif when C depends on covariates
    {
      xbeta <- drop(X * beta.x + beta.z * Z)
      
      Cen.time <- exp(rnorm(sample.size, mean = xbeta, sd = 1))
      
    }
  } else 
  {
    if (cens.distribution == "exp")
    {
      # generate independent censoring data
      Cen.time <- rexp(sample.size, rate = lambda)
    } else if (cens.distribution == "weibull")
    {
      shape <- 0.25
      nu <- runif(sample.size)
      
      # S(t|x) = exp(-H_0(t)exp(x'beta))
      # T = S^{-1}(V|x) = H_0^{-1}(-log(V) / exp(x'beta))
      # V ~ unif(0,1)
      # Weibull: H_0(t) = lambda * t ^ rho => H_0^{-1}(t) = (t/lambda)^(1/rho)
      #   Cen.time <- (-log(nu) / (lambda * 1)) ^ (1 / shape)
      Cen.time <- rweibull(sample.size, shape = shape, scale = lambda  )
    } else if (cens.distribution == "lognormal")
    {
      Cen.time <- exp(rnorm(sample.size, mean = mean(log(Fail.time))))
    } else if (cens.distribution == "unif")
    {
      Cen.time <- exp(runif(sample.size, min = min(min(log(Fail.time))),
                                     max = 1 * quantile(log(vars$Y), prob = c(0.98))))
    } else if (cens.distribution == "fixed")
    {
      mean.fail <- quantile(Fail.time, probs = 0.5)
      Cen.time <- rep(mean.fail, NROW(Fail.time)) + runif(NROW(Fail.time), max = 0.025, min = -0.025)
    }
  }
  if (plotcens)
  {
    plot(density(Fail.time), ylim = c(0, 1) , xlim = c(0, quantile(vars$Y, prob = c(0.975))))
    lines(density(Cen.time), col = "blue")
    plot(density(Cen.time - Fail.time))
  }
  #Cen.time <- pmin(Cen.time, quantile(Cen.time, prob = 0.99))
  
  
  
  #failure indicator
  delta <- 1 * (Cen.time >= Fail.time)
  
  # you can also use X<- pmin(Fail.time, Cen.time)
  t <- apply(data.frame(Fail.time, Cen.time), 1, min)
  #return variables in data.frame
  data.simu <- data.frame(t, delta, X, Z, Fail.time, Cen.time)
  #store correlations as attribute
  attr(data.simu, "cor") <- cor(vars)
  data.simu
}

#' @export
simIVMultivarSurvivalData <- function(sample.size, 
                                      conf.corr.X = 0.0, 
                                      conf.corr.Y = 0, 
                                      instrument.strength, 
                                      lambda, 
                                      beta, 
                                      survival.distribution = c("exponential", "normal"),
                                      num.confounded, 
                                      intercept = FALSE, 
                                      break2sls = FALSE,
                                      confounding.function = c("linear", "inverted", "exponential", "square", "sine"),
                                      dependent.censoring = FALSE,
                                      cens.distribution = c("exp", "lognormal", "weibull")) 
{
  #conf.corr.X == confounder correlation with X
  #conf.corr.Y == confounder correlation with Y
  
  surv.dist <- match.arg(survival.distribution)
  cens.distribution <- match.arg(cens.distribution)
  confounding.function <- match.arg(confounding.function)
  
  #generate IV random variables
  vars <- genMultivarIVData(N = sample.size, 
                            Z2XCoef = instrument.strength, 
                            U2XCoef = conf.corr.X, 
                            U2YCoef = conf.corr.Y, 
                            beta, 
                            num.confounded = num.confounded,
                            survival.distribution = surv.dist, 
                            intercept = intercept,
                            confounding.function = confounding.function)
  
  X <- vars$X #covariate
  Z <- vars$Z #instrument
  # failure variable
  lim.time <- quantile(vars$Y, prob = c(0.975))
  Fail.time <- pmin(vars$Y, lim.time)
  if (dependent.censoring)
  {
    beta.x <- runif(NCOL(X), min = -0.5, max = 0.5)
    beta.z <- runif(1, min = -0.5, max = 0.5)
    
    if (cens.distribution == "exp")
    {
      cens.effect <- exp(drop(X %*% beta.x + beta.z * Z))
      Cen.time <- rexp(sample.size, rate = cens.effect)
    } else if (cens.distribution == "weibull")
    {
      shape <- 0.5
      xbeta <- drop(X %*% beta.x + beta.z * Z)
      nu <- runif(sample.size)
      
      # S(t|x) = exp(-H_0(t)exp(x'beta))
      # T = S^{-1}(V|x) = H_0^{-1}(-log(V) / exp(x'beta))
      # V ~ unif(0,1)
      # Weibull: H_0(t) = lambda * t ^ rho => H_0^{-1}(t) = (t/lambda)^(1/rho)
      Cen.time <- (-log(nu) / (lambda * exp(xbeta))) ^ (1 / shape)
    } else if (cens.distribution == "lognormal")
    {
      xbeta <- drop(X %*% beta.x + beta.z * Z)
      
      Cen.time <- exp(rnorm(sample.size, mean = -xbeta))
      
    }
  } else 
  {
    if (cens.distribution == "exp")
    {
      # generate independent censoring data
      Cen.time <- rexp(sample.size, rate = lambda)
    } else if (cens.distribution == "weibull")
    {
      shape <- 0.5
      nu <- runif(sample.size)
      
      # S(t|x) = exp(-H_0(t)exp(x'beta))
      # T = S^{-1}(V|x) = H_0^{-1}(-log(V) / exp(x'beta))
      # V ~ unif(0,1)
      # Weibull: H_0(t) = lambda * t ^ rho => H_0^{-1}(t) = (t/lambda)^(1/rho)
      Cen.time <- (-log(nu) / (lambda * 1)) ^ (1 / shape)
      
    } else if (cens.distribution == "lognormal")
    {
      Cen.time <- exp(rnorm(sample.size))
    }
  }
  
  Cen.time <- pmin(Cen.time, quantile(Cen.time, prob = 0.98))
  
  # failure indicator
  delta <- 1 * (Cen.time >= Fail.time)
  # you can also use X<- pmin(Fail.time, Cen.time)
  t <- apply(data.frame(Fail.time, Cen.time), 1, min)
  log.t <- log(t)
  if (intercept) 
  {
    log.t <- log.t + beta[1]
    t     <- exp(log.t)
  }
  # return variables in data.frame
  survival <- data.frame(t, delta, log.t)
  
  # return a list of the survival aspects of 
  # the data and the covariate matrix
  ret <- list(survival = survival, X = X, Z = Z)
  dat <- data.frame(Z, X[,1:ncol(Z)], t, vars$U)
  names(dat) <- c( "Z", "X","Y", "U")
  attr(ret, "cor") <- cor(dat)
  class(ret) <- "survival.data"
  ret
}

#' @export
SimIVDataCompareEstimators <- function(type, 
                                       n.sims, 
                                       sample.size, 
                                       conf.corr.X = 0.0, 
                                       conf.corr.Y = 0.0, 
                                       instrument.strength,
                                       lambda, 
                                       beta0, 
                                       beta1, 
                                       dichotomous.exposure = FALSE,
                                       seed = NULL, 
                                       norta=FALSE, 
                                       betax.cens, 
                                       betaz.cens,
                                       B = 200L, 
                                       conf.level = 0.95,
                                       bootstrap = FALSE, 
                                       boot.method = c("ls", "sv", "full.bootstrap"),
                                       survival.distribution = c("exponential", "normal"), 
                                       confounding.function, 
                                       break2sls = FALSE,
                                       break.method = c("collider", "error", "error.u", "z.on.y"), 
                                       error.amount = 0.01,
                                       dependent.censoring = FALSE, 
                                       cens.misspec = FALSE,
                                       cens.distribution = c("exp", "lognormal", "weibull", "unif", "fixed"),
                                       plotcens = TRUE)
{
  # This function simulates ('n.sims'-times) survival data with a confounding variable U and an instrument Z
  # and estimates beta using the regular AFT estimating equation and also using the IV estimating equation
  # proposed by Professor Yu. It stores the results in vectors and returns a list containing these vectors.
  
  #set seed if supplied by user
  if (!is.null(seed)) 
  {
    set.seed(seed)
  }
  
  boot.method <- match.arg(boot.method)
  
  #check to make sure user specified allowed estimating equations
  types <- c("AFT", "AFT-IV", "AFT-2SLS", "AFT-IPCW", "AFT-2SLS-xhat", "RnT")
  funcs <- c("vEvalAFTScore", "vEvalAFTivScore", "vEvalAFT2SLSScorePrec", "vEvalAFTivIPCWScorePrec", "vEvalAFT2SLSxhatScore", "vEvalRnTAFTivScore")
  for (i in length(type)) {if (!is.element(type[i], types)) {stop("'type' must only contain 'AFT', 'RnT, 'AFT-IV',' AFT-2SLS' or 'AFT-IPCW'")}}
  
  #if (bootstrap) {
    #warning("Bootstrap does not work yet")
  #}
  
  zval <- qnorm((1 + conf.level)/2, 0, 1)
  
  beta.store <- array(0, dim = c(n.sims, length(type)))
  coverage.store <- array(NA, dim = c(n.sims, length(type)))
  l <- 0
  pct.censored <- NULL
  num.errors <- tot.cors <- 0
  pb <- txtProgressBar(min = 0, max = n.sims, style = 3)
  while (l < n.sims)
  {
    possibleError <- tryCatch({
      #####  GENERATE DATA  #########
      Data.simu <- simIVSurvivalData(sample.size, 
                                     conf.corr.X = conf.corr.X, 
                                     conf.corr.Y = conf.corr.Y, 
                                     instrument.strength = instrument.strength, 
                                     lambda = lambda, 
                                     beta0 = beta0, 
                                     beta1 = beta1, 
                                     norta = FALSE,
                                     betax.cens = betax.cens, 
                                     betaz.cens = betaz.cens,
                                     dichotomous.exposure = dichotomous.exposure,
                                     survival.distribution = survival.distribution, 
                                     confounding.function = confounding.function,
                                     dependent.censoring = dependent.censoring,
                                     break2sls = break2sls, 
                                     break.method = break.method,
                                     error.amount = error.amount, 
                                     cens.distribution = cens.distribution,
                                     plotcens = plotcens)

      beta.tmp <- array(0, dim = length(type))
      coverage.tmp <- array(NA, dim = length(type))
      
      #solve each estimating equation for beta1
      for (e in 1:length(type))
      {
        #return correct estimating equation function
        est.eqn <- match.fun(funcs[[match(type[e], types)]])
        
        if (type[e] == "AFT-2SLS") 
        {
          #predict X with Z (1st stage of 2SLS process) to get Xhat
          Data.simu$X.hat <- lm(X ~ Z, data = Data.simu)$fitted.values
        }
        if (type[e] == "AFT-IPCW") 
        {
          #generate function G_c() for ICPW
          if (dependent.censoring & !cens.misspec)
          {
            GC.list <- list(genKMCensoringFunc(Data.simu, cox = TRUE, 
                                          X = cbind(Data.simu$X, Data.simu$Z)),
                                          #X = Data.simu$X),
                            "placeholder", "placeholder")
          } else
          {
            GC.list                   <- genKMCensoringFuncVar(Data.simu)
            attr(GC.list[[1]], "cox") <- FALSE
          }
          GC         <- GC.list[[1]]
          #GC <- function(x) 1 - pexp(x,1)
          VarFunc    <- GC.list[[2]]
          n.riskFunc <- GC.list[[3]]
          #solve for beta using bisection method
          
          est.eqn.sq <- function(beta, data.simu) sum((est.eqn(beta = beta, data.simu = data.simu, GC = GC)) ^ 2)
          
          vals <- est.eqn(beta = c(beta1 - 2, beta1 + 2), data.simu = Data.simu, GC = GC)
          if (all(vals > 0) | all(vals < 0))
          {
            beta.tmp[e] <- optimize(est.eqn.sq,
                                    interval = c(beta1 - 2, beta1 + 2),
                                    data.simu = Data.simu,
                                    GC = GC)$minimum
          } else
          {
            beta.tmp[e] <- uniroot(est.eqn, 
                                   interval = c(beta1 - 2, beta1 + 2), 
                                   tol = 0.0001, 
                                   "data.simu" = Data.simu, 
                                   "GC" = GC)$root
          }
          
          
        } else 
        {
          GC          <- NULL
          VarFunc     <- NULL
          n.riskFunc  <- NULL
          #solve for beta using bisection method
          #beta.tmp[e] <- uniroot(est.eqn,
          #                       interval = c(beta1 - 2, beta1 + 2),
          #                       tol = 0.0001,
          #                       "data.simu" = Data.simu)$root
          est.eqn.sq <- function(beta, data.simu) sum((est.eqn(beta = beta, data.simu = data.simu)) ^ 2)
          
          
          
          # if (attr(est.eqn, "name") == "evalRnTAFTivScore")
          # {
          #   # this estimating equation is extraordinarily hard to minimize
          #   # even in 1 dimension
          #   
          #   opt.obj <- vector(mode = "list", length = 3)
          #   
          #   opt.obj[[1]]  <- optimize(est.eqn.sq, 
          #                     interval = c(beta1 - 0.5, beta1 + 0.5),
          #                     data.simu = Data.simu)
          #   
          #   opt.obj[[2]]  <- optimize(est.eqn.sq, 
          #                     interval = c(beta1 - 1, beta1 + 1.1),
          #                     data.simu = Data.simu)
          #   
          #   opt.obj[[3]]  <- optimize(est.eqn.sq, 
          #                             interval = c(beta1 - 2, beta1 + 2.1),
          #                             data.simu = Data.simu)
          #   
          #   vals <- sapply(opt.obj, function(ob) ob$objective)
          #   best.val <- which.min(vals)
          #   print(vals)
          #   print(sapply(opt.obj, function(ob) ob$minimum))
          #   beta.tmp[e] <- opt.obj[[best.val]]$minimum
          #   
          # the parameter for R & T 1991 is negative that of the AFT model
          if (attr(est.eqn, "name") == "evalRnTAFTivScore")
          {

            
            beta.tmp[e] <- rpsftm(formula=Surv(t, delta) ~ rand(Z, X), 
                                  data=Data.simu,
                                  low_psi = -beta1 - 2, hi_psi = -beta1 + 2,
                                  censor_time=Data.simu$Cen.time)$psi
            
            if (is.na(beta.tmp[e]) | is.nan(beta.tmp[e]) | is.null(beta.tmp[e]))
            {
              vals <- est.eqn(beta = c(-beta1 - 2, -beta1 + 2), data.simu = Data.simu)
              if (all(vals > 0) | all(vals < 0))
              {
                beta.tmp[e] <- optimize(est.eqn.sq,
                                        interval = c(-beta1 - 2, -beta1 + 2),
                                        data.simu = Data.simu)$minimum
              } else
              {
                beta.tmp[e] <- uniroot(est.eqn,
                                       interval = c(-beta1 - 2, -beta1 + 2),
                                       tol = 0.0001,
                                       "data.simu" = Data.simu)$root
              }
            }
              
          } else
          {
            vals <- est.eqn(beta = c(beta1 - 2, beta1 + 2), data.simu = Data.simu)
            if (all(vals > 0) | all(vals < 0))
            {
              beta.tmp[e] <- optimize(est.eqn.sq,
                                      interval = c(beta1 - 2, beta1 + 2),
                                      data.simu = Data.simu)$minimum
            } else
            {
              beta.tmp[e] <- uniroot(est.eqn,
                                     interval = c(beta1 - 2, beta1 + 2),
                                     tol = 0.0001,
                                     "data.simu" = Data.simu)$root
            }
          }
        }

        cat("sim ", l, ", beta = ", beta.tmp[e], "\n")
        
        
        if (bootstrap) 
        {
          # bootstrap estimate
          se.hat <- bootstrapVarUnivar(beta.tmp[e], 
                                       est.eqn, 
                                       Data.simu, 
                                       B = B, 
                                       method = boot.method, 
                                       GC = GC, 
                                       VarFunc = VarFunc, 
                                       n.riskFunc = n.riskFunc)$se.hat
          
          tmp <- c(beta.tmp[e] - zval * se.hat,
                   beta.tmp[e] + zval * se.hat)
          
          coverage.tmp[e] <- 1 * (beta1 >= tmp[1] & beta1 <= tmp[2])
        }
      }
      
    }, error = function(e) e)
    if(inherits(possibleError, "error"))
    {
      print(possibleError)
      num.errors <- num.errors + 1
      next
    } else 
    {
      l <- l + 1
      #store results on successful attempt
      beta.store[l, ] <- beta.tmp
      coverage.store[l, ] <- coverage.tmp
      print(colMeans(beta.store[1:l,,drop=FALSE], na.rm=TRUE))
      print(colMeans(coverage.store, na.rm=TRUE))
      setTxtProgressBar(pb, l)
      pct.censored[l] <- mean(1 - Data.simu$delta)
      cat("\n Pct censored:", mean(1 - Data.simu$delta), "\n")
      #add correlation matrix so we can take mean
      tot.cors <- tot.cors + attr(Data.simu, "cor")
    }
  }
  avg.cors <- tot.cors / n.sims
  close(pb)
  print (paste("Number of errors:", num.errors))
  res <- list()
  for (i in 1:ncol(beta.store)){res[[i]] <- beta.store[, i]}
  names(res) <- type
  attr(res, "truth") <- beta1
  attr(res, "pct.censored") <- mean(pct.censored)
  attr(res, "avg.cor") <- avg.cors
  attr(res, "coverage") <- colMeans(coverage.store)
  class(res) <- "AFTsim"
  res
}


#' @export
SimIVDataCompareEstimatorsMultivar <- function(type, 
                                               n.sims, 
                                               sample.size, 
                                               conf.corr.X = 0.0, 
                                               conf.corr.Y = 0.0, 
                                               instrument.strength,
                                               lambda, 
                                               beta, 
                                               seed = NULL, 
                                               norta = FALSE, 
                                               intercept = FALSE,
                                               B = 200L, 
                                               conf.level = 0.95,
                                               bootstrap = FALSE, 
                                               boot.method = c("ls", "sv", "full.bootstrap"),
                                               survival.distribution = c("exponential", "normal"),
                                               break2sls = FALSE,
                                               confounding.function)
{
  # This function simulates ('n.sims'-times) survival data with a confounding variable U and an instrument Z
  # and estimates beta using the regular AFT estimating equation and also using the IV estimating equation
  # proposed by Professor Yu. It stores the results in vectors and returns a list containing these vectors.
  
  #set seed if supplied by user
  if (!is.null(seed)) {set.seed(seed)}
  
  boot.method <- match.arg(boot.method)
  
  #check to make sure user specified allowed estimating equations
  types <- c("AFT", "AFT-IV", "AFT-2SLS", "AFT-IPCW")
  funcs <- c("AFTScorePre", "AFTivScorePre", "AFT2SLSScorePre", "AFTivIPCWScorePre")
  funcs.sm <- c("AFTScoreSmoothPre", "AFTivScoreSmoothPre", "AFT2SLSScoreSmoothPre", "AFT2SLSScoreSmoothPre")
  for (i in length(type)) {if (!is.element(type[i], types)) {stop("'type' must only contain 'AFT', 'AFT-IV',' AFT-2SLS' or 'AFT-IPCW'")}}
  
  
  zval <- qnorm((1 + conf.level)/2, 0, 1)
  
  beta.store     <- array(0, dim = c(n.sims, length(type)))
  coverage.store <- array(NA, dim = c(n.sims, length(type)))
  l              <- 0
  pct.censored   <- NULL
  num.errors     <- tot.cors <- 0
  pb <- txtProgressBar(min = 0, max = n.sims, style = 3)
  while (l < n.sims)
  {
    possibleError <- tryCatch({
      #####  GENERATE DATA  #########
      Data.simu <- simIVMultivarSurvivalData(sample.size, conf.corr.X = conf.corr.X, conf.corr.Y = conf.corr.Y, 
                                             instrument.strength = instrument.strength, lambda = lambda, 
                                             beta = beta, intercept = intercept,
                                             survival.distribution = survival.distribution,
                                             num.confounded = 1, break2sls = break2sls,
                                             confounding.function = confounding.function)
      if (intercept) 
      {
        Data.simu$X <- cbind(rep(1, nrow(Data.simu$X)), Data.simu$X)
        colnames(Data.simu$X)[1] <- "Intercept"
      }
      beta.tmp     <- array(0,  dim = length(type))
      coverage.tmp <- array(NA, dim = length(type))
      
      #solve each estimating equation for beta1
      for (e in 1:length(type))
      {
        #return correct estimating equation function
        est.eqn                  <- match.fun(funcs[[match(type[e], types)]])
        est.eqn.sm               <- match.fun(funcs.sm[[match(type[e], types)]])
        attr(est.eqn, "name")    <- funcs[[match(type[e], types)]]
        attr(est.eqn.sm, "name") <- funcs.sm[[match(type[e], types)]]
        dfsane.tol <- 0.000001
        if (type[e] == "AFT-IPCW") {dfsane.tol <- 8}
        ssf <- 1e10
        
        ct <- 0
        if (e == 1) 
        {
          init.par <- NULL
        } else 
        {
          init.par <- est$par
          init.par <- NULL
        }

        #solve for beta using deriv-free spectral method
        est <- repFitAFT(tol = dfsane.tol, 
                         data = Data.simu, 
                         est.eqn = est.eqn, 
                         est.eqn.sm = est.eqn.sm,
                         instrument.names = "prop_endo", 
                         confounded.x.names = paste("X", 1:1, sep=""), 
                         fit.method = "dfsane", 
                         init.par = init.par, 
                         final.fit = FALSE,
                         method = c(2), 
                         control = list(tol   = dfsane.tol, 
                                        maxit = 1000, 
                                        trace = FALSE, 
                                        M     = c(500)), 
                         quiet = TRUE)
        
        conf.x.idx  <- match(paste("X", 1:1, sep=""), names(est$par))
        beta.tmp[e] <- est$par[conf.x.idx]
        
        if (bootstrap) 
        {
          # bootstrap estimate
          se.hat <- bootstrapVar(est, Data.simu, B = B, method = boot.method)$se.hat

          tmp    <- c(beta.tmp[e] - zval * se.hat[conf.x.idx],
                      beta.tmp[e] + zval * se.hat[conf.x.idx])

          
          coverage.tmp[e] <- 1 * (beta[conf.x.idx] >= tmp[1] & beta[conf.x.idx] <= tmp[2])
        }
        

        
      }
      
    }, error=function(e) e)
    if(inherits(possibleError, "error"))
    {
      num.errors <- num.errors + 1
      print (possibleError)
      next
    } else 
    {
      l <- l + 1
      #store results on successful attempt
      beta.store[l, ]     <- beta.tmp
      coverage.store[l, ] <- coverage.tmp
      setTxtProgressBar(pb, l)
      pct.censored[l]     <- mean(1 - Data.simu$survival$delta)
      print(colMeans(coverage.store, na.rm=TRUE))
      #add correlation matrix so we can take mean
      tot.cors <- tot.cors + attr(Data.simu, "cor")
    }
  }
  avg.cors <- tot.cors / n.sims
  close(pb)
  print (paste("Number of errors:", num.errors))
  res <- list()
  for (i in 1:ncol(beta.store))
  {
    res[[i]] <- beta.store[, i]
  }
  names(res)                <- type
  attr(res, "truth")        <- beta[1]
  attr(res, "pct.censored") <- mean(pct.censored)
  attr(res, "avg.cor")      <- avg.cors
  attr(res, "coverage")     <- colMeans(coverage.store)
  class(res) <- "AFTsim"
  res
}

#' @export
summary.AFTsim <- function(res) 
{
  # summary function for the output of the 
  # 'SimIVDataCompareEstimators' function
  stopifnot(class(res) == "AFTsim")
  results     <- data.frame(array(0, dim = c(length(res), 10)))
  n.data      <- length(res[[1]])
  results[,1] <- names(res)
  colnames(results) <- c("Estimator", "Mean", 
                         "LCI",       "UCI", 
                         "sd",        "Q2.5", 
                         "Q5",        "Med", 
                         "Q95",       "Q97.5")
  for (i in 1:length(res))
  {
    results[i, 2]    <- mean(res[[i]])
    results[i, 5]    <- sd <- sd(res[[i]])
    results[i, 3:4]  <- c(mean(res[[i]]) - 1.96 * (sd / sqrt(n.data)), 
                          mean(res[[i]]) + 1.96 * (sd / sqrt(n.data)))
    results[i, 6:10] <- quantile(res[[i]], probs = c(0.025, 0.05, 0.5, 0.95, 0.975))
  }
  results[,2:10] <- round(results[,2:10], 4)
  print(paste("Average Pct Censored:", 100 * attr(res, "pct.censored")))
  attr(results, "cor.U.Y") <- attr(res, "avg.cor")[3,4]
  attr(results, "cor.U.X") <- attr(res, "avg.cor")[2,4]
  attr(results, "cor.Z.X") <- attr(res, "avg.cor")[1,2]
  print(sprintf("Cor(U,Y): %g  ||  Cor(U,X): %g  ||  Cor(Z,X): %g", 
          round(attr(res, "avg.cor")[3,4], 4), round(attr(res, "avg.cor")[2,4], 4), 
                round(attr(res, "avg.cor")[1,2], 4)))
  results
}

#' @export
createGrid <- function(U2X.range, U2Y.range, Z2X.range, n.data, sample.size, lambda.range)
{
  stopifnot(is.numeric(U2X.range) & is.numeric(U2Y.range) & is.numeric(Z2X.range) 
            & is.numeric(lambda.range))
  if (!all(n.data %% 1 == 0) | !all(sample.size %% 1 == 0)) {stop("n.data and sample.size must be integers")}
  grid <- expand.grid(U2X.range, U2Y.range, Z2X.range, n.data, sample.size, lambda.range)
  colnames(grid) <- c("Conf.Corr.X", "Conf.Corr.Y", "Instrument.Strength", "n.data", "sample.size", "lambda")
  attr(grid, "grid") <- "sim.grid"
  grid
}

#' @export
simulateGrid <- function(est.eqns, 
                         grid, beta, 
                         seed = NULL, 
                         B = 200L, 
                         conf.level = 0.95,
                         bootstrap = FALSE, 
                         boot.method = c("ls", "sv", "full.bootstrap"),
                         survival.distribution = c("exponential", "normal"),
                         dependent.censoring  = FALSE,
                         cens.misspec = FALSE,
                         dichotomous.exposure = FALSE,
                         confounding.function  = c("linear", "inverted", "exponential", "square", "sine"),
                         break2sls = FALSE, 
                         break.method = c("collider", "error", "error.u", "z.on.y"),
                         error.amount = 0.01, 
                         use.uniroot = TRUE, 
                         betax.cens,
                         betaz.cens,
                         cens.distribution = c("exp", "lognormal", "weibull", "unif", "fixed"),
                         plotcens = TRUE) 
{
  if (is.null(attr(grid, "grid"))) {stop("Grid must be created with createGrid function")}
  
  results <- array(0, dim = c(length(est.eqns), nrow(grid), (ncol(grid) + 13)))
  dimnames(results)[[1]] <- est.eqns
  dimnames(results)[[3]] <- c(colnames(grid), "Cor.U.Y", "Cor.U.X", "Cor.Z.X", "Mean", 
                              "LCI", "UCI", "sd", "Q2.5", "Q5", "Med", "Q95", "Q97.5", "Coverage")
  
  stopifnot(length(betax.cens) == length(beta))
  
  stopifnot(length(betaz.cens) == 1)
  
  #simulate situation when there IS a confounder present for
  #varying levels of correlation between U and X and U and Y
  if (length(beta) == 1 & use.uniroot) 
  {
    raw.results <- foreach(i = 1:nrow(grid), .packages = packages) %dopar% {
      print(sprintf("Simulation %g / %g", i, nrow(grid)))
      res <- SimIVDataCompareEstimators(type = est.eqns, n.sims = grid$n.data[i], 
                                        sample.size = grid$sample.size[i], 
                                        conf.corr.X = grid$Conf.Corr.X[i], 
                                        conf.corr.Y = grid$Conf.Corr.Y[i], 
                                        instrument.strength = grid$Instrument.Strength[i], 
                                        lambda = grid$lambda[i], beta0 = 0, beta1 = beta, seed = seed,
                                        betax.cens = betax.cens, betaz.cens = betaz.cens,
                                        B = B, conf.level = conf.level,
                                        bootstrap = bootstrap, boot.method = boot.method,
                                        survival.distribution = survival.distribution, 
                                        dependent.censoring = dependent.censoring,
                                        dichotomous.exposure = dichotomous.exposure, 
                                        confounding.function = confounding.function, break2sls = break2sls,
                                        break.method = break.method, error.amount = error.amount,
                                        cens.distribution = cens.distribution,
                                        cens.misspec = cens.misspec,
                                        plotcens = plotcens)
      for (j in 1:ncol(grid)) {attr(res, colnames(grid)[j]) <- grid[i, j]}
      res
    }
  } else 
  {
    raw.results <- foreach(i = 1:nrow(grid), .packages = packages) %dopar% {
      print(sprintf("Simulation %g / %g", i, nrow(grid)))
      res <- SimIVDataCompareEstimatorsMultivar(type = est.eqns, n.sims = grid$n.data[i], 
                                                sample.size = grid$sample.size[i], 
                                                conf.corr.X = grid$Conf.Corr.X[i], 
                                                conf.corr.Y = grid$Conf.Corr.Y[i], 
                                                instrument.strength = grid$Instrument.Strength[i], 
                                                lambda = grid$lambda[i], beta = beta, seed = seed,
                                                B = B, conf.level = conf.level,
                                                bootstrap = bootstrap, boot.method = boot.method,
                                                survival.distribution = survival.distribution,
                                                confounding.function = confounding.function)
      for (j in 1:ncol(grid)) {attr(res, colnames(grid)[j]) <- grid[i, j]}
      res
    }
  }
  sum.results <- lapply(raw.results, function(res) 
  {
    cors      <- c(attr(res, "avg.cor")[3,4], attr(res, "avg.cor")[2,4], attr(res, "avg.cor")[1,2])
    cors      <- matrix(rep(cors,length(est.eqns)), ncol = length(cors), byrow = T)
    coverages <- attr(res, "coverage")
    print(round(print(attr(res, "avg.cor")), 4))
    cbind(cors, as.matrix(summary(res)[,2:10]), coverages)
  })
  for (i in 1:length(sum.results)) {results[, i, (ncol(grid) + 1):(ncol(grid) + 13)] <- sum.results[[i]]}
  for (i in 1:dim(results)[1]){results[i, , (1:ncol(grid))] <- as.matrix(grid)}
  return.results <- list(raw = raw.results, summary.array = results)
  class(return.results) <- "simulation.grid"
  return.results
}
jaredhuling/aftiv documentation built on May 20, 2019, 9:55 a.m.