#################################
### This file contains all ###
### functions for simulations ###
#################################
norta <- function(N = N, cor.matrix = cor.matrix, conf.corr.X = 0, instrument.strength = 0.8, beta0 = 0, beta1 = 1)
{
# generate correlated random variables with the IV assumptions using the NORTA method
if (!is.numeric(N) | N < 1)
stop("'N' must be greater than or equal to one")
N <- as.integer(N)
ans <- rsmvnorm(R = N, cor.matrix = cor.matrix)
probs <- pnorm(ans)
U <- ans[,1] #confounder
Z <- rnorm(N)
X1 <- instrument.strength * Z + sqrt(1 - instrument.strength^2) * rnorm(N)
X <- conf.corr.X * U + sqrt(1 - conf.corr.X^2) * X1
ans <- cbind(Z, X, qexp(probs[,2], rate = exp(1 * (beta0 + beta1 * X))), U)
colnames(ans) <- c("Z", "X", "Y", "U")
data.frame(ans)
}
genIVData <- function(N = N, Z2XCoef, U2XCoef, U2YCoef, beta0 = 0, beta1 = 1,
survival.distribution = c("exponential", "normal"),
confounding.function = c("linear", "inverted", "exponential", "square", "sine"),
dichotomous.exposure = FALSE,
break2sls = FALSE, break.method = c("collider", "error", "error.u", "z.on.y"),
error.amount = 0.01)
{
if (!is.numeric(N) | N < 1)
stop("'N' must be greater than or equal to one")
surv.dist <- match.arg(survival.distribution)
confounding.function <- match.arg(confounding.function)
break.method <- match.arg(break.method)
N <- as.integer(N)
if (dichotomous.exposure)
{
sd.Y <- 1
} else
{
sd.Y <- 1
}
if (break2sls) {
if (break.method == "collider") {
uy.confounder <- rnorm(N)
U <- rnorm(N, sd = 1) + U2YCoef * uy.confounder
err <- rnorm(N) + U2YCoef * uy.confounder
} else if (break.method == "error") {
U <- rnorm(N, sd = 1)
err <- rnorm(N)
} else if (break.method == "error.u") {
err <- rnorm(N)
U <- rnorm(N, sd = 1) + err * error.amount
} else if (break.method == "z.on.y") {
U <- rnorm(N, sd = 1)
err <- rnorm(N)
}
} else {
U <- rnorm(N, sd = 1)
err <- rnorm(N, sd = sd.Y)
}
#U <- U / sd(U)
#Z <- Z / sd(Z)
if (dichotomous.exposure)
{
Z <- rbinom(N, 1, 0.5)
if (break2sls & break.method == "error") {
zb <- switch(confounding.function,
linear = (Z2XCoef * Z) + (U2XCoef * U) + error.amount * err,
inverted = (Z2XCoef * Z / abs(sin(Z))) + (U2XCoef * U) + error.amount * err,
exponential = (Z2XCoef * Z) * exp(U2XCoef * U) + error.amount * err,
square = (Z2XCoef * Z) * (U2XCoef * U)^2 + error.amount * err)
} else {
zb <- switch(confounding.function,
linear = Z2XCoef * Z + (U2XCoef * U),
inverted = Z2XCoef * Z / (1 + abs(Z)) + (U2XCoef * U),
exponential = Z2XCoef * exp(Z) + (U2XCoef * U),
sine = Z2XCoef * (Z + sin(Z * pi)) + (U2XCoef * U),
square = Z2XCoef * (Z - 1 * Z^2) + (U2XCoef * U))
#X <- X/sd(X)
}
#prob <- 1 / (1 + exp(-zb))
X <- 1 * (sign(beta1)*zb > rnorm(N, sd = 0.5))
} else {
Z <- rnorm(N, sd = 1)
if (break2sls & break.method == "error") {
X <- switch(confounding.function,
linear = (Z2XCoef * Z) + (U2XCoef * U) + error.amount * err + rnorm(N, sd = 1),
inverted = (Z2XCoef * Z / abs(sin(Z))) + (U2XCoef * U) + error.amount * err + rnorm(N, sd = 1),
exponential = (Z2XCoef * Z) * exp(U2XCoef * U) + error.amount * err + rnorm(N, sd = 1),
square = (Z2XCoef * Z) * (U2XCoef * U)^2 + error.amount * err + rnorm(N, sd = 1))
} else {
X <- switch(confounding.function,
linear = Z2XCoef * Z + (U2XCoef * U) + rnorm(N, sd = 1),
inverted = Z2XCoef * Z / (1 + abs(Z)) + (U2XCoef * U) + rnorm(N, sd = 1),
exponential = Z2XCoef * exp(Z) + (U2XCoef * U) + rnorm(N, sd = 1),
sine = Z2XCoef * (Z + sin(Z * pi)) + (U2XCoef * U) + rnorm(N, sd = 1),
square = Z2XCoef * (Z - 1 * Z^2) + (U2XCoef * U) + rnorm(N, sd = 1))
#X <- X/sd(X)
}
}
#X <- X / sd(X)
if (!break2sls) {
if (break.method == "z.on.y") {
Y <- switch(surv.dist,
exponential = rexp(N, rate = exp(-(beta0 + beta1 * X + U2YCoef * U + error.amount * Z))),
normal = exp( ( beta0 + beta1 * X + U2YCoef * U + error.amount * Z + err) ))
} else {
Y <- switch(surv.dist,
exponential = rexp(N, rate = exp(-(beta0 + beta1 * X + U2YCoef * U))),
normal = exp( ( beta0 + beta1 * X + U2YCoef * U + err) ))
}
} else {
if (break.method == "error" | break.method == "error.u") {
Y <- exp(beta0 + beta1 * X + U2YCoef * U + err)
} else if (break.method == "z.on.y") {
Y <- switch(surv.dist,
exponential = rexp(N, rate = exp(-(beta0 + beta1 * X + U2YCoef * U + error.amount * Z))),
normal = exp( ( beta0 + beta1 * X + sign(beta1) * U2YCoef * U + error.amount * Z + err) ))
} else {
Y <- exp(beta0 + beta1 * X + U2YCoef * U + err)
}
}
ans <- cbind(Z, X, Y, U)
colnames(ans) <- c("Z", "X", "Y", "U")
data.frame(ans)
}
genIVDataOld <- function(N = N, Z2XCoef, U2XCoef, U2YCoef, beta0 = 0, beta1 = 1,
survival.distribution = c("exponential", "normal"))
{
if (!is.numeric(N) | N < 1)
stop("'N' must be greater than or equal to one")
surv.dist <- match.arg(survival.distribution)
N <- as.integer(N)
U <- rnorm(N, sd = 1)
#U <- U / sd(U)
Z <- rnorm(N, sd = 1)
#Z <- Z / sd(Z)
X <- (Z2XCoef * Z) + (U2XCoef * U) + rnorm(N, sd = 1)
#X <- X / sd(X)
Y <- switch(surv.dist,
exponential = rexp(N, rate = exp(-(beta0 + beta1 * X + U2YCoef * U))),
normal = exp(beta0 + beta1 * X + U2YCoef * U + rnorm(N)))
ans <- cbind(Z, X, Y, U)
colnames(ans) <- c("Z", "X", "Y", "U")
data.frame(ans)
}
genMultivarIVData <- function(N = N, Z2XCoef, U2XCoef, U2YCoef, beta, num.confounded,
survival.distribution = c("exponential", "normal"),
intercept = FALSE, break2sls = FALSE,
confounding.function = c("exponential", "linear", "sine", "square", "inverted"),
sd = 0.5)
{
if (!is.numeric(N) | N < 1)
stop("'N' must be greater than or equal to one")
surv.dist <- match.arg(survival.distribution)
confounding.function <- match.arg(confounding.function)
num.vars <- length(beta)
N <- as.integer(N)
U <- matrix(rnorm(N * num.confounded), ncol = num.confounded)
Z <- matrix(rnorm(N * num.confounded), ncol = num.confounded)
intercept <- FALSE
if (intercept) {
no.int.beta <- beta[-1]
Z.append <- U.append <- array(0, dim = c(N,(num.vars - 1)))
Z.append[, 1:num.confounded] <- Z
U.append[, 1:num.confounded] <- U
rand.mat <- matrix(rnorm(N * (num.vars - 1)), ncol = (num.vars - 1))
X <- Z.append * Z2XCoef + U.append * U2XCoef + rand.mat
#X2Y <- t(t(X) * no.int.beta)
beta <- no.int.beta
colnames(X) <- paste("X", 1:length(no.int.beta), sep = "")
} else {
Z.append <- U.append <- array(0, dim = c(N, num.vars))
Z.append[, 1:num.confounded] <- Z
U.append[, 1:num.confounded] <- U
rand.mat <- matrix(rnorm(N * num.vars), ncol = num.vars)
if (confounding.function == "linear") {
X <- Z2XCoef * Z.append + U2XCoef * U.append + rand.mat
} else if (confounding.function == "inverted") {
X <- Z2XCoef * Z.append / (1 + abs(Z.append)) + (U2XCoef * U.append) + rand.mat
} else if (confounding.function == "exponential") {
X <- Z2XCoef * exp(Z.append) + (U2XCoef * U.append) + rand.mat
} else if (confounding.function == "sine") {
X <- Z2XCoef * (sin(Z.append * pi)) + (U2XCoef * U.append) + rand.mat
} else if (confounding.function == "square") {
X <- Z2XCoef * (Z.append^2) + (U2XCoef * U.append) + rand.mat
} else {
X <- Z2XCoef * Z.append + U2XCoef * U.append + rand.mat
}
#X <- Z.append * Z2XCoef + U.append * U2XCoef + rand.mat
#X2Y <- t(t(X) * beta)
colnames(X) <- paste("X", 1:length(beta), sep = "")
}
Y <- switch(surv.dist,
exponential = rexp(N, rate = exp(-(X %*% beta + rowSums(U)))),
normal = exp(X %*% beta + rowSums(U) + rnorm(N, sd = sd)))
ret <- list(Z = Z, X = X, Y = Y, U = U)
ret
}
#' @export
simIVSurvivalData <- function(sample.size,
conf.corr.X = 0.0,
conf.corr.Y,
instrument.strength,
lambda,
beta0,
beta1,
verbose = FALSE,
norta = FALSE,
betax.cens,
betaz.cens,
dichotomous.exposure = FALSE,
survival.distribution = c("exponential", "normal"),
confounding.function = c("linear", "inverted", "exponential", "square", "sine"),
dependent.censoring = FALSE,
break2sls = FALSE,
break.method = c("collider", "error", "error.u", "z.on.y"),
error.amount = 0.01,
cens.distribution = c("exp", "lognormal", "weibull", "unif", "fixed"),
plotcens = TRUE)
{
#conf.corr.X == confounder correlation with X
#conf.corr.Y == confounder correlation with Y
surv.dist <- match.arg(survival.distribution)
confounding.function <- match.arg(confounding.function)
break.method <- match.arg(break.method)
cens.distribution <- match.arg(cens.distribution)
if (dependent.censoring)
{
stopifnot(length(beta1) == length(betax.cens))
stopifnot(length(betaz.cens) == 1)
}
if (norta)
{
#correlation matrix for U, Y
sigma <- cbind(c(1,conf.corr.Y), c(conf.corr.Y, 1))
#generate IV random variables
vars <- norta(sample.size, sigma, conf.corr.X = conf.corr.X, instrument.strength = instrument.strength, beta0, beta1)
} else
{
#generate IV random variables
vars <- genIVData(sample.size,
Z2XCoef = instrument.strength,
U2XCoef = conf.corr.X,
U2YCoef = conf.corr.Y,
beta0,
beta1,
confounding.function = confounding.function,
survival.distribution = surv.dist,
break2sls = break2sls,
dichotomous.exposure = dichotomous.exposure,
break.method = break.method,
error.amount = error.amount)
}
#if specified, print sample correlation between Z, X, U, and Y
if (verbose) {print (cor(vars))}
X <- vars$X #covariate
Z <- vars$Z #instrument
#failure variable
Fail.time <- vars$Y
lim.time <- quantile(vars$Y, prob = c(0.99))
Fail.time <- pmin(vars$Y, lim.time)
if (dependent.censoring)
{
beta.x <- runif(NCOL(X), min = -1, max = 1)
beta.z <- runif(1, min = -1, max = 1)
beta.z <- betaz.cens
beta.x <- betax.cens
if (cens.distribution == "exp")
{
cens.effect <- exp(drop(X * beta.x + beta.z * Z))
Cen.time <- rexp(sample.size, rate = 1/cens.effect)
} else if (cens.distribution == "weibull")
{
shape <- 0.25
xbeta <- drop(X * beta.x + beta.z * Z)
nu <- runif(sample.size)
# S(t|x) = exp(-H_0(t)exp(x'beta))
# T = S^{-1}(V|x) = H_0^{-1}(-log(V) / exp(x'beta))
# V ~ unif(0,1)
# Weibull: H_0(t) = lambda * t ^ rho => H_0^{-1}(t) = (t/lambda)^(1/rho)
# Cen.time <- (-log(nu) / (lambda * exp(xbeta))) ^ (1 / shape)
Cen.time <- rweibull(sample.size, shape = shape, scale = exp(xbeta / gamma(1 + 1/shape)) )
} else if (cens.distribution == "lognormal" ||
cens.distribution == "unif") ## can't have unif when C depends on covariates
{
xbeta <- drop(X * beta.x + beta.z * Z)
Cen.time <- exp(rnorm(sample.size, mean = xbeta, sd = 1))
}
} else
{
if (cens.distribution == "exp")
{
# generate independent censoring data
Cen.time <- rexp(sample.size, rate = lambda)
} else if (cens.distribution == "weibull")
{
shape <- 0.25
nu <- runif(sample.size)
# S(t|x) = exp(-H_0(t)exp(x'beta))
# T = S^{-1}(V|x) = H_0^{-1}(-log(V) / exp(x'beta))
# V ~ unif(0,1)
# Weibull: H_0(t) = lambda * t ^ rho => H_0^{-1}(t) = (t/lambda)^(1/rho)
# Cen.time <- (-log(nu) / (lambda * 1)) ^ (1 / shape)
Cen.time <- rweibull(sample.size, shape = shape, scale = lambda )
} else if (cens.distribution == "lognormal")
{
Cen.time <- exp(rnorm(sample.size, mean = mean(log(Fail.time))))
} else if (cens.distribution == "unif")
{
Cen.time <- exp(runif(sample.size, min = min(min(log(Fail.time))),
max = 1 * quantile(log(vars$Y), prob = c(0.98))))
} else if (cens.distribution == "fixed")
{
mean.fail <- quantile(Fail.time, probs = 0.5)
Cen.time <- rep(mean.fail, NROW(Fail.time)) + runif(NROW(Fail.time), max = 0.025, min = -0.025)
}
}
if (plotcens)
{
plot(density(Fail.time), ylim = c(0, 1) , xlim = c(0, quantile(vars$Y, prob = c(0.975))))
lines(density(Cen.time), col = "blue")
plot(density(Cen.time - Fail.time))
}
#Cen.time <- pmin(Cen.time, quantile(Cen.time, prob = 0.99))
#failure indicator
delta <- 1 * (Cen.time >= Fail.time)
# you can also use X<- pmin(Fail.time, Cen.time)
t <- apply(data.frame(Fail.time, Cen.time), 1, min)
#return variables in data.frame
data.simu <- data.frame(t, delta, X, Z, Fail.time, Cen.time)
#store correlations as attribute
attr(data.simu, "cor") <- cor(vars)
data.simu
}
#' @export
simIVMultivarSurvivalData <- function(sample.size,
conf.corr.X = 0.0,
conf.corr.Y = 0,
instrument.strength,
lambda,
beta,
survival.distribution = c("exponential", "normal"),
num.confounded,
intercept = FALSE,
break2sls = FALSE,
confounding.function = c("linear", "inverted", "exponential", "square", "sine"),
dependent.censoring = FALSE,
cens.distribution = c("exp", "lognormal", "weibull"))
{
#conf.corr.X == confounder correlation with X
#conf.corr.Y == confounder correlation with Y
surv.dist <- match.arg(survival.distribution)
cens.distribution <- match.arg(cens.distribution)
confounding.function <- match.arg(confounding.function)
#generate IV random variables
vars <- genMultivarIVData(N = sample.size,
Z2XCoef = instrument.strength,
U2XCoef = conf.corr.X,
U2YCoef = conf.corr.Y,
beta,
num.confounded = num.confounded,
survival.distribution = surv.dist,
intercept = intercept,
confounding.function = confounding.function)
X <- vars$X #covariate
Z <- vars$Z #instrument
# failure variable
lim.time <- quantile(vars$Y, prob = c(0.975))
Fail.time <- pmin(vars$Y, lim.time)
if (dependent.censoring)
{
beta.x <- runif(NCOL(X), min = -0.5, max = 0.5)
beta.z <- runif(1, min = -0.5, max = 0.5)
if (cens.distribution == "exp")
{
cens.effect <- exp(drop(X %*% beta.x + beta.z * Z))
Cen.time <- rexp(sample.size, rate = cens.effect)
} else if (cens.distribution == "weibull")
{
shape <- 0.5
xbeta <- drop(X %*% beta.x + beta.z * Z)
nu <- runif(sample.size)
# S(t|x) = exp(-H_0(t)exp(x'beta))
# T = S^{-1}(V|x) = H_0^{-1}(-log(V) / exp(x'beta))
# V ~ unif(0,1)
# Weibull: H_0(t) = lambda * t ^ rho => H_0^{-1}(t) = (t/lambda)^(1/rho)
Cen.time <- (-log(nu) / (lambda * exp(xbeta))) ^ (1 / shape)
} else if (cens.distribution == "lognormal")
{
xbeta <- drop(X %*% beta.x + beta.z * Z)
Cen.time <- exp(rnorm(sample.size, mean = -xbeta))
}
} else
{
if (cens.distribution == "exp")
{
# generate independent censoring data
Cen.time <- rexp(sample.size, rate = lambda)
} else if (cens.distribution == "weibull")
{
shape <- 0.5
nu <- runif(sample.size)
# S(t|x) = exp(-H_0(t)exp(x'beta))
# T = S^{-1}(V|x) = H_0^{-1}(-log(V) / exp(x'beta))
# V ~ unif(0,1)
# Weibull: H_0(t) = lambda * t ^ rho => H_0^{-1}(t) = (t/lambda)^(1/rho)
Cen.time <- (-log(nu) / (lambda * 1)) ^ (1 / shape)
} else if (cens.distribution == "lognormal")
{
Cen.time <- exp(rnorm(sample.size))
}
}
Cen.time <- pmin(Cen.time, quantile(Cen.time, prob = 0.98))
# failure indicator
delta <- 1 * (Cen.time >= Fail.time)
# you can also use X<- pmin(Fail.time, Cen.time)
t <- apply(data.frame(Fail.time, Cen.time), 1, min)
log.t <- log(t)
if (intercept)
{
log.t <- log.t + beta[1]
t <- exp(log.t)
}
# return variables in data.frame
survival <- data.frame(t, delta, log.t)
# return a list of the survival aspects of
# the data and the covariate matrix
ret <- list(survival = survival, X = X, Z = Z)
dat <- data.frame(Z, X[,1:ncol(Z)], t, vars$U)
names(dat) <- c( "Z", "X","Y", "U")
attr(ret, "cor") <- cor(dat)
class(ret) <- "survival.data"
ret
}
#' @export
SimIVDataCompareEstimators <- function(type,
n.sims,
sample.size,
conf.corr.X = 0.0,
conf.corr.Y = 0.0,
instrument.strength,
lambda,
beta0,
beta1,
dichotomous.exposure = FALSE,
seed = NULL,
norta=FALSE,
betax.cens,
betaz.cens,
B = 200L,
conf.level = 0.95,
bootstrap = FALSE,
boot.method = c("ls", "sv", "full.bootstrap"),
survival.distribution = c("exponential", "normal"),
confounding.function,
break2sls = FALSE,
break.method = c("collider", "error", "error.u", "z.on.y"),
error.amount = 0.01,
dependent.censoring = FALSE,
cens.misspec = FALSE,
cens.distribution = c("exp", "lognormal", "weibull", "unif", "fixed"),
plotcens = TRUE)
{
# This function simulates ('n.sims'-times) survival data with a confounding variable U and an instrument Z
# and estimates beta using the regular AFT estimating equation and also using the IV estimating equation
# proposed by Professor Yu. It stores the results in vectors and returns a list containing these vectors.
#set seed if supplied by user
if (!is.null(seed))
{
set.seed(seed)
}
boot.method <- match.arg(boot.method)
#check to make sure user specified allowed estimating equations
types <- c("AFT", "AFT-IV", "AFT-2SLS", "AFT-IPCW", "AFT-2SLS-xhat", "RnT")
funcs <- c("vEvalAFTScore", "vEvalAFTivScore", "vEvalAFT2SLSScorePrec", "vEvalAFTivIPCWScorePrec", "vEvalAFT2SLSxhatScore", "vEvalRnTAFTivScore")
for (i in length(type)) {if (!is.element(type[i], types)) {stop("'type' must only contain 'AFT', 'RnT, 'AFT-IV',' AFT-2SLS' or 'AFT-IPCW'")}}
#if (bootstrap) {
#warning("Bootstrap does not work yet")
#}
zval <- qnorm((1 + conf.level)/2, 0, 1)
beta.store <- array(0, dim = c(n.sims, length(type)))
coverage.store <- array(NA, dim = c(n.sims, length(type)))
l <- 0
pct.censored <- NULL
num.errors <- tot.cors <- 0
pb <- txtProgressBar(min = 0, max = n.sims, style = 3)
while (l < n.sims)
{
possibleError <- tryCatch({
##### GENERATE DATA #########
Data.simu <- simIVSurvivalData(sample.size,
conf.corr.X = conf.corr.X,
conf.corr.Y = conf.corr.Y,
instrument.strength = instrument.strength,
lambda = lambda,
beta0 = beta0,
beta1 = beta1,
norta = FALSE,
betax.cens = betax.cens,
betaz.cens = betaz.cens,
dichotomous.exposure = dichotomous.exposure,
survival.distribution = survival.distribution,
confounding.function = confounding.function,
dependent.censoring = dependent.censoring,
break2sls = break2sls,
break.method = break.method,
error.amount = error.amount,
cens.distribution = cens.distribution,
plotcens = plotcens)
beta.tmp <- array(0, dim = length(type))
coverage.tmp <- array(NA, dim = length(type))
#solve each estimating equation for beta1
for (e in 1:length(type))
{
#return correct estimating equation function
est.eqn <- match.fun(funcs[[match(type[e], types)]])
if (type[e] == "AFT-2SLS")
{
#predict X with Z (1st stage of 2SLS process) to get Xhat
Data.simu$X.hat <- lm(X ~ Z, data = Data.simu)$fitted.values
}
if (type[e] == "AFT-IPCW")
{
#generate function G_c() for ICPW
if (dependent.censoring & !cens.misspec)
{
GC.list <- list(genKMCensoringFunc(Data.simu, cox = TRUE,
X = cbind(Data.simu$X, Data.simu$Z)),
#X = Data.simu$X),
"placeholder", "placeholder")
} else
{
GC.list <- genKMCensoringFuncVar(Data.simu)
attr(GC.list[[1]], "cox") <- FALSE
}
GC <- GC.list[[1]]
#GC <- function(x) 1 - pexp(x,1)
VarFunc <- GC.list[[2]]
n.riskFunc <- GC.list[[3]]
#solve for beta using bisection method
est.eqn.sq <- function(beta, data.simu) sum((est.eqn(beta = beta, data.simu = data.simu, GC = GC)) ^ 2)
vals <- est.eqn(beta = c(beta1 - 2, beta1 + 2), data.simu = Data.simu, GC = GC)
if (all(vals > 0) | all(vals < 0))
{
beta.tmp[e] <- optimize(est.eqn.sq,
interval = c(beta1 - 2, beta1 + 2),
data.simu = Data.simu,
GC = GC)$minimum
} else
{
beta.tmp[e] <- uniroot(est.eqn,
interval = c(beta1 - 2, beta1 + 2),
tol = 0.0001,
"data.simu" = Data.simu,
"GC" = GC)$root
}
} else
{
GC <- NULL
VarFunc <- NULL
n.riskFunc <- NULL
#solve for beta using bisection method
#beta.tmp[e] <- uniroot(est.eqn,
# interval = c(beta1 - 2, beta1 + 2),
# tol = 0.0001,
# "data.simu" = Data.simu)$root
est.eqn.sq <- function(beta, data.simu) sum((est.eqn(beta = beta, data.simu = data.simu)) ^ 2)
# if (attr(est.eqn, "name") == "evalRnTAFTivScore")
# {
# # this estimating equation is extraordinarily hard to minimize
# # even in 1 dimension
#
# opt.obj <- vector(mode = "list", length = 3)
#
# opt.obj[[1]] <- optimize(est.eqn.sq,
# interval = c(beta1 - 0.5, beta1 + 0.5),
# data.simu = Data.simu)
#
# opt.obj[[2]] <- optimize(est.eqn.sq,
# interval = c(beta1 - 1, beta1 + 1.1),
# data.simu = Data.simu)
#
# opt.obj[[3]] <- optimize(est.eqn.sq,
# interval = c(beta1 - 2, beta1 + 2.1),
# data.simu = Data.simu)
#
# vals <- sapply(opt.obj, function(ob) ob$objective)
# best.val <- which.min(vals)
# print(vals)
# print(sapply(opt.obj, function(ob) ob$minimum))
# beta.tmp[e] <- opt.obj[[best.val]]$minimum
#
# the parameter for R & T 1991 is negative that of the AFT model
if (attr(est.eqn, "name") == "evalRnTAFTivScore")
{
beta.tmp[e] <- rpsftm(formula=Surv(t, delta) ~ rand(Z, X),
data=Data.simu,
low_psi = -beta1 - 2, hi_psi = -beta1 + 2,
censor_time=Data.simu$Cen.time)$psi
if (is.na(beta.tmp[e]) | is.nan(beta.tmp[e]) | is.null(beta.tmp[e]))
{
vals <- est.eqn(beta = c(-beta1 - 2, -beta1 + 2), data.simu = Data.simu)
if (all(vals > 0) | all(vals < 0))
{
beta.tmp[e] <- optimize(est.eqn.sq,
interval = c(-beta1 - 2, -beta1 + 2),
data.simu = Data.simu)$minimum
} else
{
beta.tmp[e] <- uniroot(est.eqn,
interval = c(-beta1 - 2, -beta1 + 2),
tol = 0.0001,
"data.simu" = Data.simu)$root
}
}
} else
{
vals <- est.eqn(beta = c(beta1 - 2, beta1 + 2), data.simu = Data.simu)
if (all(vals > 0) | all(vals < 0))
{
beta.tmp[e] <- optimize(est.eqn.sq,
interval = c(beta1 - 2, beta1 + 2),
data.simu = Data.simu)$minimum
} else
{
beta.tmp[e] <- uniroot(est.eqn,
interval = c(beta1 - 2, beta1 + 2),
tol = 0.0001,
"data.simu" = Data.simu)$root
}
}
}
cat("sim ", l, ", beta = ", beta.tmp[e], "\n")
if (bootstrap)
{
# bootstrap estimate
se.hat <- bootstrapVarUnivar(beta.tmp[e],
est.eqn,
Data.simu,
B = B,
method = boot.method,
GC = GC,
VarFunc = VarFunc,
n.riskFunc = n.riskFunc)$se.hat
tmp <- c(beta.tmp[e] - zval * se.hat,
beta.tmp[e] + zval * se.hat)
coverage.tmp[e] <- 1 * (beta1 >= tmp[1] & beta1 <= tmp[2])
}
}
}, error = function(e) e)
if(inherits(possibleError, "error"))
{
print(possibleError)
num.errors <- num.errors + 1
next
} else
{
l <- l + 1
#store results on successful attempt
beta.store[l, ] <- beta.tmp
coverage.store[l, ] <- coverage.tmp
print(colMeans(beta.store[1:l,,drop=FALSE], na.rm=TRUE))
print(colMeans(coverage.store, na.rm=TRUE))
setTxtProgressBar(pb, l)
pct.censored[l] <- mean(1 - Data.simu$delta)
cat("\n Pct censored:", mean(1 - Data.simu$delta), "\n")
#add correlation matrix so we can take mean
tot.cors <- tot.cors + attr(Data.simu, "cor")
}
}
avg.cors <- tot.cors / n.sims
close(pb)
print (paste("Number of errors:", num.errors))
res <- list()
for (i in 1:ncol(beta.store)){res[[i]] <- beta.store[, i]}
names(res) <- type
attr(res, "truth") <- beta1
attr(res, "pct.censored") <- mean(pct.censored)
attr(res, "avg.cor") <- avg.cors
attr(res, "coverage") <- colMeans(coverage.store)
class(res) <- "AFTsim"
res
}
#' @export
SimIVDataCompareEstimatorsMultivar <- function(type,
n.sims,
sample.size,
conf.corr.X = 0.0,
conf.corr.Y = 0.0,
instrument.strength,
lambda,
beta,
seed = NULL,
norta = FALSE,
intercept = FALSE,
B = 200L,
conf.level = 0.95,
bootstrap = FALSE,
boot.method = c("ls", "sv", "full.bootstrap"),
survival.distribution = c("exponential", "normal"),
break2sls = FALSE,
confounding.function)
{
# This function simulates ('n.sims'-times) survival data with a confounding variable U and an instrument Z
# and estimates beta using the regular AFT estimating equation and also using the IV estimating equation
# proposed by Professor Yu. It stores the results in vectors and returns a list containing these vectors.
#set seed if supplied by user
if (!is.null(seed)) {set.seed(seed)}
boot.method <- match.arg(boot.method)
#check to make sure user specified allowed estimating equations
types <- c("AFT", "AFT-IV", "AFT-2SLS", "AFT-IPCW")
funcs <- c("AFTScorePre", "AFTivScorePre", "AFT2SLSScorePre", "AFTivIPCWScorePre")
funcs.sm <- c("AFTScoreSmoothPre", "AFTivScoreSmoothPre", "AFT2SLSScoreSmoothPre", "AFT2SLSScoreSmoothPre")
for (i in length(type)) {if (!is.element(type[i], types)) {stop("'type' must only contain 'AFT', 'AFT-IV',' AFT-2SLS' or 'AFT-IPCW'")}}
zval <- qnorm((1 + conf.level)/2, 0, 1)
beta.store <- array(0, dim = c(n.sims, length(type)))
coverage.store <- array(NA, dim = c(n.sims, length(type)))
l <- 0
pct.censored <- NULL
num.errors <- tot.cors <- 0
pb <- txtProgressBar(min = 0, max = n.sims, style = 3)
while (l < n.sims)
{
possibleError <- tryCatch({
##### GENERATE DATA #########
Data.simu <- simIVMultivarSurvivalData(sample.size, conf.corr.X = conf.corr.X, conf.corr.Y = conf.corr.Y,
instrument.strength = instrument.strength, lambda = lambda,
beta = beta, intercept = intercept,
survival.distribution = survival.distribution,
num.confounded = 1, break2sls = break2sls,
confounding.function = confounding.function)
if (intercept)
{
Data.simu$X <- cbind(rep(1, nrow(Data.simu$X)), Data.simu$X)
colnames(Data.simu$X)[1] <- "Intercept"
}
beta.tmp <- array(0, dim = length(type))
coverage.tmp <- array(NA, dim = length(type))
#solve each estimating equation for beta1
for (e in 1:length(type))
{
#return correct estimating equation function
est.eqn <- match.fun(funcs[[match(type[e], types)]])
est.eqn.sm <- match.fun(funcs.sm[[match(type[e], types)]])
attr(est.eqn, "name") <- funcs[[match(type[e], types)]]
attr(est.eqn.sm, "name") <- funcs.sm[[match(type[e], types)]]
dfsane.tol <- 0.000001
if (type[e] == "AFT-IPCW") {dfsane.tol <- 8}
ssf <- 1e10
ct <- 0
if (e == 1)
{
init.par <- NULL
} else
{
init.par <- est$par
init.par <- NULL
}
#solve for beta using deriv-free spectral method
est <- repFitAFT(tol = dfsane.tol,
data = Data.simu,
est.eqn = est.eqn,
est.eqn.sm = est.eqn.sm,
instrument.names = "prop_endo",
confounded.x.names = paste("X", 1:1, sep=""),
fit.method = "dfsane",
init.par = init.par,
final.fit = FALSE,
method = c(2),
control = list(tol = dfsane.tol,
maxit = 1000,
trace = FALSE,
M = c(500)),
quiet = TRUE)
conf.x.idx <- match(paste("X", 1:1, sep=""), names(est$par))
beta.tmp[e] <- est$par[conf.x.idx]
if (bootstrap)
{
# bootstrap estimate
se.hat <- bootstrapVar(est, Data.simu, B = B, method = boot.method)$se.hat
tmp <- c(beta.tmp[e] - zval * se.hat[conf.x.idx],
beta.tmp[e] + zval * se.hat[conf.x.idx])
coverage.tmp[e] <- 1 * (beta[conf.x.idx] >= tmp[1] & beta[conf.x.idx] <= tmp[2])
}
}
}, error=function(e) e)
if(inherits(possibleError, "error"))
{
num.errors <- num.errors + 1
print (possibleError)
next
} else
{
l <- l + 1
#store results on successful attempt
beta.store[l, ] <- beta.tmp
coverage.store[l, ] <- coverage.tmp
setTxtProgressBar(pb, l)
pct.censored[l] <- mean(1 - Data.simu$survival$delta)
print(colMeans(coverage.store, na.rm=TRUE))
#add correlation matrix so we can take mean
tot.cors <- tot.cors + attr(Data.simu, "cor")
}
}
avg.cors <- tot.cors / n.sims
close(pb)
print (paste("Number of errors:", num.errors))
res <- list()
for (i in 1:ncol(beta.store))
{
res[[i]] <- beta.store[, i]
}
names(res) <- type
attr(res, "truth") <- beta[1]
attr(res, "pct.censored") <- mean(pct.censored)
attr(res, "avg.cor") <- avg.cors
attr(res, "coverage") <- colMeans(coverage.store)
class(res) <- "AFTsim"
res
}
#' @export
summary.AFTsim <- function(res)
{
# summary function for the output of the
# 'SimIVDataCompareEstimators' function
stopifnot(class(res) == "AFTsim")
results <- data.frame(array(0, dim = c(length(res), 10)))
n.data <- length(res[[1]])
results[,1] <- names(res)
colnames(results) <- c("Estimator", "Mean",
"LCI", "UCI",
"sd", "Q2.5",
"Q5", "Med",
"Q95", "Q97.5")
for (i in 1:length(res))
{
results[i, 2] <- mean(res[[i]])
results[i, 5] <- sd <- sd(res[[i]])
results[i, 3:4] <- c(mean(res[[i]]) - 1.96 * (sd / sqrt(n.data)),
mean(res[[i]]) + 1.96 * (sd / sqrt(n.data)))
results[i, 6:10] <- quantile(res[[i]], probs = c(0.025, 0.05, 0.5, 0.95, 0.975))
}
results[,2:10] <- round(results[,2:10], 4)
print(paste("Average Pct Censored:", 100 * attr(res, "pct.censored")))
attr(results, "cor.U.Y") <- attr(res, "avg.cor")[3,4]
attr(results, "cor.U.X") <- attr(res, "avg.cor")[2,4]
attr(results, "cor.Z.X") <- attr(res, "avg.cor")[1,2]
print(sprintf("Cor(U,Y): %g || Cor(U,X): %g || Cor(Z,X): %g",
round(attr(res, "avg.cor")[3,4], 4), round(attr(res, "avg.cor")[2,4], 4),
round(attr(res, "avg.cor")[1,2], 4)))
results
}
#' @export
createGrid <- function(U2X.range, U2Y.range, Z2X.range, n.data, sample.size, lambda.range)
{
stopifnot(is.numeric(U2X.range) & is.numeric(U2Y.range) & is.numeric(Z2X.range)
& is.numeric(lambda.range))
if (!all(n.data %% 1 == 0) | !all(sample.size %% 1 == 0)) {stop("n.data and sample.size must be integers")}
grid <- expand.grid(U2X.range, U2Y.range, Z2X.range, n.data, sample.size, lambda.range)
colnames(grid) <- c("Conf.Corr.X", "Conf.Corr.Y", "Instrument.Strength", "n.data", "sample.size", "lambda")
attr(grid, "grid") <- "sim.grid"
grid
}
#' @export
simulateGrid <- function(est.eqns,
grid, beta,
seed = NULL,
B = 200L,
conf.level = 0.95,
bootstrap = FALSE,
boot.method = c("ls", "sv", "full.bootstrap"),
survival.distribution = c("exponential", "normal"),
dependent.censoring = FALSE,
cens.misspec = FALSE,
dichotomous.exposure = FALSE,
confounding.function = c("linear", "inverted", "exponential", "square", "sine"),
break2sls = FALSE,
break.method = c("collider", "error", "error.u", "z.on.y"),
error.amount = 0.01,
use.uniroot = TRUE,
betax.cens,
betaz.cens,
cens.distribution = c("exp", "lognormal", "weibull", "unif", "fixed"),
plotcens = TRUE)
{
if (is.null(attr(grid, "grid"))) {stop("Grid must be created with createGrid function")}
results <- array(0, dim = c(length(est.eqns), nrow(grid), (ncol(grid) + 13)))
dimnames(results)[[1]] <- est.eqns
dimnames(results)[[3]] <- c(colnames(grid), "Cor.U.Y", "Cor.U.X", "Cor.Z.X", "Mean",
"LCI", "UCI", "sd", "Q2.5", "Q5", "Med", "Q95", "Q97.5", "Coverage")
stopifnot(length(betax.cens) == length(beta))
stopifnot(length(betaz.cens) == 1)
#simulate situation when there IS a confounder present for
#varying levels of correlation between U and X and U and Y
if (length(beta) == 1 & use.uniroot)
{
raw.results <- foreach(i = 1:nrow(grid), .packages = packages) %dopar% {
print(sprintf("Simulation %g / %g", i, nrow(grid)))
res <- SimIVDataCompareEstimators(type = est.eqns, n.sims = grid$n.data[i],
sample.size = grid$sample.size[i],
conf.corr.X = grid$Conf.Corr.X[i],
conf.corr.Y = grid$Conf.Corr.Y[i],
instrument.strength = grid$Instrument.Strength[i],
lambda = grid$lambda[i], beta0 = 0, beta1 = beta, seed = seed,
betax.cens = betax.cens, betaz.cens = betaz.cens,
B = B, conf.level = conf.level,
bootstrap = bootstrap, boot.method = boot.method,
survival.distribution = survival.distribution,
dependent.censoring = dependent.censoring,
dichotomous.exposure = dichotomous.exposure,
confounding.function = confounding.function, break2sls = break2sls,
break.method = break.method, error.amount = error.amount,
cens.distribution = cens.distribution,
cens.misspec = cens.misspec,
plotcens = plotcens)
for (j in 1:ncol(grid)) {attr(res, colnames(grid)[j]) <- grid[i, j]}
res
}
} else
{
raw.results <- foreach(i = 1:nrow(grid), .packages = packages) %dopar% {
print(sprintf("Simulation %g / %g", i, nrow(grid)))
res <- SimIVDataCompareEstimatorsMultivar(type = est.eqns, n.sims = grid$n.data[i],
sample.size = grid$sample.size[i],
conf.corr.X = grid$Conf.Corr.X[i],
conf.corr.Y = grid$Conf.Corr.Y[i],
instrument.strength = grid$Instrument.Strength[i],
lambda = grid$lambda[i], beta = beta, seed = seed,
B = B, conf.level = conf.level,
bootstrap = bootstrap, boot.method = boot.method,
survival.distribution = survival.distribution,
confounding.function = confounding.function)
for (j in 1:ncol(grid)) {attr(res, colnames(grid)[j]) <- grid[i, j]}
res
}
}
sum.results <- lapply(raw.results, function(res)
{
cors <- c(attr(res, "avg.cor")[3,4], attr(res, "avg.cor")[2,4], attr(res, "avg.cor")[1,2])
cors <- matrix(rep(cors,length(est.eqns)), ncol = length(cors), byrow = T)
coverages <- attr(res, "coverage")
print(round(print(attr(res, "avg.cor")), 4))
cbind(cors, as.matrix(summary(res)[,2:10]), coverages)
})
for (i in 1:length(sum.results)) {results[, i, (ncol(grid) + 1):(ncol(grid) + 13)] <- sum.results[[i]]}
for (i in 1:dim(results)[1]){results[i, , (1:ncol(grid))] <- as.matrix(grid)}
return.results <- list(raw = raw.results, summary.array = results)
class(return.results) <- "simulation.grid"
return.results
}
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.