#################################
## Estimating Non-Inferiority Type I Error Rates for Difference in Means with Left Censored Data underlying Truncated Normal
## Version 1.3
## Created by: Justin Williams
## Alcon Intern, R&D
## Produced: July-August 2018
#################################
#installing and loading the needed packages
.list.of.packages <- c("devtools", "tictoc", "future.apply", "tcensReg")
lapply(.list.of.packages, function(x) if(!requireNamespace(x, quietly=TRUE)) install.packages(x))
vapply(.list.of.packages, require, character.only=TRUE, quietly=TRUE)
#enabling parallel processing
future::plan(multisession)
#function for censoring the data based on 12.0 CPD specifications
cens_method <- function(x, method, tobit_val){
if(method == "DL"){
ifelse(x < 0.61, 0.61, x)
} else if (method == "DL_half"){
ifelse(x < 0.61, 0.305, x)
} else if (method == "Tobit"){
ifelse(x < 0.61, tobit_val, x)
#want close to 0.61 since in the real data will not be able to distinguish from 0 and -1 otherwise
#rather than using arbitrarily precise number it is easier to detect trend using 0.60
}
}
cens_diff_sim_noninf <- function(rand_seed, mu1_vec, non_inf_margin, sd_vec, n1, n2, B, tobit_val, a, alpha){
# rand_seed : scalar numeric value used as the argument in the random seed generator. used for reproducability of simulation results
# mu1_vec : vector of opbservations for the mean of Population 1
# true_diff : vector of difference values which defines the mean difference between Population 1 and Population 2
# sd_vec : vector of standard deviation values
# n1 : scalar numeric value defining the number of observations in Sample 1
# n2 : scalar numeric value defining the number of observations in Sample 2
# B : scalar numeric value indicating the number of replications to use
# tobit_val : scalar numeric value indicating the tobit threshold value where censoring is occuring
# a : scalar numeric value indicating the truncation value
# alpha : scalar numeric value used to set the Type I probability for the non-inferiority test. Error rate should be alpha/2
#setting the intercept mean to be the mean of the first population
beta_0 <- mu1_vec
true_diff <- non_inf_margin
#calculating the number of parameters in each vector
num_mu1 <- length(mu1_vec)
num_sd <- length(sd_vec)
#creating the design matrix
# X <- cbind(rep(1, n1+n2), c(rep(0, n1), rep(1, n2)))
#the corresponding mu2 values based on the difference values
beta_1 <- true_diff
mu2_vec <- mu1_vec + true_diff
beta <- cbind(beta_0 = beta_0, beta_1 = true_diff)
# Xb <- X%*%t(beta)
# max_trunc_prob <- pnorm(a, mean = min(mu2_vec), sd = max(sd_vec), lower.tail = TRUE)
set.seed(rand_seed)
ls_dt <- future.apply::future_lapply(seq_len(B), function(num_reps){ #looping the function over the number of replicates
lapply(sd_vec, function(s){ #applying over the number of different standard deviation values
lapply(1:nrow(beta), function(x){ #each row of beta matrix represents a unique mu1, mu2 combination
y_star_1 <- tcensReg::rtnorm(n = n1, mu = unname(beta[x, 1]), sd = s, a = a) #oversampling from a normal distribution
y_star_2 <- tcensReg::rtnorm(n = n2, mu = sum(beta[x, ]), sd = s, a = a)
y_star <- c(y_star_1, y_star_2)
y_dl <- cens_method(y_star, method = "DL", tobit_val)
y_dl_half <- cens_method(y_star, method = "DL_half", tobit_val)
y_tobit <- cens_method(y_star, method = "Tobit", tobit_val)
cens_ind <- ifelse(y_tobit == tobit_val, 1, 0)
group <- c(rep(0, n1), rep(1, n2))
data.frame(y_star, y_dl, y_dl_half, y_tobit, cens_ind, group)
})})}, future.seed=rand_seed)
dt <- array(unlist(ls_dt), dim = c(n1+n2, 6, nrow(beta), num_sd, B),
dimnames = list(NULL, c("y_star","y_dl", "y_dl_half", "y_tobit", "cens_ind", "group"), NULL, NULL, NULL))
#dt is a large array containing all of the different vector values
#dimensions are (n1+n2) x dt_vars x num_mu_combo x num_sd x num_reps
#dimension one is the number of observations drawn in the total sample which is equal to n1+n2
#dimension two is the number of variables in the data.frame which includes the outcome variable y for each method, censoring indicator, and group variable
#dimension three is the number of different mu combinations. This is equal to length(mu1_vec)
#dimension four is the number of standard deviation values
#dimension five is the total number of replicates
#### Calculate Censoring Percentage #####
#calculate the percent censoring within each group over the number of mu combinations, sd combinations, and number of replicates
prop_cens_rep <- apply(dt, c(3, 4, 5), function(X){
df <- data.frame(X)
cens_tbl <- round(prop.table(table(df$cens_ind, df$group), 2), 4)
return(cens_tbl)
})
#averaging the censoring results over the number of replicates
prop_cens <- apply(prop_cens_rep[c(2,4),,,], c(1,2,3), mean)
row.names(prop_cens) <- c("Group_1", "Group_2")
percent_cens <- data.frame(cbind(t(matrix(prop_cens, nrow = 2, ncol = num_mu1 * num_sd, dimnames = list(paste0(c("Group_1", "Group_2"), "_pctcens"), NULL))),
beta_0 = mu1_vec,
beta_1 = true_diff,
sd = rep(sd_vec, each = num_mu1)))
percent_cens$mu1 <- percent_cens$beta_0
percent_cens$mu2 <- percent_cens$beta_0 + percent_cens$beta_1
percent_cens$delta <- percent_cens$beta_1
#calculating the censoring percentage for each scenario
writeLines("^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^")
writeLines("Censoring Percentage")
writeLines("^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^")
print(round(percent_cens, 4))
writeLines("")
writeLines("")
##### Estimating the Parameters for each of the Six Methods
method_names <- c("Uncens_NT", "GS", "DL", "DL_half", "Tobit", "tcensReg")
param_rep <- future.apply::future_apply(dt, c(3, 4, 5), function(dt_X){
df <- data.frame(dt_X)
#fitting the models
comp_mod <- lm(y_star ~ group, data = df)
dl_mod <- lm(y_dl ~ group, data = df)
dl_half_mod <- lm(y_dl_half ~ group, data = df)
# comp_trunc_mod <- tcensReg(y_star ~ group, data = df, a = a)
comp_trunc_mod <- truncreg::truncreg(y_star ~ group, data = df, point = 0)
if(sum(df$cens_ind == 1) == 0){ #checking to see if there are zero censored obsrvations
tobit_mod <- lm(y_tobit ~ group, data = df)
tobit_diff <- coef(tobit_mod)["group"]
tobit_sd <- summary(tobit_mod)$sigma
#if there are no censored observations then a truncated regression should be run
tcensReg_mod <- tcensReg(y_tobit ~ group, data = df, a = a)
tcensReg_diff <- tcensReg_mod$theta[2]
tcensReg_sd <- exp(tcensReg_mod$theta[3])
} else{
#note that for censReg it returns an estimate of the log_sd since it calculates the score equations with respect to this parameter
tobit_mod <- tcensReg(y_tobit ~ group, data = df, v = tobit_val)
tobit_diff <- tobit_mod$theta[2]
tobit_sd <- exp(tobit_mod$theta[3])
tcensReg_mod <- tcensReg(y_tobit ~ group, a = a, v = tobit_val, data = df)
tcensReg_diff <- tcensReg_mod$theta[2]
tcensReg_sd <- exp(tcensReg_mod$theta[3])
}
#difference estimates
comp_diff <- coef(comp_mod)[2]
dl_diff <- coef(dl_mod)[2]
dl_half_diff <- coef(dl_half_mod)[2]
# comp_trunc_diff <- comp_trunc_mod$theta[2]
comp_trunc_diff <- coef(comp_trunc_mod)[2]
diff_est <- rbind(comp_diff, comp_trunc_diff, dl_diff, dl_half_diff, tobit_diff, tcensReg_diff)
#standard deviation estimates
comp_sd <- summary(comp_mod)$sigma
dl_sd <- summary(dl_mod)$sigma
dl_half_sd <- summary(dl_half_mod)$sigma
# comp_trunc_sd <- exp(comp_trunc_mod$theta[3])
comp_trunc_sd <- coef(comp_trunc_mod)[3]
sd_est <- rbind(comp_sd, comp_trunc_sd, dl_sd, dl_half_sd, tobit_sd, tcensReg_sd)
#returning the combined mean estimate and standard deviation estimates
#first four rows are the mean estimate
#next four rows are the standard deviation estimate
return(rbind(diff_est, sd_est))
})
#these rep arrays are 12 x num_mu1 x num_sd x B
#dim1 represent each of the 6 methods of analysis x 2 parameters (delta and sigma)
#dim2 represents the number of different mean combinations depending on mu1
#dim3 represents the number of different standard deviation parameter settings
#dim4 represents the number of replications
diff_rep <- param_rep[1:6,,,]
row.names(diff_rep) <- method_names
sd_rep <- param_rep[7:12,,,]
row.names(sd_rep) <- method_names
#####################################
#####Non-Inferiority Testing#########
#####################################
if(sum(true_diff==-0.15)>=1){ #if the true difference is set to -0.15 then we want to run non-inferiority testing
non_inf_values <- which(beta[, "beta_1"]== -0.15)
non_inferior_margin <- non_inf_margin #this value is chosen based on D.8.2.2 of ISO 11979-7 2014; and C.3.2.2 of ISO 11979-9 2006 (typically -0.15 for our example)
sd_rep_adj <- ((n1+n2)/(n1+n2-2)) * sd_rep[,non_inf_values,,] #applying an adjustment since the mle was biased
lb_ci <- diff_rep[,non_inf_values,,] - qnorm(1 - alpha)*(sd_rep_adj*sqrt(1/n1 + 1/n2)) #calculating the lower_bound confidence interval
reject_avg <- apply(lb_ci, c(1, 2, 3), function(x) mean(x>non_inf_margin)) #if the lb confidence interval is above the non_inf margin then we reject
reject_df <-data.frame(t(matrix(reject_avg, nrow = 6, ncol = length(non_inf_values) * num_sd,
dimnames = list(paste0(method_names, "_nullreject"), NULL))))
reject_results <- data.frame(percent_cens[which(percent_cens$delta==-0.15), c("mu1", "mu2", "delta", "sd")], reject_df)
writeLines("=======================================")
writeLines("Rejecting the Null Hyptothesis of Non-Inferiority:")
writeLines("=======================================")
print(round(reject_results, 4))
}
return(reject_results)
}
tictoc::tic()
sim_results <- cens_diff_sim_noninf(rand_seed = 1509587, mu1_vec = c(1.1, 1.0), non_inf_margin = -0.15, sd_vec = c(0.4, 0.45, 0.5),
n1 = 100, n2 = 100, B = 10000, tobit_val = 0.61, a = 0, alpha = 0.05)
tictoc::toc()
# parallization is done locally using 4 cores
# total time for 10000 repitions 1197.677 sec elapsed (~aprox 20 mins)
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.