knitr::opts_chunk$set( collapse = TRUE, comment = "#>", eval = FALSE, cache = FALSE )
Here we present the analysis on the alcohol consumption real data in @zhang2022flexible. The data is made available in the package, see ?data(student)
. This dataset was originally collected and studied by @cortez2008using. In this analysis, we suppose that the binary response alc
is subject to a false negative rate of 5\%. We warn the readers that the code presented here may take time to run on a personal computer. Indeed, for the paper, the computations were performed on the "Baobab" and "Yggdrasil" HPC clusters provided by the University of Geneva.
# packages library(IBpaper) library(ib) # for simulating logistic with misclassified response misclassification <- function(object, control, extra){ ftd <- fitted(object) n <- length(ftd) N <- n * control$H u1 <- 0.0 u2 <- 0.05 mu_star <- u1 * (1 - rep(ftd,control$H)) + (1 - u2) * rep(ftd,control$H) matrix(ifelse(runif(N) > mu_star,0,1), ncol=control$H) } # Verify where a value lies given a confidence interval check_ci <- function(ci,theta){ if(!is.matrix(ci)) ci <- as.matrix(ci) if(ncol(ci)!=2) ci <- t(ci) if(nrow(ci)!=length(theta)) stop("verify ci and theta dimensions match") apply(cbind(ci,theta), 1, function(x){ if(x[3] <= min(x[1:2])){"left"} else if(x[3] >= max(x[1:2])){"right"}else{"center"} }) } # IB specifics H <- 200 # number of simulated estimators ib_control <- ibControl(H = H, sim = misclassification, maxit = 100L) # control for iterative bootstrap (IB) B <- 200 # number of bootstrap replicates set.seed(432) seeds <- sample.int(1e7,B) # seeds for bootstrap # For coverage alpha <- 0.05 # nominal level for confidence interval
The code for the analysis on the real data is provided as follows.
## Fit initial estimator fit_initial <- glm(alc ~ ., family = binomial(link = "logit"), data = student) ## Fit the MLE x <- as.matrix(student[,-1]) y <- student[,1] fit_mle <- logistic_misclassification_mle(x, y, fp = 0, fn = 0.05) ## Fit the JINI fit_ib <- ib(fit_initial, control = ib_control) ## Retrieve coefficients beta_hat <- fit_mle beta_tilde <- coef(fit_ib) p <- length(beta_hat) ## Standard errors for the MLE (based on Fisher information) se1 <- sqrt(diag(inverse_FIM(x,beta_hat,0.0,0.05))) ## Asymptotic confidence interval for MLE ci_mle <- beta_hat + se1 * matrix(rep(qnorm(c(alpha/2,1-alpha/2)),p),nc=2,byr=T) ## Standard errors for the JINI (estimated via bootstrap) logistic_object <- make_logistic(x, beta_tilde) boot <- matrix(nrow = B, ncol = p) for(b in 1:B){ ##------- simulate the process --------- # simulate logistic y <- simulation(logistic_object, control=list(seed=b, sim=misclassification)) ##------ Initial estimation ---------------- fit_tmp <- glm(y ~ x, family=binomial(link="logit")) ##------ IB bias correction ------------ ib_control$seed <- seeds[b] # update seed in control fit_ib <- ib(fit_tmp, control = ib_control) boot[b,] <- getEst(fit_ib) } se2 <- sqrt(diag(cov(boot))) ## Asymptotic confidence interval for JINI ci_jini <- beta_tilde + se2 * matrix(rep(qnorm(c(alpha/2,1-alpha/2)),p),nc=2,byr=T)
For the numerical experiment, we use beta_tilde
from the real data analysis as the true parameter.
MC <- 1e4 # number of simulation logistic_object <- make_logistic(x, beta_tilde) # for simulating response # seed for RNG set.seed(5847329) seed <- vector("list",3) seed$process <- sample.int(1e7,MC) seed$ib <- sample.int(1e7,MC) seed$boot <- sample.int(1e7,B) # for saving results res <- list( mle = matrix(nrow = MC, ncol = p), lci_mle = matrix(nrow = MC, ncol = p), coverage_mle = matrix(nrow = MC, ncol = p), ib = matrix(nrow = MC, ncol = p), lci_ib = matrix(nrow = MC, ncol = p), coverage_ib = matrix(nrow = MC, ncol = p) ) ib_control2 <- ib_control # copy for bootstrap # Simulations (careful, it is very long) for(m in 1:MC){ ##------- simulate the process --------- # simulate logistic y <- simulation(logistic_object, control = list(seed = seed$process[m], sim = misclassification)) ##------ Initial estimation ---------------- fit_initial <- glm(y ~ x, family=binomial(link="logit")) ##------ MLE estimation ---------------- fit_mle <- logistic_misclassification_mle(x,y,0.0,0.05) beta_hat <- fit_mle ci_mle <- beta_hat - sqrt(diag(inverse_FIM(x,beta_hat,0.0,0.05))) * matrix(rep(qnorm(alpha[2:1]),p),nc=2,byr=T) res$mle[m,] <- beta_hat res$lci_mle[m,] <- apply(ci_mle,1,diff) res$coverage_mle[m,] <- check_ci(ci_mle,beta) ##------ IB bias correction ------------ ib_control$seed <- seed$ib[m] fit_ib <- ib(fit_initial, control = ib_control) beta_tilde <- getEst(fit_ib) # bootstrap approximation of covariance lo <- make_logistic(x, beta_tilde) boot <- matrix(nrow = B, ncol = p) for(b in 1:B){ ##------- simulate the process --------- # simulate logistic y <- simulation(lo, control=list(seed=b, sim=misclassification)) ##------ MLE estimation ---------------- fit_tmp <- glm(y~x, family=binomial(link="logit")) ##------ IB bias correction ------------ ib_control2$seed <- seed$boot[b] fit_ib <- ib(fit_tmp, control = ib_control2) boot[b,] <- getEst(fit_ib) } se2 <- sqrt(diag(cov(boot))) ci_ib <- beta_tilde - se2 * matrix(rep(qnorm(alpha[2:1]),p),nc=2,byr=T) res$ib[m,] <- beta_tilde res$lci_ib[m,] <- apply(ci_ib,1,diff) res$coverage_ib[m,] <- check_ci(ci_ib,beta) cat(m,"\n") }
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.