knitr::opts_chunk$set(echo = TRUE)
library(knitr) library(kableExtra)
In this simulation study, we compare the performance of the three estimators considered in @guerrier2020accurate, namely the sample survey, $\bar{\pi}$, the MLE $\hat{\pi}$ as well as the moment estimator $\tilde{\pi}$. All simulation results are based on $B = 10^4$ Monte-Carlo replications and we consider 24 simulation settings, which are presented in the table below:
# Load package library(CPreval) # Number of Monte-Carlo simulations B = 10^4 # Initial simulation seed seed = 1832 # Simulation settings simu = data.frame(Simulation = 1:24, p0 = 100*rep(c(2/1000, 20/1000, 150/1000, 500/1000), 6), pi0 = 100*rep(c(1/1000, 10/1000, 100/1000, 300/1000), 6), n = rep(c(rep(1500, 4), rep(15000, 4)), 3), alpha = 100*c(rep(0, 8), rep(0, 8), rep(0.01,8)), beta = 100*c(rep(0, 8), rep(0.02,8), rep(0.02,8))) # Print table kable(simu, col.names = c("Simulation ID", "$p_0$ (%)", "$\\pi_0$ (%)", "Sample size $n$", "$\\alpha_0 = \\alpha$ (%)", "$\\beta_0 = \\beta$ (%)")) %>% kable_styling(bootstrap_options = c("striped", "hover"))
The code below performs the simulation:
# Initialisation error_survey = error_moment = error_mle = matrix(NA, B, 24) boundary_survey = boundary_mle = boundary_moment = matrix(NA, B, 24) length_survey = length_mle = matrix(NA, B, 24) coverage_survey = coverage_mle = matrix(NA, B, 24) # Start Monte-Carlo for (j in 1:24){ for (i in 1:B){ # Simulate data X = sim_Rs(p = simu$p0[j]/100, pi0 = simu$pi0[j]/100, n = simu$n[j], seed = seed + i, alpha0 = simu$alpha[j]/100, alpha = simu$alpha[j]/100, beta0 = simu$beta[j]/100, beta = simu$beta[j]/100) # Fit survey sample estimator survey = survey_sample(R = X$R, n = X$n, alpha = X$alpha, beta = X$beta, pi0 = X$pi0, simulation = TRUE) # Estimation error error_survey[i,j] = survey$estimate - simu$p0[j]/100 # Check boundary boundary_survey[i,j] = survey$boundary # Length of CI length_survey[i,j] = survey$ci_cp[2] - survey$ci_cp[1] # Coverage coverage_survey[i,j] = survey$ci_cp[1] < simu$p0[j]/100 && survey$ci_cp[2] > simu$p0[j]/100 # Moment estimator moment = moment_estimator(R0 = X$R0, R = X$R, pi0 = X$pi0, n = X$n, alpha0 = X$alpha0, alpha = X$alpha, beta0 = X$beta0, beta = X$beta, simulation = TRUE) # Estimation error error_moment[i,j] = moment$estimate - simu$p0[j]/100 # Boundary boundary_moment[i,j] = moment$boundary # MLE ml = mle(R0 = X$R0, R = X$R, pi0 = X$pi0, n = X$n, alpha0 = X$alpha0, alpha = X$alpha, beta0 = X$beta0, beta = X$beta, simulation = TRUE) # Estimation error error_mle[i,j] = ml$estimate - simu$p0[j]/100 # Boundary boundary_mle[i,j] = ml$boundary # Length of CI length_mle[i,j] = ml$ci_cp[2] - ml$ci_cp[1] # Coverage coverage_mle[i,j] = ml$ci_cp[1] < simu$p0[j]/100 && ml$ci_cp[2] > simu$p0[j]/100 } }
The table below presents the percentage of times each estimator had a solution either outside of the interval $[\pi_0, 1]$ (moment estimator) or at its boundary (MLE and sample survey):
# Simulation settings boundary = data.frame(Simulation = 1:24, survey = apply(boundary_survey, 2, mean), moment = apply(boundary_moment, 2, mean), mle = apply(boundary_mle, 2, mean)) # Print table kable(boundary, col.names = c("Simulation ID", "Survey sample $\\bar{\\pi}$", "Moment Estimator $\\tilde{\\pi}$", "MLE $\\hat{\\pi}$")) %>% kable_styling(bootstrap_options = c("striped", "hover"))
The same information is presented as a barplot in the figure below:
boundary_barplot = 100*t(as.matrix(boundary[,2:4])) colnames(boundary_barplot) = boundary[,1] cols = hcl(h = seq(15, 375, length = 4), l = 65, c = 100)[1:3] barplot(boundary_barplot, beside = TRUE, col = cols[c(1,3,2)], ylab = "Percentage of simulations with no reliable solution", xlab = "Simulation ID") legend("topleft", c("Survey", "Moment", "MLE"), fill = cols[c(1,3,2)], bty = "n")
In the figure below we compare the ratio of the RMSEs:
RMSE = function(x) sqrt(mean(x)^2 + var(x)) nb_settings = tail(simu$Simulation, n=1) cols = hcl(h = seq(15, 375, length = 4), l = 65, c = 100)[1:3] rmse_survey = apply(error_survey, 2, RMSE) rmse_moment = apply(error_moment, 2, RMSE) rmse_mle = apply(error_mle, 2, RMSE) plot(NA, xlim = c(1,nb_settings), ylim = c(0.2, 1.15), xlab = "Simulation ID", ylab = "Ratio of RMSE", axes = FALSE) rect(0.5, -0.01, 4.5, 1, col = "grey95", border = "grey95") rect(8.5, -0.01, 12.5, 1, col = "grey95", border = "grey95") rect(16.5, -0.01, 20.5, 1, col = "grey95", border = "grey95") text(2.5, 0.25, "n = 1,500") text(6.5, 0.25, "n = 15,000") text(10.5, 0.25, "n = 1,500") text(14.5, 0.25, "n = 15,000") text(18.5, 0.25, "n = 1,500") text(22.5, 0.25, "n = 15,000") text(4.5, 1.1, "without measurement error") text(12.5, 1.1, "with false positive") text(20.5, 1.1, "with false positive and negative") abline(v = 8.5, lwd = 2, lty = 2) abline(v = 16.5, lwd = 2, lty = 2) axis(1, 1:nb_settings) axis(2) grid() abline(v = 1:nb_settings, col = "grey80", lty = 3) abline(h = (0:20)/10, col = "grey80", lty = 3) box() abline(h = 1, lwd = 2) lines(1:nb_settings, rmse_mle/rmse_survey, col = cols[1], pch = 15, type = "b") lines(1:nb_settings, rmse_mle/rmse_moment, col = cols[3], pch = 16, type = "b") abline(h = 1, lwd = 2) legend(1, 0.45, c("Survey Sample", "Moment Estimator"), col = cols[c(1,3)], pch = c(15, 16), lwd = 1, bty = "n")
par(mfrow = c(2,1), mar = c(0.5,0.5,0.5,0.5), oma = c(5,5,3,0)) # Compute empirical coverage cov_survey = 100*apply(coverage_survey, 2, mean) cov_mle = 100*apply(coverage_mle, 2, mean) # Compute average length of CI len_survey = apply(length_survey, 2, mean) len_mle = apply(length_mle, 2, mean) # Plot coverage plot(NA, xlim = c(1,nb_settings), ylim = c(93.5, 100.8), xlab = " ", ylab = " ", axes = FALSE) rect(0.5, 0, 4.5, 150, col = "grey95", border = "grey95") rect(8.5, 0, 12.5, 150, col = "grey95", border = "grey95") rect(16.5, 0, 20.5, 150, col = "grey95", border = "grey95") text(2.5, 94, "n = 1,500") text(6.5, 94, "n = 15,000") text(10.5, 94, "n = 1,500") text(14.5, 94, "n = 15,000") text(18.5, 94, "n = 1,500") text(22.5, 94, "n = 15,000") text(4.5, 100.5, "without measurement error") text(12.5, 100.5, "with false positive") text(20.5, 100.5, "with false positive and negative") abline(v = 8.5, lwd = 2, lty = 2) abline(v = 16.5, lwd = 2, lty = 2) axis(2) grid() abline(v = 1:nb_settings, col = "grey80", lty = 3) abline(h = 90 + (0:10), col = "grey80", lty = 3) box() abline(h = 95, lwd = 2) mtext("Empirical coverage (95% conf. level)", side = 2, line = 3) lines(cov_survey, col = cols[1], pch = 16, type = "b") lines(cov_mle, col = cols[2], pch = 17, type = "b") legend(2.5, 99.5, c("Survey Sample", "MLE"), col = cols[c(1,2)], pch = c(15, 17), lwd = 1, bty = "n") plot(NA, xlim = c(1,nb_settings), ylim = c(0.595, 1.02), xlab = " ", ylab = " ", axes = FALSE) rect(0.5, 0, 4.5, 1.50, col = "grey95", border = "grey95") rect(8.5, 0, 12.5, 1.50, col = "grey95", border = "grey95") rect(16.5, 0, 20.5, 1.50, col = "grey95", border = "grey95") text(2.5, 0.59, "n = 1,500") text(6.5, 0.59, "n = 15,000") text(10.5, 0.59, "n = 1,500") text(14.5, 0.59, "n = 15,000") text(18.5, 0.59, "n = 1,500") text(22.5, 0.59, "n = 15,000") text(4.5, 1.01, "without measurement error") text(12.5, 1.01, "with false positive") text(20.5, 1.01, "with false positive and negative") abline(v = 8.5, lwd = 2, lty = 2) abline(v = 16.5, lwd = 2, lty = 2) axis(2) axis(1, at = 1:24) grid() abline(v = 1:nb_settings, col = "grey80", lty = 3) abline(h = (0:10)/10, col = "grey80", lty = 3) box() mtext("Ratio of average length of 95% CI", side = 2, line = 3) mtext("Simulation ID", side = 1, line = 2) abline(h = 1, lwd = 2) cols2 = hcl(h = seq(15, 375, length = 5), l = 65, c = 100)[1:4] lines(len_mle/len_survey, type = "b", ylim = c(0.6, 1.01), col = cols2[4], pch = 18, cex = 1.4)
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.