knitr::opts_chunk$set(echo = TRUE)
knitr::opts_chunk$set(fig.pos = 'H')
This vignette documents fitting an MCEM algorithm via the refitME
R
-package. For more specific details see: refitME
: Measurement Error Modelling using Monte Carlo Expectation Maximization in R
. Also, see ?refitME
for further details on the fitting function, input arguments and output.
We use the Framingham heart study data set. In addition to the naive model, we also fit a simex
model and compare it with MCEM. Computational times for both models are also reported.
R
-packages.suppressWarnings(suppressMessages(library(refitME))) suppressWarnings(suppressMessages(library(simex))) set.seed(2020) data(Framinghamdata)
The first stored variable w1
is the error contaminated predictor as used in the original Carroll et al. (2006) analysis.
glm_naiv <- glm(Y ~ w1 + z1 + z2 + z3, x = TRUE, family = binomial, data = Framinghamdata)
sigma.sq.u <- 0.006295 # ME variance, as obtained from Carroll et al. (2006). B <- 100 # The number of Monte Carlo replication values/SIMEX simulations.
\clearpage
start <- Sys.time() glm_simex <- simex(glm_naiv, SIMEXvariable = c("w1"), measurement.error = cbind(sqrt(sigma.sq.u)), B = B) # SIMEX. end <- Sys.time() t1 <- difftime(end, start, units = "secs") comp.time <- c(t1)
start <- Sys.time() glm_MCEM <- refitME(glm_naiv, sigma.sq.u, B) end <- Sys.time() t2 <- difftime(end, start, units = "secs") comp.time <- c(comp.time, t2)
est.beta <- rbind(coef(glm_naiv), coef(glm_simex), coef(glm_MCEM)) est.beta.se <- rbind(sqrt(diag(vcov(glm_naiv))), sqrt(diag(glm_simex$variance.jackknife)), sqrt(diag(vcov(glm_MCEM)))) row.names(est.beta) = row.names(est.beta.se) <- c("Naive GLM", "SIMEX", "MCEM") colnames(est.beta) = colnames(est.beta.se) <- c("(Intercept)", "SBP", "chol. level", "age", "smoke") round(est.beta, digits = 3) round(est.beta.se, digits = 3) # Standard error estimates. names(comp.time) <- c("SIMEX", "MCEM") comp.time # SIMEX and MCEM.
The Milan mortality air pollution data set is part of the SemiPar
package. Here, we fit GAM models via the mgcv
package where one predictor (daily total suspended particles measurements) is error-contaminated.
R
-packages.suppressWarnings(suppressMessages(library(refitME))) suppressWarnings(suppressMessages(library(dplyr))) suppressWarnings(suppressMessages(library(SemiPar))) suppressWarnings(suppressMessages(library(mgcv))) set.seed(2021) data(milan.mort) dat.air <- sample_n(milan.mort, 100) # Take a random sample of 100.
Y <- dat.air[, 6] # Mortality counts. n <- length(Y) z1 <- (dat.air[, 1]) z2 <- (dat.air[, 4]) z3 <- (dat.air[, 5]) w1 <- log(dat.air[, 9]) # The error-contaminated predictor (total suspended particles). dat <- data.frame(cbind(Y, w1, z1, z2, z3))
gam_naiv <- gam(Y ~ s(w1) + s(z1, k = 25) + s(z2) + s(z3), family = "poisson", data = dat)
plot_gam_naiv <- plot(gam_naiv, select = 1)
\clearpage
sigma.sq.u <- 0.0915 # This gives a reliability ratio of 0.7. rel.rat <- round((1 - sigma.sq.u/var(dat$w1))*100, digits = 0) gam_MCEM1 <- refitME(gam_naiv, sigma.sq.u)
```r Plots of smooths against each predictor. TSP (top left is the error contaminated variable)."} xlab.names <- c("log(TSP)", "Day", "Temp", "Humidity")
op <- par(mfrow = c(2, 2), las = 1)
for(i in 1:4) { if (i == 1) { plot(gam_MCEM1, select = i, ylim = c(-0.35, 0.2), xlim = range(plot_gam_naiv[[1]]$x), rug = FALSE, col = "blue", all.terms = TRUE, xlab = xlab.names[i], ylab = "s(Mortality counts)", lwd = 2, cex.lab = 1.3, cex.axis = 1.3, cex.main = 2, font.lab = 1.1, cex = 1.4, shade = T) lines(plot_gam_naiv[[1]]$x, plot_gam_naiv[[1]]$fit, type = "l", col = "red", lwd = 2, lty = 2) title(main = bquote("Reliability ratio of predictor is"~.(rel.rat) ~ "%"), outer = F, line = 1, cex = 1.4) legend("bottomright", c("Naive GAM", "MCEM GAM"), col = c("red", "blue"), lty = c(2, 1), lwd = 2, bty = "n") for(j in 1:2) { axis(j, labels = FALSE) } } if (i == 2) { plot(gam_MCEM1, select = i, ylim = c(-0.25, 0.3), rug = FALSE, col = "blue", all.terms = TRUE, xlab = xlab.names[i], ylab = "s(Mortality counts)", lwd = 2, cex.lab = 1.3, cex.axis = 1.3, cex.main = 2, font.lab = 1.1, cex = 1.4, shade = T) lines(plot_gam_naiv[[2]]$x, plot_gam_naiv[[2]]$fit, type = "l", col = "red", lwd = 2, lty = 2) for(j in 1:2) { axis(j, labels = FALSE) } } if (i == 3) { plot(gam_MCEM1, select = i, ylim = c(-0.4, 0.4), rug = FALSE, col = "blue", all.terms = TRUE, xlab = xlab.names[i], ylab = "s(Mortality counts)", lwd = 2, cex.lab = 1.3, cex.axis = 1.3, cex.main = 2, font.lab = 1.1, cex = 1.4, shade = T) lines(plot_gam_naiv[[3]]$x, plot_gam_naiv[[3]]$fit, type = "l", col = "red", lwd = 2, lty = 2) for(j in 1:2) { axis(j, labels = FALSE) } } if (i == 4) { plot(gam_MCEM1, select = i, ylim = c(-0.1, 0.1), rug = FALSE, col = "blue", all.terms = TRUE, xlab = xlab.names[i], ylab = "s(Mortality counts)", lwd = 2, cex.lab = 1.1, cex.axis = 1.1, cex.main = 2, font.lab = 1.1, cex = 1.4, shade = T) lines(plot_gam_naiv[[4]]$x, plot_gam_naiv[[4]]$fit, type = "l", col = "red", lwd = 2, lty = 2) for(j in 1:2) { axis(j, labels = FALSE) } } }
title(main = "MCEM (Poisson GAM) fitted to the air pollution data.", outer = T, line = -2) par(op)
detach(package:mgcv)
\clearpage ## Example 3: A point-process model using presence-only data We use the _Corymbia eximia_ presence-only data set from Renner and Warton (2013). Here, we fit a naive models point-process model (PPM) and an MCEM PPM. ### Load data and `R`-packages. ```r suppressWarnings(suppressMessages(library(refitME))) suppressWarnings(suppressMessages(library(caret))) data(Corymbiaeximiadata) dat <- Corymbiaeximiadata
Y1 <- dat$Y.obs Rain <- dat$Rain D.Main <- dat$D.Main MNT <- dat$MNT p.wt <- rep(1.e-6, length(Y1)) p.wt[Y1 == 0] <- 1 Y <- Y1/p.wt X <- cbind(rep(1, length(Y)), poly(MNT, degree = 2, raw = TRUE), poly(Rain, degree = 2, raw = TRUE), poly(sqrt(D.Main), degree = 2, raw = TRUE)) colnames(X) <- c("(Intercept)", "X1", "X2", "Z1", "Z2", "Z3", "Z4") dat1 <- data.frame(cbind(Y, X, p.wt)) colnames(dat1)[c(1, ncol(dat1))] <- c("Y", "p.wt")
PPM_naiv1 <- glm(Y ~ X1 + X2 + Z1 + Z2 + Z3 + Z4, family = "poisson", weights = p.wt, data = dat1)
\clearpage
sigma.sq.u <- 0.25 B <- 20 # Consider increasing `B` if you want a more accurate answer. PPM_MCEM1 <- refitME(PPM_naiv1, sigma.sq.u, B) # Should take about 2 mins.
Plot the predicted presences of Corymbia eximia
```r Plot of predicted presences of Corymbia eximia using presence-only data when fitting the MCEM model. Here the max temperature predictor is assumed to be error-contaminated."} coord.dat <- cbind(Corymbiaeximiadata$X, Corymbiaeximiadata$Y)
colnames(coord.dat) <- c("Longitude", "Latitude")
pred.dats <- as.data.frame(cbind(coord.dat[, 1], coord.dat[, 2], PPM_MCEM1$fitted.values))
colnames(pred.dats) <- c("x", "y", "preds")
levelplot(preds ~ x + y, data = pred.dats, asp = "iso", ylab = "Latitude", xlab = "Longitude", col.regions = heat.colors(1024)[900:1], cuts = 900, main = list("", cex = 5), scales = list(y = list(draw = FALSE), x = list(draw = FALSE)), colorkey = list(labels = list(cex = 0.8)))
## Example 4: A `VGAM` example using the Prinia flaviventris capture-recapture data. We use the _Prinia flaviventris_ capture-recapture data set from Hwang, Huang and Wang (2007). Here, we fit naive `vglm` and `vgam` capture-recapture models, and the MCEM capture-recapture model. For all models we used the `posbinomial()` family provided in `VGAM`. ### Load data and `R`-packages. ```r suppressWarnings(suppressMessages(library(refitME))) suppressMessages(library(VGAM)) data(Priniadata)
tau <- 17 # No. of capture occasions. w2 <- c(scale(Priniadata$w1)) # Standardize the wing length predictor. Priniadata$w2 <- w2
vglm
and vgam
capture-recapture models.CR_naiv1 <- vglm(cbind(cap, noncap) ~ w2, VGAM::posbinomial(omit.constant = TRUE, parallel = TRUE ~ w2), data = Priniadata, trace = FALSE) CR_naiv2 <- vgam(cbind(cap, noncap) ~ s(w2, df = 2), VGAM::posbinomial(omit.constant = TRUE, parallel = TRUE ~ s(w2, df = 2)), data = Priniadata, trace = FALSE)
sigma.sq.u <- 0.37/var(Priniadata$w1) # Measurement error variance. B <- 100 CR_MCEM <- refitME(CR_naiv2, sigma.sq.u, B) detach(package:VGAM)
Any scripts or data that you put into this service are public.
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.