#-------------------------------------------------------------------------------------------------------
# Some functions used in the MCEM algorithm measurement error paper. These function are mainly required
# for the capture-recapture modelling example (Section 4.4) and the simulation studies (Section 5) of
# the manuscript.
#-------------------------------------------------------------------------------------------------------
# Load R-packages.
library(splines)
library(simex)
library(rootSolve)
library(numDeriv)
library(ggplot2)
library(nleqslv)
library(fGarch)
library(gamlss)
library(scales)
#------------------------------------------------------------------------------------------------------
# A logit function.
logit_fun <- function(xb) 1/(1 + exp(-xb))
# Some conditional score (CS) functions required for the example 4.
# Since we need to solve (non-linear) estimating equations we require differentiation solvers
# (e.g., Newton's method). We need to set up the functions and set some tolerance parameter like
# the error tols. and some termination conditions.
# Numerical partial differential solver. Used in newton.1.
df.1 <- function(f, par0, y, w1, tau, sigma.sq.u, delta = 0.0005) {
k <- length(par0)
d1 <- diag(rep(delta, k))
df.1 <- matrix(0, k, k)
for(i in 1:k) {
df.1[, i] <- (f(par0 + d1[i, ], y, w1, tau, sigma.sq.u) - f(par0 - d1[i, ], y, w1, tau, sigma.sq.u))/(2*delta)
}
return(df.1)
}
# Estimating functions for the conditional score method.
est.cs <- function(betain, y, w1, tau, sigma.sq.u) {
D1 <- length(y)
YK <- 1:tau
Delta <- w1 + y*betain[2]*sigma.sq.u
eta <- betain[1] + betain[2]*Delta
expt1 <- exp(outer(eta, YK, "*") - 0.5*betain[2]^2*sigma.sq.u*matrix(YK^2, D1, tau, byrow = TRUE))
expt2 <- matrix(choose(tau, YK), D1, tau, byrow = TRUE)*expt1
normc <- apply(expt2, 1, sum)
PY.CD <- expt2/normc
EY.CD <- apply(matrix(YK, D1, tau, byrow = TRUE)*PY.CD, 1, sum, na.rm = TRUE)
score.0 <- cbind(rep(1, D1), Delta)*(c(y - EY.CD))
score.1 <- c()
for(kk in 1:D1) {
scoreAA <- score.0[kk, ]
if (is.matrix(scoreAA)) score.1a <- apply(scoreAA, 2, sum)
if (!is.matrix(scoreAA)) score.1a <- scoreAA
score.1 <- rbind(score.1, score.1a)
}
score <- apply(score.1, 2, sum)
return(score)
}
# Nhat calculation for the conditional score method.
N.CS.est <- function(beta.est, y, w1, tau, sigma.sq.u) {
D1 <- length(y)
YK <- 1:tau
Delta <- w1 + y*beta.est[2]*sigma.sq.u
eta <- beta.est[1] + beta.est[2]*Delta
expt1 <- exp(outer(eta, YK, "*") - 0.5*beta.est[2]^2*sigma.sq.u*matrix(YK^2, D1, tau, byrow = TRUE))
expt2 <- matrix(choose(tau, YK), D1, tau, byrow = TRUE)*expt1
normc <- apply(expt2, 1, sum)
pbar1 <- 1 - (1/(1 + normc))
N.hat <- sum(1/pbar1)
return(N.hat)
}
est.cs2 <- function(betain, y, w1, tau, sigma.sq.u) {
D1 <- length(y)
YK <- 1:tau
Delta <- w1 + y*betain[2]*sigma.sq.u
eta <- betain[1] + betain[2]*Delta
expt1 <- exp(outer(eta, YK, "*") - 0.5*betain[2]^2*sigma.sq.u*matrix(YK^2, D1, tau, byrow = TRUE))
expt2 <- matrix(choose(tau, YK), D1, tau, byrow = TRUE)*expt1
normc <- apply(expt2, 1, sum)
PY.CD <- expt2/normc
EY.CD <- apply(matrix(YK, D1, tau, byrow = TRUE)*PY.CD, 1, sum, na.rm = TRUE)
score.0 <- cbind(rep(1, D1), Delta)*(c(y - EY.CD))
score <- apply(score.0, 2, sum)
return(score)
}
est.cs3 <- function(betain, y, w1, tau, sigma.sq.u) {
D1 <- length(y)
YK <- 1:tau
Delta <- w1 + y*betain[2]*sigma.sq.u
eta <- betain[1] + betain[2]*Delta
expt1 <- exp(outer(eta, YK, "*") - 0.5*betain[2]^2*sigma.sq.u*matrix(YK^2, D1, tau, byrow = TRUE))
expt2 <- matrix(choose(tau, YK), D1, tau, byrow = TRUE)*expt1
normc <- apply(expt2, 1, sum)
PY.CD <- expt2/normc
EY.CD <- apply(matrix(YK, D1, tau, byrow = TRUE)*PY.CD, 1, sum, na.rm = TRUE)
score.0 <- cbind(rep(1, D1), Delta)*(c(y - EY.CD))
return(score.0)
}
# CS variance calculation function.
var.CS <- function(beta.est, y, w1, tau, sigma.sq.u) {
D1 <- length(y)
YK <- 1:tau
Delta <- w1 + y*beta.est[2]*sigma.sq.u
eta <- beta.est[1] + beta.est[2]*Delta
expt1 <- exp(outer(eta, YK, "*") - 0.5*beta.est[2]^2*sigma.sq.u*matrix(YK^2, D1, tau, byrow = TRUE))
expt2 <- matrix(choose(tau, YK), D1, tau, byrow = TRUE)*expt1
normc <- apply(expt2, 1, sum)
cprob <- c()
for(kk in 1:D1) {
cprob1 <- sum(1/normc[kk])
cprob <- c(cprob, cprob1)
}
pbar1 <- 1/(1 + (1/y)*(cprob))
ScA <- ScA <- t(est.cs3(beta.est, y, w1, tau, sigma.sq.u))%*%(est.cs3(beta.est, y, w1, tau, sigma.sq.u))
vv0 <- nleqslv(beta.est, est.cs2, y = y, w1 = w1, tau = tau, sigma.sq.u = sigma.sq.u, jacobian = T, method = c("Newton"))$jac
vv <- (solve(vv0))%*%ScA%*%t(solve(vv0))
aa <- df.1(N.CS.est, beta.est, y, w1, tau, sigma.sq.u)
bb <- matrix(aa[1, 1:2], 2, 1)
vv.2 <- t(bb)%*%vv%*%bb
var.N <- (sum((1 - pbar1)/(pbar1*pbar1)) + vv.2)
var.p <- c(diag(vv), var.N)
return(var.p)
}
# One (quadratic) covariate predict function (output is the RMSE).
predict_B_glm <- function(mod, datTest, sigma.sq.uTest, B, eta_true = NA) {
n <- nrow(datTest)
sigma.sq.u1 <- sigma.sq.uTest
K1 <- length(mod$coef)
U1_j <- rnorm(n*B, 0, sd = sqrt(rep(sigma.sq.u1, B)))
X1_j <- rep(datTest[, 2], B) - U1_j
if (K1 == 2) X <- data.frame(cbind(rep(1, B*n), X1_j))
if (K1 == 3) X <- data.frame(cbind(rep(1, B*n), X1_j, (X1_j)^2))
colnames(X) <- names(coef(mod))
etaPred <- as.matrix(X)%*%coef(mod)
RMSE <- sqrt(mean((rep(eta_true, B) - etaPred)^2, na.rm = TRUE))
values <- list(RMSE = RMSE)
return(values)
}
predict_B_gam <- function(mod, datTest, sigma.sq.uTest, B, eta_true = NA) {
n <- nrow(datTest)
sigma.sq.u1 <- sigma.sq.uTest
U1_j <- rnorm(n*B, 0, sd = sqrt(rep(sigma.sq.u1, B)))
X1_j <- rep(datTest[, 2], B) - U1_j
datTest_new <- data.frame(cbind(X1_j))
colnames(datTest_new) <- "x1"
etaPred <- predict(mod, newdata = datTest_new, type = "link")
RMSE <- sqrt(mean((rep(eta_true, B) - etaPred)^2, na.rm = TRUE))
values <- list(RMSE = RMSE)
return(values)
}
#------------------------------------------------------------------------------------------------------
# Multiple plot function for ggplot.
# ggplot objects can be passed in ..., or to plotlist (as a list of ggplot objects).
#
# - cols: Number of columns in layout.
# - layout: A matrix specifying the layout. If present, 'cols' is ignored.
#
# If the layout is something like matrix(c(1, 2, 3, 3), nrow = 2, byrow = TRUE),
# then plot 1 will go in the upper left, 2 will go in the upper right, and
# 3 will go all the way across the bottom.
multiplot <- function(..., plotlist = NULL, file, cols = 1, layout = NULL) {
require(grid)
# Make a list from the ... arguments and plotlist.
plots <- c(list(...), plotlist)
numPlots <- length(plots)
# If layout is NULL, then use 'cols' to determine layout.
# Make the panel
# ncol: Number of columns of plots
# nrow: Number of rows needed, calculated from # of cols
if (is.null(layout)) layout <- matrix(seq(1, cols*ceiling(numPlots/cols)), ncol = cols, nrow = ceiling(numPlots/cols))
if (numPlots == 1) print(plots[[1]])
# Set up the page.
else{
grid.newpage()
pushViewport(viewport(layout = grid.layout(nrow(layout), ncol(layout))))
# Make each plot, in the correct location.
# Get the i, j matrix positions of the regions that contain this subplot.
for(i in 1:numPlots) {
matchidx <- as.data.frame(which(layout == i, arr.ind = TRUE))
print(plots[[i]], vp = viewport(layout.pos.row = matchidx$row, layout.pos.col = matchidx$col))
}
}
}
#------------------------------------------------------------------------------------------------------
# Simulation functions (Section 5).
# A coverage probability function.
cov.pc <- function(true, est, esd, N.sim) {
p <- length(true)
count <- rep(0, p)
for(i in 1:p) {
count[i] <- length(which(((est[, i] - 1.96*esd[, i]) < true[i] & true[i] < (est[, i] + 1.96*esd[, i])) == TRUE))/N.sim
}
count
}
# A coverage probability and interval score function.
cov.pc_N <- function(true, est, esd, N.sim) {
count1 <- length(which(((est - 1.96*esd) < true & true < (est + 1.96*esd)) == TRUE))/N.sim
list(cp = count1)
}
# Simulation function for a logistic regression ME and missing CR sim.
genCRdata <- function(N, beta, tau, sigma.sq.u, par.int) {
x1 <- rnorm(N, 0, sd = 1)
if (par.int == 2) X <- cbind(rep(1, N), x1)
if (par.int == 3) X <- cbind(rep(1, N), x1, x1^2)
if (par.int == "spline") X <- NA
w1 <- x1 + rnorm(N, 0, sd = sqrt(sigma.sq.u)) # Add/contaminate the covariate with error.
if (par.int == 2) {
W <- cbind(rep(1, N), w1)
colnames(W) <- c("Intercept", "w1")
}
if (par.int == 3) {
W <- cbind(rep(1, N), w1, w1^2)
colnames(W) <- c("Intercept", "w1", "I(w1^2)")
}
if (par.int == "spline") W <- NA
if (par.int != "spline") eta <- X%*%beta
if (par.int == "spline") eta <- sin(0.85*x1)
if (par.int == "spline") eta <- cos(2*x1) + 2
p <- 1/(1 + exp(-drop(eta)))
y1 <- matrix(runif(N*tau) < p, N, tau)
y <- apply(y1, 1, sum)
y2 <- y1 + 0
hist <- y2[y > 0, ]
X <- X[y > 0, ]
W <- W[y > 0, ]
list(hist = hist, y = apply(hist, 1, sum), W = W, X = X)
}
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.