knitr::opts_chunk$set( warning = FALSE, message = FALSE, collapse = TRUE, comment = "#>" )
Generate data with a non-random sample selection mechanism and exclusion restriction
library(GJRM) set.seed(0) n <- 2000 Sigma <- matrix(0.5, 2, 2); diag(Sigma) <- 1 u <- rMVN(n, rep(0,2), Sigma) SigmaC <- matrix(0.5, 3, 3); diag(SigmaC) <- 1 cov <- rMVN(n, rep(0,3), SigmaC) cov <- pnorm(cov) bi <- round(cov[,1]); x1 <- cov[,2]; x2 <- cov[,3] f11 <- function(x) -0.7*(4*x + 2.5*x^2 + 0.7*sin(5*x) + cos(7.5*x)) f12 <- function(x) -0.4*( -0.3 - 1.6*x + sin(5*x)) f21 <- function(x) 0.6*(exp(x) + sin(2.9*x)) ys <- 0.58 + 2.5*bi + f11(x1) + f12(x2) + u[, 1] > 0 y <- -0.68 - 1.5*bi + f21(x1) + + u[, 2] > 0 yo <- y*(ys > 0) dataSim <- data.frame(y, ys, yo, bi, x1, x2)
Testing the hypothesis of absence of non-random sample selection...
LM.bpm(list(ys ~ bi + s(x1) + s(x2), yo ~ bi + s(x1)), dataSim, Model = "BSS")
The p-value suggests presence of sample selection, hence fit a bivariate model.
For a semiparametric sample selection bivariate probit model, the first equation MUST be the selection equation.
out <- gjrm(list(ys ~ bi + s(x1) + s(x2), yo ~ bi + s(x1)), data = dataSim, Model = "BSS", margins = c("probit", "probit")) conv.check(out) gt.bpm(out)
Let us compare the two summary outputs. The second output produces a summary of the results obtained when selection bias is not accounted for.
summary(out) summary(out$gam2)
Corrected predicted probability that 'yo' is equal to 1.
mb(ys, yo, Model = "BSS") prev(out, hd.plot = TRUE) prev(out, type = "univariate", hd.plot = TRUE)
The estimated smooth function plots are as follows. The red line is the true curve, whereas the blue line is the univariate model curve not accounting for selection bias.
x1.s <- sort(x1[dataSim$ys>0]) f21.x1 <- f21(x1.s)[order(x1.s)]-mean(f21(x1.s)) plot(out, eq = 2, ylim = c(-1.65,0.95)); lines(x1.s, f21.x1, col="red") plot(out$gam2, se = FALSE, col = "blue", ylim = c(-1.65,0.95), ylab = "", rug = FALSE)
Try a Clayton copula model...
outC <- gjrm(list(ys ~ bi + s(x1) + s(x2), yo ~ bi + s(x1)), data = dataSim, Model = "BSS", BivD = "C0", margins = c("probit", "probit")) conv.check(outC) summary(outC) prev(outC)
library(mice) ys <- dataSim$ys dataSim$yo[dataSim$ys == FALSE] <- NA dataSim <- dataSim[, -c(1:2)] imp <- mice(dataSim, m = 1, method = c("copulaSS", "", "", ""), mice.formula = outC$mice.formula, margins = outC$margins, BivD = outC$BivD, maxit = 1) comp.yo <- dataSim$yo comp.yo[ys == 0] <- imp$imp$yo[[1]] mean(comp.yo)
See also ?hiv
for additional information.
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.