#' Function for fit of the Generalized
#' Heckman Model
#'
#' @description
#' Estimates the parameters of the Generalized
#' Heckman model
#'
#' @return
#'
#' Returns a list with the following components.
#'
#' Coefficients: Returns a numerical vector with the best estimated values
#' of the model parameters;
#'
#' Value: The value of function to be minimized (or maximized) corresponding
#' to par.
#'
#' loglik: Negative of value. Minimum (or maximum) of the likelihood function
#' calculated from the estimated coefficients.
#'
#' counts: Component of the Optim function. A two-element integer vector
#' giving the number of calls to fn and gr respectively. This excludes
#' those calls needed to compute the Hessian, if requested, and any calls
#' to fn to compute a finite-difference approximation to the gradient.
#'
#' hessian: Component of the Optim function, with pre-defined option
#' hessian=TRUE. A symmetric matrix giving an estimate of the Hessian
#' at the solution found. Note that this is the Hessian of the unconstrained
#' problem even if the box constraints are active.
#'
#' fisher_infoHG: Fisher information matrix
#'
#' prop_sigmaHG: Square root of the Fisher information matrix diagonal
#'
#' level: Selection variable levels
#'
#' nObs: Numeric value representing the size of the database
#'
#' nParam: Numerical value representing the number of model parameters
#'
#' N0: Numerical value representing the number of unobserved entries
#'
#' N1: Numerical value representing the number of complete entries
#'
#' NXS: Numerical value representing the number of parameters of the
#' selection model
#'
#' NXO: Numerical value representing the number of parameters of the
#' regression model
#'
#' df: Numerical value that represents the difference between the size
#' of the response vector of the selection equation and the number of
#' model parameters
#'
#' aic: Numerical value representing Akaike's information criterion.
#'
#' bic: Numerical value representing Schwarz's Bayesian Criterion
#'
#' initial.value: Numerical vector that represents the input values
#' (Initial Values) used in the parameter estimation.
#'
#' NE: Numerical value that represents the number
#' of parameters related to the covariates fitted
#' to the dispersion parameter considering the
#' constant parameter.
#'
#' NV: Numerical value that represents the number
#' of parameters related to the covariates fitted
#' to the correlation parameter considering the
#' constant parameter.
#'
#' @details
#' The HeckmanGe() function fits a generalization of the Heckman sample
#' selection model, allowing sample selection bias and dispersion parameters
#' to depend on covariates. For more information, see
#' \insertCite{bastosBarreto;textual}{ssmodels}
#'
#' @param selection Selection equation.
#' @param outcome Primary Regression Equation.
#' @param outcomeS Matrix with Covariates for fit of the Dispersion Parameter.
#' @param outcomeC Matrix with Covariates for fit of the Correlation Parameter.
#' @param start initial values.
#' @param data Database.
#' @examples
#' data(MEPS2001)
#' attach(MEPS2001)
#' selectEq <- dambexp ~ age + female + educ + blhisp + totchr + ins + income
#' outcomeEq <- lnambx ~ age + female + educ + blhisp + totchr + ins
#' outcomeS <- cbind(age,female,totchr,ins)
#' outcomeC <- 1
#' HeckmanGe(selectEq, outcomeEq,outcomeS, outcomeC, data = MEPS2001)
#' @importFrom Rdpack reprompt
#' @references {
#' \insertAllCited{}
#' }
#' @export HeckmanGe
#' @export
HeckmanGe <- function(selection, outcome, outcomeS, outcomeC, data = sys.frame(sys.parent()), start = NULL) {
##############################################################################
# Extract model matrix and matrix from selection and regression equations
##############################################################################
mf <- match.call(expand.dots = FALSE)
m <- match(c("selection", "data", "subset"), names(mf), 0)
mfS <- mf[c(1, m)]
mfS$drop.unused.levels <- TRUE
mfS$na.action <- na.pass
mfS[[1]] <- as.name("model.frame")
names(mfS)[2] <- "formula"
# model.frame requires the parameter to be formula
mfS <- eval(mfS, parent.frame())
mtS <- terms(mfS)
XS <- model.matrix(mtS, mfS)
NXS <- ncol(XS)
YS <- model.response(mfS)
YSLevels <- levels(as.factor(YS))
############################## Regression Matrix #
m <- match(c("outcome", "data", "subset", "weights", "offset"), names(mf), 0)
mfO <- mf[c(1, m)]
mfO$na.action <- na.pass
mfO$drop.unused.levels <- TRUE
mfO$na.action <- na.pass
mfO[[1]] <- as.name("model.frame")
names(mfO)[2] <- "formula"
mfO <- eval(mfO, parent.frame())
mtO <- attr(mfO, "terms")
XO <- model.matrix(mtO, mfO)
NXO <- ncol(XO)
YO <- model.response(mfO)
#################### Dispersion Matrix #
E <- outcomeS
# NE <- ncol(E)
if (length(E) == 1) {
NE <- 1
Msigma <- cbind(rep(1, nrow(XO)))
} else {
NE <- dim(model.matrix(~E))[2] - 1
Msigma <- E
}
################## Correlation Matrix #
V <- outcomeC
if (length(V) == 1) {
NV <- 1
Mrho <- cbind(rep(1, nrow(XO)))
} else {
NV <- dim(model.matrix(~V))[2] - 1
Mrho <- V
}
############################ Likelihood #
loglik_gen <- function(start) {
NXS <- dim(model.matrix(~XS))[2] - 1
NXO <- dim(model.matrix(~XO))[2] - 1
if (length(E) == 1) {
NE <- 1
Msigma <- cbind(rep(1, nrow(XO)))
} else {
NE <- dim(model.matrix(~E))[2]
Msigma <- E
}
if (length(V) == 1) {
NV <- 1
Mrho <- cbind(rep(1, nrow(XO)))
} else {
NV <- dim(model.matrix(~V))[2]
Mrho <- V
}
## parameter indices
istartS <- 1:NXS
istartO <- seq(tail(istartS, 1) + 1, length = NXO)
ilambda <- seq(tail(istartO, 1) + 1, length = NE)
ikappa <- seq(tail(ilambda, 1) + 1, length = NV)
g <- start[istartS]
b <- start[istartO]
lambda <- start[ilambda]
# if(sigma < 0) return(NA)
kappa <- start[ikappa]
# if( ( rho < -1) || ( rho > 1)) return(NA)
mu2 <- XS %*% g
mu1 <- XO %*% b
# sigma <- exp(model.matrix(~E) %*% lambda) rho <- tanh(model.matrix(~1,data =
# data.frame(rep(1,nrow(XO))))%*% kappa)
if (NE == 1) {
sigma <- exp(model.matrix(~1, data = data.frame(rep(1, nrow(XO)))) %*%
lambda)
} else {
sigma <- exp(model.matrix(~Msigma) %*% lambda)
}
if (NV == 1) {
rho <- tanh(model.matrix(~1, data = data.frame(rep(1, nrow(XO)))) %*%
kappa)
} else {
rho <- tanh(model.matrix(~Mrho) %*% kappa)
}
z1 <- YO - mu1
z <- z1 * (1/sigma)
r <- sqrt(1 - rho^2)
A_rho <- 1/r
A_rrho <- rho/r
zeta <- mu2 * A_rho + z * A_rrho
ll <- ifelse(YS == 0, (pnorm(-mu2, log.p = TRUE)), dnorm(z, log = TRUE) -
log(sigma) + (pnorm(zeta, log.p = TRUE)))
sum(ll)
}
#######Gradient
gradlik_gen <- function(start) {
NXS <- dim(model.matrix(~XS))[2] - 1
NXO <- dim(model.matrix(~XO))[2] - 1
if (length(E) == 1) {
NE <- 1
Msigma <- cbind(rep(1, nrow(XO)))
} else {
NE <- dim(model.matrix(~E))[2]
Msigma <- E
}
if (length(V) == 1) {
NV <- 1
Mrho <- cbind(rep(1, nrow(XO)))
} else {
NV <- dim(model.matrix(~V))[2]
Mrho <- V
}
nObs <- length(YS)
NO <- length(YS[YS > 0])
nParam <- NXS + NXO + NE + NV
XS0 <- XS[YS == 0, , drop = FALSE]
XS1 <- XS[YS == 1, , drop = FALSE]
YO[is.na(YO)] <- 0
YO1 <- YO[YS == 1]
XO1 <- XO[YS == 1, , drop = FALSE]
ES0 <- Msigma[YS == 0, , drop = FALSE]
ES1 <- Msigma[YS == 1, , drop = FALSE]
VS0 <- Mrho[YS == 0, , drop = FALSE]
VS1 <- Mrho[YS == 1, , drop = FALSE]
N0 <- sum(YS == 0)
N1 <- sum(YS == 1)
M1 <- rep(1, N0 + N1)
u1 <- rep(1, N1) #YS=1
u2 <- rep(1, N0) #YS=0
## parameter indices
istartS <- 1:NXS
istartO <- seq(tail(istartS, 1) + 1, length = NXO)
ilambda <- seq(tail(istartO, 1) + 1, length = NE)
ikappa <- seq(tail(ilambda, 1) + 1, length = NV)
# if(sigma < 0) return(NA) ikappa <- tail(ilambda, 1) + 1
g <- start[istartS]
b <- start[istartO]
lambda <- start[ilambda]
# if(sigma < 0) return(matrix(NA, nObs, nParam))
kappa <- start[ikappa]
# if( ( rho < -1) || ( rho > 1)) return(matrix(NA, nObs, nParam))
mu20 <- as.numeric(XS0 %*% g)
mu21 <- as.numeric(XS1 %*% g)
mu11 <- as.numeric(XO1 %*% b)
# sigma0 <- exp(as.numeric(model.matrix(~ES0) %*% lambda)) sigma1 <-
# exp(as.numeric(model.matrix(~ES1) %*% lambda)) rho0 <-
# tanh(as.numeric(model.matrix(~VS0) %*% kappa)) rho1 <-
# tanh(as.numeric(model.matrix(~VS1) %*% kappa))
if (NE == 1) {
sigma0 <- exp(as.numeric(model.matrix(~ES0 - 1) %*% lambda))
sigma1 <- exp(as.numeric(model.matrix(~ES1 - 1) %*% lambda))
} else {
sigma0 <- exp(as.numeric(model.matrix(~ES0) %*% lambda))
sigma1 <- exp(as.numeric(model.matrix(~ES1) %*% lambda))
}
if (NV == 1) {
rho0 <- tanh(as.numeric(model.matrix(~VS0 - 1) %*% kappa))
rho1 <- tanh(as.numeric(model.matrix(~VS1 - 1) %*% kappa))
} else {
rho0 <- tanh(as.numeric(model.matrix(~VS0) %*% kappa))
rho1 <- tanh(as.numeric(model.matrix(~VS1) %*% kappa))
}
z1 <- YO1 - mu11
z <- z1/sigma1
r <- sqrt(1 - rho1^2)
A_rho <- 1/r
A_rrho <- rho1/r
# B <- (XS1.g + rho/sigma*u2)/r
zeta <- (mu21 * A_rho + z * A_rrho)
MZeta <- exp(dnorm(zeta, log = TRUE) - pnorm(zeta, log.p = TRUE))
Mmu2 <- exp(dnorm(-mu20, log = TRUE) - pnorm(-mu20, log.p = TRUE))
Q_rho <- mu21 * rho1 * ((A_rho)^2) + z * (1 + A_rrho^2)
Q_rrho <- mu21 * (1 + 2 * A_rrho^2) + 2 * z * rho1 * (1 + A_rrho^2)
gradient <- matrix(0, nObs, nParam)
gradient[YS == 0, istartS] <- -u2 * XS0 * Mmu2
gradient[YS == 1, istartS] <- u1 * XS1 * MZeta * A_rho
gradient[YS == 1, istartO] <- u1 * XO1 * (z - MZeta * A_rrho) * (1/sigma1)
if (NE == 1) {
gradient[YS == 1, ilambda] <- u1 * model.matrix(~ES1 - 1) * (z^2 - 1 -
MZeta * z * A_rrho)
} else {
gradient[YS == 1, ilambda] <- u1 * model.matrix(~ES1) * (z^2 - 1 - MZeta *
z * A_rrho)
}
if (NV == 1) {
gradient[YS == 1, ikappa] <- u1 * model.matrix(~VS1 - 1) * MZeta * A_rho *
((pracma::sech(as.numeric(model.matrix(~VS1 - 1) %*% kappa)))^2) *
(mu21 * rho1 * (A_rho^2) + z * (1 + (A_rrho^2)))
} else {
gradient[YS == 1, ikappa] <- u1 * model.matrix(~VS1) * MZeta * A_rho *
((pracma::sech(as.numeric(model.matrix(~VS1) %*% kappa)))^2) * (mu21 *
rho1 * (A_rho^2) + z * (1 + (A_rrho^2)))
}
colSums(gradient)
}
##############################Start#
if (is.null(start))
start <- step2(YS, XS, YO, XO)
####################################
ilambda <- rep(1, NE)
ikappa <- rep(0, NV)
if (length(V) == 1 & length(E) == 1) {
start <- c(start[(1:NXS)], start[((NXS + 1):(NXS + NXO))], start[(NXS + NXO +
1)], start[(NXS + NXO + 2)])
names(start) <- c(colnames(XS), colnames(XO), "sigma", colnames(E), "correlation")
} else {
if (length(V) == 1 & length(E) != 1) {
start <- c(start[(1:NXS)], start[((NXS + 1):(NXS + NXO))], start[(NXS +
NXO + 1)], ilambda, start[(NXS + NXO + 2)])
names(start) <- c(colnames(XS), colnames(XO), "interceptS", colnames(E),
"correlation")
} else {
if (length(V) != 1 & length(E) == 1) {
start <- c(start[(1:NXS)], start[((NXS + 1):(NXS + NXO))], start[(NXS +
NXO + 1)], start[(NXS + NXO + 2)], ikappa)
names(start) <- c(colnames(XS), colnames(XO), "sigma", "interceptC",
colnames(V))
} else {
start <- c(start[(1:NXS)], start[((NXS + 1):(NXS + NXO))], start[(NXS +
NXO + 1)], ilambda, start[(NXS + NXO + 2)], ikappa)
names(start) <- c(colnames(XS), colnames(XO), "interceptS", colnames(E),
"interceptC", colnames(V))
}
}
}
###################### optim function
theta_HG <- optim(start, loglik_gen, gradlik_gen, method = "BFGS", hessian = T,
control = list(fnscale = -1))
############################Results#
if (length(E) == 1 & length(V) == 1) {
names(theta_HG$par) <- c(colnames(XS), colnames(XO), "Sigma", colnames(E),
"Rho", colnames(V))
} else {
if (length(E) != 1 & length(V) == 1) {
names(theta_HG$par) <- c(colnames(XS), colnames(XO), "interceptS", colnames(E),
"rho")
} else {
if (length(E) == 1 & length(V) != 1) {
names(theta_HG$par) <- c(colnames(XS), colnames(XO), "Sigma", "interceptC",
colnames(V))
} else {
names(theta_HG$par) <- c(colnames(XS), colnames(XO), "interceptS",
colnames(E), "interceptC", colnames(V))
}
}
}
if (length(E) == 1 & length(V) == 1) {
theta_HG$par <- c(theta_HG$par[1:(NXS+NXO)], exp(theta_HG$par[NXS+NXO+1]),tanh(theta_HG$par[NXS+NXO+2]))
} else {
if (length(E) != 1 & length(V) == 1) {
theta_HG$par <- c(theta_HG$par[1:(NXS+NXO)], theta_HG$par[(NXS+NXO+1):(NXS + NXO + NE+1)],tanh(theta_HG$par[NXS + NXO + NE+2]))
} else {
if (length(E) == 1 & length(V) != 1) {
theta_HG$par <- c(theta_HG$par[1:(NXS+NXO)], exp(theta_HG$par[(NXS+NXO+NE)]), theta_HG$par[(NXS + NXO + NE+1):(NXS + NXO + NE + NV + 1)])
} else {
theta_HG$par <- theta_HG$par
}
}
}
a <- start
a1 <- theta_HG$par
a2 <- theta_HG$value
a3 <- theta_HG$counts[2]
a4 <- theta_HG$hessian
a5 <- solve(-a4)
a6 <- sqrt(diag(a5))
a7 <- YSLevels
a8 <- length(YS)
a9 <- length(start)
a10 <- sum(YS == 0)
a11 <- sum(YS == 1)
a12 <- ncol(XS)
a13 <- ncol(XO)
a14 <- (a8-a9)
a15 <- -2*a2 + 2*a9
a16 <- -2*a2 + a9*log(a8)
if (length(E) == 1) {
NE <- 1
} else {
NE <- dim(model.matrix(~E))[2]
}
if (length(V) == 1) {
NV <- 1
} else {
NV <- dim(model.matrix(~V))[2]
}
cl <- class(theta_HG)
result <- list(coefficients=a1,
value = a2,
loglik = -a2,
counts = a3,
hessian = a4,
fisher_infoHG = a5,
prop_sigmaHG = a6,
level = a7,
nObs = a8,
nParam = a9,
N0 = a10,
N1 = a11,
NXS = a12,
NXO = a13,
df = a14,
aic = a15,
bic = a16,
initial.value = a,
NE = NE,
NV = NV)
class(result) <- c("HeckmanGe", cl)
result
}
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.