Nothing
#' Truncated response model
#'
#' Estimation of models for which the response is truncated, either on
#' censored or truncated samples using OLS, NLS, maximum
#' likelihood, two-steps estimators or trimmed estimators
#'
#' @name tobit1
#' @param formula a symbolic description of the model; if two right
#' hand sides are provided, the second one described the set of
#' instruments if `scedas` is `NULL`, which is the
#' default. Otherwise, the second part indicates the set of
#' covariates for the variance function
#' @param data,subset,weights,na.action,offset,contrasts see `lm`
#' @param start an optional vector of starting values
#' @param left,right left and right truncation points for the response
#' The default is respectively 0 and +Inf which corresponds to the
#' most classic (left-zero truncated) tobit model
#' @param scedas the functional form used to specify the conditional
#' variance, either `"exp"` or `"pnorm"`
#' @param sample either `"censored"` (the default) to estimate the
#' censored (tobit) regression model or `"truncated"` to estimated
#' the truncated regression model
#' @param method one of `"ml"` for maximum likelihood, `"lm"` for
#' (biased) least squares estimators, `"twostep"` for two-steps
#' consistent estimators, `"trimmed"` for symetrically censored
#' estimator, `"minchisq"` and `"test"`. The last two are only
#' relevant for instrumental variable estimation (when the formula
#' is a two-parts formula and `scedas` is `NULL`)
#' @param opt optimization method
#' @param maxit maximum number of iterations
#' @param trace printing of intermediate result
#' @param check_gradient if `TRUE` the numeric gradient and hessian
#' are computed and compared to the analytical gradient and
#' hessian
#' @param object a `tobit1` object
#' @param ... further arguments
#' @importFrom stats binomial coef dnorm glm lm model.matrix
#' model.response pnorm sigma df.residual fitted logLik
#' model.frame printCoefmat residuals terms vcov nobs
#' model.weights .getXlevels predict delete.response predict
#' update
#' @keywords models
#' @return An object of class `c("tobit1", "micsr")`, see
#' `micsr::micsr` for further details.
#' @author Yves Croissant
#' @references \insertRef{POWE:86}{micsr}
#' @examples
#' charitable$logdon <- with(charitable, log(donation) - log(25))
#' ml <- tobit1(logdon ~ log(donparents) + log(income) + education +
#' religion + married + south, data = charitable)
#' scls <- update(ml, method = "trimmed")
#' tr <- update(ml, sample = "truncated")
#' nls <- update(tr, method = "nls")
#' @export
tobit1 <- function(formula, data, subset, weights, na.action, offset, contrasts = NULL,
start = NULL, left = 0, right = Inf,
scedas = NULL,
sample = c("censored", "truncated"),
method = c("ml", "lm", "twostep", "trimmed",
"nls", "minchisq", "test"),
opt = c("bfgs", "nr", "newton"),
maxit = 100, trace = 0,
check_gradient = FALSE,
...){
.call <- match.call()
.method <- match.arg(method)
.sample <- match.arg(sample)
zerotrunc <- ifelse(left == 0 & is.infinite(right) & (right > 0), TRUE, FALSE)
.scedas <- scedas
if (! is.null(scedas)){
if (! scedas %in% c("exp", "pnorm"))
stop("scedas should be either equal to exp or to pnorm")
.scedas <- .scedas
}
mf <- cl <- match.call(expand.dots = FALSE)
.formula <- Formula(formula)
if (length(.formula)[2] == 2 & is.null(.scedas)){
mf$model <- "tobit"
if (! .method %in% c("twostep", "minchisq", "ml", "test"))
stop("method should be one of twostep, minchisq, ml and test")
mf$method <- .method
mf[[1L]] <- as.name("ivldv")#quote(micsr::ivldv())
result <- eval(mf, parent.frame())
result$call <- .call
return(result)
} else {
if (! .method %in% c("twostep", "ml", "nls", "lm", "trimmed"))
stop("method should be one of twostep, ml, nls and lm")
}
.formula <- mf$formula <- Formula(formula)
m <- match(c("formula", "data", "subset", "weights", "na.action", "offset"),
names(mf), 0L)
# construct the model frame and components
mf <- mf[c(1L, m)]
mf[[1L]] <- quote(stats::model.frame)
mf <- eval(mf, parent.frame())
mt <- attr(mf, "terms")
if (length(.formula)[2] > 1){
X <- model.matrix(.formula, mf, rhs = 1, contrasts.arg = contrasts)
formh <- formula(.formula, rhs = 2)
if (attr(terms(formh), "intercept") == 1L) formh <- update(formh, . ~ . - 1)
Z <- model.matrix(formh, mf)
}
else{
X <- model.matrix(.formula, mf, contrasts)
Z <- NULL
}
K <- ncol(X)
y <- model.response(mf)
N <- length(y)
wt <- model.weights(mf)
if (is.null(wt)) wt <- rep(1, N) else wt <- wt #/ mean(wt)
.offset <- model.offset(mf)
# identify the untruncated observations
P <- as.numeric(y > left & y < right)
Plog <- as.logical(P)
share_untr <- mean(Plog)
# check whether the sample is censored or truncated
is_cens_smpl <- any(P == 0)
if (.sample == "censored" & ! is_cens_smpl)
stop("the tobit model requires a censored sample")
if (.method != "twostep" & is_cens_smpl & .sample == "truncated"){
X <- X[Plog, ]
y <- y[Plog]
wt <- wt[Plog]
}
# compute the starting values if they are not provided
if (is.null(start)){
init_lm <- cl
init_lm <- cl[c(1L, m)]
init_lm[[1L]] <- as.name("lm")
if (length(.formula)[2] > 1)
init_lm$formula <- formula(.formula, rhs = 1)
init_lm <- eval(init_lm, parent.frame())
coefs_init <- c(coef(init_lm), sigma = sigma(init_lm))
}
# lm estimator, biased
if (.method == "lm"){
if (.sample == "truncated") result <- lm(y ~ X - 1, subset = Plog)
else result <- lm(y ~ X - 1)
coefs <- coef(result)
names(coefs) <- colnames(X)
result <- list(coefficients = coefs,
linear.predictor = as.numeric(X %*% coefs),
fitted.values = fitted(result),
residuals = residuals(result),
df.residual = df.residual(result),
vcov = vcov(result),
npar = structure(c(covariates = length(coefs)), default = "covariates"),
logLik = NA,
fomula = .formula,
model = model.frame(result),
terms = terms(model.frame(result)),
call = .call,
est_method = "lm")
}
# Two-steps estimator a la Heckman
if (.method == "twostep"){
if (! is_cens_smpl)
stop("2 steps estimator requires a censored sample")
pbt <- glm(P ~ X - 1, family = binomial(link = 'probit'))
Xbs <- pbt$linear.predictor
mls <- mills(Xbs)
if (.sample == "truncated"){
Z <- cbind(X, sigma = mls)
result <- lm(y ~ Z - 1, subset = Plog)
coefs <- coef(result)
names(coefs) <- colnames(Z)
}
else{
Z <- cbind(X * pnorm(Xbs), sigma = dnorm(Xbs))
result <- lm(y ~ Z - 1)
coefs <- coef(result)
names(coefs) <- colnames(Z)
}
# consistent estimate of the covariance matrix
sigma <- coefs["sigma"]
ZZM1 <- solve(crossprod(Z))
MRP <- mills(Xbs) * Xbs + mills(Xbs) ^ 2
SIGMA <- (1 - MRP)
A <- crossprod(Z * sqrt(SIGMA))
DX <- X * MRP
DXZ <- crossprod(DX, Z)
Q <- t(DXZ) %*% vcov(pbt) %*% DXZ
V <- sigma ^ 2 * ZZM1 %*% (A + Q) %*% ZZM1
result <- list(coefficients = coefs,
linear.predictor = as.numeric(Z %*% coefs),
fitted.values = fitted(result),
residuals = residuals(result),
df.residual = df.residual(result),
formula = .formula,
vcov = V,
logLik = NA,
npar = structure(c(covariates = length(coefs)), default = "covariates"),
model = model.frame(result),
terms = terms(model.frame(result)),
call = .call,
est_method = "twostep")
}
# trimmed estimator
if (.method == "trimmed"){
if (! zerotrunc) stop("trimmed estimator only implemented for simple tobit")
trim_trunc <- function(param, X, y, sum = TRUE, gradient = FALSE, hessian = TRUE){
bX <- as.numeric(X %*% param)
f <- (y - pmax(1 / 2 * y, bX)) ^ 2
if (sum) f <- sum(f)
if (gradient | hessian) ymin <- pmin(y, 2 * bX)
if (gradient){
grad <- - 2 * (y < (2 * bX)) * (y - bX) * X
if (sum) grad <- apply(grad, 2, sum)
attr(f, "gradient") <- grad
}
if (hessian) attr(f, "hessian") <- 2 * crossprod( (y < (2 * bX)) * X, X)
f
}
trim_cens <- function(param, X, y, sum = TRUE, gradient = FALSE, hessian = FALSE){
sgn <- function(x) ifelse(x > 0, 1, -1)
bX <- as.numeric(X %*% param)
f <- (bX < 0) * (y ^ 2 / 2) +
(bX > 0 & y < (2 * bX)) * ((y - bX) ^ 2)+
(bX > 0 & y > (2 * bX)) * (y ^ 2 / 2 - bX ^ 2)
if (sum) f <- sum(f)
if (gradient | hessian) ymin <- pmin(y, 2 * bX)
if (gradient){
grad <- - 2 * (bX > 0)* (ymin - bX) * X
if (sum) grad <- apply(grad, 2, sum)
attr(f, "gradient") <- grad
}
if (hessian) attr(f, "hessian") <- 2 * crossprod( (bX > 0) * sgn(2 * bX - y) * X, X)
f
}
coefs <- coefs_init[1:(length(coefs_init) - 1)]
if (.sample == "truncated") coefs <- newton(trim_trunc, coefs, trace = trace, X = X, y = y)
else coefs <- newton(trim_cens, coefs, trace = trace, X = X, y = y)
vcov_trim <- function(coefs){
# only relevant for censored samples
bX <- as.numeric(X %*% coefs)
N <- sum(y > 0 & y < (2 * bX))
N <- length(y)
C_mat <- crossprod( (y > 0 & y < (2 * bX)) * X, X) / N
D_mat <- crossprod( (bX > 0) * (pmin(y, 2 * bX) - bX) ^ 2 * X, X) / N
solve(C_mat) %*% D_mat %*% solve(C_mat) / N
}
bX <- as.numeric(X %*% coefs)
status <- rep(NA, nrow(X))
status[y == 0] <- "left-cens"
status[y > 2 * bX & bX > 0] <- "right-trimmed"
status[bX < 0 & y > 0] <- "neg-linpred"
.df.residual <- nrow(X) - length(coefs)
result <- list(coefficients = coefs,
linear.predictor = bX,
fitted.values = bX,
residuals = y - bX,
df.residual = .df.residual,
formula = .formula,
vcov = vcov_trim(coefs),
npar = structure(c(covariates = length(coefs)), default = "covariates"),
logLik = NA,
model = mf,
terms = NA,
status = status,
call = .call,
est_method = "trimmed")
}
# non-linear least-squares
if (.method == "nls"){
if (! zerotrunc | .sample != "truncated") stop("nls estimator only implemented for simple truncated model")
if (.sample == "truncated"){
fun_nls <- function(x, gradient = FALSE, hessian = FALSE){
beta <- x[1:ncol(X)]
sigma <- x[ncol(X) + 1]
e <- as.numeric(y - X %*% beta - sigma * mills(X %*% beta / sigma))
bXs <- as.numeric(X %*% beta) / sigma
if (gradient | hessian){
e_beta <- - (1 + mills(bXs, 1))
e_sigma <- mills(bXs, 1) * bXs - mills(bXs)
e_beta_beta <- - mills(bXs, 2) / sigma
e_beta_sigma <- mills(bXs, 2) * bXs / sigma
e_sigma_sigma <- - mills(bXs, 2) * bXs ^ 2 / sigma
grad_beta <- (- 2 * e * e_beta) * X
grad_sigma <- - 2 * e * e_sigma
}
if (hessian){
hess_beta_beta <- crossprod(- 2 * (e * e_beta_beta + e_beta ^ 2) * X, X)
hess_beta_sigma <- apply( - 2 * (e * e_beta_sigma + e_beta * e_sigma) * X, 2, sum)
hess_sigma_sigma <- sum( - 2 * (e * e_sigma_sigma + e_sigma ^ 2))
}
f <- sum(e ^ 2)
if (gradient){
g <- - cbind(grad_beta, grad_sigma)
attr(f, "gradient") <- g
}
if (hessian){
h <- - rbind(cbind(hess_beta_beta, hess_beta_sigma),
c(hess_beta_sigma, hess_sigma_sigma))
attr(f, "hessian") <- h
}
f
}
coefs <- coefs_init
coefs <- newton(fun_nls, coefs, trace = trace)
f <- fun_nls(coefs, gradient = TRUE, hessian = TRUE)
h <- attr(f, "hessian")
g_n <- attr(f, "gradient")
B <- crossprod(g_n)
A <- h
.vcov <- solve(A) %*% B %*% solve(A)# / length(y)
}
.terms <- terms(mf)
attr(.terms, ".Environment") <- NULL
bX <- as.numeric(X %*% coefs[1:ncol(X)])
result <- list(coefficients = coefs,
linear.predictor = bX,
fitted.values = bX,
residuals = y - bX,
formula = .formula,
df.residual = length(y) - length(coefs),
vcov = .vcov,
npar = structure(c(covariates = length(coefs)), default = "covariates"),
logLik = NA,
model = mf,
terms = .terms,
call = .call,
est_method = "nls")
}
# Maximum likelihood estimator
if (.method == "ml"){
coefs_init[1:K] <- coefs_init[1:K] / coefs_init[K + 1]
coefs_init[K + 1] <- 1 / coefs_init[K + 1]
coefs <- newton(lnl_tp_olsen, coefs_init, trace = trace, X = X, y = y, wt = wt,
sum = FALSE, left = left, right = right, direction = "max", sample = .sample)
coefs[1:K] <- coefs[1:K] / coefs[K + 1]
coefs[K + 1] <- 1 / coefs[K + 1]
if (is.null(Z)){
lnl_conv <- lnl_tp(coefs, X = X, y = y, wt = wt,
sum = FALSE, gradient = TRUE, hessian = TRUE,
left = left, right = right, sample = .sample)
npar <- structure(c(covariates = ncol(X), vcov = 1), default = c("covariates", "vcov"))
}
else{
sup_coef <- rep(0, ncol(Z))
names(sup_coef) <- paste("sig_", colnames(Z), sep = "")
coefs <- c(coefs, sup_coef)
coefs <- newton(lnl_tp, coefs, trace = TRUE, X = X, y = y, wt = wt,
Z = Z, scedas = .scedas,
left = left, right = right, direction = "max", sample = .sample)
lnl_conv <- lnl_tp(coefs, X = X, y = y, wt = wt,
scedas = .scedas, Z = Z,
sum = FALSE, gradient = TRUE, hessian = TRUE,
left = left, right = right, sample = .sample)
npar <- structure(c(covariates = ncol(X), scedas = ncol(Z) + 1), default = c("covariates"))
}
fun <- function(x) lnl_tp(x, X = X, y = y, wt = wt,
scedas = .scedas, Z = Z,
sum = TRUE, gradient = TRUE, hessian = TRUE,
left = left, right = right, sample = .sample)
if(check_gradient) z <- check_gradient(fun, coefs) else z <- NA
.hessian <- attr(lnl_conv, "hessian")
.info <- attr(lnl_conv, "info")
.gradient <- attr(lnl_conv, "gradient")
.logLik <- sum(as.numeric(lnl_conv))
beta <- coefs[1:K]
sigma <- coefs[K + 1]
.linpred <- as.numeric(X %*% beta)
h <- .linpred / sigma
Ppos <- pnorm(h)
Epos <- .linpred + sigma * mills(h)
.fitted <- data.frame(y = y, Ppos = Ppos, Epos = Epos, lp = .linpred)
class(.fitted) <- c("tbl_df", "tbl", "data.frame")
.vcov <- solve(- .hessian)
dimnames(.vcov) <- list(names(coefs), names(coefs))
.terms <- terms(mf)
attr(.terms, ".Environment") <- NULL
# tobit without covariates (mu and sigma), only for the standard tobit
if (left == 0 & is.infinite(right) & .sample == "censored"){
coef0 <- function(x){
mu2 <- mean(x[x > 0] ^ 2)
yb <- mean(x[x > 0])
pr <- mean(x > 0)
alpha <- qnorm(pr)
h <- (alpha + dnorm(alpha) / pnorm(alpha)) / yb
za <- function(h){
alpha <- (h * mu2 - 1 / h) / yb
pr * alpha - pr * h * yb + (1 - pr) * mills(- alpha)
}
.sig <- 1 / h
h_min <- 1 / (.sig * 10)
h_max <- 1 / (.sig / 2)
h_conv <- uniroot(za, c(0, h_max), tol = 1E-10)$root
alpha_conv <- (h_conv * mu2 - 1 / h_conv) / yb
.sig <- 1 / h_conv
.mu <- .sig * alpha_conv
.lnL <- (1 - pr) * pnorm(- alpha_conv, log.p = TRUE) - pr * log(2 * pi) / 2 +
pr * log(h_conv) - pr / 2 * h_conv ^ 2 * mu2 - pr / 2 * alpha_conv ^ 2 +
pr * alpha_conv * h_conv * yb
c(mu = .mu, sigma = .sig, lnl = .lnL)
}
values <- cbind(model = as.numeric(lnl_conv),
saturated = (- 1 / 2 * log(2 * pi)) * (y > 0) + 0 * (y == 0),
null = coef0(y)["lnl"])
.logLik <- apply(values * wt, 2, sum)
} else {
values <- as.numeric(lnl_conv)
.logLik <- c(model = sum(as.numeric(lnl_conv)))
}
d <- ifelse(y > 0, 1, 0)
gres <- - (1 - d) * sigma * dnorm(h) / (1 - pnorm(h)) + d * (y - .linpred)
gres <- - (y == 0) * sigma * dnorm(h) / (1 - pnorm(h)) + (y > 0) * (y - .linpred)
env <- new.env(parent = .GlobalEnv)
.sigma <- coefs["sigma"]
assign(".sigma", .sigma, envir = env)
.variance <- function(mu) .sigma ^ 2 * (1 + mills(mu / .sigma, deriv = 1)) * pnorm(mu / .sigma)
.gres <- function(mu) - (y == 0) * .sigma * dnorm(mu / .sigma) / (1 - pnorm(mu / .sigma)) + (y > 0) * (y - mu)
environment(.variance) <- env
.variance
.family <- structure(list(
link = "identity",
linkfun = function(mu) mu,
linkinv = function(eta) eta,
mu.eta = function(eta) 1,
variance = .variance),
class = "family")
result <- list(coefficients = coefs,
model = mf,
terms = mt,
value = values,
gradient = .gradient,
hessian = .hessian,
info = .info,
fitted.values = .fitted,
linear.predictor = .linpred,
logLik = .logLik,
# residuals = y - .fitted$Epos,
df.residual = length(y) - length(coefs),
npar = npar,
est_method = "ml",
call = .call,
na.action = attr(mf, "na.action"),
weights = wt,
offset = .offset,
contrasts = attr(X, "contrasts"),
xlevels = .getXlevels(mt, mf),
check_gradient = z,
family = .family,
residuals = gres)
# formula = .formula,
# vcov = .vcov,
}
structure(result, class = c("tobit1", "micsr"))
}
#' @rdname tobit1
#' @export
fitted.tobit1 <- function(object, ...) object$fitted.values$Ppos * object$fitted.values$Ppos
Any scripts or data that you put into this service are public.
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.