Nothing
tobit2Intfit <- function(YS, XS, YO, XO, boundaries, start = "ml",
weights = NULL, printLevel = 0, returnLogLikStart = FALSE,
maxMethod = "BHHH",
...) {
## Fit intervaL regression model with sample selection
## The model is as follows:
##
## The latent variables are:
## YS* = XS'BS + u1
## YO* = XO'BO + u2
##
## The observables are:
## / 0 if YS* <= 0
## YS = \ 1 if YS* > 0
##
## / NA if YS = 0
## | 1 if a1 < YO* <= a2 & YS = 1
## YO = | 2 if a2 < YO* <= a3 & YS = 1
## | ...
## \ M if a(M) < YO* <= a(M+1) & YS = 1
##
## M number of intervals
##
## YS binary or logical vector, 0 (FALSE) corresponds to
## YO not observed, 1 (TRUE) if observed
## XS matrix of explanatory variables for selection equation,
## should include exclusion restriction
## YO outcome vector: vector of integers with values between 1 and M
## XS matrix of explanatory variables for outcomes
## ... additional parameters for maxLik
##
## Result:
## Object of class 'selection', derived from 'maxLik'.
## Includes all the components of maxLik and additionally
## ...
##
loglik <- function( beta) {
betaS <- beta[ibetaS]
betaO <- beta[ibetaO]
sigma <- exp(beta[iSigma])
rho <- tanh(beta[iRho])
if( ( rho < -1) || ( rho > 1)) return(NA)
vcovMat <- matrix(c(1,-rho,-rho,1), 2, 2)
XS.b <- drop(XS %*% betaS)
XO.b <- drop(XO %*% betaO)
# pre-compute the difference between the CDF of the bivariate normal
# distribution for the interval between the boundaries
pmvnDiff <- rep( NA, nObs )
for( i in which(YS==1) ) {
pmvnDiff[i] <-
pmvnorm( upper = c( ( boundaries[ YO[i] + 1 ] - XO.b[i] ) / sigma,
XS.b[i] ), sigma = vcovMat ) -
pmvnorm( upper = c( ( boundaries[ YO[i] ] - XO.b[i] ) / sigma,
XS.b[i] ), sigma = vcovMat )
}
pmvnDiff <- pmax( pmvnDiff, .Machine$double.eps )
loglik <- rep( NA, nObs )
## YS == 0, YO == NA
loglik[YS==0] <- pnorm( -XS.b[YS==0], log.p = TRUE )
## YS == 1
loglik[YS==1] <- log( pmvnDiff[YS==1] )
## --- gradient ---
grad <- matrix(0, nObs, nParam)
# pre-compute the difference between the PDF of the bivariate normal
# distribution for the interval between the boundaries
dmvnDiff <- rep( NA, nObs )
for( i in which(YS==1) ) {
dmvnDiff[i] <-
dmvnorm( x = c( ( boundaries[ YO[i] ] - XO.b[i] ) / sigma,
XS.b[i] ), sigma = vcovMat ) -
dmvnorm( x = c( ( boundaries[ YO[i] + 1 ] - XO.b[i] ) / sigma,
XS.b[i] ), sigma = vcovMat )
}
# gradients for the parameters for selection into policy (betaS)
grad[YS==0, ibetaS] <-
- dnorm( -XS.b[YS==0] ) * XS[YS==0, ] / pnorm( -XS.b[YS==0] )
grad[YS==1, ibetaS] <- (
pnorm( ( ( boundaries[ YO[YS==1] + 1 ] - XO.b[YS==1] ) / sigma
+ rho * XS.b[YS==1] ) / sqrt( 1 - rho^2 ) ) -
pnorm( ( ( boundaries[ YO[YS==1] ] - XO.b[YS==1] ) / sigma
+ rho * XS.b[YS==1] ) / sqrt( 1 - rho^2 ) ) ) *
dnorm( XS.b[YS==1] ) * XS[ YS==1, ] /
pmvnDiff[YS==1]
# gradients for the parameters for the outcome (betaO)
grad[YS==1, ibetaO] <- (
pnorm( ( XS.b[YS==1]
+ rho * ( ( boundaries[ YO[YS==1] + 1 ] - XO.b[YS==1] ) / sigma )
) / sqrt( 1 - rho^2 ) ) *
dnorm( ( boundaries[ YO[YS==1] + 1 ] - XO.b[YS==1] ) / sigma ) -
pnorm( ( XS.b[YS==1]
+ rho * ( ( boundaries[ YO[YS==1] ] - XO.b[YS==1] ) / sigma )
) / sqrt( 1 - rho^2 ) ) *
dnorm( ( boundaries[ YO[YS==1] ] - XO.b[YS==1] ) / sigma ) ) *
( -XO[ YS==1, ] / sigma ) /
pmvnDiff[YS==1]
# gradient for the standard deviation (sigma)
grad[YS==1, iSigma] <- (
ifelse( is.infinite( boundaries[ YO[YS==1] + 1 ] ), 0,
pnorm( ( XS.b[YS==1] + rho *
( ( boundaries[ YO[YS==1] + 1 ] - XO.b[YS==1] ) / sigma ) ) /
sqrt( 1 - rho^2 ) ) *
dnorm( ( boundaries[ YO[YS==1] + 1 ] - XO.b[YS==1] ) / sigma ) *
( ( XO.b[YS==1] - boundaries[ YO[YS==1] + 1 ] ) / sigma^2 ) ) -
ifelse( is.infinite( boundaries[ YO[YS==1] ] ), 0,
pnorm( ( XS.b[YS==1] + rho *
( ( boundaries[ YO[YS==1] ] - XO.b[YS==1] ) / sigma ) ) /
sqrt( 1 - rho^2 ) ) *
dnorm( ( boundaries[ YO[YS==1] ] - XO.b[YS==1] ) / sigma ) *
( ( XO.b[YS==1] - boundaries[ YO[YS==1] ] ) / sigma^2 ) ) ) *
sigma / ( pmvnDiff[YS==1] )
# gradient for the correlation parameter (rho)
grad[YS==1, iRho] <- ( dmvnDiff[YS==1] * (1 - rho^2) ) / pmvnDiff[YS==1]
attr(loglik, "gradient") <- grad
return(loglik)
}
gradlik <- function(x) {
l <- loglik(x)
return(attr(l, "gradient"))
}
YOorig <- YO
YO <- as.integer( YO )
if( min( YO[YS==1] ) <= 0 ) {
stop( "YO should only have strictly positive integer values" )
}
if( 0 %in% table(YO)) {
stop( "At least one intervals does not contain observations")
}
nInterval <- max( YO[YS==1] )
if( length( boundaries ) != nInterval + 1 ) {
stop( "argument 'boundaries' must have (number of intervals + 1 = ",
nInterval + 1, ") elements but it has ", length( boundaries ),
" elements" )
}
if( !all( sort( boundaries ) == boundaries ) ) {
stop( "the boundaries in the vector definded by argument 'boundaries' ",
"must be in ascending order" )
}
if( is.factor( YOorig ) ){
intervals <- data.frame( YO = levels( YOorig ) )
} else {
intervals <- data.frame( YO = 1:nInterval )
}
intervals$lower <- boundaries[ - length(boundaries) ]
intervals$upper <- boundaries[-1]
intervals$count <- sapply( c( 1:nInterval ),
function(x) sum( YO[ YS == 1 ] == x, na.rm = TRUE ) )
if( printLevel >= 1 ) {
print( intervals )
}
## If no starting values for the parameters are given, 2-step Heckman is
## estimated with first stage probit and second stage OLS on interval
## midpoints
# Calculating Interval midpoints
if( is.null( start ) ) {
start <- "ml"
}
if( is.numeric(start) ){
if( length(start) != (ncol(XS) + ncol(XO) + 2) ) {
stop( "The vector of starting values has an incorrect length.",
" Number of parameters: ", ncol(XS) + ncol(XO) + 2,
". Length of provided vector: ", length( start ) )
}
startVal <- start
} else if( start %in% c( "ml", "2step" ) ) {
intMeans <- ( intervals$lower + intervals$upper ) / 2
# For infinite boundaries we use mean interval width as value
intWidths <- intervals$upper - intervals$lower
meanWidth <- mean( intWidths[ is.finite( intWidths ) ] )
negInf <- is.infinite( intMeans ) & intMeans < 0
if( any( negInf ) ) {
intMeans[ negInf ] <- intervals$upper[ negInf ] - meanWidth
}
posInf <- is.infinite( intMeans ) & intMeans > 0
if( any( posInf ) ) {
intMeans[ posInf ] <- intervals$lower[ posInf ] + meanWidth
}
yMean <- intMeans[YO]
# estimation as a normal tobit-2 model (either by ML or the 2-step method)
Est <- heckit( YS ~ XS - 1, yMean ~ XO - 1, method = start )
# Extracting starting values
startVal <- as.numeric(coef(Est))
if(start == "2step") {
startVal <- startVal[ - ( length( startVal ) - 2 ) ]
}
startVal[length(startVal)-1] <- log(startVal[length(startVal)-1]^2)
startVal[length(startVal)] <- atanh(startVal[length(startVal)])
} else {
stop( "argument 'start' must be \"ml\", \"2step\", or",
" a numeric vector" )
}
## ---------------
NXS <- ncol( XS )
if(is.null(colnames(XS))) {
colnames(XS) <- paste0( rep( "XS", NXS ),
seq( from = 1, length.out = NXS ) )
}
NXO <- ncol( XO )
if(is.null(colnames(XO))) {
colnames(XO) <- paste0( rep( "XO", NXO ),
seq( from = 1, length.out = NXO ) )
}
nObs <- length( YS )
NO <- length( YS[YS > 0] )
## parameter indices
ibetaS <- seq( from = 1, length.out = NXS )
ibetaO <- seq( from = NXS+1, length.out = NXO )
iSigma <- NXS + NXO + 1
iRho <- NXS + NXO + 2
nParam <- iRho
# names of parameters (through their starting values)
names( startVal ) <-
c( colnames( XS ), colnames( XO ), "logSigma", "atanhRho" )
# weights
if( !is.null( weights ) ) {
stop( "weights have not been implemented yet. Sorry!" )
}
## output, if asked for it
if( printLevel > 0) {
cat("YO observed:", NO, "times; not observed:", nObs - NO,
"times:\n")
cat( "Number of intervals: ", nInterval, "\n" )
print(table(YS, YO, exclude=NULL))
cat( "Boundaries:\n")
print(boundaries)
cat( "Initial values:\n")
print(startVal)
}
if( printLevel > 1) {
cat( "Log-likelihood value at initial values:\n")
print(loglik(startVal))
}
# browser()
# # check if the likelihood values of all possible outcomes sum up to one
# YS[] <- 0; ll0 <- loglik( startVal )
# YS[] <- 1; YO[] <- 1; ll11 <- loglik( startVal )
# YS[] <- 1; YO[] <- 2; ll12 <- loglik( startVal )
# YS[] <- 1; YO[] <- 3; ll13 <- loglik( startVal )
# all.equal( exp(ll0) + exp(ll11) + exp(ll12) + exp(ll13), rep( 1, nObs ) )
# # check analytical derivatives
# compareDerivatives(loglik, gradlik, t0=startVal )
# range(numericGradient(loglik, t0=startVal)-gradlik(startVal))
if( returnLogLikStart ) {
return( loglik( startVal ) )
}
## estimate
result <- maxLik(loglik,
start=startVal,
method=maxMethod,
print.level = printLevel, ... )
if( result$code == 100 ) {
stop( "maxLik: Return code 100: Initial value out of range" )
}
result$tobitType <- 2
result$method <- "ml"
result$start <- startVal
result$intervals <- intervals
# Calculating sigma, sigmaSq, and rho with standard errors
result$coefAll <- c( result$estimate,
sigma = unname( exp( result$estimate[ "logSigma" ] ) ),
sigmaSq = unname( exp( 2 * result$estimate[ "logSigma" ] ) ),
rho = unname( tanh( result$estimate[ "atanhRho" ] ) ) )
jac <- cbind( diag( length( result$estimate ) ),
matrix( 0, length( result$estimate ), 3 ) )
rownames( jac ) <- names( result$estimate )
colnames( jac ) <- c( names( result$estimate ), "sigma", "sigmaSq", "rho" )
jac[ "logSigma", "sigma" ] <- exp( result$estimate[ "logSigma" ] )
jac[ "logSigma", "sigmaSq" ] <- 2 * exp( 2 * result$estimate[ "logSigma" ] )
jac[ "atanhRho", "rho" ] <- 1 - ( tanh( result$estimate[ "atanhRho" ] ) )^2
result$vcovAll <- t( jac ) %*% vcov( result ) %*% jac
class( result ) <- c( "selection", class( result ) )
return( result )
}
Any scripts or data that you put into this service are public.
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.