Nothing
################################################################################
# #
# R functions for the sfaR package #
# #
################################################################################
#------------------------------------------------------------------------------#
# Model: Sample Selection Stochastic Frontier Analysis #
# Data: Cross sectional data & Pooled data #
#------------------------------------------------------------------------------#
#' Sample selection in stochastic frontier estimation using cross-section data
#'
#' @description
#' \code{\link{sfaselectioncross}} is a symbolic formula based function for the
#' estimation of the stochastic frontier model in the presence of sample
#' selection. The model accommodates cross-sectional or pooled cross-sectional data.
#' The model can be estimated using different quadrature approaches or
#' maximum simulated likelihood (MSL). See Greene (2010).
#'
#' Only the half-normal distribution is possible for the one-sided error term.
#' Eleven optimization algorithms are available.
#'
#' The function also accounts for heteroscedasticity in both one-sided and
#' two-sided error terms, as in Reifschneider and Stevenson (1991), Caudill and
#' Ford (1993), Caudill \emph{et al.} (1995) and Hadri (1999).
#'
#' @aliases sfaselectioncross print.sfaselectioncross
#'
#' @param selectionF A symbolic (formula) description of the selection equation.
#' @param frontierF A symbolic (formula) description of the outcome (frontier) equation.
#' @param uhet A one-part formula to consider heteroscedasticity in the
#' one-sided error variance (see section \sQuote{Details}).
#' @param vhet A one-part formula to consider heteroscedasticity in the
#' two-sided error variance (see section \sQuote{Details}).
#' @param modelType Character string. Model used to solve the selection bias. Only the
#' model discussed in Greene (2010) is currently available.
#' @param logDepVar Logical. Informs whether the dependent variable is logged
#' (\code{TRUE}) or not (\code{FALSE}). Default = \code{TRUE}.
#' @param data The data frame containing the data.
#' @param subset An optional vector specifying a subset of observations to be
#' used in the optimization process.
#' @param weights An optional vector of weights to be used for weighted log-likelihood.
#' Should be \code{NULL} or numeric vector with positive values. When \code{NULL},
#' a numeric vector of 1 is used.
#' @param wscale Logical. When \code{weights} is not \code{NULL}, a scaling transformation
#' is used such that the \code{weights} sum to the sample size. Default \code{TRUE}.
#' When \code{FALSE} no scaling is used.
#' @param S If \code{S = 1} (default), a production (profit) frontier is
#' estimated: \eqn{\epsilon_i = v_i-u_i}. If \code{S = -1}, a cost frontier is
#' estimated: \eqn{\epsilon_i = v_i+u_i}.
#' @param udist Character string. Distribution specification for the one-sided
#' error term. Only the half normal distribution \code{'hnormal'} is currently
#' implemented.
#' @param start Numeric vector. Optional starting values for the maximum
#' likelihood (ML) estimation.
#' @param method Optimization algorithm used for the estimation. Default =
#' \code{'bfgs'}. 11 algorithms are available: \itemize{ \item \code{'bfgs'},
#' for Broyden-Fletcher-Goldfarb-Shanno (see
#' \code{\link[maxLik:maxBFGS]{maxBFGS}}) \item \code{'bhhh'}, for
#' Berndt-Hall-Hall-Hausman (see \code{\link[maxLik:maxBHHH]{maxBHHH}}) \item
#' \code{'nr'}, for Newton-Raphson (see \code{\link[maxLik:maxNR]{maxNR}})
#' \item \code{'nm'}, for Nelder-Mead (see \code{\link[maxLik:maxNM]{maxNM}})
#' \item \code{'cg'}, for Conjugate Gradient (see \code{\link[maxLik:maxCG]{maxCG}})
#' \item \code{'sann'}, for Simulated Annealing (see \code{\link[maxLik:maxSANN]{maxSANN}})
#' \item \code{'ucminf'}, for a quasi-Newton type optimization with BFGS updating of the
#' inverse Hessian and soft line search with a trust region type monitoring of
#' the input to the line search algorithm (see \code{\link[ucminf:ucminf]{ucminf}})
#' \item \code{'mla'}, for general-purpose optimization based on
#' Marquardt-Levenberg algorithm (see \code{\link[marqLevAlg:mla]{mla}})
#' \item \code{'sr1'}, for Symmetric Rank 1 (see
#' \code{\link[trustOptim:trust.optim]{trust.optim}}) \item \code{'sparse'}, for trust
#' regions and sparse Hessian (see \code{\link[trustOptim:trust.optim]{trust.optim}}) \item
#' \code{'nlminb'}, for optimization using PORT routines (see
#' \code{\link[stats:nlminb]{nlminb}})}
#' @param hessianType Integer. If \code{1}, analytic Hessian is
#' returned. If \code{2}, bhhh Hessian is estimated (\eqn{g'g}). bhhh hessian is
#' estimated by default as the estimation is conducted in two steps.
#' @param lType Specifies the way the likelihood is estimated. Five possibilities are
#' available: \code{kronrod} for Gauss-Kronrod quadrature
#' (see \code{\link[stats:integrate]{integrate}}), \code{hcubature} and
#' \code{pcubature} for adaptive integration over hypercubes
#' (see \code{\link[cubature:hcubature]{hcubature}} and
#' \code{\link[cubature:pcubature]{pcubature}}), \code{ghermite} for Gauss-Hermite
#' quadrature (see \code{\link[fastGHQuad:gaussHermiteData]{gaussHermiteData}}), and
#' \code{msl} for maximum simulated likelihood. Default \code{ghermite}.
#' @param Nsub Integer. Number of subdivisions/nodes used for quadrature approaches.
#' Default \code{Nsub = 100}.
#' @param uBound Numeric. Upper bound for the inefficiency component when solving
#' integrals using quadrature approaches except Gauss-Hermite for which the upper
#' bound is automatically infinite (\code{Inf}). Default \code{uBound = Inf}.
#' @param simType Character string. If \code{simType = 'halton'} (Default),
#' Halton draws are used for maximum simulated likelihood (MSL). If
#' \code{simType = 'ghalton'}, Generalized-Halton draws are used for MSL. If
#' \code{simType = 'sobol'}, Sobol draws are used for MSL. If \code{simType =
#' 'uniform'}, uniform draws are used for MSL. (see section \sQuote{Details}).
#' @param Nsim Number of draws for MSL (default 100).
#' @param prime Prime number considered for Halton and Generalized-Halton
#' draws. Default = \code{2}.
#' @param burn Number of the first observations discarded in the case of Halton
#' draws. Default = \code{10}.
#' @param antithetics Logical. Default = \code{FALSE}. If \code{TRUE},
#' antithetics counterpart of the uniform draws is computed. (see section
#' \sQuote{Details}).
#' @param seed Numeric. Seed for the random draws.
#' @param itermax Maximum number of iterations allowed for optimization.
#' Default = \code{2000}.
#' @param printInfo Logical. Print information during optimization. Default =
#' \code{FALSE}.
#' @param intol Numeric. Integration tolerance for quadrature approaches
#' (\code{kronrod, hcubature, pcubature}).
#' @param tol Numeric. Convergence tolerance. Default = \code{1e-12}.
#' @param gradtol Numeric. Convergence tolerance for gradient. Default =
#' \code{1e-06}.
#' @param stepmax Numeric. Step max for \code{ucminf} algorithm. Default =
#' \code{0.1}.
#' @param qac Character. Quadratic Approximation Correction for \code{'bhhh'}
#' and \code{'nr'} algorithms. If \code{'stephalving'}, the step length is
#' decreased but the direction is kept. If \code{'marquardt'} (default), the
#' step length is decreased while also moving closer to the pure gradient
#' direction. See \code{\link[maxLik:maxBHHH]{maxBHHH}} and
#' \code{\link[maxLik:maxNR]{maxNR}}.
#' @param x an object of class sfaselectioncross (returned by the function \code{\link{sfaselectioncross}}).
#' @param ... additional arguments of frontier are passed to sfaselectioncross;
#' additional arguments of the print, bread, estfun, nobs methods are currently ignored.
#'
#' @details
#' The current model is an extension of Heckman (1976, 1979) sample selection model to
#' nonlinear models particularly stochastic frontier model. The model has first been discussed in
#' Greene (2010), and an application can be found in Dakpo et al. (2021). Practically, we have:
#'
#' \deqn{
#' y_{1i} = \left\{ \begin{array}{ll}
#' 1 & \mbox{if} \quad y_{1i}^* > 0 \\
#' 0 & \mbox{if} \quad y_{1i}^* \leq 0 \\
#' \end{array}
#' \right.
#' }
#'
#' where
#'
#' \deqn{
#' y_{1i}^*=\mathbf{Z}_{si}^{\prime} \mathbf{\gamma} + w_i, \quad
#' w_i \sim \mathcal{N}(0, 1)
#' }
#'
#' and
#'
#' \deqn{
#' y_{2i} = \left\{ \begin{array}{ll}
#' y_{2i}^* & \mbox{if} \quad y_{1i}^* > 0 \\
#' NA & \mbox{if} \quad y_{1i}^* \leq 0 \\
#' \end{array}
#' \right.
#' }
#'
#' where
#'
#' \deqn{
#' y_{2i}^*=\mathbf{x_{i}^{\prime}} \mathbf{\beta} + v_i - Su_i, \quad
#' v_i = \sigma_vV_i \quad \wedge \quad V_i \sim \mathcal{N}(0, 1), \quad
#' u_i = \sigma_u|U_i| \quad \wedge \quad U_i \sim \mathcal{N}(0, 1)
#' }
#'
#' \eqn{y_{1i}} describes the selection equation while \eqn{y_{2i}} represents
#' the frontier equation. The selection bias arises from the correlation
#' between the two symmetric random components \eqn{v_i} and \eqn{w_i}:
#'
#' \deqn{
#' (v_i, w_i) \sim \mathcal{N}_2\left\lbrack(0,0), (1, \rho \sigma_v, \sigma_v^2) \right\rbrack
#' }
#'
#' Conditionaly on \eqn{|U_i|}, the probability associated to each observation is:
#'
#'
#' \deqn{
#' Pr \left\lbrack y_{1i}^* \leq 0 \right\rbrack^{1-y_{1i}} \cdot \left\lbrace
#' f(y_{2i}|y_{1i}^* > 0) \times Pr\left\lbrack y_{1i}^* > 0
#' \right\rbrack \right\rbrace^{y_{1i}}
#' }
#'
#' Using the conditional probability formula:
#'
#' \deqn{
#' P\left(A\cap B\right) = P(A) \cdot P(B|A) = P(B) \cdot P(A|B)
#' }
#'
#' Therefore:
#'
#' \deqn{
#' f(y_{2i}|y_{1i}^* \geq 0) \cdot Pr\left\lbrack y_{1i}^* \geq 0\right\rbrack =
#' f(y_{2i}) \cdot Pr(y_{1i}^* \geq 0|y_{2i})
#' }
#'
#' Using the properties of a bivariate normal distribution, we have:
#'
#' \deqn{
#' y_{i1}^* | y_{i2} \sim N\left(\mathbf{Z_{si}^{\prime}} \bm{\gamma}+\frac{\rho}{
#' \sigma_v}v_i, 1-\rho^2\right)
#' }
#'
#' Hence conditionally on \eqn{|U_i|}, we have:
#'
#' \deqn{
#' f(y_{2i}|y_{1i}^* \geq 0) \cdot Pr\left\lbrack y_{1i}^* \geq 0\right\rbrack =
#' \frac{1}{\sigma_v}\phi\left(\frac{v_i}{\sigma_v}\right)\Phi\left(\frac{
#' \mathbf{Z_{si}^{\prime}} \bm{\gamma}+\frac{\rho}{\sigma_v}v_i}{
#' \sqrt{1-\rho^2}}\right)
#' }
#'
#' The conditional likelihood is equal to:
#'
#' \deqn{
#' L_i\big||U_i| = \Phi(-\mathbf{Z_{si}^{\prime}} \bm{\gamma})^{1-y_{1i}} \times
#' \left\lbrace \frac{1}{\sigma_v}\phi\left(\frac{y_{2i}-\mathbf{x_{i}^{\prime}}
#' \bm{\beta} + S\sigma_u|U_i|}{\sigma_v}\right)\Phi\left(\frac{
#' \mathbf{Z_{si}^{\prime}} \bm{\gamma}+\frac{\rho}{\sigma_v}\left(y_{2i}-
#' \mathbf{x_{i}^{\prime}} \bm{\beta} + S\sigma_u|U_i|\right)}{\sqrt{1-\rho^2}}
#' \right) \right\rbrace ^{y_{1i}}
#' }
#'
#' Since the non-selected observations bring no additional information,
#' the conditional likelihood to be considered is:
#'
#' \deqn{
#' L_i\big||U_i| = \frac{1}{\sigma_v}\phi\left(\frac{y_{2i}-\mathbf{x_{i}^{\prime}}
#' \bm{\beta} + S\sigma_u|U_i|}{\sigma_v}\right) \Phi\left(\frac{\mathbf{Z_{si}^{\prime}}
#' \bm{\gamma}+\frac{\rho}{\sigma_v}\left(y_{2i}-\mathbf{x_{i}^{\prime}} \bm{\beta} +
#' S\sigma_u|U_i|\right)}{\sqrt{1-\rho^2}}\right)
#' }
#'
#' The unconditional likelihood is obtained by integrating \eqn{|U_i|} out of the conditional likelihood. Thus
#'
#' \deqn{
#' L_i\\ = \int_{|U_i|} \frac{1}{\sigma_v}\phi\left(\frac{y_{2i}-\mathbf{x_{i}^{\prime}}
#' \bm{\beta} + S\sigma_u|U_i|}{\sigma_v}\right) \Phi\left(\frac{\mathbf{Z_{si}^{\prime}}
#' \bm{\gamma}+ \frac{\rho}{\sigma_v}\left(y_{2i}-\mathbf{x_{i}^{\prime}} \bm{\beta} +
#' S\sigma_u|U_i|\right)}{\sqrt{1-\rho^2}}\right)p\left(|U_i|\right)d|U_i|
#' }
#'
#' To simplifiy the estimation, the likelihood can be estimated using a two-step approach.
#' In the first step, the probit model can be run and estimate of \eqn{\gamma} can be obtained.
#' Then, in the second step, the following model is estimated:
#'
#' \deqn{
#' L_i\\ = \int_{|U_i|} \frac{1}{\sigma_v}\phi\left(\frac{y_{2i}-\mathbf{x_{i}^{\prime}}
#' \bm{\beta} + S\sigma_u|U_i|}{\sigma_v}\right) \Phi\left(\frac{a_i +
#' \frac{\rho}{\sigma_v}\left(y_{2i}-\mathbf{x_{i}^{\prime}} \bm{\beta} +
#' S\sigma_u|U_i|\right)}{\sqrt{1-\rho^2}}\right)p\left(|U_i|\right)d|U_i|
#' }
#'
#' where \eqn{a_i = \mathbf{Z_{si}^{\prime}} \hat{\bm{\gamma}}}. This likelihood can be estimated using
#' five different approaches: Gauss-Kronrod quadrature, adaptive integration over hypercubes
#' (hcubature and pcubature), Gauss-Hermite quadrature, and
#' maximum simulated likelihood. We also use the BHHH estimator to obtain
#' the asymptotic standard errors for the parameter estimators.
#'
#' \code{sfaselectioncross} allows for the maximization of weighted log-likelihood.
#' When option \code{weights} is specified and \code{wscale = TRUE}, the weights
#' are scaled as:
#'
#' \deqn{
#' new_{weights} = sample_{size} \times \frac{old_{weights}}{\sum(old_{weights})}
#' }
#'
#' For complex problems, non-gradient methods (e.g. \code{nm} or \code{sann}) can be
#' used to warm start the optimization and zoom in the neighborhood of the
#' solution. Then a gradient-based methods is recommended in the second step. In the case
#' of \code{sann}, we recommend to significantly increase the iteration limit
#' (e.g. \code{itermax = 20000}). The Conjugate Gradient (\code{cg}) can also be used
#' in the first stage.
#'
#' A set of extractor functions for fitted model objects is available for objects of class
#' \code{'sfaselectioncross'} including methods to the generic functions \code{\link[=print.sfaselectioncross]{print}},
#' \code{\link[=summary.sfaselectioncross]{summary}}, \code{\link[=coef.sfaselectioncross]{coef}},
#' \code{\link[=fitted.sfaselectioncross]{fitted}}, \code{\link[=logLik.sfaselectioncross]{logLik}},
#' \code{\link[=residuals.sfaselectioncross]{residuals}}, \code{\link[=vcov.sfaselectioncross]{vcov}},
#' \code{\link[=efficiencies.sfaselectioncross]{efficiencies}}, \code{\link[=ic.sfaselectioncross]{ic}},
#' \code{\link[=marginal.sfaselectioncross]{marginal}},
#' \code{\link[=estfun.sfaselectioncross]{estfun}} and
#' \code{\link[=bread.sfaselectioncross]{bread}} (from the \CRANpkg{sandwich} package),
#' [lmtest::coeftest()] (from the \CRANpkg{lmtest} package).
#'
#' @return \code{\link{sfaselectioncross}} returns a list of class \code{'sfaselectioncross'}
#' containing the following elements:
#'
#' \item{call}{The matched call.}
#'
#' \item{selectionF}{The selection equation formula.}
#'
#' \item{frontierF}{The frontier equation formula.}
#'
#' \item{S}{The argument \code{'S'}. See the section \sQuote{Arguments}.}
#'
#' \item{typeSfa}{Character string. 'Stochastic Production/Profit Frontier, e =
#' v - u' when \code{S = 1} and 'Stochastic Cost Frontier, e = v + u' when
#' \code{S = -1}.}
#'
#' \item{Ninit}{Number of initial observations in all samples.}
#'
#' \item{Nobs}{Number of observations used for optimization.}
#'
#' \item{nXvar}{Number of explanatory variables in the production or cost
#' frontier.}
#'
#' \item{logDepVar}{The argument \code{'logDepVar'}. See the section
#' \sQuote{Arguments}.}
#'
#' \item{nuZUvar}{Number of variables explaining heteroscedasticity in the
#' one-sided error term.}
#'
#' \item{nvZVvar}{Number of variables explaining heteroscedasticity in the
#' two-sided error term.}
#'
#' \item{nParm}{Total number of parameters estimated.}
#'
#' \item{udist}{The argument \code{'udist'}. See the section
#' \sQuote{Arguments}.}
#'
#' \item{startVal}{Numeric vector. Starting value for M(S)L estimation.}
#'
#' \item{dataTable}{A data frame (tibble format) containing information on data
#' used for optimization along with residuals and fitted values of the OLS and
#' M(S)L estimations, and the individual observation log-likelihood. When argument \code{weights}
#' is specified, an additional variable is provided in \code{dataTable}.}
#'
#' \item{lpmObj}{Linear probability model used for initializing the first step
#' probit model.}
#'
#' \item{probitObj}{Probit model. Object of class \code{'maxLik'} and \code{'maxim'}.}
#'
#' \item{ols2stepParam}{Numeric vector. OLS second step estimates for
#' selection correction. Inverse Mills Ratio is introduced as an additional
#' explanatory variable.}
#'
#' \item{ols2stepStder}{Numeric vector. Standard errors of OLS second step estimates.}
#'
#' \item{ols2stepSigmasq}{Numeric. Estimated variance of OLS second step random error.}
#'
#' \item{ols2stepLoglik}{Numeric. Log-likelihood value of OLS second step estimation.}
#'
#' \item{ols2stepSkew}{Numeric. Skewness of the residuals of the OLS second step estimation.}
#'
#' \item{ols2stepM3Okay}{Logical. Indicating whether the residuals of the OLS
#' second step estimation have the expected skewness.}
#'
#' \item{CoelliM3Test}{Coelli's test for OLS residuals skewness. (See Coelli,
#' 1995).}
#'
#' \item{AgostinoTest}{D'Agostino's test for OLS residuals skewness. (See
#' D'Agostino and Pearson, 1973).}
#'
#' \item{isWeights}{Logical. If \code{TRUE} weighted log-likelihood is
#' maximized.}
#'
#' \item{lType}{Type of likelihood estimated. See the section \sQuote{Arguments}.}
#'
#' \item{optType}{Optimization algorithm used.}
#'
#' \item{nIter}{Number of iterations of the ML estimation.}
#'
#' \item{optStatus}{Optimization algorithm termination message.}
#'
#' \item{startLoglik}{Log-likelihood at the starting values.}
#'
#' \item{mlLoglik}{Log-likelihood value of the M(S)L estimation.}
#'
#' \item{mlParam}{Parameters obtained from M(S)L estimation.}
#'
#' \item{gradient}{Each variable gradient of the M(S)L estimation.}
#'
#' \item{gradL_OBS}{Matrix. Each variable individual observation gradient of
#' the M(S)L estimation.}
#'
#' \item{gradientNorm}{Gradient norm of the M(S)L estimation.}
#'
#' \item{invHessian}{Covariance matrix of the parameters obtained from the
#' M(S)L estimation.}
#'
#' \item{hessianType}{The argument \code{'hessianType'}. See the section
#' \sQuote{Arguments}.}
#'
#' \item{mlDate}{Date and time of the estimated model.}
#'
#' \item{simDist}{The argument \code{'simDist'}, only if \code{lType =
#' 'msl'}. See the section \sQuote{Arguments}.}
#'
#' \item{Nsim}{The argument \code{'Nsim'}, only if \code{lType = 'msl'}.
#' See the section \sQuote{Arguments}.}
#'
#' \item{FiMat}{Matrix of random draws used for MSL, only if \code{lType =
#' 'msl'}.}
#'
#' \item{gHermiteData}{List. Gauss-Hermite quadrature rule as provided by
#' \code{\link[fastGHQuad:gaussHermiteData]{gaussHermiteData}}. Only if \code{lType =
#' 'ghermite'}.}
#'
#' \item{Nsub}{Number of subdivisions used for quadrature approaches.}
#'
#' \item{uBound}{Upper bound for the inefficiency component when solving
#' integrals using quadrature approaches except Gauss-Hermite for which the upper
#' bound is automatically infinite (\code{Inf}).}
#'
#' \item{intol}{Integration tolerance for quadrature approaches except Gauss-Hermite.}
#'
#' @note For the Halton draws, the code is adapted from the \pkg{mlogit}
#' package.
#'
# @author K Hervé Dakpo, Yann Desjeux, and Laure Latruffe
#'
#' @seealso \code{\link[=print.sfaselectioncross]{print}} for printing \code{sfaselectioncross} object.
#'
#' \code{\link[=summary.sfaselectioncross]{summary}} for creating and printing
#' summary results.
#'
#' \code{\link[=coef.sfaselectioncross]{coef}} for extracting coefficients of the
#' estimation.
#'
#' \code{\link[=efficiencies.sfaselectioncross]{efficiencies}} for computing
#' (in-)efficiency estimates.
#'
#' \code{\link[=fitted.sfaselectioncross]{fitted}} for extracting the fitted frontier
#' values.
#'
#' \code{\link[=ic.sfaselectioncross]{ic}} for extracting information criteria.
#'
#' \code{\link[=logLik.sfaselectioncross]{logLik}} for extracting log-likelihood
#' value(s) of the estimation.
#'
#' \code{\link[=marginal.sfaselectioncross]{marginal}} for computing marginal effects of
#' inefficiency drivers.
#'
#' \code{\link[=residuals.sfaselectioncross]{residuals}} for extracting residuals of the
#' estimation.
#'
#' \code{\link[=vcov.sfaselectioncross]{vcov}} for computing the variance-covariance
#' matrix of the coefficients.
#'
#' \code{\link[=bread.sfaselectioncross]{bread}} for bread for sandwich estimator.
#'
#' \code{\link[=estfun.sfaselectioncross]{estfun}} for gradient extraction for each
#' observation.
#'
#' @references Caudill, S. B., and Ford, J. M. 1993. Biases in frontier estimation due to
#' heteroscedasticity. \emph{Economics Letters}, \bold{41}(1), 17--20.
#'
#' Caudill, S. B., Ford, J. M., and Gropper, D. M. 1995. Frontier estimation
#' and firm-specific inefficiency measures in the presence of
#' heteroscedasticity. \emph{Journal of Business & Economic Statistics},
#' \bold{13}(1), 105--111.
#'
#' Coelli, T. 1995. Estimators and hypothesis tests for a stochastic frontier
#' function - a Monte-Carlo analysis. \emph{Journal of Productivity Analysis},
#' \bold{6}:247--268.
#'
#' D'Agostino, R., and E.S. Pearson. 1973. Tests for departure from normality.
#' Empirical results for the distributions of \eqn{b_2} and \eqn{\sqrt{b_1}}.
#' \emph{Biometrika}, \bold{60}:613--622.
#'
#' Dakpo, K. H., Latruffe, L., Desjeux, Y., Jeanneaux, P., 2022.
#' Modeling heterogeneous technologies in the presence of sample selection:
#' The case of dairy farms and the adoption of agri-environmental schemes in France.
#' \emph{Agricultural Economics}, \bold{53}(3), 422-438.
#'
#' Greene, W., 2010. A stochastic frontier model with correction
#' for sample selection. \emph{Journal of Productivity Analysis}. \bold{34}, 15--24.
#'
#' Hadri, K. 1999. Estimation of a doubly heteroscedastic stochastic frontier
#' cost function. \emph{Journal of Business & Economic Statistics},
#' \bold{17}(3), 359--363.
#'
#' Heckman, J., 1976. Discrete, qualitative and limited dependent variables.
#' \emph{Ann Econ Soc Meas.} \bold{4}, 475--492.
#'
#' Heckman, J., 1979. Sample Selection Bias as a Specification Error.
#' \emph{Econometrica}. \bold{47}, 153--161.
#'
#' Reifschneider, D., and Stevenson, R. 1991. Systematic departures from the
#' frontier: A framework for the analysis of firm inefficiency.
#' \emph{International Economic Review}, \bold{32}(3), 715--723.
#'
#' @keywords models optimize cross-section likelihood
#'
#' @examples
#'
#' \dontrun{
#'
#' ## Simulated example
#'
#' N <- 2000 # sample size
#' set.seed(12345)
#' z1 <- rnorm(N)
#' z2 <- rnorm(N)
#' v1 <- rnorm(N)
#' v2 <- rnorm(N)
#' e1 <- v1
#' e2 <- 0.7071 * (v1 + v2)
#' ds <- z1 + z2 + e1
#' d <- ifelse(ds > 0, 1, 0)
#' u <- abs(rnorm(N))
#' x1 <- rnorm(N)
#' x2 <- rnorm(N)
#' y <- x1 + x2 + e2 - u
#' data <- cbind(y = y, x1 = x1, x2 = x2, z1 = z1, z2 = z2, d = d)
#'
#' ## Estimation using quadrature (Gauss-Kronrod)
#'
#' selecRes1 <- sfaselectioncross(selectionF = d ~ z1 + z2, frontierF = y ~ x1 + x2,
#' modelType = 'greene10', method = 'bfgs',
#' logDepVar = TRUE, data = as.data.frame(data),
#' S = 1L, udist = 'hnormal', lType = 'kronrod', Nsub = 100, uBound = Inf,
#' simType = 'halton', Nsim = 300, prime = 2L, burn = 10, antithetics = FALSE,
#' seed = 12345, itermax = 2000, printInfo = FALSE)
#'
#' summary(selecRes1)
#'
#' ## Estimation using maximum simulated likelihood
#'
#' selecRes2 <- sfaselectioncross(selectionF = d ~ z1 + z2, frontierF = y ~ x1 + x2,
#' modelType = 'greene10', method = 'bfgs',
#' logDepVar = TRUE, data = as.data.frame(data),
#' S = 1L, udist = 'hnormal', lType = 'msl', Nsub = 100, uBound = Inf,
#' simType = 'halton', Nsim = 300, prime = 2L, burn = 10, antithetics = FALSE,
#' seed = 12345, itermax = 2000, printInfo = FALSE)
#'
#' summary(selecRes2)
#'
#' }
#'
#' @export
sfaselectioncross <- function(selectionF, frontierF, uhet, vhet,
modelType = "greene10", logDepVar = TRUE, data, subset, weights,
wscale = TRUE, S = 1L, udist = "hnormal", start = NULL, method = "bfgs",
hessianType = 2L, lType = "ghermite", Nsub = 100, uBound = Inf,
simType = "halton", Nsim = 100, prime = 2L, burn = 10, antithetics = FALSE,
seed = 12345, itermax = 2000, printInfo = FALSE, intol = 1e-06,
tol = 1e-12, gradtol = 1e-06, stepmax = 0.1, qac = "marquardt") {
# u distribution check -------
udist <- tolower(udist)
if (udist != "hnormal") {
stop("Currently selection model only handles half-normal distribution ... ",
call. = FALSE)
}
# selection model check -------
modelType <- tolower(modelType)
if (modelType != "greene10") {
stop("Currently only Greene (2010) selection model is available ... ",
call. = FALSE)
}
# method to solve selection model -------
lType <- tolower(lType)
if (!(lType %in% c("kronrod", "hcubature", "pcubature", "ghermite",
"msl"))) {
stop("Unknown inefficiency distribution: ", paste(lType),
call. = FALSE)
}
# Formula manipulation -------
if (length(Formula(selectionF))[2] != 1) {
stop("argument 'selectionF' must have one RHS part",
call. = FALSE)
}
if (length(Formula(frontierF))[2] != 1) {
stop("argument 'frontierF' must have one RHS part", call. = FALSE)
}
cl <- match.call()
mc <- match.call(expand.dots = FALSE)
## Selection formula -------
m_S <- match(c("selectionF", "data", "subset", "weights"),
names(mc), nomatch = 0L)
mc_S <- mc[c(1L, m_S)]
mc_S$drop.unused.levels <- TRUE
selectionF <- interCheckSelection(formula = selectionF)
## Generate required datasets -------
names(mc_S)[2] <- "formula"
mc_S$na.action <- na.omit
mc_S[[1L]] <- quote(stats::model.frame)
mc_S <- eval(mc_S, parent.frame())
Yvar_S <- model.response(mc_S)
selecLevels <- levels(as.factor(Yvar_S))
if (length(selecLevels) != 2) {
stop("the dependent variable of the 'selection' equation has to contain",
"exactly two levels (e.g. FALSE and TRUE)")
}
Yvar_S <- as.integer(Yvar_S == selecLevels[2])
mtX_S <- terms(selectionF, data = data, rhs = 1)
Xvar_S <- model.matrix(mtX_S, mc_S)
N_S <- nrow(Xvar_S)
if (N_S == 0L) {
stop("0 (non-NA) cases in selection model", call. = FALSE)
}
wProbit <- as.vector(model.weights(mc_S))
if (length(wscale) != 1 || !is.logical(wscale[1])) {
stop("argument 'wscale' must be a single logical value",
call. = FALSE)
}
if (!is.null(wProbit)) {
if (!is.numeric(wProbit)) {
stop("'weights' must be a numeric vector", call. = FALSE)
} else {
if (any(wProbit < 0 | is.na(wProbit)))
stop("missing or negative weights not allowed",
call. = FALSE)
}
if (wscale) {
wProbit <- wProbit/sum(wProbit) * N_S
}
} else {
wProbit <- rep(1, N_S)
}
## frontier formula -------
m_F <- match(c("frontierF", "data", "subset", "weights"),
names(mc), nomatch = 0L)
mc_F <- mc[c(1L, m_F)]
mc_F$drop.unused.levels <- TRUE
frontierF <- interCheckMain(formula = frontierF, data = data)
## Generate required datasets -------
names(mc_F)[2] <- "formula"
mc_F$na.action <- na.omit
mc_F[[1L]] <- quote(stats::model.frame)
mc_F <- eval(mc_F, parent.frame())
Yvar_F <- model.response(mc_F, "numeric")[Yvar_S == 1]
mtX_F <- terms(frontierF, data = data, rhs = 1)
Xvar_F <- model.matrix(mtX_F, mc_F)[Yvar_S == 1, , drop = FALSE]
nXvar_F <- ncol(Xvar_F)
N_F <- nrow(Xvar_F)
if (N_F == 0L) {
stop("0 (non-NA) cases in frontier model", call. = FALSE)
}
wHvar <- wProbit[Yvar_S == 1]
if (!missing(uhet)) {
uhet <- clhsCheck_u(formula = uhet)
} else {
uhet <- ~1
}
if (!missing(vhet)) {
vhet <- clhsCheck_v(formula = vhet)
} else {
vhet <- ~1
}
formula <- formDist_sfaselectioncross(udist = udist, formula = frontierF,
uhet = uhet, vhet = vhet)
mtuH <- delete.response(terms(formula, data = data, rhs = 2))
uHvar <- model.matrix(mtuH, mc_F)[Yvar_S == 1, , drop = FALSE]
nuZUvar <- ncol(uHvar)
mtvH <- delete.response(terms(formula, data = data, rhs = 3))
vHvar <- model.matrix(mtvH, mc_F)[Yvar_S == 1, , drop = FALSE]
nvZVvar <- ncol(vHvar)
# Check other supplied options -------
if (length(S) != 1 || !(S %in% c(-1L, 1L))) {
stop("argument 'S' must equal either 1 or -1: 1 for production or profit frontier
and -1 for cost frontier",
call. = FALSE)
}
typeSfa <- if (S == 1L) {
"Stochastic Production/Profit Frontier, e = v - u"
} else {
"Stochastic Cost Frontier, e = v + u"
}
if (length(logDepVar) != 1 || !is.logical(logDepVar[1])) {
stop("argument 'logDepVar' must be a single logical value",
call. = FALSE)
}
# Number of parameters -------
nParm <- nXvar_F + nuZUvar + nvZVvar + 1
# Checking starting values when provided -------
if (!is.null(start)) {
if (length(start) != nParm) {
stop("Wrong number of initial values: model has ",
nParm, " parameters", call. = FALSE)
}
}
if (nParm > N_F) {
stop("Model has more parameters than observations", call. = FALSE)
}
# Check algorithms -------
method <- tolower(method)
if (!(method %in% c("ucminf", "bfgs", "bhhh", "nr", "nm",
"cg", "sann", "sr1", "mla", "sparse", "nlminb"))) {
stop("Unknown or non-available optimization algorithm: ",
paste(method), call. = FALSE)
}
# Check hessian type
if (length(hessianType) != 1 || !(hessianType %in% c(1L,
2L))) {
stop("argument 'hessianType' must equal either 1 or 2",
call. = FALSE)
}
# Draws for MSL -------
if (lType == "msl") {
if (!(simType %in% c("halton", "ghalton", "sobol", "uniform"))) {
stop("Unknown or non-available random draws method",
call. = FALSE)
}
if (!is.numeric(Nsim) || length(Nsim) != 1) {
stop("argument 'Nsim' must be a single numeric scalar",
call. = FALSE)
}
if (!is.numeric(burn) || length(burn) != 1) {
stop("argument 'burn' must be a single numeric scalar",
call. = FALSE)
}
if (!is_prime(prime)) {
stop("argument 'prime' must be a single prime number",
call. = FALSE)
}
if (length(antithetics) != 1 || !is.logical(antithetics[1])) {
stop("argument 'antithetics' must be a single logical value",
call. = FALSE)
}
if (antithetics && (Nsim%%2) != 0) {
Nsim <- Nsim + 1
}
simDist <- if (simType == "halton") {
"Halton"
} else {
if (simType == "ghalton") {
"Generalized Halton"
} else {
if (simType == "sobol") {
"Sobol"
} else {
if (simType == "uniform") {
"Uniform"
}
}
}
}
cat("Initialization of", Nsim, simDist, "draws per observation ...\n")
FiMat <- drawMat(N = N_F, Nsim = Nsim, simType = simType,
prime = prime, burn = burn, antithetics = antithetics,
seed = seed)
}
# Other optimization options -------
if (!is.numeric(Nsub) || length(Nsub) != 1) {
stop("argument 'Nsub' must be a single numeric scalar",
call. = FALSE)
}
if (Nsub != round(Nsub)) {
stop("argument 'Nsub' must be an integer", call. = FALSE)
}
if (Nsub <= 0) {
stop("argument 'Nsub' must be positive", call. = FALSE)
}
Nsub <- as.integer(Nsub)
uBound <- abs(uBound)
if (!is.numeric(itermax) || length(itermax) != 1) {
stop("argument 'itermax' must be a single numeric scalar",
call. = FALSE)
}
if (itermax != round(itermax)) {
stop("argument 'itermax' must be an integer", call. = FALSE)
}
if (itermax <= 0) {
stop("argument 'itermax' must be positive", call. = FALSE)
}
itermax <- as.integer(itermax)
if (length(printInfo) != 1 || !is.logical(printInfo[1])) {
stop("argument 'printInfo' must be a single logical value",
call. = FALSE)
}
if (!is.numeric(intol) || length(intol) != 1) {
stop("argument 'intol' must be numeric", call. = FALSE)
}
if (intol < 0) {
stop("argument 'intol' must be non-negative", call. = FALSE)
}
if (!is.numeric(tol) || length(tol) != 1) {
stop("argument 'tol' must be numeric", call. = FALSE)
}
if (tol < 0) {
stop("argument 'tol' must be non-negative", call. = FALSE)
}
if (!is.numeric(gradtol) || length(gradtol) != 1) {
stop("argument 'gradtol' must be numeric", call. = FALSE)
}
if (gradtol < 0) {
stop("argument 'gradtol' must be non-negative", call. = FALSE)
}
if (!is.numeric(stepmax) || length(stepmax) != 1) {
stop("argument 'stepmax' must be numeric", call. = FALSE)
}
if (stepmax < 0) {
stop("argument 'stepmax' must be non-negative", call. = FALSE)
}
if (!(qac %in% c("marquardt", "stephalving"))) {
stop("argument 'qac' must be either 'marquardt' or 'stephalving'",
call. = FALSE)
}
# Nodes for Gauss Hermite quadrature -------
if (lType == "ghermite") {
gH <- gaussHermiteData(n = Nsub * 2)
}
# Step 1: 2step Heckman correction -------
## OLS with linear probability model -------
lpm <- if (colnames(Xvar_S)[1] == "(Intercept)") {
if (dim(Xvar_S)[2] == 1) {
lm(Yvar_S ~ 1)
} else {
lm(Yvar_S ~ ., data = as.data.frame(Xvar_S[, -1,
drop = FALSE]), weights = wProbit)
}
} else {
lm(Yvar_S ~ -1 + ., data = as.data.frame(Xvar_S), weights = wProbit)
}
if (any(is.na(lpm$coefficients))) {
stop("at least one of the 'selectionF' coefficients is NA: ",
paste(colnames(Xvar_S)[is.na(lpm$coefficients)],
collapse = ", "), "This may be due to a singular matrix
due to potential perfect multicollinearity",
call. = FALSE)
}
## Solution for probit model -------
cat("First step probit model...\n")
probRes <- maxLik::maxLik(logLik = probit_likelihood, grad = probit_gradient,
start = coefficients(lpm), method = "bfgs", control = list(iterlim = itermax,
printLevel = printInfo, reltol = tol, tol = tol,
qac = qac), Xvar = Xvar_S, Yvar = Yvar_S, wHvar = wProbit)
probitParam <- probRes$estimate
## heckit model using IMR and OLS -------
if (inherits(data, "pdata.frame")) {
dataTable <- data[, names(index(data))]
} else {
dataTable <- data.frame(IdObs = 1:N_S)
}
dataTable <- data.frame(cbind(dataTable, data[, c(all.vars(terms(selectionF)),
all.vars(terms(frontierF)))], weights = wProbit))
dataTable$PROBIT_PREDICTIONS <- as.numeric(crossprod(matrix(probitParam),
t(Xvar_S)))
dataTable$IMR <- dnorm(dataTable[["PROBIT_PREDICTIONS"]])/pnorm(dataTable[["PROBIT_PREDICTIONS"]])
ols2step <- if (colnames(Xvar_F)[1] == "(Intercept)") {
if (dim(Xvar_F)[2] == 1) {
lm(Yvar_F ~ 1)
} else {
lm(Yvar_F ~ ., data = as.data.frame(cbind(Xvar_F[,
-1, drop = FALSE], IMR = dataTable$IMR[Yvar_S ==
1])), weights = wHvar)
}
} else {
lm(Yvar_F ~ -1 + ., data = as.data.frame(cbind(Xvar_F,
IMR = dataTable$IMR[Yvar_S == 1])), weights = wHvar)
}
if (any(is.na(ols2step$coefficients))) {
stop("at least one of the 'frontierF' coefficients is NA: ",
paste(colnames(Xvar_F)[is.na(ols2step$coefficients)],
collapse = ", "), "This may be due to a singular matrix
due to potential perfect multicollinearity",
call. = FALSE)
}
names(ols2step$coefficients) <- colnames(Xvar_F)
ols2stepParam <- ols2step$coefficients
ols2stepSigmasq <- summary(ols2step)$sigma^2
ols2stepStder <- sqrt(diag(vcov(ols2step)))
ols2stepLoglik <- logLik(ols2step)[1]
dataTable$ols2stepResiduals <- NA
dataTable$ols2stepResiduals[Yvar_S == 1] <- residuals(ols2step)
dataTable$ols2stepFitted <- NA
dataTable$ols2stepFitted[Yvar_S == 1] <- fitted(ols2step)
# possibility to have duplicated columns if ID or TIME
# appears in ols in the case of panel data
dataTable <- dataTable[!duplicated(as.list(dataTable))]
## skewness is run on the 2step model -------
ols2stepSkew <- skewness(dataTable[["ols2stepResiduals"]][Yvar_S ==
1])
ols2stepM3Okay <- if (S * ols2stepSkew < 0) {
"Residuals have the expected skeweness"
} else {
"Residuals do not have the expected skeweness"
}
if (S * ols2stepSkew > 0) {
warning("The residuals of the OLS are", if (S == 1) {
" right"
} else {
" left"
}, "-skewed. This may indicate the absence of inefficiency or
model misspecification or sample 'bad luck'",
call. = FALSE)
}
CoelliM3Test <- c(z = sum(dataTable[["ols2stepResiduals"]]^3,
na.rm = TRUE)/N_F/sqrt(6 * (sum(dataTable[["ols2stepResiduals"]]^2,
na.rm = TRUE)/N_F)^3/N_F), p.value = 2 * pnorm(-abs(sum(dataTable[["ols2stepResiduals"]]^3,
na.rm = TRUE)/N_F/sqrt(6 * (sum(dataTable[["ols2stepResiduals"]]^2,
na.rm = TRUE)/N_F)^3/N_F))))
AgostinoTest <- dagoTest(dataTable[["ols2stepResiduals"]][Yvar_S ==
1])
class(AgostinoTest) <- "dagoTest"
# Step 2: MLE arguments -------
FunArgs <- if (lType %in% c("kronrod", "hcubature", "pcubature")) {
list(start = start, olsParam = ols2stepParam, dataTable = dataTable,
nXvar = nXvar_F, nuZUvar = nuZUvar, nvZVvar = nvZVvar,
uHvar = uHvar, vHvar = vHvar, Yvar = Yvar_F, Xvar = Xvar_F,
selectDum = Yvar_S, wHvar = wHvar, S = S, PREDICTIONS = dataTable[["PROBIT_PREDICTIONS"]][Yvar_S ==
1], uBound = uBound, subdivisions = Nsub, intol,
method = method, printInfo = printInfo, itermax = itermax,
stepmax = stepmax, tol = tol, gradtol = gradtol,
hessianType = hessianType, qac = qac)
} else {
if (lType == "ghermite") {
list(start = start, olsParam = ols2stepParam, dataTable = dataTable,
nXvar = nXvar_F, nuZUvar = nuZUvar, nvZVvar = nvZVvar,
uHvar = uHvar, vHvar = vHvar, Yvar = Yvar_F,
Xvar = Xvar_F, selectDum = Yvar_S, wHvar = wHvar,
S = S, PREDICTIONS = dataTable[["PROBIT_PREDICTIONS"]][Yvar_S ==
1], gH = gH, N = N_F, method = method, printInfo = printInfo,
itermax = itermax, stepmax = stepmax, tol = tol,
gradtol = gradtol, hessianType = hessianType,
qac = qac)
} else {
if (lType == "msl") {
list(start = start, olsParam = ols2stepParam,
dataTable = dataTable, nXvar = nXvar_F, nuZUvar = nuZUvar,
nvZVvar = nvZVvar, uHvar = uHvar, vHvar = vHvar,
Yvar = Yvar_F, Xvar = Xvar_F, selectDum = Yvar_S,
wHvar = wHvar, S = S, PREDICTIONS = dataTable[["PROBIT_PREDICTIONS"]][Yvar_S ==
1], FiMat = FiMat, N = N_F, method = method,
printInfo = printInfo, itermax = itermax, stepmax = stepmax,
tol = tol, gradtol = gradtol, hessianType = hessianType,
qac = qac)
}
}
}
## MLE run -------
cat("Second step Frontier model...\n")
mleList <- tryCatch(switch(lType, kronrod = do.call(halfnormAlgOpt_ss_GK,
FunArgs), hcubature = do.call(halfnormAlgOpt_ss_HCUB,
FunArgs), pcubature = do.call(halfnormAlgOpt_ss_PCUB,
FunArgs), ghermite = do.call(halfnormAlgOpt_ss_GH, FunArgs),
msl = do.call(halfnormAlgOpt_ss_MSL, FunArgs)), error = function(e) e)
if (inherits(mleList, "error")) {
stop("The current error occurs during optimization:\n",
mleList$message, call. = FALSE)
}
# Inverse Hessian + other -------
mleList$invHessian <- vcovObj(mleObj = mleList$mleObj, hessianType = hessianType,
method = method, nParm = nParm)
mleList <- c(mleList, if (method == "ucminf") {
list(type = "ucminf max.", nIter = unname(mleList$mleObj$info["neval"]),
status = mleList$mleObj$message, mleLoglik = -mleList$mleObj$value,
gradient = mleList$mleObj$gradient)
} else {
if (method %in% c("bfgs", "bhhh", "nr", "nm", "cg", "sann")) {
list(type = substr(mleList$mleObj$type, 1, 27), nIter = mleList$mleObj$iterations,
status = mleList$mleObj$message, mleLoglik = mleList$mleObj$maximum,
gradient = mleList$mleObj$gradient)
} else {
if (method == "sr1") {
list(type = "SR1 max.", nIter = mleList$mleObj$iterations,
status = mleList$mleObj$status, mleLoglik = -mleList$mleObj$fval,
gradient = -mleList$mleObj$gradient)
} else {
if (method == "mla") {
list(type = "Lev. Marquardt max.", nIter = mleList$mleObj$ni,
status = switch(mleList$mleObj$istop, `1` = "convergence criteria were satisfied",
`2` = "maximum number of iterations was reached",
`4` = "algorithm encountered a problem in the function computation"),
mleLoglik = -mleList$mleObj$fn.value, gradient = -mleList$mleObj$grad)
} else {
if (method == "sparse") {
list(type = "Sparse Hessian max.", nIter = mleList$mleObj$iterations,
status = mleList$mleObj$status, mleLoglik = -mleList$mleObj$fval,
gradient = -mleList$mleObj$gradient)
} else {
if (method == "nlminb") {
list(type = "nlminb max.", nIter = mleList$mleObj$iterations,
status = mleList$mleObj$message, mleLoglik = -mleList$mleObj$objective,
gradient = mleList$mleObj$gradient)
}
}
}
}
}
})
# quick renaming -------
names(mleList$startVal) <- fName_uvr_sfaselectioncross(Xvar = Xvar_F,
uHvar = uHvar, vHvar = vHvar)
names(mleList$mlParam) <- names(mleList$startVal)
rownames(mleList$invHessian) <- colnames(mleList$invHessian) <- names(mleList$mlParam)
names(mleList$gradient) <- names(mleList$mlParam)
colnames(mleList$mleObj$gradL_OBS) <- names(mleList$mlParam)
# Return object -------
mlDate <- format(Sys.time(), "Model was estimated on : %b %a %d, %Y at %H:%M")
dataTable$mlResiduals <- NA
dataTable$mlResiduals[Yvar_S == 1] <- Yvar_F - as.numeric(crossprod(matrix(mleList$mlParam[1:nXvar_F]),
t(Xvar_F)))
dataTable$mlFitted <- NA
dataTable$mlFitted[Yvar_S == 1] <- as.numeric(crossprod(matrix(mleList$mlParam[1:nXvar_F]),
t(Xvar_F)))
dataTable$logL_OBS <- NA
dataTable$logL_OBS[Yvar_S == 1] <- mleList$mleObj$logL_OBS
returnObj <- list()
returnObj$call <- cl
returnObj$selectionF <- selectionF
returnObj$frontierF <- frontierF
returnObj$formula <- formula
returnObj$S <- S
returnObj$typeSfa <- typeSfa
returnObj$Ninit <- N_S
returnObj$Nobs <- N_F
returnObj$nXvar <- nXvar_F
returnObj$logDepVar <- logDepVar
returnObj$nuZUvar <- nuZUvar
returnObj$nvZVvar <- nvZVvar
returnObj$nParm <- nParm
returnObj$udist <- udist
returnObj$startVal <- mleList$startVal
returnObj$dataTable <- dataTable
returnObj$lpmObj <- lpm
returnObj$probitObj <- probRes
returnObj$ols2stepParam <- ols2stepParam
returnObj$ols2stepSigmasq <- ols2stepSigmasq
returnObj$ols2stepStder <- ols2stepStder
returnObj$ols2stepLoglik <- ols2stepLoglik
returnObj$ols2stepSkew <- ols2stepSkew
returnObj$ols2stepM3Okay <- ols2stepM3Okay
returnObj$CoelliM3Test <- CoelliM3Test
returnObj$AgostinoTest <- AgostinoTest
returnObj$isWeights <- !all.equal(wHvar, rep(1, N_F))
returnObj$lType <- lType
returnObj$optType <- mleList$type
returnObj$nIter <- mleList$nIter
returnObj$optStatus <- mleList$status
returnObj$startLoglik <- mleList$startLoglik
returnObj$mlLoglik <- mleList$mleLoglik
returnObj$mlParam <- mleList$mlParam
returnObj$gradient <- mleList$gradient
returnObj$gradL_OBS <- matrix(NA, nrow = N_S, ncol = nParm)
returnObj$gradL_OBS[Yvar_S == 1, ] <- mleList$mleObj$gradL_OBS
returnObj$gradientNorm <- sqrt(sum(mleList$gradient^2))
returnObj$invHessian <- mleList$invHessian
returnObj$hessianType <- if (hessianType == 1) {
"Analytic Hessian"
} else {
if (hessianType == 2) {
"BHHH Hessian"
}
}
returnObj$mlDate <- mlDate
if (lType == "msl") {
returnObj$simDist <- simDist
returnObj$Nsim <- Nsim
returnObj$FiMat <- FiMat
} else {
if (lType == "ghermite") {
returnObj$gHermiteData <- gH
} else {
returnObj$Nsub <- Nsub
returnObj$intol <- intol
returnObj$uBound <- uBound
}
}
rm(mleList)
class(returnObj) <- "sfaselectioncross"
return(returnObj)
}
# print for sfaselectioncross ----------
#' @rdname sfaselectioncross
#' @export
print.sfaselectioncross <- function(x, ...) {
cat("Call:\n")
cat(deparse(x$call))
cat("\n\n")
cat("Likelihood estimates using", x$optType, "\n")
cat("Normal-Half Normal Sample Selection Stochastic Frontier Model",
"\n")
cat("Data Type: Cross Section \n")
cat("Status:", x$optStatus, "\n\n")
cat(x$typeSfa, "\n")
print.default(format(x$mlParam), print.gap = 2, quote = FALSE)
invisible(x)
}
# Bread for Sandwich Estimator ----------
#' @rdname sfaselectioncross
#' @export
bread.sfaselectioncross <- function(x, ...) {
if (x$hessianType == "Analytic Hessian") {
return(x$invHessian * x$Nobs)
} else {
cat("Computing Analytical Hessian \n")
Yvar <- model.response(model.frame(x$formula, data = x$dataTable[x$dataTable[all.vars(x$selectionF)[1]] ==
1, ]))
Xvar <- model.matrix(x$formula, rhs = 1, data = x$dataTable[x$dataTable[all.vars(x$selectionF)[1]] ==
1, ])
uHvar <- model.matrix(x$formula, rhs = 2, data = x$dataTable[x$dataTable[all.vars(x$selectionF)[1]] ==
1, ])
vHvar <- model.matrix(x$formula, rhs = 3, data = x$dataTable[x$dataTable[all.vars(x$selectionF)[1]] ==
1, ])
if (x$lType == "kronrod") {
hessAnalytical <- chesshalfnormlike_ss_GK(x$mlParam,
nXvar = ncol(Xvar), nuZUvar = ncol(uHvar), nvZVvar = ncol(vHvar),
uHvar = uHvar, vHvar = vHvar, Yvar = Yvar, Xvar = Xvar,
wHvar = x$dataTable$weights, S = x$S, PREDICTIONS = x$dataTable[["PROBIT_PREDICTIONS"]][x$dataTable[all.vars(x$selectionF)[1]] ==
1], uBound = x$uBound, subdivisions = x$Nsub,
intol = x$intol)
} else {
if (x$lType == "hcubature") {
hessAnalytical <- chesshalfnormlike_ss_HCUB(x$mlParam,
nXvar = ncol(Xvar), nuZUvar = ncol(uHvar),
nvZVvar = ncol(vHvar), uHvar = uHvar, vHvar = vHvar,
Yvar = Yvar, Xvar = Xvar, wHvar = x$dataTable$weights[x$dataTable[all.vars(x$selectionF)[1]] ==
1], S = x$S, PREDICTIONS = x$dataTable[["PROBIT_PREDICTIONS"]][x$dataTable[all.vars(x$selectionF)[1]] ==
1], uBound = x$uBound, subdivisions = x$Nsub,
intol = x$intol)
} else {
if (x$lType == "pcubature") {
hessAnalytical <- chesshalfnormlike_ss_PCUB(x$mlParam,
nXvar = ncol(Xvar), nuZUvar = ncol(uHvar),
nvZVvar = ncol(vHvar), uHvar = uHvar, vHvar = vHvar,
Yvar = Yvar, Xvar = Xvar, wHvar = x$dataTable$weights[x$dataTable[all.vars(x$selectionF)[1]] ==
1], S = x$S, PREDICTIONS = x$dataTable[["PROBIT_PREDICTIONS"]][x$dataTable[all.vars(x$selectionF)[1]] ==
1], uBound = x$uBound, subdivisions = x$Nsub,
intol = x$intol)
} else {
if (x$lType == "ghermite") {
hessAnalytical <- chesshalfnormlike_ss_GH(x$mlParam,
nXvar = ncol(Xvar), nuZUvar = ncol(uHvar),
nvZVvar = ncol(vHvar), uHvar = uHvar, vHvar = vHvar,
Yvar = Yvar, Xvar = Xvar, wHvar = x$dataTable$weights[x$dataTable[all.vars(x$selectionF)[1]] ==
1], S = x$S, PREDICTIONS = x$dataTable[["PROBIT_PREDICTIONS"]][x$dataTable[all.vars(x$selectionF)[1]] ==
1], gH = x$gHermiteData, N = x$Nobs)
} else {
if (x$lType == "msl") {
hessAnalytical <- chesshalfnormlike_ss_MSL(x$mlParam,
nXvar = ncol(Xvar), nuZUvar = ncol(uHvar),
nvZVvar = ncol(vHvar), uHvar = uHvar,
vHvar = vHvar, Yvar = Yvar, Xvar = Xvar,
wHvar = x$dataTable$weights[x$dataTable[all.vars(x$selectionF)[1]] ==
1], S = x$S, PREDICTIONS = x$dataTable[["PROBIT_PREDICTIONS"]][x$dataTable[all.vars(x$selectionF)[1]] ==
1], FiMat = x$FiMat, N = x$Nobs)
}
}
}
}
}
invHess <- invHess_fun(hess = hessAnalytical)
colnames(invHess) <- rownames(invHess) <- names(x$mlParam)
return(invHess * x$Nobs)
}
}
# Gradients Evaluated at each Observation ----------
#' @rdname sfaselectioncross
#' @export
estfun.sfaselectioncross <- function(x, ...) {
return(x$gradL_OBS[complete.cases(x$gradL_OBS), , drop = FALSE])
}
Any scripts or data that you put into this service are public.
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.