Nothing
#' @name unitquantreg.control
#'
#' @title Control parameters for unit quantile regression
#'
#' @description Auxiliary function that control fitting of unit quantile
#' regression models using \code{\link{unitquantreg}}.
#'
#' @author André F. B. Menezes
#'
#' @param method string. Specify the method argument passed to \code{\link[optimx]{optimx}}.
#' @param hessian logical. Should use the numerically Hessian matrix to compute
#' variance-covariance? Default is \code{FALSE}, i.e., use the analytic
#' Hessian.
#' @param gradient logical. Should use the analytic gradient? Default is
#' \code{TRUE}.
#' @param maxit integer. Specify the maximal number of iterations passed to
#' \code{optimx}.
#' @param factr numeric.Controls the convergence of the \code{"L-BFGS-B"} method.
#' @param reltol numeric. Relative convergence tolerance passed to \code{\link[optimx]{optimx}}.
#' @param trace non-negative integer. If positive, tracing information on the
#' progress of the optimization is produced.
#' @param starttests logical. Should \code{\link[optimx]{optimx}} run tests of the functions
#' and parameters? Default is \code{FALSE}.
#' @param dowarn logical. Show warnings generated by \code{\link[optimx]{optimx}}? Default is
#' \code{FALSE}.
#' @param ... arguments passed to \code{\link[optimx]{optimx}}.
#'
#'
#' @details The \code{control} argument of
#' \code{\link{unitquantreg}} uses the arguments of
#' \code{\link{unitquantreg.control}}. In particular, the
#' parameters in \code{\link{unitquantreg}} are estimated by
#' maximum likelihood using the \code{\link[optimx]{optimx}}, which is a
#' general-purpose optimization wrapper function that calls other R tools for
#' optimization, including the existing \code{\link[stats]{optim}} function.
#' The main advantage of \code{\link[optimx]{optimx}} is to unify the tools
#' allowing a number of different optimization methods and provide sanity checks.
#'
#'
#' @seealso \code{\link[optimx]{optimx}} for more details about control
#' parameters and \code{\link{unitquantreg.fit}} the fitting
#' procedure used by \code{\link{unitquantreg}}.
#'
#' @return A list with components named as the arguments.
#'
#' @examples
#' data(sim_bounded, package = "unitquantreg")
#' sim_bounded_curr <- sim_bounded[sim_bounded$family == "uweibull", ]
#'
#' # Fitting using the analytical gradient
#' fit_gradient <- unitquantreg(formula = y1 ~ x,
#' data = sim_bounded_curr, tau = 0.5,
#' family = "uweibull",
#' control = unitquantreg.control(gradient = TRUE,
#' trace = 1))
#'
#' # Fitting without using the analytical gradient
#' fit_nogradient <- unitquantreg(formula = y1 ~ x,
#' data = sim_bounded_curr, tau = 0.5,
#' family = "uweibull",
#' control = unitquantreg.control(gradient = FALSE,
#' trace = 1))
#' # Compare estimated coefficients
#' cbind(gradient = coef(fit_gradient), no_gradient = coef(fit_nogradient))
#'
#' @references Nash, J. C. and Varadhan, R. (2011). Unifying Optimization Algorithms to Aid Software System Users: optimx for R., \emph{Journal of Statistical Software}, \bold{43}(9), 1--14.
#'
#' @rdname unitquantreg.control
#' @export
unitquantreg.control <- function(method = "BFGS", hessian = FALSE,
gradient = TRUE,
maxit = 5000, factr = 1e7,
reltol = sqrt(.Machine$double.eps),
trace = 0L, starttests = FALSE,
dowarn = FALSE,...) {
out <- list(
method = method,
hessian = hessian,
gradient = gradient,
maxit = maxit,
reltol = reltol,
factr = factr,
trace = trace,
dowarn = dowarn,
starttests = starttests
)
out <- c(out, list(...))
if (method == "L-BFGS-B") out$reltol <- out$abstol <- NULL
if (!is.null(out$fnscale)) warning("fnscale must not be modified")
if (!is.null(out$all.method)) {
out$all.method <- NULL
warning("it is no possible use all.method control from optimx")
}
out$fnscale <- 1
out
}
#' @name unitquantreg
#'
#' @title Parametric unit quantile regression models
#'
#' @description Fit a collection of parametric unit quantile regression model
#' by maximum likelihood using the log-likelihood function, the score vector
#' and the hessian matrix implemented in \code{C++}.
#'
#'
#' @param formula symbolic description of the quantile model like \code{y ~ x}
#' or \code{y ~ x | z}. See below for details.
#' @param tau numeric vector. The quantile(s) to be estimated, i.e.,
#' number between 0 and 1. If just one quantile is specified an object of class
#' \code{unitquantreg} is returned. If a numeric vector of values between 0 and 1
#' is specified an object of class \code{unitquantregs} is returned. See below for
#' details.
#' @param data data.frame contain the variables in the model.
#' @param subset an optional vector specifying a subset of observations to
#' be used in the fitting process.
#' @param na.action a function which indicates what should happen when the
#' data contain \code{NA}s.
#' @param family character. Specify the distribution family.
#' @param link character. Specify the link function in the quantile model.
#' Currently supported are \code{logit}, \code{probit}, \code{cloglog} and
#' \code{cauchit}. Default is \code{logit}.
#' @param link.theta character. Specify the link function in the shape model.
#' Currently supported are \code{identity}, \code{log} and \code{sqrt}.
#' Default is \code{log}.
#' @param start numeric vector. An optional vector with starting values for all parameters.
#' @param control list. Control arguments specified via \code{\link{unitquantreg.control}}.
#' @param x,y logical. If \code{TRUE} the corresponding components of the fit
#' (model frame, response, model matrix) are returned. For \code{\link{unitquantreg.fit}}
#' \code{y} should be the numeric response vector with values in (0,1).
#' @param X,Z numeric matrix. Regressor matrix for the quantile and shape model,
#' respectively. Default is constant shape model, i.e., \code{Z} is matrix with
#' column of ones.
#' @param model logical. Indicates whether model frame should be included as a
#' component of the returned value.
#'
#'
#' @details
#' The parameter estimation and inference are performed under the frequentist paradigm.
#' The \code{\link[optimx]{optimx}} R package is use, since allows different optimization
#' technique to maximize the log-likelihood function. The analytical score function are
#' use in the maximization and the standard errors are computed using the
#' analytical hessian matrix, both are implemented in efficient away using \code{C++}.
#'
#'
#' @return \code{\link{unitquantreg}} can return an object of
#' class \code{unitquantreg} if \code{tau} is a scalar, i.e., a list with
#' the following components.
#' \item{family}{the distribution family name.}
#' \item{coefficients}{a list with elements \code{"quantile"} and \code{"shape"}
#' containing the coefficients from the respective models.}
#' \item{fitted.values}{a list with elements \code{"quantile"} and \code{"shape"}
#' containing the fitted parameters from the respective models.}
#' \item{linear.predictors}{a list with elements \code{"quantile"} and \code{"shape"}
#' containing the fitted linear predictors from the respective models.}
#' \item{link}{a list with elements \code{"quantile"} and \code{"shape"}
#' containing the link objects from the respective models.}
#' \item{tau}{the quantile specify.}
#' \item{loglik}{log-likelihood of the fitted model.}
#' \item{gradient}{gradient evaluate at maximum likelihood estimates.}
#' \item{vcov}{covariance matrix of all parameters in the model.}
#' \item{nobs}{number of observations.}
#' \item{npar}{number of parameters.}
#' \item{df.residual}{residual degrees of freedom in the fitted model.}
#' \item{theta_const}{logical indicating if the \eqn{\theta} parameter was treated as nuisance parameter.}
#' \item{control}{the control parameters used to fit the model.}
#' \item{iterations}{number of iterations of optimization method.}
#' \item{converged}{logical, if \code{TRUE} indicates successful convergence.}
#' \item{kkt}{a list of logical \code{kkt1} and \code{kkt2} provide check on
#' Kuhn-Karush-Tucker conditions, first-order KKT test (\code{kkt1}) checks
#' whether the gradient at the final parameters estimates is "small" and the
#' second-order KKT test (\code{kkte}) checks whether the Hessian at the final
#' parameters estimates is positive definite.}
#' \item{elapsed_time}{time elapsed to fit the model.}
#' \item{call}{the original function call.}
#' \item{formula}{the original model formula.}
#' \item{terms}{a list with elements \code{"quantile"}, \code{"shape"} and
#' \code{"full"} containing the \code{terms} objects for the respective models.}
#' \item{model}{the full model frame, if \code{model = TRUE}.}
#' \item{y}{the response vector, if \code{y = TRUE}.}
#' \item{x}{a list with elements \code{"quantile"} and \code{"shape"}
#' containing the model matrices from the respective models, if \code{x = TRUE}.}
#'
#' While \code{\link{unitquantreg.fit}} returns an unclassed list with
#' components up to \code{elapsed_time}.
#'
#' If \code{tau} is a numeric vector with length greater than one an object of
#' class \code{unitquantregs} is returned, which consist of list of objects of
#' class \code{unitquantreg} for each specified quantiles.
#'
#' @author André F. B. Menezes
#'
#' @importFrom stats make.link model.frame model.matrix model.response na.omit delete.response terms
#' @importFrom Formula as.Formula Formula
#' @importFrom numDeriv jacobian
NULL
#' @rdname unitquantreg
#' @export
unitquantreg <- function(formula, data, subset, na.action, tau, family,
link = c("logit", "probit", "cloglog", "cauchit"),
link.theta = c("identity", "log", "sqrt"),
start = NULL, control = unitquantreg.control(),
model = TRUE, x = FALSE, y = TRUE) {
# Call
cl <- match.call()
if (missing(data)) data <- environment(formula)
mf <- match.call(expand.dots = FALSE)
m <- match(c("formula", "data", "subset", "na.action"), names(mf), 0L)
mf <- mf[c(1L, m)]
mf$drop.unused.levels <- TRUE
# Formula
formula <- Formula::as.Formula(formula)
if (length(formula)[2L] < 2L) {
formula <- Formula::as.Formula(formula(formula), ~ 1)
simple_formula <- TRUE
} else {
if (length(formula)[2L] > 2L) {
formula <- Formula::Formula(formula(formula, rhs = 1:2))
warning("formula must not have more than two RHS parts")
simple_formula <- FALSE
}
}
mf$formula <- formula
# Evaluate model.frame
mf[[1L]] <- as.name("model.frame")
mf <- eval(mf, parent.frame())
# Extract terms, model matrix, response
mt <- terms(formula, data = data)
mtX <- terms(formula, data = data, rhs = 1L)
mtZ <- delete.response(terms(formula, data = data, rhs = 2L))
X <- model.matrix(mtX, mf)
Z <- model.matrix(mtZ, mf)
Y <- model.response(mf, "numeric")
p <- ncol(X)
q <- ncol(Z)
# Sanity check for response variable
if (!(min(Y) > 0 & max(Y) < 1)) {
stop("invalid dependent variable, all observations must be in (0, 1)")
}
# Get family name
family <- match.arg(family, .families)
# Link functions
link <- match.arg(link)
if (is.null(link.theta)) {
link.theta <- if (simple_formula) "identity" else "log"
}
else {
link.theta <- match.arg(link.theta)
}
# The workhorse: unitquantreg.fit()
# Vectorizing in tau (quantile)
names(tau) <- tau
out <- lapply(tau, function(t) {
fit <- unitquantreg.fit(y = Y, X = X, Z = Z, tau = t, family = family,
link = link, link.theta = link.theta, start = start,
control = control)
# Output
fit$call <- cl
fit$formula <- formula
fit$terms <- list(quantile = mtX, shape = mtZ, full = mt)
if (model) fit$model <- mf
if (y) fit$y <- Y
if (x) fit$x <- list(quantile = X, shape = Z)
class(fit) <- "unitquantreg"
fit
})
if (length(tau) == 1L) {
out <- out[[1L]]
} else {
class(out) <- "unitquantregs"
}
out
}
#' @rdname unitquantreg
#' @export
unitquantreg.fit <- function(y, X, Z = NULL, tau, family, link, link.theta,
start = NULL, control = unitquantreg.control()) {
linkobj <- make.link(link)
linkobj.theta <- make.link(link.theta)
ocontrol <- control
method <- control$method
hessian <- control$hessian
gradient <- control$gradient
control$method <- control$hessian <- control$gradient <- NULL
family_name <- .get_family_name(family)
n <- length(y)
if (is.null(Z)) {
Z <- matrix(1, ncol = 1, nrow = n)
colnames(Z) <- "(Intercept)"
rownames(Z) <- rownames(X)
}
p <- ncol(X)
q <- ncol(Z)
theta_const <- (q == 1L) && isTRUE(all.equal(as.vector(Z[, 1L]), rep.int(1, n)))
# Initial guess
if (is.null(start)) {
# For beta
ystar <- linkobj$linkfun(y)
reg_ini <- suppressWarnings(quantreg::rq.fit(X, ystar, tau = tau))
start <- reg_ini$coefficients
names(start) <- colnames(X)
# For theta/gamma
gs <- if (link.theta == "log") 0.1 else 1.1
gamma <- rep(gs, length.out = q)
start <- c(start, gamma)
names(start)[1:q + p] <- colnames(Z)
}
gradfun <- if (gradient) score_unitquantreg else NULL
# Maximization
opt <- optimx::optimx(par = start, fn = loglike_unitquantreg, gr = gradfun,
method = method, hessian = hessian,
control = control, X = X, Z = Z, y = y, tau = tau,
family = family, linkobj = linkobj,
linkobj.theta = linkobj.theta)
par <- stats::coef(opt)
# Check if the optimization converged
if (opt$convcode == 0L) {
converged <- TRUE
} else {
converged <- FALSE
warning("optimization failed to converge")
}
# Get number of iterations
# it <- na.omit(opt$count)
# it <- it[length(it)]
it <- opt$fevals
# Estimated coefficients
mu.coefficients <- par[1:p]
theta.coefficients <- par[1:q + p]
# Fitted values
linear.predictors.mu <- drop(X %*% mu.coefficients)
linear.predictors.theta <- drop(Z %*% theta.coefficients)
fitted.mu <- linkobj$linkinv(linear.predictors.mu)
fitted.theta <- linkobj.theta$linkinv(linear.predictors.theta)
# Parms
parms <- list(par = par, tau = tau, family = family,
linkobj = linkobj, linkobj.theta = linkobj.theta,
X = X, Z = Z, y = y)
# Get the hessian matrix
if (!hessian) {
he <- do.call(hessian_unitquantreg, parms)
} else {
he <- try(numDeriv::jacobian(func = gradfun, x = par,
tau = tau, family = family,
linkobj = linkobj,
linkobj.theta = linkobj.theta,
X = X, Z = Z, y = y), silent = TRUE)
if (inherits(he, "try-error"))
stop("Unable to compute Hessian using numDeriv::jacobin")
}
# Check hessian matrix to compute vcov (Observed Fisher Information) matrix
chol_he <- tryCatch(chol(he), error = function(e) NULL)
if (!is.null(chol_he)) {
vcov <- chol2inv(chol_he)
} else {
warning("Moore-Penrose inverse is used in covariance matrix.\n",
"The final Hessian matrix is full rank but has at least one negative eigenvalue.\n",
"Second-order optimality condition violated.", noBreaks. = TRUE)
vcov <- MASS::ginv(he)
}
# Log-likelihood
ll <- -1L * do.call(loglike_unitquantreg, parms)
# Gradient
grad <- if (!is.null(gradfun)) -1L * do.call(gradfun, parms) else NULL
# Names
if (theta_const) {
if (link.theta == "identity") theta_name <- "theta"
else theta_name <- paste0(link.theta, "(theta)")
} else {
theta_name <- paste0("(theta)", "_", colnames(Z))
}
names(mu.coefficients) <- colnames(X)
names(theta.coefficients) <- theta_name
rownames(vcov) <- colnames(vcov) <- c(colnames(X), theta_name)
if (!is.null(gradfun)) names(grad) <- c(names(mu.coefficients), names(theta.coefficients))
# Output
out <- list(
family = family_name,
coefficients = list(mu = mu.coefficients, theta = theta.coefficients),
fitted.values = list(mu = drop(fitted.mu), theta = drop(fitted.theta)),
linear.predictors = list(mu = drop(linear.predictors.mu),
theta = drop(linear.predictors.theta)),
link = list(mu = linkobj, theta = linkobj.theta),
tau = tau,
loglik = ll,
gradient = grad,
vcov = vcov,
nobs = length(y),
npar = length(par),
df.residual = length(y) - length(par),
theta_const = theta_const,
control = ocontrol,
iterations = it,
converged = converged,
kkt = c("kkt1" = opt$kkt1, "kkt2" = opt$kkt2),
elapsed_time = opt$xtime
)
out
}
Any scripts or data that you put into this service are public.
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.