R/fitting-unitquantreg.R
In unitquantreg: Parametric Quantile Regression Models for Bounded Data

Documented in unitquantreg unitquantreg.control unitquantreg.fit

#' @name unitquantreg.control
#'
#' @title Control parameters for unit quantile regression
#'
#' @description Auxiliary function that control fitting of unit quantile
#' regression models using \code{\link{unitquantreg}}.
#'
#' @author André F. B. Menezes
#'
#' @param method string. Specify the method argument passed to \code{\link[optimx]{optimx}}.
#' @param hessian logical. Should use the numerically Hessian matrix to compute
#' variance-covariance? Default is \code{FALSE}, i.e., use the analytic
#' Hessian.
#' @param gradient logical. Should use the analytic gradient? Default is
#' \code{TRUE}.
#' @param maxit integer. Specify the maximal number of iterations passed to
#' \code{optimx}.
#' @param factr numeric.Controls the convergence of the \code{"L-BFGS-B"} method.
#' @param reltol numeric. Relative convergence tolerance passed to \code{\link[optimx]{optimx}}.
#' @param trace non-negative integer. If positive, tracing information on the
#' progress of the optimization is produced.
#' @param starttests logical. Should \code{\link[optimx]{optimx}} run tests of the functions
#' and parameters? Default is \code{FALSE}.
#' @param dowarn logical. Show warnings generated by \code{\link[optimx]{optimx}}? Default is
#' \code{FALSE}.
#' @param ... arguments passed to \code{\link[optimx]{optimx}}.
#'
#'
#' @details The \code{control} argument of
#' \code{\link{unitquantreg}} uses the arguments of
#' \code{\link{unitquantreg.control}}. In particular, the
#' parameters in \code{\link{unitquantreg}} are estimated by
#' maximum likelihood using the \code{\link[optimx]{optimx}}, which is a
#' general-purpose optimization wrapper function that calls other R tools for
#' optimization, including the existing \code{\link[stats]{optim}} function.
#' The main advantage of \code{\link[optimx]{optimx}} is to unify the tools
#' allowing a number of different optimization methods and provide sanity checks.
#'
#'
#' @seealso \code{\link[optimx]{optimx}} for more details about control
#' parameters and \code{\link{unitquantreg.fit}} the fitting
#' procedure used by \code{\link{unitquantreg}}.
#'
#' @return A list with components named as the arguments.
#'
#' @examples
#' data(sim_bounded, package = "unitquantreg")
#' sim_bounded_curr <- sim_bounded[sim_bounded$family == "uweibull", ]
#'
#' # Fitting using the analytical gradient
#' fit_gradient <- unitquantreg(formula = y1 ~ x,
#'                              data = sim_bounded_curr, tau = 0.5,
#'                              family = "uweibull",
#'                              control = unitquantreg.control(gradient = TRUE,
#'                                                             trace = 1))
#'
#' # Fitting without using the analytical gradient
#' fit_nogradient <- unitquantreg(formula = y1 ~ x,
#'                              data = sim_bounded_curr, tau = 0.5,
#'                              family = "uweibull",
#'                              control = unitquantreg.control(gradient = FALSE,
#'                                                             trace = 1))
#' # Compare estimated coefficients
#' cbind(gradient = coef(fit_gradient), no_gradient = coef(fit_nogradient))
#'
#' @references Nash, J. C. and Varadhan, R. (2011). Unifying Optimization Algorithms to Aid Software System Users: optimx for R., \emph{Journal of Statistical Software}, \bold{43}(9), 1--14.
#'
#' @rdname unitquantreg.control
#' @export
unitquantreg.control <- function(method = "BFGS", hessian = FALSE,
                                 gradient = TRUE,
                                 maxit = 5000, factr = 1e7,
                                 reltol = sqrt(.Machine$double.eps),
                                 trace = 0L, starttests = FALSE,
                                 dowarn = FALSE,...) {
  out <- list(
    method = method,
    hessian = hessian,
    gradient = gradient,
    maxit = maxit,
    reltol = reltol,
    factr = factr,
    trace = trace,
    dowarn = dowarn,
    starttests = starttests
  )
  out <- c(out, list(...))
  if (method == "L-BFGS-B") out$reltol <- out$abstol <- NULL
  if (!is.null(out$fnscale)) warning("fnscale must not be modified")
  if (!is.null(out$all.method)) {
    out$all.method <- NULL
    warning("it is no possible use all.method control from optimx")
  }
  out$fnscale <- 1
  out
}

#' @name unitquantreg
#'
#' @title Parametric unit quantile regression models
#'
#' @description Fit a collection of parametric unit quantile regression model
#' by maximum likelihood using the log-likelihood function, the score vector
#' and the hessian matrix implemented in \code{C++}.
#'
#'
#' @param formula symbolic description of the quantile model like \code{y ~ x}
#' or \code{y ~ x | z}. See below for details.
#' @param tau numeric vector. The quantile(s) to be estimated, i.e.,
#' number between 0 and 1. If just one quantile is specified an object of class
#' \code{unitquantreg} is returned. If a numeric vector of values between 0 and 1
#' is specified an object of class \code{unitquantregs} is returned. See below for
#' details.
#' @param data data.frame contain the variables in the model.
#' @param subset an optional vector specifying a subset of observations to
#' be used in the fitting process.
#' @param na.action a function which indicates what should happen when the
#' data contain \code{NA}s.
#' @param family character. Specify the distribution family.
#' @param link character. Specify the link function in the quantile model.
#' Currently supported are \code{logit}, \code{probit}, \code{cloglog} and
#' \code{cauchit}. Default is \code{logit}.
#' @param link.theta character. Specify the link function in the shape model.
#' Currently supported are \code{identity}, \code{log} and \code{sqrt}.
#' Default is \code{log}.
#' @param start numeric vector. An optional vector with starting values for all parameters.
#' @param control list. Control arguments specified via \code{\link{unitquantreg.control}}.
#' @param x,y logical. If \code{TRUE} the corresponding components of the fit
#' (model frame, response, model matrix) are returned. For \code{\link{unitquantreg.fit}}
#' \code{y} should be the numeric response vector with values in (0,1).
#' @param X,Z numeric matrix. Regressor matrix for the quantile and shape model,
#' respectively. Default is constant shape model, i.e., \code{Z} is matrix with
#' column of ones.
#' @param model logical. Indicates whether model frame should be included as a
#' component of the returned value.
#'
#'
#' @details
#' The parameter estimation and inference are performed under the frequentist paradigm.
#' The \code{\link[optimx]{optimx}} R package is use, since allows different optimization
#' technique to maximize the log-likelihood function.  The analytical score function are
#' use in the maximization and the standard errors are computed using the
#' analytical hessian matrix, both are implemented in efficient away using \code{C++}.
#'
#'
#' @return \code{\link{unitquantreg}} can return an object of
#' class \code{unitquantreg} if \code{tau} is a scalar, i.e., a list with
#' the following components.
#' \item{family}{the distribution family name.}
#' \item{coefficients}{a list with elements \code{"quantile"} and \code{"shape"}
#' containing the coefficients from the respective models.}
#' \item{fitted.values}{a list with elements \code{"quantile"} and \code{"shape"}
#' containing the fitted parameters from the respective models.}
#' \item{linear.predictors}{a list with elements \code{"quantile"} and \code{"shape"}
#' containing the fitted linear predictors from the respective models.}
#' \item{link}{a list with elements \code{"quantile"} and \code{"shape"}
#' containing the link objects from the respective models.}
#' \item{tau}{the quantile specify.}
#' \item{loglik}{log-likelihood of the fitted model.}
#' \item{gradient}{gradient evaluate at maximum likelihood estimates.}
#' \item{vcov}{covariance matrix of all parameters in the model.}
#' \item{nobs}{number of observations.}
#' \item{npar}{number of parameters.}
#' \item{df.residual}{residual degrees of freedom in the fitted model.}
#' \item{theta_const}{logical indicating if the \eqn{\theta} parameter was treated as nuisance parameter.}
#' \item{control}{the control parameters used to fit the model.}
#' \item{iterations}{number of iterations of optimization method.}
#' \item{converged}{logical, if \code{TRUE} indicates successful convergence.}
#' \item{kkt}{a list of logical  \code{kkt1} and \code{kkt2} provide check on
#' Kuhn-Karush-Tucker conditions, first-order KKT test (\code{kkt1}) checks
#' whether the gradient at the final parameters estimates is "small" and the
#' second-order KKT test (\code{kkte}) checks whether the Hessian at the final
#' parameters estimates is positive definite.}
#' \item{elapsed_time}{time elapsed to fit the model.}
#' \item{call}{the original function call.}
#' \item{formula}{the original model formula.}
#' \item{terms}{a list with elements \code{"quantile"}, \code{"shape"} and
#' \code{"full"} containing the \code{terms} objects for the respective models.}
#' \item{model}{the full model frame, if \code{model = TRUE}.}
#' \item{y}{the response vector, if \code{y = TRUE}.}
#' \item{x}{a list with elements \code{"quantile"} and \code{"shape"}
#' containing the model matrices from the respective models, if \code{x = TRUE}.}
#'
#' While \code{\link{unitquantreg.fit}} returns an unclassed list with
#' components up to \code{elapsed_time}.
#'
#' If \code{tau} is a numeric vector with length greater than one an object of
#' class \code{unitquantregs} is returned, which consist of list of objects of
#' class \code{unitquantreg} for each specified quantiles.
#'
#' @author André F. B. Menezes
#'
#' @importFrom stats make.link model.frame model.matrix model.response na.omit delete.response terms
#' @importFrom Formula as.Formula Formula
#' @importFrom numDeriv jacobian
NULL

#' @rdname unitquantreg
#' @export
unitquantreg <- function(formula, data, subset, na.action, tau, family,
                         link = c("logit", "probit", "cloglog", "cauchit"),
                         link.theta = c("identity", "log", "sqrt"),
                         start = NULL, control = unitquantreg.control(),
                         model = TRUE, x = FALSE, y = TRUE) {

  # Call
  cl <- match.call()
  if (missing(data)) data <- environment(formula)
  mf <- match.call(expand.dots = FALSE)
  m <- match(c("formula", "data", "subset", "na.action"), names(mf), 0L)
  mf <- mf[c(1L, m)]
  mf$drop.unused.levels <- TRUE

  # Formula
  formula <- Formula::as.Formula(formula)
  if (length(formula)[2L] < 2L) {
    formula <- Formula::as.Formula(formula(formula), ~ 1)
    simple_formula <- TRUE
  } else {
    if (length(formula)[2L] > 2L) {
      formula <- Formula::Formula(formula(formula, rhs = 1:2))
      warning("formula must not have more than two RHS parts")
      simple_formula <- FALSE
    }
  }
  mf$formula <- formula

  # Evaluate model.frame
  mf[[1L]] <- as.name("model.frame")
  mf <- eval(mf, parent.frame())

  # Extract terms, model matrix, response
  mt <- terms(formula, data = data)
  mtX <- terms(formula, data = data, rhs = 1L)
  mtZ <- delete.response(terms(formula, data = data, rhs = 2L))
  X <- model.matrix(mtX, mf)
  Z <- model.matrix(mtZ, mf)
  Y <- model.response(mf, "numeric")

  p <- ncol(X)
  q <- ncol(Z)

  # Sanity check for response variable
  if (!(min(Y) > 0 & max(Y) < 1)) {
    stop("invalid dependent variable, all observations must be in (0, 1)")
  }

  # Get family name
  family <- match.arg(family, .families)

  # Link functions
  link <- match.arg(link)

  if (is.null(link.theta)) {
    link.theta <- if (simple_formula) "identity" else "log"
  }
  else {
    link.theta <- match.arg(link.theta)
  }

  # The workhorse: unitquantreg.fit()

  # Vectorizing in tau (quantile)
  names(tau) <- tau
  out <- lapply(tau, function(t) {
    fit <- unitquantreg.fit(y = Y, X = X, Z = Z, tau = t, family = family,
                            link = link, link.theta = link.theta, start = start,
                            control = control)

    # Output
    fit$call <- cl
    fit$formula <- formula
    fit$terms <- list(quantile = mtX, shape = mtZ, full = mt)
    if (model) fit$model <- mf
    if (y) fit$y <- Y
    if (x) fit$x <- list(quantile = X, shape = Z)
    class(fit) <- "unitquantreg"
    fit
  })
  if (length(tau) == 1L) {
    out <- out[[1L]]
  } else {
    class(out) <- "unitquantregs"
  }

  out
}

#' @rdname unitquantreg
#' @export
unitquantreg.fit <- function(y, X, Z = NULL, tau, family, link, link.theta,
                             start = NULL, control = unitquantreg.control()) {

  linkobj <- make.link(link)
  linkobj.theta <- make.link(link.theta)
  ocontrol <- control
  method <- control$method
  hessian <- control$hessian
  gradient <- control$gradient
  control$method <- control$hessian <- control$gradient <- NULL
  family_name <- .get_family_name(family)

  n <- length(y)
  if (is.null(Z)) {
    Z <- matrix(1, ncol = 1, nrow = n)
    colnames(Z) <- "(Intercept)"
    rownames(Z) <- rownames(X)
  }
  p <- ncol(X)
  q <- ncol(Z)
  theta_const <- (q == 1L) && isTRUE(all.equal(as.vector(Z[, 1L]), rep.int(1, n)))

  # Initial guess
  if (is.null(start)) {
    # For beta
    ystar <- linkobj$linkfun(y)
    reg_ini <- suppressWarnings(quantreg::rq.fit(X, ystar, tau = tau))
    start <- reg_ini$coefficients
    names(start) <- colnames(X)

    # For theta/gamma
    gs <- if (link.theta == "log") 0.1 else 1.1
    gamma <- rep(gs, length.out = q)
    start <- c(start, gamma)
    names(start)[1:q + p] <- colnames(Z)
  }

  gradfun <- if (gradient) score_unitquantreg else NULL

  # Maximization
  opt <- optimx::optimx(par = start, fn = loglike_unitquantreg, gr = gradfun,
                        method = method, hessian = hessian,
                        control = control, X = X, Z = Z, y = y, tau = tau,
                        family = family, linkobj = linkobj,
                        linkobj.theta = linkobj.theta)
  par <- stats::coef(opt)

  # Check if the optimization converged
  if (opt$convcode == 0L) {
    converged <- TRUE
  } else {
    converged <- FALSE
    warning("optimization failed to converge")
  }

  # Get number of iterations
  # it <- na.omit(opt$count)
  # it <- it[length(it)]
  it <- opt$fevals

  # Estimated coefficients
  mu.coefficients <- par[1:p]
  theta.coefficients <- par[1:q + p]

  # Fitted values
  linear.predictors.mu <- drop(X %*% mu.coefficients)
  linear.predictors.theta <- drop(Z %*% theta.coefficients)
  fitted.mu <- linkobj$linkinv(linear.predictors.mu)
  fitted.theta <- linkobj.theta$linkinv(linear.predictors.theta)

  # Parms
  parms <- list(par = par, tau = tau, family = family,
                linkobj = linkobj, linkobj.theta = linkobj.theta,
                X = X, Z = Z, y = y)

  # Get the hessian matrix
  if (!hessian) {
    he <- do.call(hessian_unitquantreg, parms)
  } else {
    he <- try(numDeriv::jacobian(func = gradfun, x = par,
                                 tau = tau, family = family,
                                 linkobj = linkobj,
                                 linkobj.theta = linkobj.theta,
                                 X = X, Z = Z, y = y), silent = TRUE)
    if (inherits(he, "try-error"))
      stop("Unable to compute Hessian using numDeriv::jacobin")

  }

  # Check hessian matrix to compute vcov (Observed Fisher Information) matrix
  chol_he <- tryCatch(chol(he), error = function(e) NULL)
  if (!is.null(chol_he)) {
    vcov <- chol2inv(chol_he)
  } else {
    warning("Moore-Penrose inverse is used in covariance matrix.\n",
            "The final Hessian matrix is full rank but has at least one negative eigenvalue.\n",
            "Second-order optimality condition violated.", noBreaks. = TRUE)
    vcov <- MASS::ginv(he)
  }

  # Log-likelihood
  ll <- -1L * do.call(loglike_unitquantreg, parms)
  # Gradient
  grad <- if (!is.null(gradfun)) -1L * do.call(gradfun, parms) else NULL

  # Names
  if (theta_const) {
    if (link.theta == "identity") theta_name <- "theta"
    else theta_name <- paste0(link.theta, "(theta)")
  } else {
    theta_name <- paste0("(theta)", "_", colnames(Z))
  }
  names(mu.coefficients) <- colnames(X)
  names(theta.coefficients) <- theta_name
  rownames(vcov) <- colnames(vcov) <- c(colnames(X), theta_name)
  if (!is.null(gradfun)) names(grad) <- c(names(mu.coefficients), names(theta.coefficients))


  # Output
  out <- list(
    family = family_name,
    coefficients = list(mu = mu.coefficients, theta = theta.coefficients),
    fitted.values = list(mu = drop(fitted.mu), theta = drop(fitted.theta)),
    linear.predictors = list(mu = drop(linear.predictors.mu),
                             theta = drop(linear.predictors.theta)),
    link = list(mu = linkobj, theta = linkobj.theta),
    tau = tau,
    loglik = ll,
    gradient = grad,
    vcov = vcov,
    nobs = length(y),
    npar = length(par),
    df.residual = length(y) - length(par),
    theta_const = theta_const,
    control = ocontrol,
    iterations = it,
    converged = converged,
    kkt = c("kkt1" = opt$kkt1, "kkt2" = opt$kkt2),
    elapsed_time = opt$xtime
  )

  out
}