R/regL1.R

Defines functions class_to_regL1 class_to_rq print.regL1 summary.regL1 print.summary.regL1

Documented in class_to_regL1 class_to_rq print.regL1 print.summary.regL1 summary.regL1

#main author: Kévin Allan Sales Rodrigues

#' Fitting Linear L1 Models
#'
#' This function fits an L1 regression model using the \code{\link{rq}} function from the 'quantreg' package. L1 regression allows dealing with outliers and non-normal distributions in the data.
#'
#' @param formula a formula object, with the response on the left of a ~ operator, and the terms, separated by + operators, on the right.
#' @param data a data.frame in which to interpret the variables named in the formula, or in the subset and the weights argument. If this is missing, then the variables in the formula should be on the search list. This may also be a single number to handle some special cases – see below for details.
#' @param subset an optional vector specifying a subset of observations to be used in the fitting process.
#' @param weights vector of observation weights; if supplied, the algorithm fits to minimize the sum of the weights multiplied into the absolute residuals. The length of weights must be the same as the number of observations. The weights must be nonnegative and it is strongly recommended that they be strictly positive, since zero weights are ambiguous.
#' @param na.action a function to filter missing data. This is applied to the model.frame after any subset argument has been used. The default (with na.fail) is to create an error if any missing values are found. A possible alternative is na.omit, which deletes observations that contain one or more missing values.
#' @param method the algorithmic method used to compute the fit. There are several options: "br", "fn", "pfn", "sfn", "fnc", "conquer", "pfnb", "qfnb", "ppro" and "lasso". See \code{\link{rq}} for more details.
#' @param model if TRUE then the model frame is returned. This is essential if one wants to call summary subsequently.
#' @param contrasts a list giving contrasts for some or all of the factors default = NULL appearing in the model formula. The elements of the list should have the same name as the variable and should be either a contrast matrix (specifically, any full-rank matrix with as many rows as there are levels in the factor), or else a function to compute such a matrix given the number of levels.
#' @param ... additional arguments for the fitting routines (see \code{\link{rq.fit.br}} and \code{\link{rq.fit.fnb}}, etc. and the functions they call).
#'
#' @returns A fitted L1 linear regression model object.
#'
#' @importFrom methods as
#' @importFrom stats model.weights model.response .getXlevels
#' @import quantreg
#' @importFrom MatrixModels model.Matrix
#' @import Matrix
#'
#' @export
#'
#' @details L1 regression is an important particular case of quantile regression, so this function inherits from the "rq" class of the \code{quantreg} package.
#'
#' @examples
#' set.seed(123)
#' x = matrix(rnorm(100), ncol = 2)
#' y = x[, 1] + x[, 2] + rlaplace(50, 0, 5)
#'
#' # Fits a linear regression L1 model
#' mod1 = regL1(y ~ x)
#'
# @importFrom stats as.formula
# @importFrom stats na.omit
# Define a constructor function for the "regL1" class
regL1 = function (formula, data, subset, weights, na.action, method = "br",
                  model = TRUE, contrasts = NULL, ...){
  tau = 0.5 # to L1 regression
  call <- match.call()
  mf <- match.call(expand.dots = FALSE)
  m <- match(c("formula", "data", "subset", "weights", "na.action"), names(mf), 0)
  mf <- mf[c(1,m)]
  mf$drop.unused.levels <- TRUE
  mf[[1]] <- as.name("model.frame")
  mf <- eval.parent(mf)
  if(method == "model.frame")return(mf)
  mt <- attr(mf, "terms")
  weights <- as.vector(stats::model.weights(mf))
  tau <- sort(unique(tau))
  eps <- .Machine$double.eps^(2/3)
  if (any(tau == 0)) tau[tau == 0] <- eps
  if (any(tau == 1)) tau[tau == 1] <- 1 - eps
  Y <- stats::model.response(mf)
  if(method == "sfn"){
    if(requireNamespace("MatrixModels", quietly = TRUE)
       && requireNamespace("Matrix", quietly = TRUE)){
      X <- MatrixModels::model.Matrix(mt, data, sparse = TRUE)
      vnames <- dimnames(X)[[2]]
      X <- methods::as(X ,"matrix.csr")
      mf$x <- X
    }
  }
  else{
    X <- stats::model.matrix(mt, mf, contrasts)
    vnames <- dimnames(X)[[2]]
  }
  Rho <- function(u,tau) u * (tau - (u < 0))
  if (length(tau) > 1) {
    if (any(tau < 0) || any(tau > 1))
      stop("invalid tau:  taus should be >= 0 and <= 1")
    coef <- matrix(0, ncol(X), length(tau))
    rho <- rep(0, length(tau))
    if(!(method %in% c("ppro","qfnb","pfnb"))){
      fitted <- resid <- matrix(0, nrow(X), length(tau))
      for(i in 1:length(tau)){
        z <- {if (length(weights))
          quantreg::rq.wfit(X, Y, tau = tau[i], weights, method, ...)
          else quantreg::rq.fit(X, Y, tau = tau[i], method, ...)
        }
        coef[,i] <- z$coefficients
        resid[,i] <- z$residuals
        rho[i] <- sum(Rho(z$residuals,tau[i]))
        fitted[,i] <- Y - z$residuals
      }
      taulabs <- paste("tau=",format(round(tau,3)))
      dimnames(coef) <- list(vnames, taulabs)
      dimnames(resid)[[2]] <- taulabs
      fit <- z
      fit$coefficients <-  coef
      fit$residuals <- resid
      fit$fitted.values <- fitted
      if(method == "lasso") class(fit) <- c("lassorqs","rqs")
      else if(method == "scad") class(fit) <- c("scadrqs","rqs")
      else class(fit) <- "rqs"
    }
    else if(method == "pfnb"){ # Preprocessing in fortran loop
      fit <- quantreg::rq.fit.pfnb(X, Y, tau)
      class(fit) = "rqs"
    }
    else if(method == "qfnb"){ # simple fortran loop method
      fit <- quantreg::rq.fit.qfnb(X, Y, tau)
      class(fit) = ifelse(length(tau) == 1,"rq","rqs")
    }
    else if(method == "ppro"){ # Preprocessing method in R
      fit <- quantreg::rq.fit.ppro(X, Y, tau, ...)
      class(fit) = ifelse(length(tau) == 1,"rq","rqs")
    }
  }
  else{
    process <- (tau < 0 || tau > 1)
    if(process && method != "br")
      stop("when tau not in [0,1] method br must be used")
    fit <- {
      if(length(weights))
        quantreg::rq.wfit(X, Y, tau = tau, weights, method, ...)
      else quantreg::rq.fit(X, Y, tau = tau, method, ...)
    }
    if(process)
      rho <- list(tau = fit$sol[1,], rho = fit$sol[3,])
    else {
      if(length(dim(fit$residuals)))
        dimnames(fit$residuals) <- list(dimnames(X)[[1]],NULL)
      rho <-  sum(Rho(fit$residuals,tau))
    }
    if(method == "lasso") class(fit) <- c("lassorq","rq")
    else if(method == "scad") class(fit) <- c("scadrq","rq")
    else class(fit) <- ifelse(process, "rq.process", "rq")
  }
  fit$na.action <- attr(mf, "na.action")
  fit$formula <- formula
  fit$terms <- mt
  fit$xlevels <- stats::.getXlevels(mt,mf)
  fit$call <- call
  fit$tau <- tau
  fit$weights <- weights
  fit$residuals <- drop(fit$residuals)
  fit$rho <- rho
  fit$method <- method
  fit$fitted.values <- drop(fit$fitted.values)

  attr(fit, "na.message") <- attr(m, "na.message")
  if(model) fit$model <- mf
  #fit
  ### quantreg::rq END  ### my code START

  fit$call_rq = fit$call
  fit$call = call
  fit$SAE = sum(abs(fit$residuals))
  fit$N = length(fit$y)
  fit$MAE = fit$SAE / fit$N # lambda_mle

  # lambda_ros calculations | begin
  res = cbind(sort(as.numeric(fit$residuals)))
  res1 = matrix(0, nrow(res) -ncol(fit$x),1)
  n = nrow(fit$x) - ncol(fit$x)
  m = (n+1)/2 -sqrt(n)
  j=1
  for(i in 1:nrow(res)) if(res[i] !=0) ((res1[j] = res[i]) & (j = j+1))
  pos1 = n-m +1
  pos2 = m
  e1 = round(pos1)
  e2 = round(pos2)
  if (e1 > n) (e1 = n)
  if(e2 == 0) (e2 = 1)
  lambda_robust = sqrt(n)*(res[e1] -res1[e2])/4
  # lambda_ros calculations | end

  fit$lambda_ros = lambda_robust

  #model = structure(model, class = c('regL1', 'rq')) # alterando a classe
  class(fit) = "regL1"
  # Create an object of class "regL1"

  return(fit)
}


#' Print Linear L1 Regression Summary Object
#'
#' Print summary of linear L1 regression object.
#'
#'
#' @param x This is an object of class "\code{summary.regL1}" produced by a call to \code{summary.regL1()}.
#' @param digits Significant digits reported in the printed table.
#' @param ... Optional arguments.
#' @returns No return value, called for side effects.
#'
#' @export
#'
#' @import greekLetters
#' @method print summary.regL1
#' @rdname print.summary.regL1
print.summary.regL1 = function(x, digits = max(5, .Options$digits - 2), ...)
  {
    cat("\nCall: ")
    dput(x$call)
    coef <- x$coef
    ## df <- x$df
    ## rdf <- x$rdf
    #tau <- x$tau
    #cat("\ntau: ")
    #print(format(round(tau,digits = digits)), quote = FALSE, ...)

    # important statistics
    SAE = x$SAE
    N = x$N
    MAE = x$MAE
    lambda_ROS = x$lambda_ros
    cat("\n", greekLetters::greeks('lambda'),"(MLE) | MAE: ", format(round(MAE,digits = digits)), "    SAE: ", format(round(SAE,digits = digits)), "     N: ", N, sep = "")
    cat("\n", greekLetters::greeks('lambda'),"(ROS)      : ", format(round(lambda_ROS,digits = digits)), sep ="")

    cat("\n\nCoefficients:\n")
    print(format(round(coef, digits = digits)), quote = FALSE, ...)
    invisible(x)
  }


#' Summary methods for L1 Regression
#'
#' Returns a summary list for a L1 regression fit. A null value will be returned if printing is invoked.
#'
#'
#' @param object Object returned from regL1 representing the fit of the L1 model.
#' @param se specifies the method used to compute standard standard errors. There are currently seven available methods: "rank", "iid", "nid", "ker", "boot", "BLB", "conquer" and "extreme".
#' @param covariance logical flag to indicate whether the full covariance matrix of the estimated parameters should be returned.
#' @param hs Use Hall Sheather bandwidth for sparsity estimation If false revert to Bofinger bandwidth.
#' @param U Resampling indices or gradient evaluations used for bootstrap, see \code{\link{summary.rq}} and \code{\link{boot.rq}} for more details.
#' @param gamma parameter controlling the effective sample size of the'bag of little bootstrap samples that will be b = n^gamma where n is the sample size of the original model.
#' @param ... Optional arguments. See \code{\link{summary.rq}} for more details.
#'
#' @returns No return value, called for side effects.
#' @export
#'
#' @import quantreg
#' @importFrom utils modifyList
#' @importFrom stats coefficients dnorm qnorm pt quantile terms var model.matrix
#' @importFrom conquer conquer
#'
#' @method summary regL1
#' @rdname summary.regL1
#'
#' @seealso
#' \code{\link{regL1}} for fitting linear L1 models.
#' \code{\link{summary.rq}} summary methods for Quantile Regression.
#'
# Define a summary method for the "regL1" class
summary.regL1 = function(object, se = NULL, covariance = FALSE, hs = TRUE, U = NULL, gamma = 0.7, ...){
  class(object) = "rq" # voltando a classe do objeto
  if (object$method == "lasso")
    stop("no inference for lasso'd rq fitting: try rqss (if brave, or credulous)")
  if (object$method == "conquer")
    se = "conquer"
  mt <- stats::terms(object)
  m <- model.frame(object)
  y <- model.response(m)
  dots <- list(...)
  method <- object$method
  if (object$method == "sfn") {
    x <- object$model$x
    vnames <- names(object$coef)
    ctrl <- object$control
  }
  else { # can be Matrix:: em vez de stats::
    x <- stats::model.matrix(mt, m, contrasts = object$contrasts)
    vnames <- dimnames(x)[[2]]
  }
  wt <- as.vector(model.weights(object$model))
  tau <- object$tau
  eps <- .Machine$double.eps^(1/2)
  coef <- stats::coefficients(object)
  if (is.matrix(coef))
    coef <- coef[, 1]
  resid <- object$residuals

  n <- length(y)
  p <- length(coef)
  rdf <- n - p
  if (!is.null(wt)) {
    resid <- resid * wt
    x <- x * wt
    y <- y * wt
  }
  if (is.null(se)) {
    if (n < 1001 & covariance == FALSE)
      se <- "rank"
    else se <- "nid"
  }
  if (se == "rank") {
    f <- rq.fit.br(x, y, tau = tau, ci = TRUE, ...)
  }
  if (se == "iid") {
    xxinv <- diag(p)
    xxinv <- backsolve(qr(x)$qr[1:p, 1:p, drop = FALSE],
                       xxinv)
    xxinv <- xxinv %*% t(xxinv)

    pz <- sum(abs(resid) < eps)
    h <- max(p + 1, ceiling(n * bandwidth.rq(tau, n, hs = hs)))
    ir <- (pz + 1):(h + pz + 1)
    ord.resid <- sort(resid[order(abs(resid))][ir])
    xt <- ir/(n - p)

    sparsity <- rq(ord.resid ~ xt)$coef[2]
    cov <- sparsity^2 * xxinv * tau * (1 - tau)
    scale <- 1/sparsity
    serr <- sqrt(diag(cov))
  }
  else if (se == "nid") {
    h <- bandwidth.rq(tau, n, hs = hs)
    while ((tau - h < 0) || (tau + h > 1)) h <- h/2
    bhi <- rq.fit(x, y, tau = tau + h, method = method)$coef
    blo <- rq.fit(x, y, tau = tau - h, method = method)$coef
    dyhat <- x %*% (bhi - blo)
    if (any(dyhat <= 0))
      warning(paste(sum(dyhat <= 0), "non-positive fis"))
    f <- pmax(0, (2 * h)/(dyhat - eps))
    fxxinv <- diag(p)
    if (method == "sfn") {
      D <- t(x) %*% (f * x)
      D <- chol(0.5 * (D + t(D)), nsubmax = ctrl$nsubmax,
                nnzlmax = ctrl$nnzlmax, tmpmax = ctrl$tmpmax)
      fxxinv <- backsolve(D, fxxinv)
    }
    else {
      fxxinv <- backsolve(qr(sqrt(f) * x)$qr[1:p, 1:p,
                                             drop = FALSE], fxxinv)
      fxxinv <- fxxinv %*% t(fxxinv)
    }
    xx <- t(x) %*% x
    cov <- tau * (1 - tau) * fxxinv %*% xx %*% fxxinv
    scale <- mean(f)
    serr <- sqrt(diag(cov))
  }
  else if (se == "ker") {
    h <- bandwidth.rq(tau, n, hs = hs)
    while ((tau - h < 0) || (tau + h > 1)) h <- h/2
    uhat <- c(y - x %*% coef)
    h <- (stats::qnorm(tau + h) - stats::qnorm(tau - h)) * min(sqrt(stats::var(uhat)),
                                                 (stats::quantile(uhat, 0.75) - stats::quantile(uhat, 0.25))/1.34)
    f <- stats::dnorm(uhat/h)/h
    fxxinv <- diag(p)
    fxxinv <- backsolve(qr(sqrt(f) * x)$qr[1:p, 1:p, drop = FALSE],
                        fxxinv)
    fxxinv <- fxxinv %*% t(fxxinv)
    cov <- tau * (1 - tau) * fxxinv %*% crossprod(x) %*%
      fxxinv
    scale <- mean(f)
    serr <- sqrt(diag(cov))
  }
  else if (se == "boot") {
    if ("cluster" %in% names(dots)) {
      bargs <- utils::modifyList(list(x = x, y = y, tau = tau),
                          dots)
      if (length(object$na.action)) {
        cluster <- dots$cluster[-object$na.action]
        bargs <- utils::modifyList(bargs, list(cluster = cluster))
      }
      if (class(bargs$x)[1] == "matrix.csr")
        bargs <- utils::modifyList(bargs, list(control = ctrl))
      B <- do.call(boot.rq, bargs)
    }
    else B <- boot.rq(x, y, tau, coef = coef, ...)
    cov <- cov(B$B)
    serr <- sqrt(diag(cov))
  }
  else if (se == "BLB") {
    n <- length(y)
    b <- ceiling(n^gamma)
    S <- n%/%b
    V <- matrix(sample(1:n, b * S), b, S)
    Z <- matrix(0, NCOL(x), S)
    for (i in 1:S) {
      v <- V[, i]
      B <- boot.rq(x[v, ], y[v], tau, bsmethod = "BLB",
                   blbn = n, ...)
      Z[, i] <- sqrt(diag(cov(B$B)))
    }
    cov <- cov(B$B)
    serr <- apply(Z, 1, mean)
  }
  else if (se == "extreme") {
    tau0 <- tau
    if (tau > 0.5) {
      y <- -y
      tau <- 1 - tau
    }
    if (length(dots$mofn))
      mofn = dots$mofn
    else mofn = floor(n/5)
    if (length(dots$mofn))
      kex = dots$kex
    else kex = 20
    if (length(dots$alpha))
      alpha = dots$alpha
    else alpha = 0.1
    if (length(dots$R))
      R = dots$R
    else R = 200
    m <- (tau * n + kex)/(tau * n)
    taub <- min(tau * n/mofn, tau + (0.5 - tau)/3)
    xbar <- apply(x, 2, mean)
    b0 <- rq.fit(x, y, tau, method = method)$coef
    bm <- rq.fit(x, y, tau = m * tau, method = method)$coef
    An <- (m - 1) * tau * sqrt(n/(tau * (1 - tau)))/c(crossprod(xbar,
                                                                bm - b0))
    bt <- rq.fit(x, y, tau = taub, method = method)$coef
    s <- matrix(sample(1:n, mofn * R, replace = T), mofn,
                R)
    mbe <- (taub * mofn + kex)/(taub * mofn)
    bmbeb <- rq.fit(x, y, tau = mbe * taub, method = method)$coef
    B0 <- boot.rq.pxy(x, y, s, taub, bt, method = method)
    Bm <- boot.rq.pxy(x, y, s, tau = mbe * taub, bmbeb, method = method)
    B <- (mbe - 1) * taub * sqrt(mofn/(taub * (1 - taub))) *
      (B0 - b0)/c((Bm - B0) %*% xbar)
    if (tau0 <= 0.5) {
      bbc <- b0 - apply(B, 2, stats::quantile, 0.5, na.rm = TRUE)/An
      ciL <- b0 - apply(B, 2, stats::quantile, 1 - alpha/2, na.rm = TRUE)/An
      ciU <- b0 - apply(B, 2, stats::quantile, alpha/2, na.rm = TRUE)/An
    }
    else {
      bbc <- -(b0 - apply(B, 2, stats::quantile, 0.5, na.rm = TRUE)/An)
      ciL <- -(b0 - apply(B, 2, stats::quantile, alpha/2, na.rm = TRUE)/An)
      ciU <- -(b0 - apply(B, 2, stats::quantile, 1 - alpha/2,
                          na.rm = TRUE)/An)
    }
    B <- R - sum(is.na(B[, 1]))
    coef <- cbind(b0, bbc, ciL, ciU)
    if (tau0 > 0.5) {
      coef <- -coef
      tau <- tau0
    }
    dimnames(coef) = list(dimnames(x)[[2]], c("coef", "BCcoef",
                                              "ciL", "ciU"))
  }
  else if (se == "conquer") {
    if (length(dots$R))
      R = dots$R
    else R = 200
    Z <- conquer::conquer(x[, -1], y, tau, ci = TRUE, B = R)
    coef <- cbind(Z$coef, Z$perCI)
    cnames <- c("coefficients", "lower bd", "upper bd")
    dimnames(coef) <- list(vnames, cnames)
    resid <- y - x %*% Z$coef
  }
  if (se == "rank") {
    coef <- f$coef
  }
  else if (!(se %in% c("conquer", "extreme"))) {
    coef <- array(coef, c(p, 4))
    dimnames(coef) <- list(vnames, c("Value", "Std. Error",
                                     "t value", "Pr(>|t|)"))
    coef[, 2] <- serr
    coef[, 3] <- coef[, 1]/coef[, 2]
    coef[, 4] <- if (rdf > 0)
      2 * (1 - stats::pt(abs(coef[, 3]), rdf))
    else NA
  }

  # important statistics
  SAE = object$SAE
  N = object$N
  MAE = object$MAE
  lambda_ROS = object$lambda_ros
  object <- object[c("call", "terms")]
  if (covariance == TRUE) {
    if (se != "rank")
      object$cov <- cov
    if (se == "iid")
      object$scale <- scale
    if (se %in% c("nid", "ker")) {
      object$Hinv <- fxxinv
      object$J <- crossprod(x)
      object$scale <- scale
    }
    else if (se == "boot") {
      object$B <- B$B
      object$U <- B$U
    }
  }
  object$coefficients <- coef
  object$residuals <- resid
  object$rdf <- rdf
  object$tau <- tau
  object$SAE = SAE
  object$N = N
  object$MAE = MAE
  object$lambda_ros = lambda_ROS
  class(object) = "summary.regL1" # troquei a classe regL1
  object
}


#' Print an regL1 object
#'
#' Print an object generated by regL1
#'
#'
#' @param x Object returned from regL1 representing the fit of the L1 model.
#' @param ... Optional arguments.
#'
#' @returns No return value, called for side effects.
#'
#' @export
#' @importFrom stats coef residuals
#'
#' @seealso
#' \code{\link{regL1}} for fitting linear L1 models.
#'
#' @method print regL1
#' @rdname print.regL1
#

# Define a summary method for the "regL1" class
print.regL1 = function(x, ...) {
  if (!is.null(cl <- x$call)) {
    cat("Call:\n")
    dput(cl)
  }
  coef <- stats::coef(x)
  cat("\nCoefficients:\n")
  print(coef, ...)
  rank <- x$rank
  nobs <- length(stats::residuals(x))
  if (is.matrix(coef))
    p <- dim(coef)[1]
  else p <- length(coef)
  rdf <- nobs - p
  cat("\nDegrees of freedom:", nobs, "total;", rdf, "residual\n")
  if (!is.null(attr(x, "na.message")))
    cat(attr(x, "na.message"), "\n")
  invisible(x)
}


#' Change object class from "regL1" to "rq"
#'
#' Changing the object's class from "regL1" to "rq" allows you to use functions from the 'quantreg' package normally.
#'
#' @param object Object from "regL1" class.
#' @returns Object with class "rq".
#'
#'
#' @export
#'
#' @seealso
#' \code{\link{regL1}} for fitting linear L1 models.
#' \code{\link{rq}} for fitting linear L1 models.
class_to_rq = function(object) {
  class(object) = "rq"
  return(object)
}

#' Change object class from "rq" to "regL1"
#'
#' Changing the object's class from "rq" to "regL1" allows you to use functions from the 'diagL1' package normally.
#'
#' @param object Object from "rq" class.
#' @returns Object with class "regL1".
#'
#' @export
#'
#' @seealso
#' \code{\link{regL1}} for fitting linear L1 models.
#' \code{\link{rq}} for fitting linear L1 models.
class_to_regL1 = function(object) {
  class(object) = "regL1"
  return(object)
}

Try the diagL1 package in your browser

Any scripts or data that you put into this service are public.

diagL1 documentation built on May 29, 2024, 10:56 a.m.