R/summary-ncvreg.R

Defines functions print.summary.ncvreg summary.ncvreg

Documented in print.summary.ncvreg summary.ncvreg

#' Summary method for ncvreg objects
#' 
#' Inferential summaries for `ncvreg` and `ncvsurv` objects based on local marginal false discovery rates.
#' 
#' @param object   An `ncvreg` or `ncvsurv` object.
#' @param lambda   The regularization parameter value at which inference should be reported.
#' @param which    Alternatively, `lambda` may be specified by index; `which=10` means: report inference for the 10th value of
#'   `lambda` along the regularization path.  If both `lambda` and `which` are specified, `lambda` takes precedence.
#' @param number   By default, `summary` will provide an inferential summary for each variable that has been selected (i.e.,  each
#'   variable with a nonzero coefficient).  Specifying `number=5`, for example, means that the summary table will include the 5
#'   features with the lowest mfdr values, regardless of whether they were selected.  To see all features, `number=Inf`.
#' @param cutoff   Alternatively, specifying for example `cutoff=0.3` will report inference for all features with mfdr under 30%.
#'   If both `number` and `cutoff` are specified, the intersection between both sets of features is reported.
#' @param sort     Should the results be sorted by `mfdr`? (default: TRUE)
#' @param sigma    For linear regression models, users can supply an estimate of the residual standard deviation.
#'   The default is to use RSS / DF, where degrees of freedom are approximated using the number of nonzero coefficients.
#' @param ...      Further arguments; in particular, if you have set `returnX=FALSE`, you will need to supply `X` and `y` in order to calculate local mFDRs.
#' 
#' @return Whether passed an `ncvreg` or `ncvsurv` object, the return value is an object with S3 class `summary.ncvreg`. The class has its own print method and contains the following list elements:
#' * `penalty`: The penalty used by `ncvreg` or `ncvsurv`.
#' * `model`: Either `"linear"`, `"logistic"`, or `"Cox"`.
#' * `n`: Number of instances.
#' * `p`: Number of regression coefficients (not including the intercept).
#' * `lambda`: The `lambda` value at which inference is being reported.
#' * `nvars`: The number of nonzero coefficients (again, not including the intercept) at that value of `lambda`.
#' * `table`: A table containing estimates, normalized test statistics (z), and an estimate of the local mfdr for each coefficient.
#'    The mfdr may be loosely interpreted, in an empirical Bayes sense, as the probability that the given feature is null.
#' * `unpen.table`: If there are any unpenalized coefficients, a separate inferential summary is given for them.  Currently, this is
#'    based on \code{lm}/\code{glm}/\code{coxph} using the penalized coefficients to provide an offset.  This is useful and more or less
#'    accurate, but not ideal; we hope to improve the inferential methods for unpenalized variables in the future.
#'    
#' @author Patrick Breheny <patrick-breheny@@uiowa.edu>
#' @seealso [ncvreg()], [cv.ncvreg()], [plot.cv.ncvreg()], [local_mfdr()]
#' @examples
#' 
#' # Linear regression --------------------------------------------------
#' data(Prostate)
#' fit <- ncvreg(Prostate$X, Prostate$y)
#' summary(fit, lambda=0.08)
#' 
#' # Logistic regression ------------------------------------------------
#' data(Heart)
#' fit <- ncvreg(Heart$X, Heart$y, family="binomial")
#' summary(fit, lambda=0.05)
#' 
#' # Cox regression -----------------------------------------------------
#' data(Lung)
#' fit <- ncvsurv(Lung$X, Lung$y)
#' summary(fit, lambda=0.1)
#' 
#' # Options ------------------------------------------------------------
#' fit <- ncvreg(Heart$X, Heart$y, family="binomial")
#' summary(fit, lambda=0.08, number=3)
#' summary(fit, lambda=0.08, number=Inf)
#' summary(fit, lambda=0.08, cutoff=0.5)
#' summary(fit, lambda=0.08, number=3, cutoff=0.5)
#' summary(fit, lambda=0.08, number=5, cutoff=0.1)
#' summary(fit, lambda=0.08, number=Inf, sort=FALSE)
#' summary(fit, lambda=0.08, number=3, cutoff=0.5, sort=FALSE)
#' 
#' # If X and y are not returned with the fit, they must be supplied
#' fit <- ncvreg(Heart$X, Heart$y, family="binomial", returnX=FALSE)
#' summary(fit, X=Heart$X, y=Heart$y, lambda=0.08)
#' @export

summary.ncvreg <- function(object, lambda, which, number, cutoff, sort=TRUE, sigma, ...) {
  nvars <- predict(object, type="nvars", lambda=lambda, which=which)
  if (length(nvars) > 1) stop("You must specify a single model (i.e., a single value of lambda)", call.=FALSE)
  if (missing(lambda)) lambda <- object$lambda[which]
  custom <- !(missing(number) & missing(cutoff))
  if (inherits(object, 'ncvsurv')) {
    model <- 'Cox'
  } else {
    model <- switch(object$family, gaussian="linear", binomial="logistic", poisson="Poisson")
  }
  local <- local_mfdr(object, lambda, sigma=sigma, ...)
  out <- structure(list(penalty=object$penalty, model=model, n=object$n, p=nrow(object$beta)-1, lambda=lambda,
                        custom=custom, nvars=nvars),
                   class='summary.ncvreg')
  if (is.null(local$unpen)) {
    Tab <- local
  } else {
    Tab <- local$pen.vars
    out$unpenTable <- local$unpen.vars
  }
  
  # Sort/subset table
  if (!missing(number) && number > nrow(Tab)) number <- nrow(Tab)
  if (missing(number) & missing(cutoff)) {
    Tab <- Tab[Tab$Estimate != 0,]
  } else if (missing(cutoff)) {
    Tab <- Tab[Tab$mfdr <= sort(Tab$mfdr)[number],]
  } else if (missing(number)) {
    Tab <- Tab[Tab$mfdr <= cutoff,]
  } else {
    Tab <- Tab[Tab$mfdr <= min(sort(Tab$mfdr)[number], cutoff),]
  }
  if (sort) Tab <- Tab[order(Tab$mfdr),]
  out$table <- Tab
  out
}

#' @param x        A `summary.ncvreg` object.
#' @param digits   Number of digits past the decimal point to print out. Can be a vector specifying different display digits for
#'   each of the five non-integer printed values.
#' 
#' @rdname summary.ncvreg
#' @export

print.summary.ncvreg <- function(x, digits, ...) {
  digits <- if (missing(digits)) digits <- c(4, 2, 2, 3) else rep(digits, length.out=5)
  if ((sys.nframe() >= 2) && (as.character(sys.call(2))[1] == "print.summary.cv.ncvreg")) {
    skip_intro <- TRUE
  } else {
    skip_intro <- FALSE
  }
  if (!skip_intro) {
    cat(x$penalty, "-penalized ", x$model, " regression with n=", x$n, ", p=", x$p, "\n", sep="")
    cat("At lambda=", formatC(x$lambda, digits[1], format="f"), ":\n", sep="")
    cat("-------------------------------------------------\n")
    if (x$custom) {
      cat("  Features satisfying criteria       : ", nrow(x$table), "\n", sep="")
      cat("  Average mfdr among chosen features : ", formatC(mean(x$table$mfdr), digits=3), "\n", sep="")
    } else {
      cat("  Nonzero coefficients         : ", formatC(x$nvars, width=3), "\n", sep="")
    }
  }
  if (!x$custom) {
    if (x$nvars > 0) {
      cat("  Expected nonzero coefficients: ", formatC(sum(x$table$mfdr), digits=2, format='f', width=6), '\n', sep="")
      space <- substr('                                              ', 1, 5-nchar(nrow(x$table)))
      cat("  Average mfdr (", nrow(x$table), " features)", space, ": ", formatC(mean(x$table$mfdr), digits=3, format='f', width=7), '\n', sep="")
    }
  }
  cat("\n")
  x$table$mfdr <- format.pval(x$table$mfdr, eps=1e-4)
  print(x$table, digits=digits)
  if (!is.null(x$unpenTable)) {
    x$unpenTable$p.value <- format.pval(x$unpenTable$p.value, eps=1e-4)
    cat("\nUnpenalized variables:\n")
    print(x$unpenTable, digits=digits)
  }
}
pbreheny/ncvreg documentation built on April 3, 2024, 3:54 p.m.