R/SansSouci-class.R

Defines functions predict.SansSouci plot.SansSouci thresholds.SansSouci thresholds foldChanges.SansSouci foldChanges pValues.SansSouci pValues fit.SansSouci print.SansSouci label.SansSouci label nObs.SansSouci nObs nHyp.SansSouci nHyp SansSouciSim SansSouci

Documented in fit.SansSouci foldChanges foldChanges.SansSouci label label.SansSouci nHyp nHyp.SansSouci nObs nObs.SansSouci plot.SansSouci predict.SansSouci print.SansSouci pValues pValues.SansSouci SansSouci SansSouciSim thresholds thresholds.SansSouci

#' Create an object of class 'SansSouci'
#'
#' @param Y A matrix of \eqn{m} variables (hypotheses) by \eqn{n} observations
#' @param groups A numeric vector of \eqn{n} values in \eqn{0, 1}, the groups of
#'   observations on which to perform two-sample tests
#' @param truth An optional numeric vector of $m$ values in ${0,1}$, the status
#'   of each null hypothesis (0 means H0 is true, 1 means H1 is true). Typically
#'   used in simulations.
#' @return An object of class "SansSouci"
#' @export
#' @examples
#' data(expr_ALL, package = "sanssouci.data")
#' groups <- ifelse(colnames(expr_ALL) == "NEG", 0, 1)
#' table(groups)
#' a <- SansSouci(Y = expr_ALL, groups = groups)
#'
#' res <- fit(a, B = 100, alpha = 0.1)
#' plot(res, xmax = 500)
#' res_beta <- fit(res, B = 100, alpha = 0.1, family = "Beta", K = 10)
#' plot(res_beta, xmax = 500)
#'
SansSouci <- function(Y, groups, truth = NULL) {
  if (missing(groups)) { # one-sample tests
    groups <- rep(1, ncol(Y))
  }
  ugroups <- unique(groups)
  n_groups <- length(ugroups)

  if (n_groups > 1) {
    categCheck(groups, ncol(Y))
  }
  if (!is.null(truth)) {
    all_0 <- identical(truth, rep(0, nrow(Y)))
    all_1 <- identical(truth, rep(1, nrow(Y)))
    if (!all_0 && !all_1) {
      categCheck(truth, nrow(Y))
    }
  }
  input <- list(
    Y = Y,
    groups = groups,
    n_groups = n_groups,
    m = nrow(Y)
  )
  input$truth <- truth
  obj <- structure(list(
    input = input,
    parameters = NULL,
    output = NULL
  ),
  class = "SansSouci"
  )
  obj
}


#' Create an object of class 'SansSouci' from simulations
#'
#' Create a 'SansSouci' object from simulation in the Gaussian
#' equi-correlated model
#'
#' @param ... Parameters to be passed to [gaussianSamples]
#' @seealso [gaussianSamples]
#' @export
#' @examples
#' obj <- SansSouciSim(
#'   m = 543, rho = 0.4, n = 210,
#'   pi0 = 0.8, SNR = 3, prob = 0.5
#' )
#' alpha <- 0.1
#'
#' # Adaptive Simes (lambda-calibration)
#' set.seed(542)
#' res <- fit(obj, B = 100, alpha = alpha, family = "Simes")
#' plot(res)
#' volcanoPlot(res, q = 0.05, r = 0.2)
#'
#' # upper bound on number of signals if the entire data set
#' # (and corresponding lower bound on FDP)
#' predict(res)
#'
#' # confidence curve
#' plot(res)
#'
#' # comparison to other confidence curves
#' # Parametric Simes (no calibration -- assume positive dependence (PRDS))
#' res0 <- fit(obj, B = 0, alpha = alpha, family = "Simes")
#' res0
#'
#' # Oracle
#' oracle <- fit(obj, alpha = alpha, family = "Oracle")
#' oracle
#'
#' confs <- list(
#'   Simes = predict(res0, all = TRUE),
#'   "Simes+calibration" = predict(res, all = TRUE),
#'   "Oracle" = predict(oracle, all = TRUE)
#' )
#' plotConfCurve(confs)
#'
#' \dontrun{
#' # Use wilcoxon tests instead of Welch tests
#' res <- fit(obj, B = 100, alpha = 0.1, rowTestFUN = rowWilcoxonTests)
#' volcanoPlot(res, q = 0.05, r = 0.2)
#' }
#'
SansSouciSim <- function(...) {
  sim <- gaussianSamples(...)
  SansSouci(Y = sim$X, groups = sim$categ, truth = sim$H)
}

#' Basic methods for class `SansSouci`
#'
#' @param object An object of class \code{SansSouci}
#' @name SansSouci-methods
#' @examples
#' data(expr_ALL, package = "sanssouci.data")
#' groups <- ifelse(colnames(expr_ALL) == "NEG", 0, 1)
#' table(groups)
#' a <- SansSouci(Y = expr_ALL, groups = groups)
#' print(a)
#' nHyp(a)
#' nObs(a)
#' label(a)
#'
#' res <- fit(a, B = 100, alpha = 0.1)
#' label(res)
#' print(res)
#' str(pValues(res))
#' str(foldChanges(res))
#' str(thresholds(res))
#' volcanoPlot(res, q = 0.05, r = 0.5)
#'
NULL
#> NULL

#' Generic functions for S3 class SansSouci
#'
#' @name all-generics
#' @param object An object. See individual methods for specifics
NULL
#> NULL

#' @describeIn all-generics ' Get the number of hypotheses
#' @param object An object. See individual methods for specifics
#' @export
nHyp <- function(object) UseMethod("nHyp")

#' `nHyp`: get the number of hypotheses
#'
#' @rdname SansSouci-methods
#' @param object An object of class `SansSouci`
#' @export
nHyp.SansSouci <- function(object) {
  object$input$m
}

#' @describeIn all-generics Get the number of observations
#' @param object An object. See individual methods for specifics
#' @export
nObs <- function(object) UseMethod("nObs")

#' `nObs` Get the number of observations
#'
#' @rdname SansSouci-methods
#' @param object An object of class `SansSouci`
#' @export
nObs.SansSouci <- function(object) {
  ncol(object$input$Y)
}

#' @describeIn all-generics Get the label of hypotheses tested
#' @param object An object. See individual methods for specifics
#' @export
label <- function(object) UseMethod("label")

#' `label` Get the label of a post hoc method
#'
#' @rdname SansSouci-methods
#' @param object An object of class `SansSouci`
#' @export
label.SansSouci <- function(object) {
  param <- object$parameters
  if (is.null(param)) {
    return(NULL)
  }
  lab <- param$family
  if (!(lab %in% c("Simes", "Oracle")) && (!is.null(param$K))) {
    lab <- sprintf("%s(K=%d)", lab, param$K)
  }
  return(lab)
}

#' Print 'SansSouci' objects
#'
#' @rdname SansSouci-methods
#' @param x An object of class `SansSouci`
#' @param ... Not used
#' @param verbose Should detailed output be printed? Defaults to FALSE
#' @importFrom utils str
#' @export
print.SansSouci <- function(x, ..., verbose = FALSE) {
  object <- x
  rm(x)
  cat("'SansSouci' object:\n")
  input <- object$input
  if (!is.null(input)) {
    cat("\tNumber of hypotheses:\t", nHyp(object), "\n")
    if (!is.null(nObs(object))) {
      cat("\tNumber of observations:\t", nObs(object), "\n")
    }
    cat("\t", input$n_group, "-sample data", "\n", sep = "")
    cat("\n")
    if (verbose) {
      cat("Data:")
      cat("\n")
      str(input$Y)
      cat("\n")
    }
    truth <- input$truth
    if (!is.null(truth)) {
      cat("Truth: ")
      cat("\n")
      cat("\t", sum(truth), "false null hypotheses (signals)")
      pi0 <- 1 - mean(truth)
      cat(" out of ", nHyp(object), " (pi0=", round(pi0, 3), ")", sep = "")
      cat("\n")
    }
  }
  params <- object$parameters
  if (!is.null(params)) {
    cat("Parameters:", "\n")
    cat("\tTest function:\t\t", params$funName, "\n", sep = "")
    cat("\tNumber of permutations:\tB=", params$B, "\n", sep = "")
    cat("\tSignificance level:\talpha=", params$alpha, "\n", sep = "")
    cat("\tReference family:\t", params$fam, "\n", sep = "")
    cat("\t\t(of size:\tK=", params$K, ")", "\n", sep = "")
    cat("\n")
  }
  output <- object$output
  if (!is.null(output)) {
    cat("Output:\n")
    if (verbose) {
      cat("\tp-values:\n")
      print(summary(output$p.values))
      cat("\tCalibrated thresholds:")
      if (all(output$thr %in% c(0, 1))) {
        print(table(output$thr))
        cat("\n")
      } else {
        print(summary(output$thr))
        cat("\tPivotal statistic:\n")
        print(summary(output$piv_stat))
      }
      cat("\n")
    }
    cat("\tCalibration parameter:\tlambda=", output$lambda, "\n", sep = "")
  }
  invisible(object)
}

#' @importFrom generics fit
#' @export
generics::fit

#' Fit SansSouci object
#' @param object An object of class `SansSouci`
#' @param alpha A numeric value in `[0,1]`, the target (JER) risk
#' @param B An integer value, the number of permutations to be performed.
#'  Defaults to 1000
#' @param alternative A character string specifying the alternative hypothesis.
#'   Must be one of "two.sided" (default), "greater" or "less".
#' @param rowTestFUN A (vectorized) test function. Defaults to [rowWelchTests].
#' @param family A character value, the name of a threshold family. Should be
#'   one of "Linear", "Beta" and "Simes", or "Oracle". "Linear" and "Simes"
#'   families are identical.
#'
#'   - Simes/Linear: The classical family of thresholds introduced by Simes (1986).
#'   This family yields JER control if the test statistics are positively
#'   dependent (PRDS) under H0.
#'
#'   - Beta: A family of thresholds that achieves marginal kFWER control under
#'   independence
#'
#'   - Oracle A family such that the associated bounds correspond to the true
#'   numbers/proportions of true/false positives. "truth" must be available in
#'   object$input$truth.
#'
#' @param max_steps_down A numeric value, the maximum number of steps down to
#'   perform. Defaults to 10 (but the algorithm generally converges in 1 or 2
#'   steps).
#' @param K An integer value in `[1,m]`, the number of elements in the
#'   reference family. Defaults to m
#' @param force A boolean value: should the permutation p-values and pivotal statistics be re-calculated ? Defaults to `FALSE`
#' @param verbose A boolean value: should extra info be printed? Defaults to `FALSE`
#' @param ... Not used
#' @return A 'fitted' object of class 'SansSouci'. It is a list of three elements
#'  - input: see [SansSouci]
#'  - param: the input parameters, given as a list
#'  - output: the outputs of the calibration, see [calibrate]
#' @export
#' @examples
#' # Generate Gaussian data and perform multiple tests
#' obj <- SansSouciSim(m = 502, rho = 0.5, n = 100, pi0 = 0.8, SNR = 3, prob = 0.5)
#' res <- fit(obj, B = 100, alpha = 0.1)
#'
#' # confidence curve
#' plot(res)
#'
#' # confidence curve for a subset
#' S <- which(pValues(res) < 0.1 & foldChanges(res) > 0.3)
#' plot(res, S = S)
#'
#' # plot two confidence curves
#' res_beta <- fit(res, B = 100, alpha = 0.1, family = "Beta", K = 20)
#'
#' resList <- list("Linear" = res, "Beta" = res_beta)
#' bounds <- lapply(resList, predict, all = TRUE)
#' plotConfCurve(bounds, xmax = 200)
#'
fit.SansSouci <- function(object, alpha, B = 1e3,
                          rowTestFUN = NULL,
                          alternative = c("two.sided", "less", "greater"),
                          family = c("Simes", "Linear", "Beta", "Oracle"),
                          max_steps_down = 10L, K = nHyp(object),
                          force = FALSE,
                          verbose = FALSE, ...) {
  alternative <- match.arg(alternative)
  family <- match.arg(family)
  if (family == "Oracle") {
    alpha <- NA_real_ ## alpha is not used by Oracle
    truth <- object$input$truth
    if (is.null(truth)) {
      stop("'truth' should be available for 'Oracle'. See ?SansSouci")
    }
  } else {
    if (mode(alpha) != "numeric" || alpha < 0 || alpha > 1) {
      stop("Argument 'alpha' should be a numeric value in [0,1]")
    }
  }
  Y <- object$input$Y
  groups <- object$input$groups
  n_groups <- object$input$n_groups
  m <- nHyp(object)
  n <- nObs(object)
  funName <- NA_character_

  if (is.null(rowTestFUN)) {
    if (n_groups == 1) {
      rowTestFUN <- rowZTests
      funName <- "rowZTests"
    } else if (n_groups == 2) {
      rowTestFUN <- rowWelchTests
      funName <- "rowWelchTests"
    } else if (n_groups == n) {
      rowTestFUN <- rowPearsonCorrelationTests
      funName <- "rowPearsonCorrelationTests"
    }
  } else {
    funName <- as.character(substitute(rowTestFUN))
  }

  ## should we re-calculate p0?
  params <- object$parameters
  p0 <- object$output$p0
  do_p0 <- TRUE
  if (!is.null(params) && !force) {
    if (!is.null(p0)) {
      cond_B <- (params$B == B)
      cond_F <- all.equal(params$rowTestFUN, rowTestFUN)
      cond_A <- (params$alternative == alternative)
      do_p0 <- (!cond_B) || (!cond_F) || (!cond_A)
    }
  }

  ## should we re-calculate the (first) pivotal statistic ?
  params <- object$parameters
  pivStat0 <- NULL
  if (!is.null(params) && !force) {
    cond_F <- (params$family == family)
    cond_K <- (params$K == K)
    cond_P <- (!do_p0)
    if (cond_F && cond_K && cond_P) {
      pivStat0 <- object$output$piv_stat
    }
  }
  ttype <- sprintf("%s-sample", n_groups)
  object$parameters <- list(
    alpha = alpha,
    B = B,
    alternative = alternative,
    rowTestFUN = rowTestFUN,
    funName = funName,
    type = ttype,
    family = family,
    max_steps_down = max_steps_down,
    K = K
  )

  if (family == "Beta" && K == m) {
    warning("For the 'Beta' family we recommend choosing K < m")
  }

  cal <- rowTestFUN(Y, groups, alternative = alternative)
  p.values <- cal$p.value
  if (B > 0 && family != "Oracle") {
    t0 <- Sys.time()
    if (verbose) {
      cat("Randomization p-values...")
    }
    if (do_p0) {
      null_groups <- NULL
      if (n_groups == 1) { # sign-flipping
        null_groups <- replicate(B, rbinom(n, 1, 0.5) * 2 - 1)
      } else if (n_groups == 2) { # permutation
        null_groups <- replicate(B, sample(groups))
      } else if (n_groups > 2) { # continuous covariate
        null_groups <- replicate(B, sample(groups))
      }
      rwt0 <- rowTestFUN(Y, null_groups, alternative = alternative)
      p0 <- rwt0$p.value
      if (verbose) {
        dt <- Sys.time() - t0
        cat("done (", format(dt), ")\n", sep = "")
      }
    } else if (verbose) {
      cat("skipped computation (already done)\n")
    }
    t0 <- Sys.time()
    if (verbose) {
      cat("Calibration...")
    }
    calib <- calibrate(
      p0 = p0, m = m, alpha = alpha,
      family = family, K = K,
      p = p.values,
      max_steps_down = max_steps_down,
      piv_stat0 = pivStat0
    )
    if (verbose) {
      dt <- Sys.time() - t0
      cat("done (", format(dt), ")\n", sep = "")
    }

    cal$p0 <- p0
    cal <- c(cal, calib)
  } else { # no calibration!
    cal$lambda <- alpha
    thr <- switch(family,
      "Linear" = t_linear(alpha, seq_len(m), m),
      "Simes" = t_linear(alpha, seq_len(m), m),
      "Beta" = t_beta(alpha, seq_len(m), m),
      "Oracle" = object$input$truth
    )
    cal$thr <- thr
  }
  # }
  object$output <- cal
  object
}

#' @describeIn all-generics Get p-values
#' @param object An object. See individual methods for specifics
#' @export
pValues <- function(object) UseMethod("pValues")

#' `pValues`: get p-values
#'
#' @rdname SansSouci-methods
#' @param object An object of class `SansSouci`
#' @export
pValues.SansSouci <- function(object) {
  object$output$p.value
}

#' @describeIn all-generics Get fold changes
#' @param object An object. See individual methods for specifics
#' @export
foldChanges <- function(object) UseMethod("foldChanges")

#' @rdname SansSouci-methods
#' @param object An object of class `SansSouci`
#' @export
foldChanges.SansSouci <- function(object) {
  object$output$estimate
}

## An attempt with 'setters'
# 'foldChanges<-' <- function(object, value) UseMethod("foldChanges")
# 'foldChanges<-.SansSouci' <- function(object, value) {
#     object$output$estimate <- value
#     object
# }


#' @describeIn all-generics Get thresholds
#' @param object An object. See individual methods for specifics
#' @export
thresholds <- function(object) UseMethod("thresholds")

#' `thresholds`: get thresholds
#'
#' @rdname SansSouci-methods
#' @param object An object of class `SansSouci`
#' @export
thresholds.SansSouci <- function(object) object$output$thr

#' Plot confidence bound on the true/false positives among most significant items
#'
#' @param x An object of class 'SansSouci'
#' @param y Not used
#' @param xmax Right limit of the plot
#' @param ... Further arguments to be passed to \code{bound}
#' @export
#' @examples
#' # Generate Gaussian data and perform multiple tests
#' obj <- SansSouciSim(m = 502, rho = 0.5, n = 100, pi0 = 0.8, SNR = 3, prob = 0.5)
#' res <- fit(obj, B = 100, alpha = 0.1)
#'
#' # confidence curve
#' plot(res)
#'
#' # confidence curve for a subset of hypotheses
#' S <- which(pValues(res) < 0.1 & foldChanges(res) > 0.3)
#' plot(res, S = S)
plot.SansSouci <- function(x, y, xmax = nHyp(x), ...) {
  cb <- predict(x, all = TRUE, ...)
  plotConfCurve(cb, xmax = xmax)
}

#' Post hoc confidence bounds on the true/false positives
#'
#' @param object An object of class 'SansSouci'
#' @param S A subset of indices
#' @param what A character vector, the names of the post hoc bounds to be
#'   computed, among:
#'
#' - FP: Upper bound on the number of false positives in the 'x' most significant items
#' - TP: Lower bound on the number of true positives in the 'x' most significant items
#' - FDP: Upper bound on the proportion of false positives in the 'x' most significant items
#' - TP: Lower bound on the proportion of true positives in the 'x' most significant items
#'
#' Defaults to `c("TP", "FDP")`
#' @param all A logical value: should the bounds for all ordered subsets of `S` be returned? If `FALSE` (the default), only the bound for `S` is returned
#' @param ... Not used
#'
#' @return If `all` is `FALSE` (the default), only the value of the bound is returned. Otherwise, a `data.frame` is return, with |S| rows and 4 columns:
#' - x: Number of most significant items selected
#' - label: Label for the procedure, typically of the form 'family(param)'
#' - bound: Value of the post hoc bound
#' - stat: Type of post hoc bound, as specified by argument `bound`.
#'
#' @importFrom stats predict
#' @export
#' @examples
#'
#' # Generate Gaussian data and perform multiple tests
#' obj <- SansSouciSim(m = 502, rho = 0.5, n = 100, pi0 = 0.8, SNR = 3, prob = 0.5)
#' res <- fit(obj, B = 100, alpha = 0.1)
#'
#' # post hoc bound on the set of all hypotheses
#' predict(res)
#'
#' # idem for all possible subsets (sorted by p-value)
#' bounds <- predict(res, all = TRUE)
#' head(bounds)
#'
#' # post hoc bound on a subset
#' S <- which(pValues(res) < 0.01)
#' predict(res, S)
predict.SansSouci <- function(object, S = seq_len(nHyp(object)),
                              what = c("TP", "FDP"), all = FALSE, ...) {
  p.values <- pValues(object)
  thr <- thresholds(object)
  lab <- label(object)
  bounds <- posthoc_bound(p.values, S = S, thr = thr, lab = lab, what = what, all = all)
  if (!all) {
    bounds <- bounds[, "bound"]
    if (length(bounds) > 1) {
      names(bounds) <- what
    }
  }
  return(bounds)
}
pneuvial/sanssouci documentation built on Feb. 12, 2024, 4:18 a.m.