R/pip.R

Defines functions pip

Documented in pip

#' Compute posterior inclusion probabilities (PIPs)
#'
#' From a set of p-values, computes posterior probabilities that a feature should be truly included.
#' For example, membership inclusion in a given cluster can be improved by filtering low quality members.
#' In using PCA and related methods, it helps select variables that are truly associated with given latent variables.
#'
#' This function requires the Bioconductor \code{qvalue} package to be installed.
#'
#' @param pvalue a vector of p-values.
#' @param group a vector of group indicators (optional).
#' If provided, PIP analysis is stratified.
#' Assumes groups are in 1:k where k is the number of unique groups.
#' @param pi0 a vector of pi0 values (optional).
#' Its length has to be either 1 or equal the number of groups.
#' @param verbose If TRUE, reports information.
#' @param ... optional arguments for \code{\link[qvalue]{lfdr}} to control a local FDR estimation.
#'
#' @return \code{pip} returns a vector of posterior inclusion probabilities
#' @references Chung (2020) Statistical significance of cluster membership for unsupervised evaluation of cell identities. Bioinformatics, 36(10): 3107–3114 \doi{10.1093/bioinformatics/btaa087}
#' @references Chung (2014) "Jackstraw Weighted Shrinkage for Principal Component Analysis and Covariance Matrix" in Statistical Inference of Variables Driving Systematic Variation in High-Dimensional Biological Data. PhD thesis, Princeton University. \url{https://www.proquest.com/openview/e90b562d689cf3a021c35a93c6f346db/1?pq-origsite=gscholar&cbl=18750}
#' @author Neo Christopher Chung \email{nchchung@@gmail.com}
#' John R. Yamamoto-Wilson
#' @export
pip <- function(
                pvalue,
                group = NULL,
                pi0 = NULL,
                verbose = TRUE,
                ...
                ) {
    # check package dependencies!
    if ( !requireNamespace( "qvalue" ) )
        stop( 'The Bioconductor `qvalue` package is required to use function `jackstraw::pip`, please install it manually!' )

    # pvalue is always mandatory
    if ( missing( pvalue ) )
        stop( '`pvalue` is required!' )

    m <- length(pvalue)

    if ( is.null( pi0 ) ) {
        if (verbose)
            message("Using qvalue::pi0est to estimate pi0 values.")
        if ( is.null( group ) ) {
            # pooled PIP
            prob <- 1 - qvalue::lfdr(pvalue, ...)
        } else {
            # stratified PIP
            if ( length( group ) != m )
                stop( '`group` length (', length(group), ') must match `pvalue` length (', m, ')' )
            k <- length(unique(group))
            prob <- vector("numeric", length = m )
            for (i in 1:k) {
                # skip empty groups quietly
                if ( sum(group == i) > 0 )
                    prob[group == i] <- 1 - qvalue::lfdr( pvalue[group == i], ... )
            }
        }
    } else {
        if (verbose)
            message("Using pi0 values:", pi0)
        if (is.null(group)) {
            # pooled PIP
            # NOTE: pi0 not used here
            prob <- 1 - qvalue::lfdr(pvalue, ...)
        } else {
            # stratified PIP
            if ( length( group ) != m )
                stop( '`group` length (', length(group), ') must match `pvalue` length (', m, ')' )
            k <- length(unique(group))
            if ( length( pi0 ) == 1 ) {
                pi0 <- rep(pi0, k)
            } else if (length(pi0) != k) {
                stop("When providing pi0 values, the length of pi0 has to be either 1 or equal the number of groups.")
            }
            prob <- vector("numeric", length = m )
            for (i in 1:k) {
                # skip empty groups quietly
                if ( sum(group == i) > 0 )
                    prob[group == i] <- 1 - qvalue::lfdr( pvalue[group == i], pi0 = pi0[i], ... )
            }
        }
    }

    return(prob)
}
ncchung/jackstraw documentation built on Sept. 17, 2024, 9:20 a.m.