R/0-multtest.R

#' Estimate Number of True Null Hypotheses Using Histogram-based Method
#' (Nettleton 2006)
#'
#' This function estimates the number of true null hypotheses given a vector of
#' p-values using the method of Nettleton et al. (2006) JABES 11, 337-356. The
#' estimate obtained is identical to the estimate obtained by the iterative
#' procedure described by Mosig et al. Genetics 157:1683-1698. The number of
#' p-values falling into B equally sized bins are counted. The count of each bin
#' is compared to the average of all the bin counts associated with the current
#' bins and all bins to its right.  Working from left to right, the first bin
#' that has a count less than or equal to the average is identified. That
#' average is multiplied by the total number of bins to obtain an estimate of
#' m0, the number of tests for which the null hypothesis is true.
#' @param p a numerical vector of p-value.
#' @param B number of bin.
#' @return The function returns an estimate of m0, the number of tests for which
#' the null hypothesis is true.
#' @export
#' @author Dan Nettleton \email{dnett@iastate.edu}
#' @references Dan NETTLETON, J. T. Gene HWANG, Rico A. CALDO, and Roger P. WISE.
#' Estimating the Number of True Null Hypotheses From a Histogram of p Values.
#' Journal of Agricultural, Biological, and Environmental Statistics,
#' Volume 11, Number 3, Pages 337–356.
#' @examples
#' p <- runif(100)
#' m0 <- estimate.m0(p)
#' m0
estimate.m0 <- function(p, B = 20) {
    m <- length(p)
    m0 <- m
    bin <- c(-0.1, (1:B)/B)
    bin.counts = rep(0, B)
    for (i in 1:B) {
        bin.counts[i] <- sum((p > bin[i]) & (p <= bin[i + 1]))
    }
    tail.means <- rev(cumsum(rev(bin.counts))/(1:B))
    temp <- bin.counts - tail.means
    index <- min((1:B)[temp <= 0])
    m0 <- B * tail.means[index]
    return(m0)
}

#' Estimating The threshold lambda in Estimating FDR Using Storey's Formula
#'
#' This function calculate lambda threshold to estimate FDR
#'
#' @inheritParams estimate.m0
#' @return The function returns an estimate of lambdastar, the threshold
#' to calculate FDR based on Storey's formula.
#' @export
#' @author Dan Nettleton \email{dnett@iastate.edu}
#' @references Dan NETTLETON, J. T. Gene HWANG, Rico A. CALDO, and Roger P. WISE.
#' Estimating the Number of True Null Hypotheses From a Histogram of p Values.
#' Journal of Agricultural, Biological, and Environmental Statistics,
#' Volume 11, Number 3, Pages 337–356.
#' @examples
#' p <- runif(100)
#' lambdastar <- estimate.lambda(p)
#' lambdastar
estimate.lambda <- function(p, B = 20) {
    # m <- length(p)
    bin <- c(-0.1, (1:B)/B)
    bin.counts <- rep(0, B)
    for (i in 1:B) {
        bin.counts[i] <- sum((p > bin[i]) & (p <= bin[i + 1]))
    }
    tail.means <- rev(cumsum(rev(bin.counts))/(1:B))
    temp <- bin.counts - tail.means
    index <- min((1:B)[temp <= 0])
    lambdastar <- (index-1)/B
    # lambdastar <- index/B
    return(lambdastar)
}
#' Q-value Using Histogram-based Method (Nettleton 2006)
#'
#' This function computes q-values using the approach of Nettleton et al.
#'(2006) JABES 11, 337-356.  Author: Dan Nettleton
#'@return The function returns a q-value vector of the input p-value vector.
#' @inheritParams estimate.m0
#' @param lambda0 threshold lambda in Storey formula.
#' @export
#' @author Dan Nettleton \email{dnett@iastate.edu}
#' @references Dan NETTLETON, J. T. Gene HWANG, Rico A. CALDO, and Roger P. WISE.
#' Estimating the Number of True Null Hypotheses From a Histogram of p Values.
#' Journal of Agricultural, Biological, and Environmental Statistics,
#' Volume 11, Number 3, Pages 337–356.
#' @examples
#' p <- runif(100)
#' lambda0 <- estimate.lambda(p)
#' q <- jabes.q(p, lambda0)
#' sum(q <= .05)
jabes.q <- function(p, lambda0, B = 20) {
    m <- length(p)
    # lambda0 <- estimate.lambda(p)
    m0 <- pi0(p, lambda0) * m
    # m0 <- estimate.m0(p, B)
    #k = 1:m
    ord = order(p)
    p[ord] <- (p[ord] * m0)/(1:m)
    qval <- p
    qval[ord] <- rev(cummin(rev(qval[ord])))
    return(qval)
}

#'pauc_out.
#'
#'This function is to calculate false discovery proportion, partial area under
#'ROC curve, number of true positive when we know the true status of each gene
#'@param q a vector of p-values
#'@param lab a vector of factor consistings of 0 or 1, indicating
#'if the feater are true null or false null.
#'@return a vector of S, V, R, FDR, pauc
#'@export
#' @examples
#' set.seed(1)
#' EE <- 1000
#' DE <- 100
#' p1 <- runif(EE)
#' p2 <- rbeta(DE, shape1 = .5, shape2 = 1)
#' p <- c(p1, p2)
#' lab <- as.factor(as.character(c(rep(0, EE), rep(1, DE))))
#' pauc_out(p, lab)


pauc_out <- function(q, lab) {
    # lab <- as.factor(c(rep(0, EE), rep(1, DE)))
    if (!is.factor(lab))
        stop("lab has to be  a vector of factor 0 or 1.")
    # DE <- sum(as.numeric(as.character(lab)))
    # EE <- length(lab) - DE
    roc.out <- AUC::roc(1 - q, lab)
    roc.ind <- sum(roc.out$fpr <= 0.05)
    roc.min <- roc.out$cutoffs[roc.ind]
    pauc <- AUC::auc(roc.out, min = roc.min)
    if (length(pauc) == 0)
        pauc <- 0
    R <- sum(q <= 0.05)
    V <- sum(q <= 0.05 & (1 - as.numeric(as.character(lab))))
    FDR <- V/max(1, R)
    S = R - V
    auc <- AUC::auc(roc.out)
    out <- c(S = S, R = R, V = V, FDR = FDR, pauc = pauc, auc = auc)
    return(out)
}
ntyet/sfdr documentation built on May 7, 2019, 1:30 p.m.