TopK: TopK Exact Test

Documented in TopK_PermT.test

#' TopK exact test based on permutation T p-values
#'
#' This function perform TopK exact test on a two group data set (Case-Control study).
#' The univariate p-values based on permutation t-test.
#'
#' @param X a data matrix containing features as rows and samples as columns.
#' @param nA number of samples in group 1
#' @param nB number of samples in group 2
#' @param Kvals a numeric vector indicating how many \code{"K"} we choose.
#' @param B the number of inner permutation, default is "0" for exact test.
#' @param alternative a character string specifying the alternative hypothesis,
#'          must be one of \code{"two.sided"} (default), \code{"greater"} or \code{"less"}.
#'          You can specify just the initial letter.
#' @param ties.method a character string specifying how ties are treated, see ‘Details’; can be abbreviated.
#' @param pval If TRUE, the permutation matrix will be populated by p-values from t-test.
#'             If FALSE, the permutation matrix will be directly populated by t statistics (negative absolute value).
#' @param ReturnType character(1) specify how to return the results.
#'          Could be \code{"list"}, \code{"vector"} or rich format \code{"TopK"}. See detail.
#' @param vrb a logical vector indicating TRUE when show some words while running, FALSE otherwise.
#'
#' @examples TopK_PermT.test(X,7,7)
#'
#' @importFrom broman perm.test
#'
#' @export

TopK_PermT.test=function(X,nA,nB,
                       Kvals=c(1,2,3,4,5,10,25),
                       B=0,# set B=0 for exact test
                       alternative = c("two.sided", "less", "greater"),
                       ties.method = c("random", "min", "max", "average"),
                       pval=FALSE,
                       ReturnType="TopK",vrb=T){
    # hrep=1; SimType="null1"; vrb=T; Kvals=c(1,2,3,4,5,10,25);B=0
    # alternative <- match.arg(alternative)
    N=nA+nB
    PermMat=combn(N,nA)
    nPerm=dim(PermMat)[2]

    # cat("nPerm=",nPerm,fill=T)

    nK=length(Kvals)

    #---------------------------------------------------------------
    # Get the exact permutation T p-values under all permutations
    #---------------------------------------------------------------

    # OPTION 1
    # adapt the intermediate function binary.v in package 'broman'

    # Put the functions outside the parents function

    if(vrb) cat("   Getting permutation t values.",fill=T)

    permScan <- function(i) perm.test(X[i, 1:nA], X[i, (nA+1):(nA+nB)], alternative = alternative, pval=pval)
    Pmat=t(mapply(permScan, 1:nrow(X)))

    if (pval) {
      Pmat = Pmat
    } else {
      Pmat = -abs(Pmat)
    }

    PermPs=t(apply(Pmat,1,rank,ties.method="max")/nPerm)



    # OPTION 2

    # allp <- array(NA, c(nrow(X),nPerm))
    # for (h in 1:nPerm) {
    #     for (i in 1:nrow(X)) {
    #         idx <- PermMat[,h]
    #         allp[i,h] <- t.test(X[i,idx], X[i,setdiff(1:N, idx)], var.equal = T)$p.value
    #     }
    # }

    #--- The code below runs 40% slower
    # tScan <- function(i,idx) t.test(X[i,idx], X[i,setdiff(1:N, idx)], var.equal = T)$p.value
    # tScan_h=function(h) mapply(tScan, 1:nrow(X),MoreArgs=list(idx=PermMat[,h]))
    #
    # res=mapply(tScan_h,1:nPerm)



    #---------------------------------------------------------------
    # Get the TopK  p-values under each permutation
    #---------------------------------------------------------------


    GetTopK=function(K){
        jgetK=function(j) PermPs[order(PermPs[,j])[1:K],j]
        if(K>1) return(mapply(jgetK,1:nPerm))
        if(K==1) return(matrix(mapply(jgetK,1:nPerm),nrow=1))
    }


    TopK=list()
    for(k in Kvals){
        TopK=c(TopK,list(GetTopK(k)))
    }


    #---------------------------------------------------------------
    # Get the eCDF estimate for each TopK value..
    #---------------------------------------------------------------

    # have to doubly correct the the top2
    GetTopKcdf=function(topK){
        K=dim(topK)[1]
        jCDFK=function(j) mean(colSums((topK-topK[,j])<=0)==K)
        return(mapply(jCDFK,1:nPerm))
    }

    TopKcdf=list()
    for(k in 1:nK){
        TopKcdf=c(TopKcdf,list(GetTopKcdf(TopK[[k]])))
    }


    #---------------------------------------------------------------
    # Adjust eCDF estimate for each TopK value, so that we have
    # a legitimate p-value
    #---------------------------------------------------------------

    getTadj=function(ik) mean(TopKcdf[[ik]]<=(TopKcdf[[ik]][1]))
    Tadj=mapply(getTadj,1:nK)



    #---------------------------------------------------------------
    # Get minP adjusted p-value across all considered
    # choices of K
    #---------------------------------------------------------------

    # Let's get the minP across them all

    CDFmat=t(matrix(unlist(TopKcdf),ncol=nK))

    ipval=function(pvec) rank(pvec, ties.method = ties.method)
    Kpvals=apply(CDFmat,1,ipval)
    Kpvals=Kpvals/(dim(Kpvals)[1])
    KminP=apply(Kpvals,1,min)


    #---------------------------------------------------------------
    #  format output
    #---------------------------------------------------------------

    #  p.values
    p.values=c(Tadj, mean(KminP<=min(Tadj)))
    names(p.values)=c(paste("top",Kvals,sep=""),"minP.allk")

    # Tobs
    getTobs = function(i) TopKcdf[[i]][1]
    Tobs = mapply(getTobs, 1:length(Kvals))
    names(Tobs)=c(paste("top",Kvals,sep=""))


    if(vrb) print(round(p.values,4))

    if(ReturnType=="list") return(list(p.values=p.values,Tobs=Tobs,K.values=Kvals,
                                       N=N,nA=nA,nB=nB,nPerm=nPerm)
    )



    if(ReturnType=="vector"){
        return(c(p.values,Tobs,Kvals))
    }

    if(ReturnType=="TopK") {
        res <- list(p.values=p.values,
                    Tobs=Tobs,
                    # K.counts=K.counts,
                    TopK = TopK,
                    TopKcdf = TopKcdf,
                    Tadj = Tadj,
                    K.values=Kvals,N=N,nA=nA,nB=nB,nPerm=nPerm,
                    method = "Permutation t-test"
        )
        class(res) <- c(class(res), "TopK")
        return(res)
    }

}



binary.v <- function (n)
{
    x <- 1:(2^n)
    mx <- max(x)
    digits <- floor(log2(mx))
    ans <- 0:(digits - 1)
    lx <- length(x)
    x <- matrix(rep(x, rep(digits, lx)), ncol = lx)
    (x%/%2^ans)%%2
}



perm.test <- function (x, y, alternative = c("two.sided", "less", "greater"),
                       var.equal = FALSE, pval)
{
    # alternative <- match.arg(alternative)
    kx <- length(x)
    ky <- length(y)
    n <- kx + ky
    X <- c(x, y)
    # z <- rep(1:0, c(kx, ky))

    if(pval){
        pobs <- t.test(x, y, var.equal = var.equal, alternative=alternative)$p.value

        o <- binary.v(n)
        o <- o[, apply(o, 2, sum) == kx]
        nc <- choose(n, kx)
        allp <- 1:nc
        for (i in 1:nc) {
            xn <- X[o[, i] == 1]
            yn <- X[o[, i] == 0]
            allp[i] <- t.test(xn, yn, var.equal = var.equal, alternative = alternative)$p.value
        }

        return(allp)

    } else {

        tobs <- t.test(x, y, var.equal = var.equal, alternative = alternative)$statistic

        o <- binary.v(n)
        o <- o[, apply(o, 2, sum) == kx]
        nc <- choose(n, kx)
        allt <- 1:nc
        for (i in 1:nc) {
            xn <- X[o[, i] == 1]
            yn <- X[o[, i] == 0]
            allt[i] <- t.test(xn, yn, var.equal = var.equal, alternative = alternative)$statistic
        }

        return(allt)
    }
}