R/MultipleTesting_HeteroDist_ModFuncs.r
In fdrDiscreteNull: False Discovery Rate Procedures Under Discrete and Heterogeneous Null Distributions

####################################################################
###########  function: check basic numeric properties  ############
####################################################################
# small function
CheckIt = function(vector_in) {
   tmp1 = any(is.na(vector_in) | is.nan(vector_in) | is.infinite(vector_in))
   cat("^^any na or nan:",tmp1,"...")
   
   if(is.list(vector_in)) stop("groupwise data is list","\t")
   
   if (is.vector(vector_in)) {
   cat("Min is:",min(vector_in)," Max is:", max(vector_in),"...")
   cat("length is",length(vector_in),"\t") }
   
   if (is.matrix(vector_in)) {
      nr = nrow(vector_in); nc = ncol(vector_in)
      cat("nrow:",nr,"; ncol:",nc,"\t")
     if (nc == 2) {
          rs1 =  rowSums(vector_in)
         cat("^^min rowSum:",min(rs1),"^^Max rowSum:",max(rs1),"\n") 
         }
      if (nc == 4) {
          rs1 =  rowSums(vector_in[,1:2]) # For fet based on individual marginals
         cat("^^min rowSum:",min(rs1),"^^Max rowSum:",max(rs1),"\n") 
         }    
     }   # third big if
   }  
 

###########################################################################
#### function: updated main grouping with specified minimal group size 
#######################################################################
### function
# FOR PRACTICAL PURPOSED to_merge is always true.

eNetBuilder = function(div_mat = NULL,ngp = NULL, merge_size = NULL,rad = NULL)
  {
  if (!is.matrix(div_mat))
    stop("^^ A matrix with entries as divergences is needed ...","\n")
  
  m_lt = nrow(div_mat)

  ngrptmp = 1; cnt_idx = double(0);  div_mat_in = div_mat
  all_idx = 1:m_lt;  # caution, no longer all_idx = 1:m 
  
  idx_left = all_idx ; itn = length(idx_left)
  centers_id = double(0) ;    grp_idx = vector("list",ngp)

  while(itn & (ngrptmp <= ngp)) {
    # cat("^^Forming group", ngrptmp, "....","\n")
   ball = vector("list",itn);  ball_size = double(itn)

   # compare itn balls
   for (i in 1:itn) {
        ball_id_tmp = which(abs(div_mat_in[i,]) <= rad)
        ball[[i]] = idx_left[ball_id_tmp]    # ids in ball from original 1:nrow(div_mat) indices
        ball_size[i] = length(ball[[i]])    }

    smax_id = which.max(ball_size);  id_mxball = idx_left[smax_id]
    ball_max = ball[[smax_id]];  size_mxball = ball_size[smax_id]

   # cat("^^Ids: e-ball center", id_mxball,". Ball size:", size_mxball, "\n") # "ids in ball:",ball_max,"\n")
    
    centers_id = c(centers_id,id_mxball);     grp_idx[[ngrptmp]] = ball_max
    cnt_idx = c(cnt_idx,ball_max)
    # added check
   # cat("^^# of Id used",length(cnt_idx),"\n")
    
    if (any(cnt_idx <0))     stop("^^negative id in current ids","\n") 
    
    idx_left = all_idx[-cnt_idx]  # take current ids from 1:m

    itn = length(idx_left)
   # cat("^^# of ids left currently",itn,"\n") # ". Idx left:", idx_left,"\n")

    # Follwing is the best partition, case 1:  ngrptmp <= ngp-1
    if (ngrptmp <= ngp-1) {
       if (itn <= merge_size) {  
         # the above condition should be itn <= merge_size instead of itn < merge_size since it is when ngrptmp <= ngp-1
       #  cat("^^Can not form",ngp,"groups each with cardinality no less than",merge_size,"\n")
         cat("^^---- Regroup with smaller e-net size ---- ","\n")
         rad = rad/2  # adjust this
         idx_left = 1:m_lt; itn = length(idx_left)
         ngrptmp = 0;  cnt_idx = double(0);  div_mat_in = div_mat
         }

       if (itn == merge_size & ngrptmp == ngp-1) {
         cat("^^Merging",itn,"ids.",ngp,"e-balls reached as requested","\n")
         grp_idx[[ngp]] =  idx_left
         itn = 0
        }
       
       if (itn > merge_size) {
         # cat("^# of id's left to group is", itn,",continue grouping","\n")
         div_mat_in = div_mat[idx_left,][,idx_left]
         }
    }  # end  if (ngrptmp <= ngp-1)

    # case 2:  ngrptmp = ngp
    if (ngrptmp == ngp) {
      # nothing left 
      if (itn == 0)
        cat("^^",ngp,"e-balls reached as requested","\n")

      if (itn > merge_size) {
     #  cat("^^---Some ids left for more groups. Regroup with larger e-net size --- ","\n")
        rad = 1.5*rad  # adjust this
        idx_left = 1:m_lt; itn = length(idx_left)
         ngrptmp = 0;  cnt_idx = double(0);  div_mat_in = div_mat
       }  
       
       if (itn <= merge_size) {
           grp_idx[[ngp]] = c(grp_idx[[ngp]],idx_left)
          cat("^^Merging",itn,"ids.",ngp,"e-balls reached as requested","\n")
           }
      } # end if (ngrptmp == ngp)

    ngrptmp = ngrptmp + 1

  } # end of while
      return (grp_idx)
} # end of fcuntion

### ###########################
eNetFull = function(metrics_in = NULL, ngrp_in = NULL, merge_size = NULL, rad_in = NULL, mgpsize_in = NULL) {

    if(is.null(mgpsize_in))
     stop("^^Specify minimal group size (MGS)")
    rad_tmp = rad_in;  mgps_tmp = 1  # mininal number of groups and group size

    while (mgps_tmp < mgpsize_in) {

      groups_tmp = eNetBuilder(metrics_in,ngrp_in,merge_size,rad_tmp)
      ngp_tmp = length(groups_tmp)
  #    check minimal group size
      mgps_tmp = min(sapply(groups_tmp,length))
      cat("^^Current achieved minimal group size is", mgps_tmp,"\n")
    } # end of while
    return(groups_tmp)
}   # end of function
 


########################## ########################## ##########################  
################## function:  divergence  matrix   ########################## 
########################## ########################## ##########################

GetDivergenceMatrix = function(pvSpList=NULL)  {   
  
  lgA1 = length(pvSpList)     # caution: no longer m but lgA1
  lgt_div = (lgA1-1)*lgA1/2
  cat("^^Computing", lgt_div, "pairwise divergences. Takes time ...","\n")
  
  infNorm_mat = matrix(0,lgA1,lgA1); div_mat = matrix(0,lgA1,lgA1)
  
  # start of computing pairwise quantities
  for (i in 2:lgA1) { 
       sp_pv1 = pvSpList[[i]];  lg1 = length(sp_pv1)     #pvSpList has been formated
           
     for (j in 1:(i-1)) {       
       sp_pv2 = pvSpList[[j]]; lg2 = length(sp_pv2)    #pvSpList has been formated 
     
       teva = union(sp_pv1,sp_pv2)  # evaluate cdf at union of pvalue supports 
       cdfn1 = pvalueDist(teva,sp_pv1);  cdfn2 = pvalueDist(teva,sp_pv2)
       
       infNorm_mat[i,j] = max(abs(cdfn1-cdfn2)) 
        }
    }   # end of computing pairwise quantities
      
     infNorm_mat_sym = infNorm_mat + t(infNorm_mat)
      
      cat("^^Finished computing matrix of pairwise divergences...","\n")       
      return(infNorm_mat_sym)
      
  }
  
 ######################### ########################## ##########################   
### Identify Approximate Uniform, this function also formats different psupport
######################### ########################## ########################## 
 
  Div_Appr_Unif = function(pvSpList=NULL,test_used=NULL,appr_unif_tol = NULL) {
      lg3 = length(pvSpList)
      pv_supp_formatted = vector("list",lg3)
      id_appr_unif = double(0)
       
          for (i in 1:lg3) {             
             sp_pv_tmp = c(0,pvSpList[[i]])
             if (max(abs(diff(sp_pv_tmp))) < appr_unif_tol)      
              id_appr_unif = c(id_appr_unif,i)
          }  
                      
    return(list(pvSpList,id_appr_unif))       
  } # end of func
 
  
  
  #######################################################################
  ####   Distribution of the p-values  (Generally Applicable)  #########
  #######################################################################
  
  pvalueDist <- function(peva,support)
  {
    lgh=length(support)
    lgh3 <- length(peva)
    y <- double(lgh3)
    for (i in 1:lgh3)  {
      cpv <- peva[i] >= support
      # the key is to compare where t falls in the support
      if (sum(cpv)==0) {y[i]=0} # because t is strictly less than minimum of support
      else if (sum(cpv)==lgh) {y[i]=1} # because t is no less than maximum of support
      else # t falls in one interval
      {  y[i] <- support[max(which(cpv))]   }
    }
    return(y)
  }
  
  

       ######################################################################
      ####     Function to get full tables for  association studies #########
      #######################################################################
      fulltable <- function(twocoldata)
      {
        # m is the # rows in the data
        m <-nrow(twocoldata)
        # construct 3-3 full table
        tables<- array(0, dim=c(3,3,m))

        controlsum<-sum(twocoldata[,1])
        treatsum<-sum(twocoldata[,2])

         # get 2-by-2 table for each gene
          tables[1,1,]=twocoldata[,1]
          tables[1,2,]=twocoldata[,2]
          tables[2,1,]=controlsum - twocoldata[,1]
          tables[2,2,]= treatsum -twocoldata[,2]

         # get 3rd row and column for each gene
         tables[1,3,]= tables[1,1,]+ tables[1,2,]
         tables[2,3,]= tables[2,1,]+ tables[2,2,]

           tables[3,1,]=rep(controlsum,m)
           tables[3,2,]=rep(treatsum,m)
           tables[3,3,]=rep(treatsum+controlsum,m)

         return(tables)
      }


      ################################################################# ###########################
      #### Function to Get marginals and cellcounts from generated counts for association stuies
      ################################################################# ###########################

      getcellcountsandmarginals <- function(countveccontrol,countvectreat)
      {
        m <- length(countveccontrol)
        twocolsimdata <- cbind(countveccontrol,countvectreat)
        simtables <- fulltable(twocolsimdata)

        simallcellcounts <- vector("list",m)
        for (i in 1:m) {simallcellcounts[[i]] <- simtables[1:2,1:2,i]}

        simallmarginals <- vector("list",m)
        for (i in 1:m) {simallmarginals[[i]] <- c(simtables[1,3,i],simtables[2,3,i],simtables[3,1,i])}

         y2 <- list(simallcellcounts,simallmarginals)
          return(y2)
      }

      ############################################################# ###########################
      #### Function to Get marginals and cellcounts from generated counts for differential expression
      ################################################################# ###########################
      
      getcellcountsandmarginals_DE <- function(data_in)
      {
        m <-nrow(data_in)
        # construct 3-3 full table
        tables<- array(0, dim=c(3,3,m))
        
        tables[1,1,] = data_in[,1]
        tables[2,1,] = data_in[,2]
        tables[1,2,] = data_in[,3] - data_in[,1]
        tables[2,2,] = data_in[,4] - data_in[,2]
        tables[1,3,]= data_in[,3]
        tables[2,3,]= data_in[,4]
        tables[3,3,] = data_in[,3] + data_in[,4]
        tables[3,1,] = data_in[,1] + data_in[,2]
        tables[3,2,] = tables[3,3,] - tables[3,1,]
        
        simallcellcounts <- vector("list",m)
        for (i in 1:m) {simallcellcounts[[i]] <- tables[1:2,1:2,i]}
        
        simallmarginals <- vector("list",m)
        for (i in 1:m) {simallmarginals[[i]] <- c(tables[1,3,i],tables[2,3,i],tables[3,1,i])}
        
        y2 <- list(simallcellcounts,simallmarginals)
        return(y2)
      }
      
      
      
      
 ###################################################################################
  #### Function to get two-sided p-valu and its support for FET
  ###################################################################################

    pvalFETSupport <- function(cellmatrix)
      {

          ns = cellmatrix[1,1]; nw = sum(cellmatrix[,1]); nb = sum(cellmatrix[,2]); nd = sum(cellmatrix[1,])
#          x <- x[1, 1]; m <- sum(x[, 1]);    n <- sum(x[, 2]); k <- sum(x[1, ])
#        lo <- max(0, k - n);   hi <- min(k, m);       support <- lo:hi
#        logdc <- dhyper(support, m, n, k, log = TRUE)
          lo <- max(0, nd - nb);   hi <- min(nd, nw)
          support <- lo:hi
          # all probability masses for this distribution
          mass = dhyper(support, nw, nb, nd)
          # realized mass
          realizedmass = dhyper(ns, nw, nb, nd)

           # two-sided pvalue
           pvalue <- sum(mass[which(mass <= realizedmass)])
            # add: sum_y P(y) over y such that P(y) < P(X)
           lessProb = sum(mass[which(mass < realizedmass)])
           # eqProb: sum_y P(y) over y such that P(y) = P(x)
           eqProb = pvalue - lessProb
           
           if (is.na(pvalue)) print("pvalue is NaN")

          ### compute p value support
          lgh2 = length(mass)
          temp <- double(lgh2)
          for (i in 1:lgh2)
          {
            # temp[i] is the two-sided pvalue for i-th table given marginals
            temp[i] <- sum(mass[which(mass <= mass[i])])
          }
           # sort pvalue support
          psupport <- unique(sort(temp,decreasing=FALSE))

          # randomized p-value
        randPval = pvalue - mean(runif(1))*eqProb  # definition from Dickhaus et al Lemma 2
        randPval = min(1,randPval)
        
        # mid p-value
        MidPval = lessProb + 0.5*eqProb # definition from HWang et al 2001
        MidPval = min(1,MidPval)
          # return all three p-values
          pvals = c(pvalue,MidPval,randPval)
          
          # save probless to the fist entry
          support<- c(pvals, psupport)
          return(support)
          }


      ################################################################################### 
      ####  Function to get two-sided pvalue and its support for binomial test    #########
      ###################################################################################

      pvalueByBinoSupport <- function(marginal)
      {
        #  under null that two poisson rates are equal, the UMP test follows Binomial with p=0.5
        spTmp = seq(from=0,to=marginal[2])
        d <- dbinom(spTmp,marginal[2],0.5,TRUE) ### using log of probs provides more accurate values
        ord <- order(d)
        d.ordered <- d[ord]
        
        # when marginal[2] is odd, the null CDF is symmetric and each prob appears twice
        relDev <- abs(diff(d.ordered)) ### check differences
        idx <- which(relDev > 0 & relDev <= .Machine$double.eps*2) ### "2" has proven to be a good choice
        # slight numerical correction
        ler = length(idx)  
        if (ler >0) {  
          for (jx in 1:ler) {
            d.ordered[c(idx[jx], idx[jx] + 1)] <- sum(d.ordered[c(idx[jx], idx[jx] + 1)])/2        
          } } 
        
        d.ordered <- exp(d.ordered)
        s <- sum(d.ordered) # check total prob
        if(s != 1) d.ordered <- d.ordered/s     
        d <- d.ordered[order(ord)]              ### restore original order (if necessary)
        #### end of correction
        mass = d      # re-assign

          ## compute pvalue
          realizedmass <- mass[which(spTmp==marginal[1])]

           # two-sided pvalue
           pvalue <- sum(mass[which(mass <= realizedmass)])
           # add: sum of probabilities of y such that P(y) < P(X)
           lessProb = sum(mass[which(mass < realizedmass)])
           # eqProb: sum_y P(y) st P(y) = P(x)
           eqProb = pvalue - lessProb
           
           # check p-value
           if (is.na(pvalue)) print("pvalue is NaN")

          ### compute p value support
          lgh2 = length(mass)
          temp <- double(lgh2)
          for (i in 1:lgh2)
          {
            # temp[i] is the two-sided pvalue for i-th table given marginals
            temp[i] <- sum(mass[which(mass <= mass[i])])
          }
          # sort pvalue support
          psupport <- sort(temp,decreasing=FALSE)
          
          # ranomized p-value
        randPval = pvalue - mean(runif(1))*eqProb  
        randPval = min(1,randPval)
        
        # mid p-value
        MidPval = lessProb + 0.5*eqProb
        MidPval = min(1,MidPval)
          # return all three p-values
          pvals = c(pvalue,MidPval,randPval)
           
          # save mean to the fist entry
          support<- c(pvals, psupport)
          return(support)
          }

      
      #############################################################
      ######## one-sided p-value and its support for FET ##########
      ########################################################
      pvalOneSideFETSupport <- function(cellmatrix, Side = "Right")
      {
        ns = cellmatrix[1,1]; nw = sum(cellmatrix[,1]); nb = sum(cellmatrix[,2]); nd = sum(cellmatrix[1,]);
        drx = dhyper(ns, nw, nb, nd)
        lo <- max(0, nd - nb);   hi <- min(nd, nw)
        #          x <- x[1, 1]; m <- sum(x[, 1]);    n <- sum(x[, 2]); k <- sum(x[1, ])
        #        lo <- max(0, k - n);   hi <- min(k, m);       support <- lo:hi
        #        logdc <- dhyper(support, m, n, k, log = TRUE)
        
        if (Side == "Right") {
          # get support
          psupport = dhyper(lo:ns, nw, nb, nd) + phyper(lo:ns, nw, nb, nd,lower.tail = FALSE)
          psupport = unique(sort(psupport))
          # tail
          ptail =  phyper(ns, nw, nb, nd,lower.tail = FALSE)
          # type of p-value
          pvalO = ptail + drx
          pvalMid = ptail + 0.5*drx    #definition from HWang et al 2001
          pvalRnd = ptail + mean(runif(1))*drx   # definition from Habiger 2015
        } 
        
        if (Side == "Left") {
          # get support
          psupport = phyper(lo:nd, nw, nb, nd)
          psupport = unique(sort(psupport))
          # tail
          ptail =  phyper(ns, nw, nb, nd)
          # type of p-value
          pvalO = ptail
          pvalMid = ptail - drx + 0.5*drx     #by duality
          pvalRnd = ptail - drx + mean(runif(1))*drx  #by duality
        }
        
        # save pval and support
        pvals = c(pvalO,pvalMid,pvalRnd)
        support<- c(pvals,psupport)   
        return(support)
      }
      
      ########################################################
      ######## one-sided p-value and its support for Binomial ##########
      ########################################################
      pvalOneSideBTSupport <- function(cellvector, Side = "Right")
      {
        ns = cellvector[1]; nt = cellvector[2]
        drx = dbinom(ns, nt,0.5)
        
        if (Side == "Right") {
          # get support
          psupport = pbinom(0:ns, nt,0.5,lower.tail = FALSE) + dbinom(0:ns, nt,0.5)
          psupport = unique(sort(psupport))
          # tail
          ptail =  pbinom(ns, nt,0.5,lower.tail = FALSE)
          # type of pval
          pvalO = ptail + drx
          pvalMid = ptail + 0.5*drx
          pvalRnd = ptail + mean(runif(1))*drx 
        } 
        
        if (Side == "Left") {
          # get support
          psupport = pbinom(0:nt, nt,0.5)
          psupport = unique(sort(psupport))
          # tail
          ptail =  pbinom(ns, nt,0.5)
          # type of p-value
          pvalO = ptail
          pvalMid = ptail - drx + 0.5*drx     #by duality
          pvalRnd = ptail - drx + mean(runif(1))*drx  #by duality
        }
        
        # save pvalue and support
        pvals = c(pvalO,pvalMid,pvalRnd)
        support<- c(pvals,psupport)   
        return(support)
      }
      

########## generalized estimator of proportion
GenEstProp <- function(pvector,psupports,TuningPar=c(0.5,100))
  {
    # m is the coherent length of the argument
    m = length(pvector)
    minSupp = unlist(lapply(psupports, min))
    
    # define smallest guiding value
    maxmin = max(minSupp)
    cat("^^max of min's of supports: ",maxmin,"\n")
    
    singPsupp = which(minSupp == 1)
    NofDirac = length(singPsupp)
      
    if (NofDirac == m) {
      cat("^^All CDF's are Dicac masses ..","\n")
      genest = 1
      } else {  #  case 2: NofDirac < m; this case lasts till the end of estimation
  
        if (NofDirac >0) { # case 2(a): m>NofDirac >0
          maxminA = max(minSupp[-singPsupp])
          cat("^^second max of min's of supports: ",maxminA,"\n")
          schLoc = (1:m)[-singPsupp]
        if (maxminA < 0.5) {
          tunings = seq(maxminA+TuningPar[1]*(0.5-maxminA), 0.5,length.out=TuningPar[2])
        } else{ 
          tunings = c(maxminA)  
          }
        } else {  # case 2(b):   maxmin < 1, i.e., NofDirac = 0, i.e. no dirac mass
          schLoc = 1:m
          if (maxmin < 0.5) {
            tunings = seq(maxmin+TuningPar[1]*(0.5-maxmin), 0.5,length.out=TuningPar[2])
          } else {  
            tunings = c(maxmin)  
           }
          
        } # end of case 2(b)
      
    # start search  if  NofDirac <m
    Lt = length(tunings)
    est = double(Lt)
    cuts = double(m)
    
    for (j in 1:Lt) {
      lamb = tunings[j]
      
      for (i in schLoc) {
        psupp = psupports[[i]]
        if (sum(psupp<=lamb) >0 ) {
          tmp = unique(max(psupp[which(psupp<=lamb)]))
          loc = max(which(psupp==tmp))
          cuts[i]= psupp[loc]
        } else  {    cuts[i] = NA }
        
      }
      cutstmp = cuts[!is.na(cuts)]
      
      esttmp = sum(as.numeric(pvector[!is.na(cuts)]>cutstmp)/(1-cutstmp))/m + 
        1/((1-lamb)*m)+ sum(is.na(cuts))/m+length(singPsupp)/m
      
      est[j] = min(1,esttmp)
    }
    # take the average
    genest = mean(est[!is.nan(est)])
   } # end of case 2: NofDirac < m 
    
    return(genest)
  }

    ###################################################
    ####### Subsection 19: BH FDR procedure        ########
    ######################################################
    # BH procedure requires input of pvalues and their indices
      # BH procedure requires input of pvalues and their indices
      BHFDRApp <- function(Pvals,FDRlevel)
      {   
        if (any(is.na(Pvals)) | any(is.nan(Pvals))) { cat("^^NA or NAN in pvalues... ","\n")
        print(Pvals[is.na(Pvals)])
        }
        
        if (any(is.na(FDRlevel)) | any(is.nan(FDRlevel))) cat("^^NA or NAN FDRlevel... ","\n")
           
       if (is.vector(Pvals))  lgh3 <- length(Pvals)
        if (is.matrix(Pvals) | is.data.frame(Pvals))  lgh3 <- dim(Pvals)[1] 
        PvalAndIdx <- cbind(Pvals,seq(1:lgh3))
        PvalAndIdxOrd <- PvalAndIdx[order(PvalAndIdx[,1]),]
        
     #   cat("^^Dims of PvalAndIdxOrd is:",as.vector(dim(PvalAndIdxOrd)),"\n")
    
        BHstepups <- seq(1:lgh3)*(FDRlevel/lgh3)
        cmp3 <- PvalAndIdxOrd[,1] <= BHstepups
        scmp3 <- sum(cmp3)
    
       # collect rejections if any
        if (scmp3 == 0) {   
        print ("No rejections by BH procedure")        # when there are no rejections
        rejAndTresh <- list(matrix(numeric(0), ncol=2,nrow=0),0)       
        }  else   {  r <- max(which(cmp3))        
            #  cat("^^^ Minimax index in BH is:",r,"\n")
             # cat("^^^ Minimax threshold in BH is:",BHstepups[r],"\n")
                   if (r==1) {
                    # when r =1, a row is chosen, so should change it into a matrix of two columns
                     BHrej = as.matrix(t(PvalAndIdxOrd[1:r,]))  
                   #  print(BHrej)
                   } else {
                      BHrej <- PvalAndIdxOrd[1:r,]
                   }
               rejAndTresh <- list(BHrej,BHstepups[r])    
              }
            return(rejAndTresh)
      }


     #### rejection based on adjusted p-values
      FDRAdjustedPval <- function(Pvals,FDRlevel)
      {   
        if (any(is.na(Pvals)) | any(is.nan(Pvals))) { cat("^^NA or NAN in pvalues... ","\n")
        print(Pvals[is.na(Pvals)])
        }
        
        if (any(is.na(FDRlevel)) | any(is.nan(FDRlevel))) cat("^^NA or NAN FDRlevel... ","\n")
           
       if (is.vector(Pvals))  lgh3 <- length(Pvals)
        if (is.matrix(Pvals) | is.data.frame(Pvals))  lgh3 <- dim(Pvals)[1] 
        PvalAndIdx <- cbind(Pvals,seq(1:lgh3))
        PvalAndIdxOrd <- PvalAndIdx[order(PvalAndIdx[,1]),]
        
     #   cat("^^Dims of PvalAndIdxOrd is:",as.vector(dim(PvalAndIdxOrd)),"\n")
        cmp3 <- PvalAndIdxOrd[,1] <= FDRlevel
        scmp3 <- sum(cmp3)
    
       # collect rejections if any
        if (scmp3 == 0) {   
        print ("No rejections by BH")        # when there are no rejections
        rejAndTresh <- list(matrix(numeric(0), ncol=2,nrow=0),0)       
        }  else   {  r <- max(which(cmp3))        
            #  cat("^^^ Minimax index in BH is:",r,"\n")
             # cat("^^^ Minimax threshold in BH is:",BHstepups[r],"\n")
                   if (r==1) {
                    # when r =1, a row is chosen, so should change it into a matrix of two columns
                     BHrej = as.matrix(t(PvalAndIdxOrd[1:r,]))  
                   #  print(BHrej)
                   } else {
                      BHrej <- PvalAndIdxOrd[1:r,]
                   }
               rejAndTresh <- list(BHrej,PvalAndIdxOrd[,1][r])    
              }
            return(rejAndTresh)
      }


 ### heyse2011 adjusted p-values
    HeyseAdjFunc = function (pvec, pSupps)
      {
        m = length(pvec)
        if (m ==1)
          stop("^^^At least two p-values and their supports are needed.")
        # supports[[i]] is the support for pvec[i]; entries of supports[[i]] are ascendingly ordered wrt index
        
        # order p-values ascendingly
        pvecAscend =  pvec[order(pvec,decreasing = F)]
        
        # create BH sequence of p-values in terms of Heyse2011 rephrase; m*p_(k)/k, where p_(m) is the largest
        pBHAscend = m*pvecAscend/(1:m)
        
        # obtain Q(t) = \sum_{j=1}^m F_j(t)
        # obtain matrix F_j(p_(i))
        stepf = lapply(pSupps, function(x) stepfun(x, c(0, x)))
        p.mat = matrix(NA, m, m)
        for (i in 1:m)  {
          p.mat[i, ] = stepf[[i]](pvecAscend)
        }
        # sum of each column; obtain Q(p_(i)) = \sum_{j=1}^m F_j(p_(i))
        p.sums = apply(p.mat, 2, sum)
        # Q(p_(i))/i
        p.sums = p.sums/(1:m)
        
        # obtain Heller 2012 with orginal p-vec ordering
        pR = double(m)
        pR[m:1] = cummin(p.sums[m:1])
        op = order(pvec)
        pR = pmin(1,pR[order(op)])
        
        
        ##### obtain Heyse2011 with orginal p-vec ordering ####
        pH = double(m)
        pH[m] = pBHAscend[m]
        pH[(m-1):1] = pmin(cummin(pBHAscend[m:2]),p.sums[(m-1):1])
        pH = pmin(1, pH[order(op)])
        
        # choose which to return
        return(pH)
      }
 
    #######################################################################
    #### Subsection 18:  Storey's procedure               #########
    #######################################################################

      StoreyFDREst <- function (simpvalues,FDRlevel,pi0est)
      {
         m <- length(simpvalues)
        # step length in search depends on minimal difference between p values
         sortedpvalvec <- sort(simpvalues)
         diffsortedpvals <- diff(sortedpvalvec)
         # since diff does not compute the differece between minimal p value and zero
         # it should be added, so step length in thresh is the minimum of
         # minimal diff and min p value
         threshstep <- min(min(diffsortedpvals[diffsortedpvals > 0]),min(sortedpvalvec))

        # it is known that FDR threshold is no less than bonferroni, but no larger than
        # FDRlevel. the step length should be sensitive to distances bewteen p values to
        # make practical change in R(t) as t changes
        # sometimes threshstep can be so small that R itself can not handle it

        # Step 1: crude search first   do not start from min(sortedpvalvec) when m is small
        threshveccrude <- seq(from= 0,to=1,by=10^-3)
        ecdfcrude0 <- max(sum(sortedpvalvec <= threshveccrude[1]),1)/m

        istcrude <-1
        stFDPcrude <- pi0est*threshveccrude[1]/ecdfcrude0
         stFDPcrude0 <- stFDPcrude
        while (stFDPcrude <= FDRlevel)
        {
          istcrude <- istcrude+1;
          # check if mat threshol (t=1) is reach, is so, break
          if (istcrude > length(threshveccrude)){print("max threshold (t=1) in crude search for Storey's estimators exhausted");break}
          steCDFcrude <- max(sum(sortedpvalvec <= threshveccrude[istcrude]),1)/m
          stFDPcrude <- pi0est*threshveccrude[istcrude]/steCDFcrude
          if (is.na(stFDPcrude) | is.nan(stFDPcrude)) print("NA or NaN occured as FDP of Storey's estimators")

        }
        if (istcrude == 2) { stsupthreshcrude <- threshveccrude[1]
             stFDPcrudeAtSThresh <- stFDPcrude0    }     else {
              stsupthreshcrude <- threshveccrude[istcrude-1]
             ecdfstsupthreshcrude <- max(sum(sortedpvalvec <= stsupthreshcrude),1)/m
        stFDPcrudeAtSThresh <- pi0est*stsupthreshcrude/ecdfstsupthreshcrude  }
      #  cat("stFDPcrudeAtSThresh is",stFDPcrudeAtSThresh,"at sup thresh",stsupthreshcrude,"\n")

        ############################################
        #### step 2: after a crude threshold has been located, do a finer search from the crude sup threshold
        ### chen's fdr estimator
        # check if mat threshol (t=1) is reach, is so, break

         # use while loop for storey's
         if (stsupthreshcrude == 1) {  print("max threshold (t=1) for Storey's estimators exhausted, no fine search in second round")
            stsupthresh <- stsupthreshcrude
         stFDPAtSThresh <- stFDPcrudeAtSThresh} else {
        threshvecst <- seq(from = stsupthreshcrude,to=threshveccrude[istcrude],
                               by= min((threshveccrude[istcrude]-stsupthreshcrude)/m,FDRlevel/m))
         ecdf0st <- max(sum(sortedpvalvec <= threshvecst[1]),1)/m
         stFDP <- pi0est*threshvecst[1]/ecdf0st
          stFDP0 <- stFDP
         ist <-1
        while (stFDP <= FDRlevel)
        {
          ist <- ist+1;
          if (ist > length(threshvecst)) break
          steCDF <- max(sum(sortedpvalvec <= threshvecst[ist]),1)/m
          stFDP <- pi0est*threshvecst[ist]/steCDF
          if (is.na(stFDP) | is.nan(stFDP)) print("NA or NaN occured as FDP for Storey's estimators in fine search")
        }
        if (ist==2) {stsupthresh <- threshvecst[1]
          stFDPAtSThresh <- stFDP0     } else {
        stsupthresh <- threshvecst[ist-1]
         ecfstsupthresh <- max(sum(sortedpvalvec <= stsupthresh),1)/m
        stFDPAtSThresh <- pi0est*stsupthresh/ecfstsupthresh }
       #  cat("stFDPAtSThresh",stFDPAtSThresh,"at sup thresh",stsupthresh,"\n")
      } # end of esle


        # get discoveries
        stDiscovery <- which(simpvalues <= stsupthresh)
        if (length(stDiscovery)==0) print("No discoveries found by Storey's procedures")

        StDiscoveries <- list(stDiscovery,c(stsupthresh,stFDPAtSThresh))
        return(StDiscoveries)

      }
      
     # storey: the new qvalue package stops with error when there is p-value being exactly 1
     # so the following is used
     storeyPi0Est <- function(lambda,pvector)
      {
          # m is the coherent length of the argument
           m <- length(pvector)

          over <- m*(1-lambda)
          stunmin <- sum(pvector > lambda)/over 
          st <- min(1,stunmin)
          if (is.nan(st))  print("Storey's estimator of pi0 is NaN")
          if (is.na(st))  print("Storey's estimator of pi0 is NA")

          return(st)
      }
Any scripts or data that you put into this service are public.
fdrDiscreteNull documentation built on April 25, 2020, 1:11 a.m.
rdrr.io home R language documentation Run R code online
CRAN packages Bioconductor packages R-Forge packages GitHub packages
Note that we can't provide technical support on individual packages. You should contact the package authors for that.
fdrDiscreteNull
False Discovery Rate Procedures Under Discrete and Heterogeneous Null Distributions

R/MultipleTesting_HeteroDist_ModFuncs.r
In fdrDiscreteNull: False Discovery Rate Procedures Under Discrete and Heterogeneous Null Distributions

Defines functions storeyPi0Est FDRAdjustedPval BHFDRApp GenEstProp pvalOneSideBTSupport pvalOneSideFETSupport pvalueByBinoSupport pvalFETSupport getcellcountsandmarginals_DE getcellcountsandmarginals fulltable pvalueDist Div_Appr_Unif GetDivergenceMatrix eNetFull eNetBuilder CheckIt

Try the fdrDiscreteNull package in your browser

R Package Documentation

Browse R Packages

We want your feedback!

fdrDiscreteNull False Discovery Rate Procedures Under Discrete and Heterogeneous Null Distributions

R/MultipleTesting_HeteroDist_ModFuncs.r In fdrDiscreteNull: False Discovery Rate Procedures Under Discrete and Heterogeneous Null Distributions

Defines functions storeyPi0Est FDRAdjustedPval BHFDRApp GenEstProp pvalOneSideBTSupport pvalOneSideFETSupport pvalueByBinoSupport pvalFETSupport getcellcountsandmarginals_DE getcellcountsandmarginals fulltable pvalueDist Div_Appr_Unif GetDivergenceMatrix eNetFull eNetBuilder CheckIt

Try the fdrDiscreteNull package in your browser

R Package Documentation

Browse R Packages

We want your feedback!

fdrDiscreteNull
False Discovery Rate Procedures Under Discrete and Heterogeneous Null Distributions

R/MultipleTesting_HeteroDist_ModFuncs.r
In fdrDiscreteNull: False Discovery Rate Procedures Under Discrete and Heterogeneous Null Distributions