R/combine_p_values.R

Defines functions combine combinePValues calculateCovariances transformData pop.sd pop.var

Documented in combine

pop.var <- function(x) var(x) * (length(x)-1) / length(x)
pop.sd <- function(x) sqrt(pop.var(x))

transformData <- function(data_vector) {
  dvm = mean(data_vector)
  dvsd = pop.sd(data_vector)
  s = (data_vector-dvm)/dvsd
  distr = ecdf(s)
  sapply(s, function(a) -2*log(distr(a)))
}

calculateCovariances <- function(data_matrix){
  transformed_data_matrix = apply(data_matrix, MARGIN=1, FUN=transformData)
  covar_matrix = cov(transformed_data_matrix)
  covar_matrix
}


combinePValues <- function(covar_matrix, p_values, extra_info = FALSE){
  N = ncol(covar_matrix) # number of samples
  df_fisher = 2.0*N
  Expected  = 2.0*N
  cov_sum <- (2*sum(covar_matrix[lower.tri(covar_matrix, diag=FALSE)]))
  Var = 4.0*N+cov_sum
  c = Var/(2.0*Expected)
  df_brown = (2.0*Expected^2)/Var
  if (df_brown > df_fisher) {
    df_brown = df_fisher
    c = 1.0
  }
  x = 2.0*sum( -log(p_values) )

  p_brown = pchisq(df=df_brown, q=x/c, lower.tail=FALSE)
  p_fisher = pchisq(df=df_fisher, q=x, lower.tail=FALSE)

  if (extra_info) {
    return(list(P_test=p_brown, P_Fisher=p_fisher, Scale_Factor_C=c, DF=df_brown))
  }
  else {
    return(p_brown)
  }
}

#' Combining P values
#'
#' @param gene_file pathway annotation file/gene-set file
#' @param expression_matrix gene expression matrix
#' @param gnames gene names of expression matrix
#' @param Pval1 P-value matrix obtained using binorm
#' @param thr  Based on threshold provided, those gene sets having number of genes greater than the threshold value will to be considered for covariance matrix calculation and combining of p values
#'
#' @return Combined P-value matrix
#' @export
#'
#' @examples
#' combine()
combine <- function(gene_file,expression_matrix,gnames,Pval1,thr=2){
  combp <- matrix(0,nrow(gene_file),ncol(expression_matrix), dimnames=list(rownames(gene_file),colnames(expression_matrix)))
  gene_file = data.frame(apply(gene_file,2,toupper))
  rownames(expression_matrix) = toupper(rownames(expression_matrix))
  gnames = toupper(gnames)
  for(i in 1:nrow(gene_file))
  {
    pathway <- gene_file[i,]
    genes = pathway[2: ncol(pathway)] ;

    pose = which(genes == "") ;
    if ( dim(as.matrix(pose))[1] > 0){
      genes = genes[-pose] ;
    }
    genes = as.matrix(genes)
    pos = which ( genes %in%  gnames) ;
    pos = as.matrix(pos) ;
    if( nrow(pos) > 5) {
      exprlocal =  log2(100*expression_matrix[ genes[pos], ] +1) ;

      plocal<- Pval1[ genes[pos], ]
      plocal = t(plocal)
      covar_matrix = cov(plocal) ;

      for (j in 1:nrow(plocal)){
        gpos = which(exprlocal[,j] > 0) ;
        if( nrow(as.matrix(gpos)) > thr) {
          lcovar = covar_matrix[ gpos, gpos] ;
          temp = combinePValues( lcovar, p_values=plocal[j,gpos], extra_info=TRUE)
          combp[i,j] = -log2(temp$P_test)

        }
      }

    }
  }
  return(combp)
}
reggenlab/UniPath documentation built on Nov. 26, 2020, 8:09 a.m.