R/EPrnaseq.R

Defines functions .ePrnaseqFunction .fitEPrnaseqModel EPrnaseq

Documented in EPrnaseq .ePrnaseqFunction .fitEPrnaseqModel

#' ExonPointer
#'
#' @description Prediction of cassette exons events by utilizing information of meta-features (flanking junctions, skipping junctions and introns) associated with the exon in context ofs a given gene.
#'
#'
#' @param Gcount list; contains gene-wise  matrix of meta-features read counts times samples, generated by \code{\link{countMatrixGenes}}.
#' @param RMM gene-wise list that represents the association of exons with other meta-features of genes (introns and junctions (skipping/flanking)). It is generated using \code{\link{readMembershipMatrix}}.
#' @param designM design matrix required by limma.
#' @param contrastM contrast matrix required by limma.
#' @param Groups list of sample groups. \cr
#'        Example: If there are two sample groups with three samples each, 'Groups' should be formed as:
#'        \enumerate{
#'        \item numeric: c(1, 1, 1, 2, 2, 2)
#'        \item alphabetical: c('A', 'A', 'A', 'B', 'B', 'B')}
#' @param p number of threads to be used if running in parallel. (default=1)
#' @param threshold minimum number of reads that should map to a meta-feature (default=3). If number of reads<threshold, meta-feature would be discarded.
#' @param annotation matrix; contains annotation of exons and introns, created using \code{\link{readMembershipMatrix}}.
#' @param ... other parameters to be passed to \code{\link[limma]{eBayes}}, \code{\link[limma]{voom}}, \code{\link[edgeR]{calcNormFactors}} and \code{\link[limma]{lmFit}} .
#'
#' @details ExonPointer algorithm finds cassette exon events using metafeatures (exons, introns and junctions). The read counts of meta-features are present in Gcount and the association of an exon with introns and junctions (skipping/flanking) is given by Read Membership Matrix (RMM).\cr
#'     In order to find a cassette exon event, one-tailed p-values of metafeatures are summarized using Irwin-Hall method to find the equivalent P-value (EqP). EqP determines if an event is differentially alternatively spliced. For more details, please refer: S. S. Tabrez, R. D. Sharma, V. Jain, A. A. Siddiqui & A. Mukhopadhyay. Differential alternative splicing coupled to nonsense-mediated decay of mRNA ensures dietary restriction-induced longevity. Nature Communications volume 8, Article number: 306 (2017).
#'
#' @import limma
#' @import edgeR
#' @import matrixStats
#' @import Biobase
#' @import parallel
#'
#' @return ExonPointer gives a list of ranked cassette exon events with equivalent p-value and t-statistic.
#'
#' @references \enumerate{
#'  \item S. S. Tabrez, R. D. Sharma, V. Jain, A. A. Siddiqui & A. Mukhopadhyay. Differential alternative splicing coupled to nonsense-mediated decay of mRNA ensures dietary restriction-induced longevity. Nature Communications volume 8, Article number: 306 (2017).
#'  \item Robinson, M. D., McCarthy, D. J. & Smyth, G. K. edgeR: A Bioconductor package for differential expression analysis of digital gene expression data. Bioinformatics 26, 139–140 (2009).
#'  \item Ritchie, M. E., Phipson, B., Wu, D., Hu, Y., Law, C. W., Shi, W., & Smyth, G. K. limma powers differential expression analyses for RNA-sequencing and microarray studies. Nucleic acids research, 43(7), e47 (2015).
#'  \item Henrik Bengtsson (2017). matrixStats: Functions that Apply to Rows and Columns of Matrices (and to Vectors). R package version 0.52.2. https://github.com/HenrikBengtsson/matrixStats
#'  \item https://git.bioconductor.org/packages/Biobase
#'  \item Huber W, Carey VJ, Gentleman R, Anders S, Carlson M, Carvalho BS, Bravo HC, Davis S, Gatto L, Girke T, Gottardo R, Hahne F, Hansen KD, Irizarry RA, Lawrence M, Love MI, MacDonald J, Obenchain V, Ole's AK, Pag'es H, Reyes A, Shannon P, Smyth GK, Tenenbaum D, Waldron L, Morgan M (2015). “Orchestrating high-throughput genomic analysis with Bioconductor.” Nature Methods, 12(2), 115–121.
#'  \item https://CRAN.R-project.org/view=HighPerformanceComputing
#'  }
#'
#' @export

EPrnaseq <- function(Gcount, RMM, designM, contrastM, Groups=NULL, p=1,threshold=3, annotation, ... ) {

  #
  #Check length of Gcount and length of RMM
  index<- match(names(Gcount),names(RMM))
  RMM<- RMM[index];

  geneNames <- names(Gcount);
  n <- c(1:length(Gcount))
  
  if(p==1) {

    fit <- base::lapply(n, function(x) .fitEPrnaseqModel(gene=geneNames[x],counts=Gcount[[x]], RMmeber=RMM[[x]],contrastM=contrastM,designM=designM,Groups=Groups,threshold=threshold))
    
  } else {
    
   
    fit <- mclapply(n, function(x) .fitEPrnaseqModel(gene=geneNames[x],counts=Gcount[[x]], RMmeber=RMM[[x]],contrastM=contrastM,designM=designM,Groups=Groups,threshold=threshold),mc.cores=p);


  }

  fit<- fit[!is.na(fit)]
  #
  if(length(fit)==0) return(NA)
  newfit<- lapply(fit,function(x) ncol(x))
  fit<- fit[newfit==4]
  fit<- do.call('rbind',fit)
  attr(fit, "name") <- "EPrnaSeq";

  fit <- addAnnotationRnaSeq(fit=fit, annotation=annotation)

  return(fit)
}



#' EP model fitting
#'
#' @description Internal function of \code{\link{EPrnaseq}}, not to be called separately.
#'
#' @return
#'

.fitEPrnaseqModel <- function(gene, counts,RMmeber, contrastM,designM,Groups,threshold){
  #

  #Remove short genes
  if (is.null(rownames(counts)) || nrow(counts)< 3 ) { cat('Skipping short Gene',gene,'\n',sep=''); return(fit <- NA)  }

  #i think i should make aux matrix here and then check the other criteria also make simiar
  counts<- .prepareCounts(counts, designM, Groups, threshold-1)
  if (is.null(rownames(counts)) || nrow(counts)< 3 ) { cat('Skipping short Gene',gene,'\n',sep=''); return(fit <- NA)  }
  if (is.null(rownames(RMmeber)) || nrow(RMmeber)< 3 ) { cat('Skipping short Gene RMM',gene,'\n',sep=''); return(fit <- NA)  }

  #Match dimenstion to count matrix
  index <- match(rownames(counts), rownames(RMmeber))
  RMmeber<- RMmeber[index,,drop=FALSE]
  try(fit <- .ePrnaseqFunction(counts=counts,RMM= RMmeber,contrastM=contrastM,designM=designM,Groups=Groups),silent=TRUE)
  if(!exists("fit",inherits=FALSE))  return(NA) ##This is not working may be some problem of space
  if( class(fit)=='try-error') {
    cat('Skipping Gene:',gene,'\n',sep='');
    fit <- 'error'
    return(fit)
  } else cat(gene,'\n',sep='');
  fit<- fit[!is.na(fit)]
  if (length(fit)==0) return(fit <- NA)
  fit<- do.call('rbind',fit)
  fit<- cbind(fit,gene)
  return(fit)
}


#' EP function
#'
#' @description Internal function of \code{\link{EPrnaseq}}, not to be called separately.
#'
#'
#' @return
#'

.ePrnaseqFunction <- function(counts=y,RMM,contrastM,designM, Groups){

  ##require(combinat)


  if (nrow(counts) != nrow(RMM) ) {
    cat('Dimension missmatch?','\n',sep='');
    return(NA)	     }


  if (nrow(RMM)< 3 ) { cat('Skipping short Gene','\n',sep='');
    return(NA)  }


  if (is.null(rownames(counts)) || nrow(counts)< 3 ) {
    cat('Skipping short Gene','\n',sep='');
    return(NA) 	        }

  #counts<- getNumericCount(counts)
  #------------------------------------------
  #Running Limma
  #------------------------------------------
  ucounts <- counts
  counts <- DGEList(counts=counts,group=Groups)
  counts <- calcNormFactors(counts)
  try( fit<- voom(counts,designM),silent=TRUE)
  if(!exists("fit",inherits=FALSE))  return(NA)


  ##removing zero counts false expression
  E <- ((ucounts !=0)*1)* fit$E

  ##Check low expressed reads #remove that have expression less than 6.32
  expression<- .removeLECounts(E,designM,Groups)


  #Run limma
  # fit <- lmFit(medpolish(expression,trace.iter = FALSE)$residual, designM) # since the lmFit works directly i am not taking log values into account
  #Just to try with it
  fit <- lmFit(expression, designM) # since the lmFit works directly i am not taking log values into account
  fit2 <- contrasts.fit(fit, contrastM)
  fit2 <- eBayes(fit2)


  #Match dimenstion to count matrix
  index <- match(rownames(expression), rownames(RMM))
  RMM<- RMM[index,,drop=FALSE]

  #only valid if information of skipping junction obtained before
  # and not running p2auxMatrix function since it causing sometime false positives for no reason
  indexToremove<- match(colnames(RMM),rownames(RMM))
  RMM<- RMM[,!is.na(indexToremove),drop=FALSE]


  #To make the auxMatrix before removing the rows of RMM; in case already done- not required
  #auxMatrix<- p2auxMatrix(RMM)
  auxMatrix<- (RMM ==0.5 )* 0.5
  RMMij<- (RMM > 1) *1
  RMM<- (RMM > 0.5) *1 #RMM excluded from skipping junction now


  #Check the dimension of the matrices
  if (!any(match(rownames(expression),rownames(RMM)),na.rm=TRUE)) {
    cat('Dimension missmatch?','\n',sep='');
    return(NA)	     }


  #------------------------------------------
  #Run by contrast
  #------------------------------------------
  nbrOfcontrast<- ncol(contrastM)
  contrasts<- colnames(contrastM)
  FAP<- vector('list',length=nbrOfcontrast)

  #for (j in 1:nbrOfcontrast)
  FAP <- lapply(1:nbrOfcontrast, function(j)
  {
    #Preparing one tailed pvalues
    t <- data.frame(fit2$t[,j])
    pvalue2tail <- data.frame(fit2$p.value[,j])
    pvalue1tail <- pvalue2tail * .5 * (t>0) + (1 -pvalue2tail * .5) * (t<=0)
    SumOfPvalues <- t(as.matrix(pvalue1tail)) %*% ( (1 * (RMM > 0)) + ((-1)*(auxMatrix ==0.5))) + colSums(1*(auxMatrix ==0.5))
    NumberOfPvalues <- colSums((auxMatrix + RMM) !=0)
    EqPvalues<- as.matrix(unlist(lapply(1:length(SumOfPvalues), function(x) .sumPvalsMethod(SumOfPvalues[x],NumberOfPvalues[x]))))

    #Apply Constraints ; atleast two junction one exon and other side must be skipping junction or intron
    constraints <- (colSums((RMMij > 0)* 1) >1)  &  (colSums((auxMatrix>0)*1) >0) # for flanking intron or Junctions
    EqPvalues <- as.matrix(constraints * (( EqPvalues < 0.05 ) * EqPvalues ))
    rownames(EqPvalues)<- colnames(SumOfPvalues)
    EqPvalues <- as.matrix( EqPvalues[which(EqPvalues != 0),,drop=FALSE])
    EqPvalues <- cbind(EqPvalues,rep(contrasts[j],nrow(EqPvalues)))

    #include t statistic
    tGE<- as.matrix(t(as.matrix(t)) %*% ((RMM +auxMatrix)> 0)*1) # now it sums up the t-statistics of term that are used for summing up pvalues
    EqPvalues<- cbind(EqPvalues,tGE[,rownames(EqPvalues)])

    #Saving Pvalues
    if(nrow(EqPvalues)==0) return(NA) else {
      colnames(EqPvalues)<- c('EqPvalues','Contrast','t-statistics')
      return(EqPvalues) }
  })
  return(FAP);
}
########################################################################################
harshsharma-cb/FASE documentation built on Aug. 6, 2023, 1:37 a.m.