#' Intron Pointer
#'
#'
#' @description Prediction of intron retention events by utilizing information of meta-features (flanking junctions, skipping junctions and introns) associated with the intron in context for a given gene.
#'
#'
#' @param Gcount list; contains gene-wise matrix of meta-features read counts times samples, generated by \code{\link{countMatrixGenes}}.
#' @param iMM gene-wise list that represents the association of intron with other meta-features of genes (exons and junctions (skipping/flanking)). It is generated using \code{\link{intronMembershipMatrix}}.
#' @param designM design matrix required by limma.
#' @param contrastM contrast matrix required by limma.
#' @param Groups list of sample groups. \cr
#' Example: If there are two sample groups with three samples each, 'Groups' should be formed as:
#' \enumerate{
#' \item numeric: c(1, 1, 1, 2, 2, 2)
#' \item alphabetical: c('A', 'A', 'A', 'B', 'B', 'B')}
#' @param p number of threads to be used if running in parallel. (default=1)
#' @param threshold minimum number of reads that should map to a meta-feature (default=3). If number of reads<threshold, meta-feature would be discarded.
#' @param annotation matrix; contains annotation of exons and introns, created using \code{\link{readMembershipMatrix}}.
#' @param ... other parameters to be passed to \code{\link[limma]{eBayes}}, \code{\link[limma]{voom}}, \code{\link[edgeR]{calcNormFactors}} and \code{\link[limma]{lmFit}} .
#'
#' @details IntronPointer algorithm finds intron retention events using metafeatures (exons, introns and junctions). The read counts of meta-features are present in Gcount and the association of an intron with exons and junctions is given by Intron Membership Matrix (iMM).\cr
#' In order to find an intron retention event, one-tailed p-values of metafeatures are summarized using Irwin-Hall method to find the equivalent P-value (EqP). EqP determines if an event is differentially alternatively spliced.For more details, please refer: S. S. Tabrez, R. D. Sharma, V. Jain, A. A. Siddiqui & A. Mukhopadhyay. Differential alternative splicing coupled to nonsense-mediated decay of mRNA ensures dietary restriction-induced longevity. Nature Communications volume 8, Article number: 306 (2017).
#'
#'
#' @import limma
#' @import edgeR
#' @import matrixStats
#' @import Biobase
#' @import parallel
#'
#' @return IntronPointer gives a list of ranked intron retention events with equivalent p-value and t-statistics. The output of iPrnaseq can be passed to \code{\link{addAnnotationRnaSeq}} to add annotation to the ranked intron retention events.
#'
#' @references \enumerate{
#' \item S. S. Tabrez, R. D. Sharma, V. Jain, A. A. Siddiqui & A. Mukhopadhyay. Differential alternative splicing coupled to nonsense-mediated decay of mRNA ensures dietary restriction-induced longevity. Nature Communications volume 8, Article number: 306 (2017).
#' \item Robinson, M. D., McCarthy, D. J. & Smyth, G. K. edgeR: A Bioconductor package for differential expression analysis of digital gene expression data. Bioinformatics 26, 139–140 (2009).
#' \item Ritchie, M. E., Phipson, B., Wu, D., Hu, Y., Law, C. W., Shi, W., & Smyth, G. K. limma powers differential expression analyses for RNA-sequencing and microarray studies. Nucleic acids research, 43(7), e47 (2015).
#' \item Henrik Bengtsson (2017). matrixStats: Functions that Apply to Rows and Columns of Matrices (and to Vectors). R package version 0.52.2. https://github.com/HenrikBengtsson/matrixStats
#' \item https://git.bioconductor.org/packages/Biobase
#' \item Huber W, Carey VJ, Gentleman R, Anders S, Carlson M, Carvalho BS, Bravo HC, Davis S, Gatto L, Girke T, Gottardo R, Hahne F, Hansen KD, Irizarry RA, Lawrence M, Love MI, MacDonald J, Obenchain V, Ole's AK, Pag'es H, Reyes A, Shannon P, Smyth GK, Tenenbaum D, Waldron L, Morgan M (2015). “Orchestrating high-throughput genomic analysis with Bioconductor.” Nature Methods, 12(2), 115–121.
#' \item https://CRAN.R-project.org/view=HighPerformanceComputing
#' }
#'
#' @export
#'
iPrnaseq <- function(Gcount, iMM, designM, contrastM, Groups=NULL, p=1,threshold=3, annotation, ... ) {
index<- match(names(Gcount),names(iMM))
iMM<- iMM[index]
geneNames <- names(Gcount)
n <- c(1:length(Gcount))
if(p==1) {
# contrastM <- lapply(1:length(Gcount),function(x) contrastM)
# designM <- lapply(1:length(Gcount),function(x) designM)
# Groups <-lapply(1:length(Gcount),function(x) Groups)
# threshold <-lapply(1:length(Gcount),function(x) threshold)
fit <- base::lapply(n, function(x) .fitiPrnaseqModel(gene=geneNames[x],counts=Gcount[[x]], iMMeber=iMM[[x]],contrastM=contrastM,designM=designM,Groups=Groups,threshold=threshold))
#fit <- base::mapply(gene=geneNames,counts=Gcount,iMMeber=iMM,contrastM=contrastM,designM=designM,Groups=Groups,threshold=threshold, FUN=.fitiPrnaseqModel, SIMPLIFY=FALSE);
} else {
#require(parallel) #check if you want to have parallel
fit <- mclapply(n, function(x) .fitiPrnaseqModel(gene=geneNames[x],counts=Gcount[[x]], iMMeber=iMM[[x]],contrastM=contrastM,designM=designM,Groups=Groups,threshold=threshold),mc.cores=p);
#pkg <- c("package:parallel")
#lapply(pkg, detach, character.only = TRUE, unload = TRUE)
}
#browser()
fit<- fit[!is.na(fit)]
#
if(length(fit)==0) return(NA)
newfit<- lapply(fit,function(x) ncol(x))
fit<- fit[newfit==4]
fit<- do.call('rbind',fit)
attr(fit, "name") <- "iPrnaSeq";
fit <- addAnnotationRnaSeq(fit=fit, annotation=annotation)
return(fit)
}
#' fitiPrnaseqModel
#'
#' @description Internal function of \code{\link{iPrnaseq}}, not to be called separately.
#'
#' @return
#'
.fitiPrnaseqModel <- function(gene, counts, iMMeber, contrastM,designM,Groups,threshold){
#if(gene=='AT1G01040') browser()
##Remove short genes
if (is.null(rownames(counts)) || nrow(counts)< 3 ) { cat('Skipping short Gene',gene,'\n',sep=''); return(fit <- NA) }
counts<- .prepareCounts(counts, designM, Groups, threshold-1)
if (is.null(rownames(counts)) || nrow(counts)< 3 ) { cat('Skipping short Gene',gene,'\n',sep=''); return(fit <- NA) }
if (is.null(rownames(iMMeber)) || nrow(iMMeber)< 3 ) { cat('Skipping short Gene iMM',gene,'\n',sep=''); return(fit <- NA) }
##No need of this function anymore added to iMM preperation
#prepare skipping Junction
#iMMeber<- addSkippingJI(iMMeber)
#Match dimenstion to count matrix
index <- match(rownames(counts), rownames(iMMeber))
iMMeber<- iMMeber[index,,drop=FALSE]
try(fit <- .iPrnaseqFunction(counts=counts,iMM= iMMeber,contrastM=contrastM,designM=designM,Groups=Groups),silent=TRUE)
if(!exists("fit",inherits=FALSE)) return(NA) ##This is not working may be some problem of space
if( class(fit)=='try-error') {
cat('Skipping Gene:',gene,'\n',sep='');
fit <- 'error'
return(fit)
} else cat(gene,'\n',sep='');
fit<- fit[!is.na(fit)]
if (length(fit)==0) return(fit <- NA)
fit<- do.call('rbind',fit)
fit<- cbind(fit,gene)
return(fit)
}
#' iPrnaseqFunction
#'
#' @description Internal function of \code{\link{iPrnaseq}}, not to be called separately.
#'
#'
#' @import edgeR
#' @import limma
#'
#' @return
#'
.iPrnaseqFunction <- function(counts=y,iMM,contrastM,designM,Groups){
#require(limma)
#require(edgeR) #edgeR is also dependent on Limma
## Later i should make some arrangement to use quantile and TMM normalizaition seperatly
#require(combinat)
if (nrow(counts) != nrow(iMM) ) {
cat('Dimension missmatch?','\n',sep='');
return(NA) }
if (nrow(iMM)< 3 ) { cat('Skipping short Gene','\n',sep='');
return(NA) }
if (is.null(rownames(counts)) || nrow(counts)< 3 ) {
cat('Skipping short Gene','\n',sep='');
return(NA) }
#------------------------------------------
#Running Limma
#------------------------------------------
ucounts <- counts
counts <- DGEList(counts=counts,group=Groups)
counts <- calcNormFactors(counts)
try( fit<- voom(counts,designM),silent=TRUE)
if(!exists("fit", inherits=FALSE)) return(NA)
##removing zero counts false expression
E <- ((ucounts !=0)*1)* fit$E
##Check low expressed reads #remove that have expression less than 6.32
expression<- .removeLECounts(E,designM,Groups)
#Run limma
# fit <- lmFit(medpolish(expression,trace.iter = FALSE)$residual, designM) # since the lmFit works directly i am not taking log values into account
#Just to try with it
fit <- lmFit(expression, designM) # since the lmFit works directly i am not taking log values into account
fit2 <- contrasts.fit(fit, contrastM)
fit2 <- eBayes(fit2)
#Match dimenstion to count matrix
index <- match(rownames(expression), rownames(iMM))
iMM<- iMM[index,,drop=FALSE]
#only valid if information of skipping junction obtained before
# and not running p2auxMatrix function since it causing sometime false positives for no reason
indexToremove<- match(colnames(iMM),rownames(iMM))
iMM<- iMM[,!is.na(indexToremove),drop=FALSE]
#To make the auxMatrix before removing the rows of iMM
auxMatrix<- (iMM > 2 )* 0.5
iMMix<- (iMM ==2 ) *1
#Check the dimenstion of the matrices
if (!any(match(rownames(expression),rownames(iMM)),na.rm=TRUE)) {
cat('Dimension missmatch?','\n',sep='');
# return(list(FAP=NA)) }
return(NA) }
#------------------------------------------
#Run by contrast
#------------------------------------------
nbrOfcontrast<- ncol(contrastM)
contrasts<- colnames(contrastM)
#FAP<- vector('list',length=nbrOfcontrast) # no need of this in here
#for (j in 1:nbrOfcontrast)
FAP <- lapply(1:nbrOfcontrast, function(j)
{
#Preparing one tailed pvalues
t <- data.frame(fit2$t[,j])
pvalue2tail <- data.frame(fit2$p.value[,j])
pvalue1tail <- pvalue2tail * .5 * (t>0) + (1 -pvalue2tail * .5) * (t<=0)
SumOfPvalues <- t(as.matrix(pvalue1tail)) %*% ( (1 * (iMMix > 0)) + ((-1)*(auxMatrix ==0.5))) + colSums(1*(auxMatrix ==0.5))
NumberOfPvalues <- colSums((auxMatrix + iMMix) !=0)
##i found there is a problem in iMM when intron is not there in iMM matrix so iMM==2 doesnot work and a NumberOfPvalues become get 1 (only for skipping junction)
# I am not sure, that i took into account the min pvalue or either side #i think i took care before the code was changed actually upside
#EqPvalues <- pmin(t(SumOfPvalues^NumberOfPvalues)/fact(NumberOfPvalues), t((NumberOfPvalues - SumOfPvalues)^NumberOfPvalues)/fact(NumberOfPvalues))
EqPvalues<- as.matrix(unlist(lapply(1:length(SumOfPvalues), function(x) .sumPvalsMethod(SumOfPvalues[x],NumberOfPvalues[x]))))
#Apply Constraints ; atleast two junction one exon and other side must be skipping junction or intron
constraints <- (colSums((iMMix > 0)* 1) >0) & (colSums((auxMatrix>0)*1) >0) # there should be intron and atleast skipping Junctions
#constraints <- (colSums((iMM == 1)* 1) >1) & (colSums((auxMatrix>0)*1) >0) # led to false positive even if there is no count for that intron
# at least there should be 2 exon flanking and a skipping Junctions
EqPvalues <- as.matrix(constraints * (( EqPvalues < 0.05 ) * EqPvalues ))
rownames(EqPvalues)<- colnames(SumOfPvalues)
EqPvalues <- as.matrix( EqPvalues[which(EqPvalues != 0),,drop=FALSE])
EqPvalues <- cbind(EqPvalues,rep(contrasts[j],nrow(EqPvalues)))
#include t statistic
#tGE<- as.matrix(t(as.matrix(t)) %*% (iMMix > 0)*1) #In case of introns are being reported ##Found a problem here m not reporting right t values
tGE<- as.matrix(t(as.matrix(t)) %*% ((iMMix+auxMatrix) > 0)*1) # now it sums up the t-statistics of term that are used for summing up pvalue
EqPvalues<- cbind(EqPvalues,tGE[,rownames(EqPvalues)])
#Saving Pvalues
if(nrow(EqPvalues)==0) return(NA) else {
colnames(EqPvalues)<- c('EqPvalues','Contrast','t-statistics')
return(EqPvalues) }
})
return(FAP);
}
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.