R/DEJ.R

Defines functions cpmCountsDEJ addAnnotationDEJ DEJ

Documented in addAnnotationDEJ cpmCountsDEJ DEJ

#' Differentially Expressed Junctions
#'
#' @description Find differentially expressed junctions, provided a junction matrix as input (generated by \code{\link{getJunctionCountMatrix}}). This function uses standard limma package (\code{\link[limma]{eBayes}}) to find differentially expressed junctions.
#'
#' @param JunctionMatrix matrix containing read counts for junctions, obtained using \code{\link{getJunctionCountMatrix}} .
#' @param designM design matrix required by limma
#' @param contrastM contrast matrix required by limma.
#' @param Groups list of sample groups. \cr
#'        Example: If there are two sample groups with three samples each, 'Groups' should be formed as:
#'        \enumerate{
#'        \item numeric: c(1, 1, 1, 2, 2, 2)
#'        }
#'
#' @import edgeR
#'
#' @return The output is ranked differentially expressed junctions. Meta-data is saved as fit2.Rdata in folderSRA directory. The ranking can be annotated using \code{\link{addAnnotationDEJ}}. The annotated and ranked differentially expressed junctions for a given contrast (as given in contrast matrix) can be saved using \code{\link{cpmCountsDEJ}}
#'
#' @references \enumerate{
#' \item Robinson, M. D., McCarthy, D. J. & Smyth, G. K. edgeR: A Bioconductor package for differential expression analysis of digital gene expression data. Bioinformatics 26, 139–140 (2009)
#' }
#' @export
#'

DEJ<- function(JunctionMatrix, designM, contrastM, Groups)
{
        rownames(JunctionMatrix) <- JunctionMatrix[,5]
        counts<- JunctionMatrix[,6:ncol(JunctionMatrix)] #saving raw counts for all junctions
        write.csv(counts,file='RawJunctionCounts.csv')
        #load('DCmatrix.Rdata')
        ncounts <- DGEList(counts=counts, group=Groups)
        ncounts <- calcNormFactors(ncounts)
        fit<- voom(ncounts,designM)

        E <- ((counts !=0)*1)* fit$E
        write.csv(E,file='RawJunctionCounts_log2cpm.csv')

        fit <- lmFit(fit, designM)
        fit2 <- contrasts.fit(fit, contrastM)
        fit2 <- eBayes(fit2)
        return(fit2)

}


#' Annotation of differentially expressed junctions
#'
#' @description Add annotation to ranked differentially expressed junctions of given contrast returned by \code{\link{DEJ}}.
#'
#' @param JunctionMatrixA annotated junction matrix containing junction read counts, produced by \code{\link{JunctionMatrixAnnotation}}.
#' @param fit output of \code{\link{DEJ}}, that contains ranking of differentially expressed junctions.
#' @param contrast contrast from contrast matrix, whose ranking is required.
#'
#' @return Annotated ranking of differentially expressed junctions of a given contrast. The output can be saved using write.csv or \code{\link[openxlsx]{write.xlsx}}.
#' @export
#'

addAnnotationDEJ<-function(JunctionMatrixA,fit,contrast)
{
        #load('JunctionMatrixAnnotation.Rdata')
        test<- topTable(fit, coef=contrast, n = Inf, sort = "p")
        #load('counts_genes.Rdata')
        annotation <- JunctionMatrixA[,c(1:5,ncol(JunctionMatrixA))]
        index<- match(as.vector(rownames(test)),as.vector(annotation$UniqueJun))
        test<- cbind(test,annotation[index,,drop=FALSE])
        return(test)
}


#' cpmCountsDEJ
#'
#' @description Save read counts and log2cpm expression of differentially expressed junctions.
#'
#' @param JunctionMatrix matrix containing read counts for junctions.
#' @param filename filename in which output of \code{\link{addAnnotationDEJ}} is saved.
#' @param designM design matrix required by limma
#' @param contrastM contrast matrix required by limma.
#' @param Groups list of sample groups. \cr
#'        Example: If there are two sample groups with three samples each, 'Groups' should be formed as:
#'        \enumerate{
#'        \item numeric: c(1, 1, 1, 2, 2, 2)
#'        }
#' 
#' @import edgeR
#'
#' @return Read counts and logcpm expression of given contrast.
#'
#' @references \enumerate{
#' \item Robinson, M. D., McCarthy, D. J. & Smyth, G. K. edgeR: A Bioconductor package for differential expression analysis of digital gene expression data. Bioinformatics 26, 139–140 (2009)
#' }
#' @export
#'

cpmCountsDEJ<- function(JunctionMatrix,filename, designM=designM, contrastM=contrastM, Groups=Groups)
{
        #load('DCmatrix.Rdata')
        test <- read.csv(filename)
        filename<- strsplit(filename,'.',fixed=TRUE)[[1]][1]

        eventName<- as.vector(test$X)

        counts<- JunctionMatrix[,6:ncol(JunctionMatrix)] #saving raw counts for all genes

        index <- match(eventName,rownames(counts))
        counts <- counts[index,,drop=FALSE]

        countfilename <- paste(filename,'_count.csv',sep="",collapse="")
        write.csv(counts,file=countfilename)

        ncounts <- DGEList(counts=counts, group=Groups)
        ncounts <- calcNormFactors(ncounts)
        fit<- voom(ncounts,designM)
        E <- ((counts !=0)*1)* fit$E
        l_filename <- paste(filename,'_log2cpm.csv',sep="",collapse="")
        write.csv(E,file=l_filename)


}
harshsharma-cb/FASE documentation built on Aug. 6, 2023, 1:37 a.m.