R/countMatrixGenes.R

Defines functions .getEIJcounts .getNumericCount countMatrixGenes

Documented in countMatrixGenes .getEIJcounts .getNumericCount

#' Count Matrix Genes
#'
#' @description This function creates association of metafeatures such as exons, introns and junctions times sample for each gene. It requires a junction matrix, annotation matrix (generated by default using \code{\link{readMembershipMatrix}}) and summarized exon and intron read counts. It should be run only after running \code{\link{readMembershipMatrix}} and \code{\link{intronMembershipMatrix}}.
#'
#'
#' @param JunctionMatrix matrix of junction read counts times samples, generated by \code{\link{getJunctionCountMatrix}}.
#' @param annotation gene features and meta-featues annotation file generated by \code{\link{readMembershipMatrix}}, saved by default as Annotation.Rdata.
#' @param intronList intron read counts per gene generated by Rsubread package and saved in counts_introns.Rdata file, as per preprocessing instructions.
#' @param exonList exon read counts per gene generated by Rsubread package and saved in counts_exons.Rdata file, as per preprocessing instructions.
#'
#' @return Gcount list contains gene-wise read count summarization of meta-features times samples in the study. The output is saved as Gcount.Rdata.
#' @export

countMatrixGenes<- function(JunctionMatrix,annotation,intronList=c(intron_A,intron_B,intron_C), exonList= c(out_A,out_B,out_C)){
  ##-------------------------------------------
  ## Get the CountMatrix
  ##-------------------------------------------

  tstamp <- paste("[",system("date",intern=TRUE),"]",sep="",collapse="")
  cat(paste(tstamp," Preprocessing...",sep="",collapse=""))


  rownames(intronList$counts) <- rownames(intronList$annotation)
  intronCounts<- cbind(intronList$annotation,intronList$counts)
  rownames(exonList$counts) <- rownames(exonList$annotation)
  exonCounts<- cbind(exonList$annotation,exonList$counts)

  rm(exonList,intronList); gc();

  genes<- as.character(annotation[,6])
  genes<- genes[!is.na(genes)]
  ugenes<- unique(genes)
  lgene<- length(ugenes)

  cat('done','\n',sep='')
  tstamp <- paste("[",system("date",intern=TRUE),"]",sep="",collapse="")
  cat(paste(tstamp," Running...",sep="",collapse=""))
  if(lgene < 5000) ntimes <-  seq(1, lgene, (lgene-1)/2) else ntimes <- seq(5000, lgene, 5000)
  Gcount<- lapply(1:lgene,function(i) {
    if(any(ntimes==i)) cat(i,' ',sep='');
    indexGeneAnnotation<- match(as.vector(annotation[,6]) ,ugenes[i])
    Gannotation<- annotation[(!is.na(indexGeneAnnotation)),,drop=FALSE]
    if(nrow(Gannotation) <2) return(NA)
    reads<- as.vector(Gannotation[,5])

    indexGeneExon <- match(as.vector(exonCounts[,1]) ,ugenes[i])
    GexonCount <- exonCounts[!is.na(indexGeneExon),,drop=FALSE]
    indexGeneIntron <- match(as.vector(intronCounts[,1]) ,ugenes[i])
    GintronCount <- intronCounts[!is.na(indexGeneIntron),,drop=FALSE]
    indexGjunction <-grep('JUN',reads)
    GjunctionCount<- match(as.vector(Gannotation[indexGjunction,5]), as.vector(JunctionMatrix[,5]))
    GjunctionCount<- JunctionMatrix[GjunctionCount,,drop=FALSE]
    .getEIJcounts(Gannotation,GjunctionCount,GintronCount,GexonCount)
  })
  names(Gcount) <- ugenes
  cat('\n',sep='')
  tstamp <- paste("[",system("date",intern=TRUE),"]",sep="",collapse="")
  cat(paste(tstamp," Running...Done",sep="",collapse=""))
  Gcount <- Gcount[!is.na(Gcount)]
  save(Gcount, file='Gcount.Rdata')
  return(Gcount)
}

#' .getNumericCount
#'
#' @description Internal function of \code{\link{countMatrixGenes}}, not to be run separately.
#'
#'
#' @return
.getNumericCount<- function(counts){
  newCount<- lapply(1:ncol(counts),function(x) as.numeric(counts[,x]))
  newCount<- do.call('cbind',newCount)
  rownames(newCount) <- rownames(counts)
  colnames(newCount) <- colnames(counts)
  return(newCount)
}


#' .getEIJcounts
#'
#' @description Internal function of \code{\link{countMatrixGenes}}, not to be called separately.
#'
#' @return
#'

.getEIJcounts<- function(Gannotation,GjunctionCount,GintronCount,GexonCount)
{

  GannT<- paste(Gannotation[,1],Gannotation[,2],Gannotation[,3],Gannotation[,4],sep='')
  GinT <- paste(GintronCount[,2],GintronCount[,3],GintronCount[,4],GintronCount[,5],sep='')
  GexT <- paste(GexonCount[,2],GexonCount[,3],GexonCount[,4],GexonCount[,5],sep='')

  indexIn<- match(GannT,GinT)
  GintronCount<- cbind(Gannotation[!is.na(indexIn),5,drop=FALSE],GintronCount[indexIn[!is.na(indexIn)],7:ncol(GintronCount),drop=FALSE])
  indexEx<- match(GannT,GexT)
  GexonCount<- cbind( Gannotation[!is.na(indexEx),5,drop=FALSE],GexonCount[indexEx[!is.na(indexEx)],7:ncol(GexonCount),drop=FALSE])
  GjunctionCount<- GjunctionCount[,5:ncol(GjunctionCount)]

  rownames(GjunctionCount) <- as.vector(GjunctionCount[,1])
  rownames(GexonCount)<- as.vector(GexonCount[,1])
  rownames(GintronCount) <- as.vector(GintronCount[,1])

  GjunctionCount<- GjunctionCount[,-1,drop=FALSE]
  GexonCount<- GexonCount[,-1,drop=FALSE]
  GintronCount<- GintronCount[,-1,drop=FALSE]

  colnames(GjunctionCount) <- colnames(GjunctionCount)
  colnames(GexonCount) <- colnames(GjunctionCount)
  colnames(GintronCount) <- colnames(GjunctionCount)
  #correcting the problem of character counts
  counts<- rbind(GexonCount,GintronCount,GjunctionCount)
  return(counts)
}
harshsharma-cb/FASE documentation built on Aug. 6, 2023, 1:37 a.m.