R/get.gene.anno.R

Defines functions .get.gene.anno

Documented in .get.gene.anno

#' A function that returns a list of characteristics of the gene annotation
#'
#' @param PARAMETERS A PARAMETERS list with the parameters indicated in the DPDE4PM function
#' @param ANNOTATION A data frame describing the annotation of genes in the GTF file, generated by the read.gtf function
#' \describe{
#'  \item{chr}{chromosome}
#'  \item{feature}{genomic feature}
#'  \item{start}{start coordinate, base 1}
#'  \item{end}{stop coordinate, base 1}
#'  \item{strand}{strand}
#'  \item{gene}{gene id in the GTF file}
#'  \item{transcript}{transcript id in the GTF fiile}
#' }
#'
#' @return A list of gene characteristics including the following
#' \describe{
#'  \item{anno}{annotation of the gene}
#'  \item{gene}{gene id}
#'  \item{chr}{chromosome}
#'  \item{left}{start position of the gene}
#'  \item{right}{end position of the gene}
#'  \item{DNA2RNA}{a structure for converting the DNA to RNA}
#'  \item{RNA2DNA}{a structure for converting the RNA to DNA}
#'  \item{dna_length}{distance in bps between start and end genomic coordinates}
#'  \item{exome_length}{length of region covered by exons}
#' }
.get.gene.anno <- function(PARAMETERS,ANNOTATION) {

  # extract batch annotation
  anno=ANNOTATION[ANNOTATION$gene == PARAMETERS$GENE,c(1,3:6)]
  anno_unique=unique(anno)

  # extract information
  strand=as.character(anno_unique[1,4])
  chr=as.character(anno_unique[1,1])
  left=min(anno_unique$start)
  right=max(anno_unique$stop)
  intervals=anno_unique[,2:3]-left+1
  gene=as.character(anno_unique[1,5])
  dna_length=right-left+1

  # prepare DNA2RNA
  DNA2RNA=rep(0,dna_length)
  no_intervals=length(intervals[,1])
  for (i in 1:no_intervals) {DNA2RNA[intervals[i,1]:intervals[i,2]]=1}
  exome_length=sum(DNA2RNA) # this is actally exome length
  DNA2RNA=cumsum(DNA2RNA)*DNA2RNA

  # prepare RNA2DNA
  RNA2DNA = left:right
  RNA2DNA = RNA2DNA[DNA2RNA > 0]

  # summarize result
  batch_anno=list(anno=anno_unique, gene=gene,chr=chr,strand=strand,left=left,right=right,
                  DNA2RNA=DNA2RNA, RNA2DNA=RNA2DNA, dna_length=dna_length,
                  exome_length=exome_length)
}
helen-zhu/DPDE4PM documentation built on Feb. 17, 2021, 9:46 a.m.