R/matchGeneID.R

Defines functions matchGeneID

Documented in matchGeneID

#' Match gene ids in the novel assembly to a reference 
#' @param novel A GRanges object of the novel assembly
#' @param ref A GRanges object of the reference annotation
#' @details Both the novel and ref GRanges objects must be from the same reference
#' genome. 
#' @return A data frame of match. 

matchGeneID <- function(novel,ref){
  if('type' %in% colnames(mcols(ref))){
    ref <- ref[ref$type=='exon']
  } else if('feature' %in% colnames(mcols(ref))){
    ref <- ref[ref$feature=='exon']
  } else {
    stop('The reference annotation has no feature information.')
  }
  
  ref_gr <- getGeneRange(ref)
  novel_gr <- getGeneRange(novel)
  hits <- findOverlaps(novel_gr,ref_gr)
  
  novel_gr_o <- novel_gr[queryHits(hits)]
  ref_gr_o <- ref_gr[subjectHits(hits)]
  overlaps <- pintersect(novel_gr_o,ref_gr_o)
  cover <- width(overlaps)/width(novel_gr_o)
  mapping <- data.frame(chr=seqnames(novel_gr_o),
                        strand=strand(novel_gr_o),
                        novel_gene=novel_gr_o$gene_id,
                        novel_stat=start(novel_gr_o),
                        novel_end=end(novel_gr_o),
                        cover_by_ref=cover,
                        ref_gene=ref_gr_o$gene_id,
                        ref_stat=start(ref_gr_o),
                        ref_end=end(ref_gr_o))
  return(mapping)
}
wyguo/RTDBox documentation built on Jan. 31, 2023, 1:19 a.m.