R/locateMTvariants.R

Defines functions locateMTvariants

Documented in locateMTvariants

#' Locates the variant and determines local bounds within gene
#' Primarily meant to be used within injectMTvarients
#' 
#' @param query      A single variant in MVRanges form 
#' @param coding     Look only at coding regions? (TRUE) 
#'
#' @return           An MVRanges with start/end codons (overlaps return copies)
#'
#' @export
locateMTvariants <- function(query, coding=TRUE) {

  # Checks if the function has been run before
  if ("gene" %in% names(mcols(query)) &
      "region" %in% names(mcols(query)) &
      "localEnd" %in% names(mcols(query)) & 
      "localStart" %in% names(mcols(query)) &
      "startCodon " %in% names(mcols(query)) &
      "endCodon" %in% names(mcols(query))) {
    return(query) # done 
  }
  
  # No input was given
  if (length(query) == 0) return(NULL)
  
  #run serially
  if (is(query, "MVRangesList")) MVRangesList(lapply(query, locateVariants))
  
  # For now only support rCRS
  stopifnot(genome(query) == "rCRS")
  data("mtAnno.rCRS", package="MTseeker")
  metadata(query)$annotation <- mtAnno
  
  # Localized genic coordinates
  # Ben said he was interested in looking at tRNA regions 
  ### Figure this out later
  #anno <- subset(mtAnno, region %in% c("tRNA", "coding"))

  # decomposeAndCalc throws me an error when I try to use tRNA
  anno <- mtAnno
  if (coding) anno <- subset(mtAnno, region == "coding")
  ol <- findOverlaps(query, anno, ignore.strand=TRUE)
  
  # Initialize
  query$gene <- NA_character_
  query$overlapGene <- NA_character_
  query$region <- NA_character_
  query$localStart <- NA_integer_
  query$localEnd <- NA_integer_
  query$startCodon <- NA_integer_
  query$endCodon <- NA_integer_
  
  
  if (length(ol) == 0) {
    message("No overlapping genes for variant: ", names(query))
    return(query) # allows iterating over an MVRanges this way 
  }
  
  if (length(subjectHits(ol)) > 2) {
    message("More than 2 overlapping genes in locateVariants")
    message("locateMTvariants is meant to run on individual variants.")
    stop("Exiting.")
  }
  

  # If there are multiple genes the variant is located within
  # Create multiple copies of the variant to handle each gene 
  # that it is located within
  if (length(subjectHits(ol)) > 1) {

    query <- rep(query, length(subjectHits(ol)))
    
    # Assume that there are only 2 overlapping genes
    query[1]$gene <- names(anno)[subjectHits(ol)][1]
    query[2]$gene <- names(anno)[subjectHits(ol)][2]
    overlappedGenes <- names(anno)[subjectHits(ol)]
    query$overlapGene <- paste(unique(overlappedGenes), collapse = ",")

  } else {
    
    # Otherwise there is no overlap
    query$gene <- names(anno)[subjectHits(ol)]

  }
  
  for (i in seq_along(query)) {
    
    subHit <- subjectHits(ol)[i]
    
    # Does the variant fall into a coding, tRNA, rRNA, or D-loop region
    query[i]$region <- anno[subHit]$region
    
    # Local start and end (relative to gene of interest)
    # 1-indexed
    query[i]$localStart <- start(query[i]) - start(anno[subHit]) + 1
    query[i]$localEnd <- end(query[i]) - start(anno[subHit]) + 1
    
    ######################### 
    #if (genome(query) == "rCRS" && names(anno[subHit]) == "MT-ND6") {
    #  
    #  subir <- IRanges(query[i]$localStart, query[i]$localEnd)
    #  data(rCRSeq, package="MTseeker")
    #  anno$refDNA <- getSeq(rCRSeq, anno)
    #  refDNA <- anno["MT-ND6"]$refDNA
    #  refSeq <- as.character(unlist(unname(extractAt(anno["MT-ND6"]$refDNA, subir))))
    #  
    #  show(refSeq)
    #  show(ref(query[i]))
    #  
    #  
    #  query[i]$localStart <- query[i]$localStart + 54
    #  query[i]$localEnd <- query[i]$localEnd + 54
    #  
    #  subir <- IRanges(query[i]$localStart, query[i]$localEnd)
    #  refSeq <- as.character(unlist(unname(extractAt(anno["MT-ND6"]$refDNA, subir))))
    #  
    #  show(refSeq)
    #  show(ref(query[i]))
    #  
    #}
    #########################
    
    # Local codon starts and ends
    # Taking a conservative approach
    query[i]$startCodon <- floor(query[i]$localStart / 3)
    query[i]$endCodon <- ceiling(query[i]$localEnd / 3)

    # Everything is 1-indexed (if variant is in the first codon)
    if (query[i]$startCodon == 0) query[i]$startCodon <- 1
    if (query[i]$endCodon == 0) query[i]$endCodon <- 1
  }

  return(query)
}
trichelab/MTseeker documentation built on March 8, 2021, 6:20 p.m.