R/MVRangesList.R

Defines functions .extractBamFiles .extractCoverageRles .extractCommonElts MVRangesList

Documented in MVRangesList

#' like a VRangesList, but for mitochondria
#' 
#' @import VariantAnnotation
#' @import VariantTools
#' @import Biostrings
#' @import S4Vectors
#' 
#' @exportClass MVRangesList
setClass("MVRangesList", contains="SimpleVRangesList")


#' Wrap a VRangesList for mitochondrial use.
#'
#' Usually an MVRangesList will be created by callMT.
#' 
#' If the elements were generated by pileupMT(), a coverageRles DataFrame
#' will be generated and placed into the metadata() list for the result. 
#' Similarly, if an element named 'bam' is located in the metadata of the
#' supplied arguments, it will be combined into a vector named bamFiles. 
#'
#' @param   ...           The MVRanges elements to turn into an MVRangesList
#' @param   bamFiles      (Optional) BAM filenames for each MVRanges element
#' @param   coverageRles  (Optional) coverage Rles for each MVRanges element
#' @param   verbose       Be verbose? (default is FALSE) 
#' 
#' @rdname                MVRangesList-methods
#' 
#' @return                the MVRangesList
#' 
#' @examples
#' 
#' library(MTseekerData)
#' BAMdir <- system.file("extdata", "BAMs", package="MTseekerData")
#' pdxBAMs <- paste0(BAMdir, "/", list.files(BAMdir, pattern="^PDX.*.bam$"))
#' (mvrl <- pileupXenograft(pdxBAMs[1]))
#'
#' @export
MVRangesList <- function(..., bamFiles=NULL, coverageRles=NULL, verbose=FALSE) {

  if (is(..., "MVRangesList")) {
    if (verbose) message("Argument is an MVRangesList")
    res <- (...) 
  } else if (is(..., "VRangesList")) {
    if (verbose) message("Argument is a VRangesList")
    res <- new("MVRangesList", ..., elementType="MVRanges")
  } else { 
    if (verbose) message("Argument is not a VRangesList or an MVRangesList")
    res <- new("MVRangesList", VRangesList(...), elementType="MVRanges")
  }
  
  if (!is.null(coverageRles)) metadata(res)$coverageRles <- coverageRles
  if (!is.null(bamFiles)) metadata(res)$bamFiles <- bamFiles
  if (verbose) message("elementType is ", elementType(res))

  return(res) 

}


#' MVRangesList methods (centralized).
#'
#' @section Utility methods:
#' 
#' `genomeCoverage`       returns estimated mitochondrial read coverage depth
#' `coverage`             returns an RleList of coverage for each sample's chrM
#' `filt`                 removes variants where PASS != TRUE for each element 
#'
#' @section Annotation methods:
#'
#' `genes`                returns an annotated GRanges of mitochondrial genes 
#' `metadata`             get or set elements in the metadata() of an MVRL 
#' `getAnnotations`       returns a GRanges of annotated mitochondrial features
#' `genome`               returns the genome (or, perhaps, genomes) in an MVRL
#' `encoding`             returns mutations in coding regions for each element
#' `granges`              returns mildly annotated aggregates of variant sites
#' `snpCall`              retrieves single nucleotide variant polymorphisms 
#' `locateVariants`       locates variants within genes, tRNA, rRNA, or D-loop
#' `summarizeVariants`    attempts mass functional annotation of variant sites
#' `consensusString`      creates consensus genotypes from rCRS for eg Haplogrep
#' 
#' @section Visualization methods:
#'
#' `plot`                 creates circular plot of mitochondrial variant calls
#'
#' @param x             an MVRangesList (for some methods)
#' @param query         an MVRangesList (for predictCoding)
#' @param object        an MVRangesList (for other methods)
#' @param annotations   an MVRangesList (for getAnnotations)
#' @param filterLowQual opt. for `granges`/`summarizeVariants`
#' @param y             another MVRangesList
#' @param varAllele     variant alleles
#' @param subject       a GRanges, usually 
#' @param seqSource     a BSgenome, usually 
#' @param ...           miscellaneous args, passed through
#'
#' @return              depends on the method invoked.
#' 
#' @name  MVRangesList-methods
NULL


#' @rdname    MVRangesList-methods
#' @export
setMethod("genomeCoverage", signature(x="MVRangesList"), 
          function(x) sapply(x, genomeCoverage))


#' @rdname    MVRangesList-methods
#' @export
setMethod("genes", signature(x="MVRangesList"), 
          function(x) subset(getAnnotations(x), region == "coding"))


#' @rdname    MVRangesList-methods
#' @export
setMethod("snpCall", signature(object="MVRangesList"),
          function(object) endoapply(object, snpCall))


#' @rdname    MVRangesList-methods
#' @export
setMethod("getAnnotations", signature(annotations="MVRangesList"), 
          function(annotations) getAnnotations(annotations[[1]]))


#' @rdname    MVRangesList-methods
#' @export
setMethod("encoding", signature(x="MVRangesList"), 
          function(x) MVRangesList(lapply(x, encoding)))


#' @rdname    MVRangesList-methods
#' @export
setMethod("coverage", signature(x="MVRangesList"), 
          function(x) RleList(lapply(x, coverage)))


#' @rdname    MVRangesList-methods
#' @export
setMethod("predictCoding", # mitochondrial annotations kept internally
          signature(query="MVRangesList", "missing", "missing", "missing"), 
          function(query, ...) sapply(query, predictCoding))


#' @rdname    MVRangesList-methods
#'
#' @export
setMethod("show", signature(object="MVRangesList"),
          function(object) {
            callNextMethod() # not sure why it has to be done like this... 
          })


# helper; deprecated 
# setAs(from="MVRangesList", to="GRangesList",
#      function(from) {
#        message("Coercing from MVRangesList to GRangesList...") 
#        GRangesList(lapply(from, granges))
#      })


# helper
setAs(from="MVRangesList", to="GRanges",
      function(from) {
        message("Coercing from MVRangesList to GRanges...") 
        granges(from)
      })


# helper
# setAs(from="MVRangesList", to="VRangesList",
#       function(from) {
#         message("Coercing from MVRangesList to VRangesList...") 
#         VRangesList(lapply(from, as, "VRanges"))
#       })


#' @rdname    MVRangesList-methods
#' @export
setMethod("filt", signature(x="MVRangesList"),
          function(x) {
            message("Filtering out low-quality calls...")
            MVRangesList(sapply(x, subset, PASS))
          })


#' @rdname    MVRangesList-methods
#' @export
setMethod("granges", signature(x="MVRangesList"),
          function(x, filterLowQual=TRUE) {
            
            # if cached...
            if (filterLowQual == TRUE & 
                "granges.filtered" %in% names(metadata(x))) {
              return(metadata(x)$granges.filtered)
            } else if (filterLowQual == FALSE & 
                       "granges.unfiltered" %in% names(metadata(x))) {
              return(metadata(x)$granges.unfiltered)
            }

            # if not...
            #if (filterLowQual == TRUE) x <- filt(x) 
            anno <- suppressMessages(getAnnotations(x))
            
            message("Aggregating variants...")
            gr <- GRanges(unlist(x))
            gr <- keepSeqlevels(gr, "chrM", pruning.mode="coarse")
            gr <- reduce(gr)
            ol <- findOverlaps(gr, anno)
            metadata(gr)$annotation <- anno
            metadata(gr)$sampleNames <- names(x)
            
            message("Annotating variants by region...")
            gr$gene <- NA_character_
            gr[queryHits(ol)]$gene <- names(anno)[subjectHits(ol)] 
            gr$region <- NA_character_
            gr[queryHits(ol)]$region <- anno[subjectHits(ol)]$region
            
            message("Annotating variants by sample...") 
            hitMat <- matrix(0, ncol=length(x), nrow=length(gr),
                             dimnames=list(NULL, names(x)))
            xgrl <- GRangesList(lapply(x, GRanges))
            varHits <- findOverlaps(xgrl, gr)
            bySample <- split(subjectHits(varHits), queryHits(varHits))
            for (s in seq_along(bySample)) hitMat[bySample[[s]], s] <- 1
            mcols(gr)$overlaps <- hitMat 
            return(gr)
          })


#' @rdname    MVRangesList-methods
#' @export
setMethod("summarizeVariants", 
          signature(query="MVRangesList","missing","missing"),
          function(query, filterLowQual=TRUE, ...) {
            
            # code duplication! refactor
            getRangedImpact <- function(pos) {
              url <- paste("http://mitimpact.css-mendel.it", "api", "v2.0",
                           "genomic_position", pos, sep="/")
              res <- as.data.frame(read_json(url, simplifyVector=TRUE)$variants)
              if (nrow(res) > 0) {
                res$genomic <- with(res, paste0("m.", Start, Ref, ">", Alt))
                res$protein <- with(res, paste0("p.",AA_ref,AA_position,AA_alt))
                res$Consequence <- with(res, 
                                        paste(Gene_symbol, 
                                              paste0(AA_ref,
                                                     AA_position,
                                                     AA_alt)))
                res[, c("genomic","protein","Start",
                        "Ref","Alt","Codon_substitution","dbSNP_150_id",
                        "Mitomap_Phenotype","Mitomap_Status",
                        "Gene_symbol","OXPHOS_complex",
                        "Consequence","APOGEE_boost_consensus","MToolBox")]
              } else {
                return(NULL)
              }
            }
            
            gr <- granges(query, filterLowQual=filterLowQual, ...)
            names(gr) <- as.character(gr)
            message("Retrieving functional annotations for variants...")
            hits <- lapply(as.character(ranges(gr)), getRangedImpact)
            rsv <- do.call(rbind, hits[which(sapply(hits, length) > 0)])
            names(rsv) <- sub("Start", "start", names(rsv)) # grrr
            rsv$chrom <- "chrM"
            rsv$end <- rsv$start # FIXME
            res <- makeGRangesFromDataFrame(rsv, keep.extra.columns=TRUE)
            seqinfo(res) <- seqinfo(gr)
            return(res)
            
          })


#' @rdname    MVRangesList-methods
#' @export
setMethod("genome", signature(x="MVRangesList"),
          function(x) unique(sapply(lapply(x, genome), unique)))


#' @rdname    MVRangesList-methods
#' @export
setMethod("locateVariants", 
          signature(query="MVRangesList","missing","missing"),
          function(query, filterLowQual=TRUE, ...) {
            
            stop("Don't use this method for now. It has bugs!")
            if (filterLowQual == TRUE) query <- filt(query)
            MVRangesList(lapply(query, locateVariants))
            
          })


#' @rdname    MVRangesList-methods
#' @export
setMethod("plot", signature(x="MVRangesList"),
          function(x, ...) MTcircos(x, ...))


#' @rdname    MVRangesList-methods
#' @export
setMethod("consensusString", signature(x="MVRangesList"), 
          function(x, ...) {
            actual <- unique(genome(x))
            res <- DNAStringSet(lapply(lapply(x, consensusString), `[[`, 1))
            names(res) <- paste(sapply(x, function(y) 
              as.character(unique(sampleNames(y)))), 
              actual, sep=".")
            return(res)
          })


# helper fn
.extractCommonElts <- function(...) { 
  Reduce(intersect, lapply(lapply(..., metadata), names))
}


# helper fn
.extractCoverageRles <- function(...) { 
  if ("coverageRle" %in% .extractCommonElts(...)) {
    df <- lapply(lapply(lapply(..., metadata), `[[`, "coverageRle"), DataFrame)
    return(df)
  } else { 
    return(NULL)
  }
}


# helper fn
.extractBamFiles <- function(...) { 
  if ("bam" %in% .extractCommonElts(...)) {
    return(sapply(lapply(..., metadata), `[[`, "bam"))
  } else { 
    return(NULL)
  }
}
trichelab/MTseeker documentation built on March 8, 2021, 6:20 p.m.