R/MVRangesList.R

Defines functions MVRangesList

Documented in MVRangesList

#' like a VRangesList, but for mitochondria
#' 
#' @import VariantAnnotation
#' @import VariantTools
#' @import Biostrings
#' @import S4Vectors
#' 
#' @exportClass MVRangesList
setClass("MVRangesList", contains="SimpleVRangesList")


#' Wrap a VRangesList for mitochondrial use.
#'
#' Usually an MVRangesList will be created by callMT.
#'
#' @rdname        MVRangesList-methods
#' 
#' @return        the MVRangesList
#' 
#' @examples
#'
#' library(MTseekerData)
#' BAMdir <- system.file("extdata", "BAMs", package="MTseekerData")
#' BAMs <- paste0(BAMdir, "/", list.files(BAMdir, pattern=".bam$"))
#' targets <- data.frame(BAM=BAMs, stringsAsFactors=FALSE) 
#' rownames(targets) <- sapply(strsplit(basename(BAMs), "\\."), `[`, 1)
#' (mall <- getMT(targets))
#'
#' if (requireNamespace("GmapGenome.Hsapiens.rCRS", quietly=TRUE)) {
#'   (mvrl <- callMT(mall))
#'   filt(mvrl$pt1_cell1)
#' } else { 
#'   message("You have not yet installed an rCRS reference genome.")
#'   message("Consider running the indexMTgenome() function to do so.")
#'   message("An example MVRangesList is RONKSvariants from MTseekerData.")
#' }
#'
#' @export
MVRangesList <- function(...) {
  new("MVRangesList", GenomicRangesList(...), elementType = "MVRanges")
}


#' MVRangesList methods (centralized).
#'
#' @section Utility methods:
#' 
#' `genomeCoverage`       returns estimated mitochondrial read coverage depth
#' `coverage`             returns an RleList of coverage for each sample's chrM
#' `filt`                 removes variants where PASS != TRUE for each element 
#'
#' @section Annotation methods:
#'
#' `genes`                returns an annotated GRanges of mitochondrial genes 
#' `getAnnotations`       returns a GRanges of annotated mitochondrial features
#' `genome`               returns the genome (or, perhaps, genomes) in an MVRL
#' `encoding`             returns mutations in coding regions for each element
#' `granges`              returns mildly annotated aggregates of variant sites
#' `snpCall`              retrieves single nucleotide variant polymorphisms 
#' `locateVariants`       locates variants within genes, tRNA, rRNA, or D-loop
#' `summarizeVariants`    attempts mass functional annotation of variant sites
#' `consensusString`      creates consensus genotypes from rCRS for eg Haplogrep
#' 
#' @section Visualization methods:
#'
#' `plot`                 creates circular plot of mitochondrial variant calls
#'
#' @param x             an MVRangesList (for some methods)
#' @param query         an MVRangesList (for predictCoding)
#' @param object        an MVRangesList (for other methods)
#' @param annotations   an MVRangesList (for getAnnotations)
#' @param filterLowQual opt. for `granges`/`summarizeVariants`
#' @param y             another MVRangesList
#' @param varAllele     variant alleles
#' @param subject       a GRanges, usually 
#' @param seqSource     a BSgenome, usually 
#' @param ...           miscellaneous args, passed through
#'
#' @return              depends on the method invoked.
#' 
#' @name  MVRangesList-methods
NULL


#' @rdname    MVRangesList-methods
#' @export
setMethod("genomeCoverage", signature(x="MVRangesList"), 
          function(x) sapply(x, genomeCoverage))


#' @rdname    MVRangesList-methods
#' @export
setMethod("genes", signature(x="MVRangesList"), 
          function(x) subset(getAnnotations(x), region == "coding"))


#' @rdname    MVRangesList-methods
#' @export
setMethod("snpCall", signature(object="MVRangesList"),
          function(object) endoapply(object, snpCall))


#' @rdname    MVRangesList-methods
#' @export
setMethod("getAnnotations", signature(annotations="MVRangesList"), 
          function(annotations) getAnnotations(annotations[[1]]))


#' @rdname    MVRangesList-methods
#' @export
setMethod("encoding", signature(x="MVRangesList"), 
          function(x) MVRangesList(lapply(x, encoding)))


#' @rdname    MVRangesList-methods
#' @export
setMethod("coverage", signature(x="MVRangesList"), 
          function(x) RleList(lapply(x, coverage)))


#' @rdname    MVRangesList-methods
#' @export
setMethod("predictCoding", # mitochondrial annotations kept internally
          signature(query="MVRangesList", "missing", "missing", "missing"), 
          function(query, ...) sapply(query, predictCoding))


#' @rdname    MVRangesList-methods
#' @export
setMethod("show", signature(object="MVRangesList"),
          function(object) {
            callNextMethod()
            covgs <- paste0(round(unname(sapply(object, genomeCoverage))), "x")
            cat(S4Vectors:::labeledLine("genomeCoverage", covgs))
            if ("counts" %in% names(metadata(object))) {
              peaks <- nrow(metadata(object)$counts)
              cat(ifelse("bias" %in% names(rowData(counts(object))),
                  "Bias-corrected ", "Raw "))
              cat("fragment counts at", peaks, "peaks are available from",
                  "counts(object).\n")
            }
          })


# helper
setAs(from="MVRangesList", to="GRangesList",
      function(from) GRangesList(lapply(from, granges)))


#' @rdname    MVRangesList-methods
#' @export
setMethod("filt", signature(x="MVRangesList"),
          function(x) {
            message("Filtering out low-quality calls...")
            MVRangesList(sapply(x, subset, PASS))
          })


#' @rdname    MVRangesList-methods
#' @export
setMethod("granges", signature(x="MVRangesList"),
          function(x, filterLowQual=TRUE) {

            # if cached...
            if (filterLowQual == TRUE & 
                "granges.filtered" %in% names(metadata(x))) {
              return(metadata(x)$granges.filtered)
            } else if (filterLowQual == FALSE & 
                       "granges.unfiltered" %in% names(metadata(x))) {
              return(metadata(x)$granges.unfiltered)
            }

            # if not...
            if (filterLowQual == TRUE) x <- filt(x) 
            anno <- suppressMessages(getAnnotations(x))
            message("Aggregating variants...")
            gr <- unlist(as(x, "GRangesList")) 
            gr <- keepSeqlevels(gr, "chrM", pruning.mode="coarse")
            gr <- reduce(gr)
            ol <- findOverlaps(gr, anno)
            metadata(gr)$annotation <- anno
            metadata(gr)$sampleNames <- names(x)
            message("Annotating variants by region...")
            gr$gene <- NA_character_
            gr[queryHits(ol)]$gene <- names(anno)[subjectHits(ol)] 
            gr$region <- NA_character_
            gr[queryHits(ol)]$region <- anno[subjectHits(ol)]$region
            message("Annotating variants by sample...") 
            hitMat <- matrix(0, ncol=length(x), nrow=length(gr),
                             dimnames=list(NULL, names(x)))
            varHits <- findOverlaps(as(x, "GRangesList"), gr)
            bySample <- split(subjectHits(varHits), queryHits(varHits))
            for (s in seq_along(bySample)) hitMat[bySample[[s]], s] <- 1
            mcols(gr)$overlaps <- hitMat 
            return(gr)
          })


#' @rdname    MVRangesList-methods
#' @export
setMethod("summarizeVariants", 
          signature(query="MVRangesList","missing","missing"),
          function(query, filterLowQual=TRUE, ...) {
            
            # code duplication! refactor
            getRangedImpact <- function(pos) {
              url <- paste("http://mitimpact.css-mendel.it", "api", "v2.0",
                           "genomic_position", pos, sep="/")
              res <- as.data.frame(read_json(url, simplifyVector=TRUE)$variants)
              if (nrow(res) > 0) {
                res$genomic <- with(res, paste0("m.", Start, Ref, ">", Alt))
                res$protein <- with(res, paste0("p.",AA_ref,AA_position,AA_alt))
                res$Consequence <- with(res, 
                                        paste(Gene_symbol, 
                                              paste0(AA_ref,
                                                     AA_position,
                                                     AA_alt)))
                res[, c("genomic","protein","Start",
                        "Ref","Alt","Codon_substitution","dbSNP_150_id",
                        "Mitomap_Phenotype","Mitomap_Status",
                        "Gene_symbol","OXPHOS_complex",
                        "Consequence","APOGEE_boost_consensus","MToolBox")]
              } else {
                return(NULL)
              }
            }

            gr <- granges(query, filterLowQual=filterLowQual, ...)
            names(gr) <- as.character(gr)
            message("Retrieving functional annotations for variants...")
            hits <- lapply(as.character(ranges(gr)), getRangedImpact)
            rsv <- do.call(rbind, hits[which(sapply(hits, length) > 0)])
            names(rsv) <- sub("Start", "start", names(rsv)) # grrr
            rsv$chrom <- "chrM"
            rsv$end <- rsv$start # FIXME
            res <- makeGRangesFromDataFrame(rsv, keep.extra.columns=TRUE)
            seqinfo(res) <- seqinfo(gr)
            return(res)

          })


#' @rdname    MVRangesList-methods
#' @export
setMethod("genome", signature(x="MVRangesList"),
          function(x) unique(sapply(lapply(x, genome), unique)))


#' @rdname    MVRangesList-methods
#' @export
setMethod("locateVariants", 
          signature(query="MVRangesList","missing","missing"),
          function(query, filterLowQual=TRUE, ...) {

            stop("Don't use this method for now. It has bugs!")
            if (filterLowQual == TRUE) query <- filt(query)
            MVRangesList(lapply(query, locateVariants))

          })


#' @rdname    MVRangesList-methods
#' @export
setMethod("plot", signature(x="MVRangesList"),
          function(x, ...) MTcircos(x, ...))


#' @rdname    MVRangesList-methods
#' @export
setMethod("consensusString", signature(x="MVRangesList"), 
          function(x, ...) {
            actual <- unique(genome(x))
            res <- DNAStringSet(lapply(lapply(x, consensusString), `[[`, 1))
            names(res) <- paste(sapply(x, function(y) 
                                as.character(unique(sampleNames(y)))), 
                                actual, sep=".")
            return(res)
          })

Try the MTseeker package in your browser

Any scripts or data that you put into this service are public.

MTseeker documentation built on Oct. 31, 2019, 3:20 a.m.