R/get_merged_gr.R

Defines functions get_merged_gr

Documented in get_merged_gr

#' get a GRanges of (by default, standard) chromosomes from seqinfo
#' 
#' refactored from scan_spiked_bam to clarify information flow
#' 
#' @param si        seqinfo, usually from a BAM/CRAM file with spike contigs
#' @param spike     database of spike-in standard sequence features (spike)
#' @param standard  trim to standard chromosomes? (TRUE) 
#' 
#' @return          GRanges with two genomes: the organism assembly and "spike"
#'
#' @examples
#' sb <- system.file("extdata", "example.spike.bam", package="spiky", 
#'                   mustWork=TRUE) 
#' si <- seqinfo_from_header(sb) 
#' genome(si) <- "spike" # no genomic contigs
#' data(spike, package="spiky")
#' get_merged_gr(si, spike=spike) # note canonicalized spikes
#' 
#' @details
#' By default, get_merged_gr will return a GRanges with "standardized" 
#' genomic and spike contig names (i.e. genomic chr1-22, X, Y, M, and 
#' the canonical spike names in data(spike, package="spiky")).
#' 
#' The constraint to "standard" chromosomes on genomic contigs can be 
#' removed by setting `standard` to FALSE in the function arguments.
#'
#' @import          GenomeInfoDb
#' 
#' @export
get_merged_gr <- function(si, spike, standard=TRUE) { 

  # assembly contigs 
  if (standard) {
    chrom_contigs <- seqlevels(keepStandardChromosomes(si))
  } else {
    chrom_contigs <- seqlevels(si)
  }

  # spike contigs using the names found in the merged BAM/CRAM file
  orig_spike_contigs <- subset(seqlevels(si), genome(si) == "spike")

  # the collection of contigs we want to keep for downstream analysis
  merged_contigs <- sortSeqlevels(si[c(chrom_contigs, orig_spike_contigs)])

  # coerce this to a GRanges object 
  gr <- as(merged_contigs, "GRanges")

  # use standardized element names, but keep the seqlevels found in the BAM:
  names(gr) <- as.character(seqnames(rename_spike_seqlevels(gr, spike=spike)))
  return(gr)

} 
trichelab/spiky documentation built on Sept. 17, 2022, 8:44 a.m.