##########################################################################/**
# @RdocClass BamDataFile
#
# @title "The BamDataFile class"
#
# \description{
# @classhierarchy
#
# A BamDataFile object represents a BAM file.
# }
#
# @synopsis
#
# \arguments{
# \item{...}{Arguments passed to @see "R.filesets::GenericDataFile".}
# }
#
# \section{Fields and Methods}{
# @allmethods "public"
# }
#
# @author "HB"
#
# \references{
# [1] The SAM Format Specification Working Group,
# \emph{The SAM Format Specification}, Sept 7, 2011.\cr
# }
#
# \seealso{
# An object of this class is typically part of an
# @see "BamDataSet".
# }
#*/###########################################################################
setConstructorS3("BamDataFile", function(...) {
extend(GenericDataFile(...), c("BamDataFile", uses("AromaSeqDataFile"), uses("SequenceContigsInterface")))
})
setMethodS3("as.character", "BamDataFile", function(x, ...) {
this <- x
s <- NextMethod("as.character")
if (isFile(this)) {
s <- c(s, sprintf("Has index file (*.bai): %s", hasIndex(this)))
s <- c(s, sprintf("Is sorted: %s", isSorted(this)))
if (hasIndex(this)) {
s <- c(s, getSeqGenericSummary(x, ...))
counts <- getReadCounts(this)
counts <- c(counts, total=sum(counts, na.rm=TRUE))
frac <- 100*counts/counts["total"]
counts <- pi3(counts)
s <- c(s, sprintf("Number of mapped reads: %s (%.1f%%) out of %s", counts["mapped"], frac["mapped"], counts["total"]))
s <- c(s, sprintf("Number of unmapped reads: %s (%.1f%%) out of %s", counts["unmapped"], frac["unmapped"], counts["total"]))
}
s <- c(s, sprintf("Generated by: %s", getProgramString(this)))
}
s
}, protected=TRUE)
setMethodS3("buildIndex", "BamDataFile", function(this, ..., skip=!overwrite, overwrite=FALSE) {
pathname <- getPathname(this)
pathnameBAI <- sprintf("%s.bai", pathname)
if (hasIndex(this)) {
if (skip) {
return(invisible(getIndexFile(this, create = FALSE)))
}
if (!overwrite) {
throw("Cannot build index file (*.bai). File already exists: ", pathnameBAI)
}
}
pathname <- Arguments$getReadablePathname(pathname)
pathnameT <- Rsamtools::indexBam(pathname)
getIndexFile(this, create = FALSE)
})
setMethodS3("getIndexFile", "BamDataFile", function(this, create = TRUE, clean = TRUE, ...) {
pathname <- getPathname(this)
pathnameIDX <- sprintf("%s.bai", pathname)
pathnameIDX <- Arguments$getReadablePathname(pathnameIDX, mustExist = FALSE)
if (isFile(pathnameIDX)) {
## If index file is outdated, deleted
if (clean && file_test("-ot", pathnameIDX, pathname)) {
warning("Detected outdated index file and recreated it: ", pathnameIDX)
file.remove(pathnameIDX)
## Will try to recreate it
create <- TRUE
}
}
if (!isFile(pathnameIDX)) {
if (!create) return(NULL)
pathnameT <- Rsamtools::indexBam(pathname)
}
BamIndexDataFile(pathnameIDX)
})
setMethodS3("hasIndex", "BamDataFile", function(this, ...) {
!is.null(getIndexFile(this, create = FALSE))
})
setMethodS3("getIndexStats", "BamDataFile", function(this, ..., force=FALSE) {
stats <- this$.idxStats
if (force || is.null(stats)) {
pathname <- getPathname(this)
if (!hasIndex(this)) {
throw("Cannot get index statistics. Index does not exists: ", pathname)
}
# Quote pathnames
bfr <- systemSamtools("idxstats", shQuote(pathname), stdout=TRUE, ...)
bfr <- strsplit(bfr, split="\t", fixed=TRUE)
seqName <- sapply(bfr, FUN=.subset, 1L)
seqLength <- as.integer(sapply(bfr, FUN=.subset, 2L))
countMapped <- as.numeric(sapply(bfr, FUN=.subset, 3L))
countUnmapped <- as.numeric(sapply(bfr, FUN=.subset, 4L))
# NOTE: samtools idxstats can return ridicolously(!) large read
# counts. Let's assume they are errors and set to NAs. /HB 2010-10-02
countMapped[countMapped >= .Machine$integer.max] <- NA
countUnmapped[countUnmapped >= .Machine$integer.max] <- NA
countMapped <- as.integer(countMapped)
countUnmapped <- as.integer(countUnmapped)
stats <- data.frame(length=seqLength, mapped=countMapped, unmapped=countUnmapped)
rownames(stats) <- seqName
this$.idxStats <- stats
}
stats
})
setMethodS3("getReadCounts", "BamDataFile", function(this, ...) {
if (isCapableOf(aroma.seq, "samtools")) {
# Alt 1. samtools indexstats
stats <- getIndexStats(this, ...)
counts <- colSums(stats[,c("mapped", "unmapped")], na.rm=TRUE)
} else {
# Alt 1. Rsamtools
use("Rsamtools")
pathname <- getPathname(this)
stats <- captureOutput({
quickBamFlagSummary(pathname, index=pathname, main.groups.only=FALSE)
})
stats <- grep("record is", stats, value=TRUE)
.stop_if_not(length(stats) == 2L)
stats <- strsplit(stats, split="|", fixed=TRUE)
stats <- sapply(stats, FUN=function(x) x[2L])
counts <- as.integer(stats)
names(counts) <- c("mapped", "unmapped")
}
# Sanity check
.stop_if_not(length(counts) == 2L)
counts
})
setMethodS3("nbrOfReads", "BamDataFile", function(this, ...) {
## Does always work. /HB 2014-04-18
## Maybe 'samtools flagstat' is better?
counts <- getReadCounts(this, ...)
## Very slow?!? /HB 2014-04-19
## counts <- Rsamtools::countBam(getPathname(this))$records
sum(counts, na.rm=TRUE)
})
setMethodS3("nbrOfMappedReads", "BamDataFile", function(this, ...) {
counts <- getReadCounts(this, ...)
counts["mapped"]
})
setMethodS3("nbrOfUnmappedReads", "BamDataFile", function(this, ...) {
counts <- getReadCounts(this, ...)
counts["unmapped"]
})
# \details{
# BAM headers typically contain an \code{"@HD VN:1.0 SO:<value>"} entry,
# where \code{<value>} indicates whether the aligned reads are sorted
# or not. Unfortunately, this entry is neither enforced nor has it to
# be correct [1,2].
#
# Instead, we consider a BAM file to be sorted if and only if it has
# an index file. The rationale is that it is not possible to index
# a BAM file unless it is sorted first.
# }
#
# \references{
# [1] Question: is my BAM file sorted?, Biostar, 2011,
# \url{http://www.biostars.org/post/show/5256/is-my-bam-file-sorted/}\cr
# [2] Asking for suggestiona on samtools bug fixing, SEQanswers, 2010,
# \url{http://seqanswers.com/forums/showthread.php?t=3739}\cr
# }
setMethodS3("isSorted", "BamDataFile", function(this, ...) {
isTRUE(hasIndex(this))
})
# Argument '...' must be 2nd to match the generic base::sort() function.
setMethodS3("sort", "BamDataFile", function(x, ..., force=FALSE) {
# To please R CMD check
this <- x
# Nothing todo?
if (!force && isSorted(this)) {
return(this)
}
throw("Not yet implemented!")
})
setMethodS3("getTargets", "BamDataFile", function(this, ...) {
hdr <- getHeader(this)
targets <- hdr$targets
targets
})
setMethodS3("getSeqLengths", "BamDataFile", function(this, ...) {
getTargets(this)
})
## BEGIN: DEPRECATE?
setMethodS3("nbrOfTargets", "BamDataFile", function(this, ...) {
nbrOfSeqs(this, ...)
})
setMethodS3("getTargetNames", "BamDataFile", function(this, ...) {
getSeqNames(this, ...)
})
setMethodS3("getTargetLengths", "BamDataFile", function(this, ...) {
getSeqLengths(this, ...)
})
setMethodS3("getTotalTargetLength", "BamDataFile", function(this, ...) {
getTotalSeqLength(this, ...)
})
## END: DEPRECATE?
setMethodS3("getHeader", "BamDataFile", function(this, force=FALSE, ...) {
header <- this$.header
if (force || is.null(header)) {
header <- readHeader(this, ...)
this$.header <- header
}
header
})
setMethodS3("readHeader", "BamDataFile", function(this, ...) {
pathname <- getPathname(this)
pathname <- Arguments$getReadablePathname(pathname)
bf <- Rsamtools::BamFile(pathname, index=character(0L))
hdr <- Rsamtools::scanBamHeader(bf)
hdr
}, private=TRUE)
setMethodS3("getPrograms", "BamDataFile", function(this, ...) {
hdr <- getHeader(this, ...)
pgs <- hdr$text[names(hdr$text) == "@PG"]
pgs <- lapply(pgs, FUN=function(pg) {
pg <- trim(pg)
pg <- strsplit(pg, split=":", fixed=TRUE)
keys <- sapply(pg, FUN=function(x) x[1L])
values <- sapply(pg, FUN=function(x) x[-1L])
names(values) <- keys
values
})
ids <- sapply(pgs, FUN=function(x) x[[1L]])
pgs <- lapply(pgs, FUN=function(x) x[-1L])
names(pgs) <- ids
pgs
}, protected=TRUE)
setMethodS3("getProgram", "BamDataFile", function(this, ...) {
pgList <- getPrograms(this, ...)
if (length(pgList) >= 1L) {
pgList <- pgList[[1L]]
} else {
pgList <- list()
}
pgList
}, protected=TRUE)
setMethodS3("getProgramString", "BamDataFile", function(this, full=FALSE, ...) {
pgs <- getPrograms(this, ...)
res <- c()
for (kk in seq_along(pgs)) {
pg <- pgs[[kk]]
name <- names(pgs)[kk]
resK <- sprintf("(%d)", kk)
# Program name
pn <- c()
if (!is.null(name)) pn <- c(pn, gsub("[.][0-9]+$", "", name))
if (is.element("PN", names(pg))) pn <- c(pn, pg["PN"])
if (length(pn) == 2L) {
pattern <- sprintf("^%s", pn[2])
pn[1] <- gsub(pattern, "", pn[1])
pn <- pn[nzchar(pn)]
}
pn <- paste(pn, collapse=" ")
resK <- c(resK, pn)
if (is.element("VN", names(pg))) resK <- c(resK, sprintf("v%s", pg["VN"]))
if (full) {
if (is.element("CL", names(pg))) resK <- c(resK, sprintf("(call: %s)", pg["CL"]))
if (is.element("DS", names(pg))) resK <- c(resK, sprintf("[%s]", pg["DS"]))
}
resK <- paste(resK, collapse=" ")
res <- c(res, resK)
}
res <- paste(res, collapse="; ")
res
}, protected=TRUE)
# \section{Read Groups (RG) specification}{
# The RG fields populated are ID, SM, PL and LB, where
# ID=identifier, SM=sample, PL=platform/technology, and
# LB=library (DNA library prep identifier).
# ID: Each RG must have a unique ID.
# SM: Sample. Use pool name where a pool is being sequenced.
# PM: Platform/technology used to produce the reads. Valid values:
# CAPILLARY, HELICOS, ILLUMINA, IONTORRENT, LS454, PACBIO, and SOLID.
# LB: Library.
#
# Note that tools such as GATK (Broad Institute) requires read groups
# 'ID' and 'SM'. [3].
# }
#
# \references{
# [1] The SAM Format Specification Working Group,
# \emph{The SAM Format Specification}, Sept 7, 2011.\cr
# [2] "What is '@RG ID' versus '@RG SM'"
# \url{http://seqanswers.com/forums/showthread.php?t=9784}\cr
# [3] Geraldine van der Auwera,
# \emph{Collected FAQs about BAM files},
# The Broad Institute, Aug 2012.
# \url{http://gatkforums.broadinstitute.org/discussion/1317/collected-faqs-about-bam-files}\cr
# }
setMethodS3("getReadGroups", "BamDataFile", function(this, ...) {
hdr <- getHeader(this, ...)
SamReadGroup$byScanBamHeader(hdr, ...)
})
setMethodS3("getReadGroup", "BamDataFile", function(this, ...) {
rgList <- getReadGroups(this, ...)
if (length(rgList) >= 1L) {
rgList <- rgList[[1L]]
} else {
rgList <- SamReadGroup()
}
rgList
})
# @RdocMethod replaceAllReadGroups
# @title "Writes a new BAM file with all existing read groups replaced by one new read group"
#
# \description{
# @get "title".
# }
#
# \arguments{
# \item{sample}{Specifies the \code{SM} read group.}
# \item{library}{Specifies the \code{LB} read group.}
# \item{platform}{Specifies the \code{PL} read group.}
# \item{platformUnit}{Specifies the \code{PU} read group.}
# }
setMethodS3("replaceAllReadGroups", "BamDataFile", function(this, rg="*", ..., validate=TRUE, skip=!overwrite, overwrite=FALSE, verbose=FALSE) {
# - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
# Validate arguments
# - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
# Argument 'rg':
if (is.character(rg) && rg == "*") {
rg <- SamReadGroup()
keys <- names(asSamList(rg, drop=FALSE))
for (key in keys) rg[[key]] <- "*"
} else {
rg <- Arguments$getInstanceOf(rg, "SamReadGroup")
}
# Argument 'verbose':
verbose <- Arguments$getVerbose(verbose)
if (verbose) {
pushState(verbose)
on.exit(popState(verbose))
}
verbose && enter(verbose, "Updating Read Groups")
pathname <- getPathname(this)
# Output pathname
pathnameD <- gsub("[.]bam$", ",RG.bam", pathname)
pathnameD <- Arguments$getWritablePathname(pathnameD, mustNotExist=(!skip && !overwrite))
if (isFile(pathnameD)) {
if (skip) {
verbose && cat(verbose, "Already exists. Skipping.")
bf <- newInstance(this, pathnameD)
buildIndex(bf, skip=TRUE, verbose=less(verbose, 10))
verbose && exit(verbose)
return(bf)
}
if (!overwrite) {
throw("Cannot replace SAM read groups. File already exists: ", pathnameD)
}
}
verbose && cat(verbose, "Arguments:")
verbose && print(verbose, rg)
# - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
# Use default values from existing read groups of the input file?
# - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
rgList <- asSamList(rg)
keep <- sapply(rgList, FUN=function(x) identical(x, "*"))
keys <- names(rgList)[keep]
if (length(keys) > 0L) {
rgDefault <- getReadGroups(this)
rgDefault <- Reduce(merge, rgDefault)
# Sanity check
.stop_if_not(inherits(rgDefault, "SamReadGroup"))
for (key in keys) {
rg[[key]] <- rgDefault[[key]]
}
} # for (key ...)
# - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
# Validate
# - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
validate(rg)
# - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
# Assert mandatory fields
# - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
rgList <- asSamList(rg)
mandatory <- c("SM", "LB", "PL", "PU")
missing <- setdiff(mandatory, names(rgList))
if (length(missing)) {
throw("Cannot write read groups. Mandatory fields are missing: ", hpaste(missing))
}
verbose && cat(verbose, "Arguments:")
verbose && str(verbose, rgList)
# - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
# Write new BAM file
# - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
args <- list("AddOrReplaceReadGroups", I=pathname, O=pathnameD)
args <- c(args, asString(rg, fmtstr="RG%s=%s"))
verbose && str(verbose, args)
args$verbose <- less(verbose, 10)
res <- do.call(systemPicard, args)
status <- attr(res, "status"); if (is.null(status)) status <- 0L
verbose && cat(verbose, "Results:")
verbose && str(verbose, res)
verbose && cat(verbose, "Status:")
verbose && str(verbose, status)
bf <- newInstance(this, pathnameD)
buildIndex(bf, overwrite=TRUE, verbose=less(verbose, 10))
verbose && exit(verbose)
bf
}) # replaceAllReadGroups()
# See help("scanBam", package="Rsamtools") for definition of 'pos'.
setMethodS3("extractReadStartPositions", "BamDataFile", function(this, param=ScanBamParam(what=c("rname", "pos")), flag=scanBamFlag(isUnmappedQuery=FALSE, isDuplicate=FALSE), ..., verbose=FALSE) {
use("Rsamtools")
pathname <- getPathname(this)
data <- scanBam(pathname, flag=flag, param=param, ...)
data <- data[[1L]]
data <- as.data.frame(data)
data$rname <- as.character(data$rname)
data
})
setMethodS3("readDataFrame", "BamDataFile", function(this, fields=NULL, flag=scanBamFlag(isUnmappedQuery=FALSE, isDuplicate=FALSE), which=which, ..., verbose=FALSE) {
use("Rsamtools")
# - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
# Validate arguments
# - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
# Argument 'fields':
knownFields <- c("qname", "flag", "rname", "strand", "pos", "qwidth", "mapq", "cigar", "mrnm", "mpos", "isize")
if (is.null(fields)) {
fields <- knownFields
}
fields <- match.arg(fields, choices=knownFields, several.ok=TRUE)
# Argument 'verbose':
verbose <- Arguments$getVerbose(verbose)
if (verbose) {
pushState(verbose)
on.exit(popState(verbose))
}
verbose && enter(verbose, "Reading data frame")
pathname <- getPathname(this)
verbose && cat(verbose, "Pathname: ", pathname)
param <- ScanBamParam(what=fields, which=which)
verbose && cat(verbose, "scanBam() parameters:")
verbose && print(verbose, param)
data <- scanBam(pathname, param=param, ...)
# Sanity check
.stop_if_not(is.list(data) && length(data) == 1L)
data <- data[[1L]]
.stop_if_not(is.list(data))
verbose && str(verbose, data)
verbose && enter(verbose, "Coercing list to data frame")
# Sanity check
ns <- sapply(data, FUN=length)
.stop_if_not(all(ns == ns[1L]))
data <- as.data.frame(data, stringsAsFactors=TRUE)
verbose && str(verbose, data)
verbose && exit(verbose)
verbose && exit(verbose)
data
})
setMethodS3("readReadPositions", "BamDataFile", function(this, ..., flag=scanBamFlag(isUnmappedQuery=FALSE, isDuplicate=FALSE), verbose=FALSE) {
use("Rsamtools")
# - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
# Validate arguments
# - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
# Argument 'verbose':
verbose <- Arguments$getVerbose(verbose)
if (verbose) {
pushState(verbose)
on.exit(popState(verbose))
}
verbose && enter(verbose, "Reading (rname,pos)")
verbose && cat(verbose, "scanBam() flags:")
verbose && print(verbose, flag)
data <- readDataFrame(this, fields=c("rname", "pos"), flag=flag, ..., verbose=less(verbose, 5))
verbose && exit(verbose)
data
}) # readReadPositions()
###########################################################################/**
# @RdocMethod validate
# @alias validate.SamDataFile
#
# @title "Validates a BAM (or SAM) file"
#
# \description{
# @get "title".
# }
#
# @synopsis
#
# \arguments{
# \item{method}{A @character string specifying which validation method to use.}
# \item{...}{Additional arguments passed to the internal validation method.}
# \item{verbose}{See @see "R.utils::Verbose".}
# }
#
# \value{
# Returns (invisibly) what the internal validation method returns,
# or throws an exception if an error is detected.
# }
#
# \seealso{
# Internally \code{picardValidateSamFile()} is used.
# }
#
# @author "HB"
#
# @keyword internal
#*/###########################################################################
setMethodS3("validate", "BamDataFile", function(this, method=c("picard"), ..., verbose=FALSE) {
# - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
# Validate arguments
# - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
# Argument 'method':
method <- match.arg(method)
# Argument 'verbose':
verbose <- Arguments$getVerbose(verbose)
if (verbose) {
pushState(verbose)
on.exit(popState(verbose))
}
verbose && enter(verbose, sprintf("Validating %s", class(this)[1L]))
verbose && cat(verbose, "Validation method: ", method)
if (method == "picard") {
.stop_if_not(isCapableOf(aroma.seq, "picard"))
res <- picardValidateSamFile(getPathname(this), ..., verbose=verbose)
}
verbose && exit(verbose)
invisible(res)
}, protected=TRUE)
setMethodS3("writeSample", "BamDataFile", function(this, pathname, n, seed=NULL, ..., verbose=FALSE) {
use("Rsamtools")
# Argument 'pathname':
pathname <- Arguments$getWritablePathname(pathname, mustNotExist=TRUE)
# Argument 'n':
n <- Arguments$getInteger(n, range=c(1,Inf))
nMax <- nbrOfReads(this)
if (n > nMax) {
throw("Requested more reads than available. Cannot sample with replacement: ", n, " > ", nMax)
}
# Argument 'seed':
if (!is.null(seed)) seed <- Arguments$getInteger(seed)
# Argument 'verbose':
verbose <- Arguments$getVerbose(verbose)
if (verbose) {
pushState(verbose)
on.exit(popState(verbose))
}
verbose && enter(verbose, sprintf("Writing sample of %s", class(this)[1L]))
verbose && print(verbose, this)
verbose && cat(verbose, "Sample size: ", n)
verbose && cat(verbose, "seed: ", seed)
# - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
# Set the random seed
# - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
if (!is.null(seed)) {
verbose && enter(verbose, "Setting (temporary) random seed")
oldRandomSeed <- NULL
if (exists(".Random.seed", mode="integer")) {
oldRandomSeed <- get(".Random.seed", mode="integer")
}
on.exit({
if (!is.null(oldRandomSeed)) {
.Random.seed <<- oldRandomSeed
}
}, add=TRUE)
verbose && cat(verbose, "The random seed will be reset to its original state afterward.")
verbose && cat(verbose, "Seed: ", seed)
set.seed(seed)
verbose && exit(verbose)
}
# - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
# Sample what to keep
# - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
yieldSize <- 1e6
progress <- isVisible(verbose)
verbose && cat(verbose, "Displaying progress: ", progress)
cprogress <- 0L
dprogress <- ceiling(nMax/yieldSize/100)
sprogress <- paste(rep(".", times=max(min(100/dprogress, 100),1)), collapse="")
verbose && enter(verbose, "Sampling read indexed to keep")
if (n < nMax) {
# FIXME: For a BAM file with, say, 2*10^9 reads, here we
# allocate a vector of 8GB just to keep track of which
# reads to keep. /HB 2014-04-19
verbose && printf(verbose, "Allocating logical vector of length %d: %g GB\n", nMax, (4*nMax)/1024^3)
keep <- logical(length=nMax)
keep[sample(nMax, size=n, replace=FALSE)] <- TRUE
## verbose && print(verbose, table(keep))
# Offset
offset <- 0L
# TODO: Added ram/buffer size option. /HB 2013-07-01
filter <- FilterRules(list(sampler=function(x) {
# Display progress?
if (progress) {
if (cprogress %% dprogress == 0L) {
message(sprogress, appendLF=FALSE)
cprogress <<- cprogress + 1L
}
}
n <- nrow(x)
## message(sprintf("n=%d",n))
# Nothing to do?
if (n == 0L) return(logical(0L))
# Available indices
res <- keep[offset + seq_len(n)]
## message(sprintf("n2=%d",length(res)))
# Sanity check
.stop_if_not(length(res) == n)
# Update offset
offset <<- offset + n
if (progress) {
if (cprogress == 100L) {
message(" [100%]", appendLF=TRUE)
cprogress <<- 0L
}
}
res
}))
} else {
verbose && cat(verbose, "Nothing to filter; keeping all reads")
# Nothing to filter; keep everything
filter <- NULL
}
verbose && exit(verbose)
verbose && enter(verbose, "Filtering")
# Input file
pathnameBAM <- getPathname(this)
# Index file
pathnameI <- sprintf("%s.bai", pathname)
# Write to temporary file
pathnameT <- pushTemporaryFile(pathname)
pathnameIT <- sprintf("%s.bai", pathnameT)
bam <- BamFile(pathnameBAM, yieldSize=yieldSize)
if (progress) message("[0%] ", appendLF=FALSE)
pathname2 <- filterBam(bam, destination=pathnameT, filter=filter, indexDestination=TRUE)
keep <- NULL # Not needed anymore
if (progress && cprogress < 100L) message(" [100%]", appendLF=TRUE)
verbose && cat(verbose, "Created: ", pathname2)
bam2 <- newInstance(this, pathname2)
.stop_if_not(nbrOfReads(bam2) == n)
# Undo temporary files
file.rename(pathnameIT, pathnameI)
pathname <- popTemporaryFile(pathnameT)
verbose && exit(verbose)
bam <- newInstance(this, pathname)
verbose && print(verbose, bam)
verbose && exit(verbose)
bam
}, protected=TRUE)
setMethodS3("tview", "BamDataFile", function(this, reference=NULL, chromosome=NULL, position=1L, ..., verbose=FALSE) {
# Argument 'reference':
if (!is.null(reference)) {
if (inherits(reference, "FastaReferenceFile")) {
pathnameR <- getPathname(reference)
pathnameR <- Arguments$getReadablePathname(pathnameR)
} else {
pathnameR <- Arguments$getReadablePathname(reference)
}
}
# Argument 'chromosome' & 'position':
if (!is.null(chromosome)) {
chromosome <- Arguments$getCharacter(chromosome)
position <- Arguments$getInteger(position, range=c(1L, Inf))
}
# Argument 'verbose':
verbose <- Arguments$getVerbose(verbose)
if (verbose) {
pushState(verbose)
on.exit(popState(verbose))
}
verbose && enter(verbose, "Interactive display of BAM file using samtools tview")
verbose && print(verbose, this)
pathname <- getPathname(this)
pathname <- Arguments$getReadablePathname(pathname)
args <- list("tview", pathname)
if (!is.null(reference)) {
verbose && cat(verbose, "Using reference:")
verbose && print(verbose, reference)
args <- c(args, pathnameR)
if (inherits(reference, "FastaReferenceFile")) {
buildIndex(reference, verbose=verbose)
}
}
# Initial genomic position
if (!is.null(chromosome)) {
pos <- sprintf("%s:%d", chromosome, position)
verbose && cat(verbose, "Initial position: %s", pos)
args <- c(args, sprintf("-p %s", pos))
}
# Additional options
args <- c(args, list(...))
verbose && enter(verbose, "Calling samtools tview")
verbose && str(verbose, args)
res <- do.call(systemSamtools, args=args)
verbose && exit(verbose)
verbose && exit(verbose)
invisible(res)
})
setMethodS3("getFlagStat", "BamDataFile", function(this, ..., force=FALSE) {
stats <- this$.flagStat
if (force || is.null(stats)) {
pathname <- getPathname(this)
if (!hasIndex(this)) {
throw("Cannot get index statistics. Index does not exists: ", pathname)
}
bfr <- systemSamtools("flagstat", shQuote(pathname), stdout=TRUE, ...)
bfr <- trim(bfr)
# Parse
pattern <- "([0-9]+)[ ]*[+][ ]*([0-9]+)[ ]+(.*)"
stats <- lapply(bfr, FUN=function(x) {
counts <- c(gsub(pattern, "\\1", x), gsub(pattern, "\\2", x))
counts <- as.integer(counts)
description <- gsub(pattern, "\\3", x)
data.frame(description=description, passed=counts[1], failed=counts[2], stringsAsFactors=FALSE)
})
stats <- Reduce(rbind, stats)
this$.flagStat <- stats
}
stats
})
setMethodS3("isPaired", "BamDataFile", function(this, ..., force=FALSE) {
isPaired <- this$.isPaired
if (is.null(isPaired) || force) {
bai <- buildIndex(this)
isPaired <- testPairedEndBam(getPathname(this), index = getPathname(bai))
this$.isPaired <- isPaired
}
isPaired
})
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.