###########################################################################/**
# @RdocClass BwaAlignment
#
# @title "The BwaAlignment class"
#
# \description{
# @classhierarchy
#
# ...
# }
#
# @synopsis
#
# \arguments{
# \item{...}{Arguments passed to @see "AbstractAlignment".}
# \item{indexSet}{An @see "BwaIndexSet".}
# \item{flavor}{A @character string specifying the type of
# BWA algorithm to use for alignment.}
# }
#
# \section{Fields and Methods}{
# @allmethods "public"
# }
#
# \references{
# [1] Li H. and Durbin R., \emph{Fast and accurate short read alignment
# with Burrows-Wheeler Transform}. Bioinformatics, 2009.\cr
# [2] Li H. and Durbin R., \emph{Fast and accurate long-read alignment
# with Burrows-Wheeler Transform}. Bioinformatics, 2010.\cr
# }
#
# \author{Henrik Bengtsson}
#*/###########################################################################
setConstructorS3("BwaAlignment", function(..., indexSet=NULL, flavor=c("backtracking")) {
# Validate arguments
if (!is.null(indexSet)) {
indexSet <- Arguments$getInstanceOf(indexSet, "BwaIndexSet")
}
# Arguments '...':
args <- list(...)
# Arguments 'flavor':
flavor <- match.arg(flavor)
extend(AbstractAlignment(..., indexSet=indexSet, flavor=flavor), "BwaAlignment")
})
setMethodS3("getAsteriskTags", "BwaAlignment", function(this, collapse=NULL, ...) {
tags <- NextMethod("getAsteriskTags", collapse=NULL)
# Drop BWA index 'method' tag, e.g. 'is' or 'bwtsw'
idx <- which(tags == "bwa") + 1L
if (is.element(tags[idx], c("is", "bwtsw"))) tags <- tags[-idx]
# Append flavor
flavor <- getFlavor(this)
if (!identical(flavor, "backtracking")) tags <- c(tags, flavor)
paste(tags, collapse=collapse)
})
setMethodS3("getParameterSets", "BwaAlignment", function(this, ...) {
paramsList <- NextMethod("getParameterSets")
paramsList$method <- list(flavor=getFlavor(this))
paramsList$aln <- getOptionalArguments(this, ...)
paramsList
}, protected=TRUE)
###########################################################################/**
# @RdocMethod process
#
# @title "Runs the BWA aligner"
#
# \description{
# @get "title" on all input files.
# The generated BAM files are all sorted and indexed.
# }
#
# @synopsis
#
# \arguments{
# \item{...}{Not used.}
# \item{skip}{If @TRUE, already processed files are skipped.}
# \item{verbose}{See @see "R.utils::Verbose".}
# }
#
# \value{
# Returns a @see "BamDataSet".
# }
#
# @author
#*/###########################################################################
setMethodS3("process", "BwaAlignment", function(this, ..., skip=TRUE, force=FALSE, verbose=FALSE) {
# - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
# Local functions
# - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
asBwaParameter <- function(rg, default=list(), ...) {
if (isEmpty(rg)) {
return(NULL)
}
if (!hasSample(rg)) {
sm <- default$SM
if (is.null(sm)) {
throw("No default value for required read group 'SM' is specified.")
}
rg$SM <- sm
}
# Validate
if (!hasID(rg)) {
id <- default$ID
if (!is.null(id)) {
msg <- "Using default ID that was specified"
} else {
id <- rg$SM
msg <- "No default ID was specified, so will that of the SM read group"
}
msg <- sprintf("BWA 'samse/sampe' requires that the SAM read group has an ID. %s, i.e. ID=%s: %s", msg, id, paste(as.character(rg), collapse=", "))
warning(msg)
rg$ID <- id
}
rgArg <- asString(rg, fmtstr="%s:%s", collapse="\t")
rgArg <- sprintf("@RG\t%s", rgArg)
# Don't forget to put within quotation marks
rgArg <- sprintf("\"%s\"", rgArg)
rgArg <- list(r=rgArg)
} # asBwaParameter()
# - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
# Validate arguments
# - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
# Argument 'verbose':
verbose <- Arguments$getVerbose(verbose)
if (verbose) {
pushState(verbose)
on.exit(popState(verbose))
}
verbose && enter(verbose, "BWA alignment")
ds <- getInputDataSet(this)
verbose && cat(verbose, "Input data set:")
verbose && print(verbose, ds)
if (force) {
todo <- seq_along(ds)
} else {
todo <- findFilesTodo(this, verbose=less(verbose, 1))
# Already done?
if (length(todo) == 0L) {
verbose && cat(verbose, "Already done. Skipping.")
res <- getOutputDataSet(this, onMissing="error", verbose=less(verbose, 1))
verbose && exit(verbose)
return(invisible(res))
}
}
is <- getIndexSet(this)
verbose && cat(verbose, "Aligning using index set:")
verbose && print(verbose, is)
indexPrefix <- getIndexPrefix(is)
rgSet <- this$.rgSet
if (!is.null(rgSet)) {
verbose && cat(verbose, "Assigning SAM read group:")
verbose && print(verbose, rgSet)
validate(rgSet)
}
paramsList <- getParameterSets(this)
verbose && printf(verbose, "Additional BWA arguments: %s\n", getParametersAsString(this))
verbose && cat(verbose, "Number of files: ", length(ds))
method <- paramsList$method
if (method != "backtracking") {
throw("Non-supported BWA algorithm. Currently only 'backtracking' is supported: ", method)
}
isPaired <- isPaired(this)
if (isPaired) {
pairs <- getFilePairs(ds)
}
# - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
# Apply aligner to each of the FASTQ files
# - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
isPaired <- isPaired(this)
path <- getPath(this)
res <- listenv()
for (ii in todo) {
df <- ds[[ii]]
name <- getFullName(df)
verbose && enter(verbose, sprintf("BWA alignment of sample #%d ('%s') of %d", ii, name, length(todo)))
# The FASTQ to be aligned
if (isPaired) {
dfList <- list(R1=df, R2=getMateFile(df))
verbose && cat(verbose, "Paired-end read data:")
verbose && print(verbose, dfList)
} else {
dfList <- list(df)
verbose && cat(verbose, "Single-end read data:")
verbose && print(verbose, df)
}
pathnameFQs <- sapply(dfList, FUN=getPathname)
verbose && cat(verbose, "FASTQ pathname(s): ", hpaste(pathnameFQs))
# The SAIs to be generated
fullnames <- sapply(dfList, FUN=getFullName, paired=FALSE)
filenames <- sprintf("%s.sai", fullnames)
pathnameSAIs <- sapply(filenames, FUN=Arguments$getWritablePathname, path=path)
verbose && cat(verbose, "SAI pathname(s): ", hpaste(pathnameSAIs))
# The SAM file to be generated
fullname <- getFullName(dfList[[1L]])
filename <- sprintf("%s.sam", fullname)
pathnameSAM <- Arguments$getWritablePathname(filename, path=path)
verbose && cat(verbose, "SAM pathname: ", pathnameSAM)
# The BAM file to be generated
filename <- sprintf("%s.bam", fullname)
pathnameBAM <- Arguments$getWritablePathname(filename, path=path)
verbose && cat(verbose, "BAM pathname: ", pathnameBAM)
# Nothing to do?
done <- (skip && isFile(pathnameBAM))
if (done) {
verbose && cat(verbose, "Already aligned. Skipping")
res[[ii]] <- pathnameBAM
verbose && exit(verbose)
next
}
res[[ii]] %<-% {
verbose && print(verbose, dfList)
# - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
# (a) Generate SAI file via BWA aln
# - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
verbose && enter(verbose, "Generating SAI")
for (kk in seq_along(pathnameSAIs)) {
pathnameSAI <- pathnameSAIs[kk]
if (!isFile(pathnameSAI)) {
fq <- dfList[[kk]]
pathnameFQ <- pathnameFQs[kk]
.stop_if_not(pathnameFQ == getPathname(fq))
args <- list("aln", f=shQuote(pathnameSAI), shQuote(indexPrefix))
if (inherits(fq, "BamDataFile")) args <- c(args, "-b")
args <- c(args, shQuote(pathnameFQ))
args$verbose <- less(verbose, 10)
args <- c(args, paramsList$aln)
res <- do.call(systemBWA, args = args)
verbose && cat(verbose, "System result code: ", res)
}
} # for (kk ...)
# Sanity check
Arguments$getReadablePathnames(pathnameSAIs)
verbose && exit(verbose)
# - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
# (b) Generate SAM via BWA samse/sampe
# - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
if (!isFile(pathnameSAM)) {
verbose && enter(verbose, "Generating SAM")
# Extract sample-specific read group
rgII <- getSamReadGroup(df)
if (length(rgSet) > 0L) {
rgII <- merge(rgSet, rgII)
}
verbose && cat(verbose, "Writing SAM Read Groups:")
verbose && print(verbose, rgII)
verbose && cat(verbose, "BWA 'samse' parameter:")
fullname <- getFullName(df)
rgArg <- asBwaParameter(rgII, default=list(ID=fullname, SM=fullname))
verbose && print(verbose, rgArg)
args <- list(pathnameSAI=pathnameSAIs, pathnameFQ=pathnameFQs,
indexPrefix=indexPrefix, pathnameD=pathnameSAM)
args <- c(args, rgArg)
args$verbose <- less(verbose, 5)
if (isPaired) {
res <- do.call(bwaSampe, args=args)
} else {
res <- do.call(bwaSamse, args=args)
}
verbose && cat(verbose, "System result code: ", res)
# BWA 'samse/sampe' can generate empty SAM files
if (isFile(pathnameSAM)) {
if (file.info(pathnameSAM)$size == 0L) {
verbose && cat(verbose, "Removing empty SAM file falsely created by BWA: ", pathnameSAM)
file.remove(pathnameSAM)
}
}
verbose && print(verbose, pathnameSAM)
verbose && exit(verbose)
}
# Sanity check
Arguments$getReadablePathname(pathnameSAM)
# - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
# (c) Generates a (sorted and indexed) BAM file from SAM file
# - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
if (!isFile(pathnameBAM)) {
verbose && enter(verbose, "Generating BAM")
sf <- SamDataFile(pathnameSAM)
bf <- convertToBam(sf, verbose=less(verbose, 5))
verbose && print(verbose, pathnameBAM)
verbose && exit(verbose)
}
# Sanity check
pathnameBAM <- Arguments$getReadablePathname(pathnameBAM)
pathnameBAM
} ## %<-%
verbose && exit(verbose)
} ## for (ii ...)
## Resolve futures
res <- as.list(res)
bams <- getOutputDataSet(this, onMissing="error", verbose=less(verbose, 1))
verbose && exit(verbose)
invisible(bams)
})
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.