R/BwaAlignment.R

###########################################################################/**
# @RdocClass BwaAlignment
#
# @title "The BwaAlignment class"
#
# \description{
#  @classhierarchy
#
#  ...
# }
#
# @synopsis
#
# \arguments{
#  \item{...}{Arguments passed to @see "AbstractAlignment".}
#  \item{indexSet}{An @see "BwaIndexSet".}
#  \item{flavor}{A @character string specifying the type of
#    BWA algorithm to use for alignment.}
# }
#
# \section{Fields and Methods}{
#  @allmethods "public"
# }
#
# \references{
#   [1] Li H. and Durbin R., \emph{Fast and accurate short read alignment
#       with Burrows-Wheeler Transform}. Bioinformatics, 2009.\cr
#   [2] Li H. and Durbin R., \emph{Fast and accurate long-read alignment
#       with Burrows-Wheeler Transform}. Bioinformatics, 2010.\cr
# }
#
# \author{Henrik Bengtsson}
#*/###########################################################################
setConstructorS3("BwaAlignment", function(..., indexSet=NULL, flavor=c("backtracking")) {
  # Validate arguments
  if (!is.null(indexSet)) {
    indexSet <- Arguments$getInstanceOf(indexSet, "BwaIndexSet")
  }

  # Arguments '...':
  args <- list(...)

  # Arguments 'flavor':
  flavor <- match.arg(flavor)

  extend(AbstractAlignment(..., indexSet=indexSet, flavor=flavor), "BwaAlignment")
})


setMethodS3("getAsteriskTags", "BwaAlignment", function(this, collapse=NULL, ...) {
  tags <- NextMethod("getAsteriskTags", collapse=NULL)
  # Drop BWA index 'method' tag, e.g. 'is' or 'bwtsw'
  idx <- which(tags == "bwa") + 1L
  if (is.element(tags[idx], c("is", "bwtsw"))) tags <- tags[-idx]

  # Append flavor
  flavor <- getFlavor(this)
  if (!identical(flavor, "backtracking")) tags <- c(tags, flavor)

  paste(tags, collapse=collapse)
})


setMethodS3("getParameterSets", "BwaAlignment", function(this, ...) {
  paramsList <- NextMethod("getParameterSets")
  paramsList$method <- list(flavor=getFlavor(this))
  paramsList$aln <- getOptionalArguments(this, ...)
  paramsList
}, protected=TRUE)



###########################################################################/**
# @RdocMethod process
#
# @title "Runs the BWA aligner"
#
# \description{
#   @get "title" on all input files.
#   The generated BAM files are all sorted and indexed.
# }
#
# @synopsis
#
# \arguments{
#  \item{...}{Not used.}
#  \item{skip}{If @TRUE, already processed files are skipped.}
#  \item{verbose}{See @see "R.utils::Verbose".}
# }
#
# \value{
#   Returns a @see "BamDataSet".
# }
#
# @author
#*/###########################################################################
setMethodS3("process", "BwaAlignment", function(this, ..., skip=TRUE, force=FALSE, verbose=FALSE) {
  # - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
  # Local functions
  # - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
  asBwaParameter <- function(rg, default=list(), ...) {
    if (isEmpty(rg)) {
      return(NULL)
    }

    if (!hasSample(rg)) {
      sm <- default$SM
      if (is.null(sm)) {
        throw("No default value for required read group 'SM' is specified.")
      }
      rg$SM <- sm
    }

    # Validate
    if (!hasID(rg)) {
      id <- default$ID
      if (!is.null(id)) {
        msg <- "Using default ID that was specified"
      } else {
        id <- rg$SM
        msg <- "No default ID was specified, so will that of the SM read group"
      }
      msg <- sprintf("BWA 'samse/sampe' requires that the SAM read group has an ID. %s, i.e. ID=%s: %s", msg, id, paste(as.character(rg), collapse=", "))
      warning(msg)
      rg$ID <- id
    }

    rgArg <- asString(rg, fmtstr="%s:%s", collapse="\t")
    rgArg <- sprintf("@RG\t%s", rgArg)
    # Don't forget to put within quotation marks
    rgArg <- sprintf("\"%s\"", rgArg)

    rgArg <- list(r=rgArg)
  } # asBwaParameter()


  # - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
  # Validate arguments
  # - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
  # Argument 'verbose':
  verbose <- Arguments$getVerbose(verbose)
  if (verbose) {
    pushState(verbose)
    on.exit(popState(verbose))
  }


  verbose && enter(verbose, "BWA alignment")

  ds <- getInputDataSet(this)
  verbose && cat(verbose, "Input data set:")
  verbose && print(verbose, ds)

  if (force) {
    todo <- seq_along(ds)
  } else {
    todo <- findFilesTodo(this, verbose=less(verbose, 1))
    # Already done?
    if (length(todo) == 0L) {
      verbose && cat(verbose, "Already done. Skipping.")
      res <- getOutputDataSet(this, onMissing="error", verbose=less(verbose, 1))
      verbose && exit(verbose)
      return(invisible(res))
    }
  }

  is <- getIndexSet(this)
  verbose && cat(verbose, "Aligning using index set:")
  verbose && print(verbose, is)
  indexPrefix <- getIndexPrefix(is)

  rgSet <- this$.rgSet
  if (!is.null(rgSet)) {
    verbose && cat(verbose, "Assigning SAM read group:")
    verbose && print(verbose, rgSet)
    validate(rgSet)
  }

  paramsList <- getParameterSets(this)
  verbose && printf(verbose, "Additional BWA arguments: %s\n", getParametersAsString(this))

  verbose && cat(verbose, "Number of files: ", length(ds))

  method <- paramsList$method
  if (method != "backtracking") {
    throw("Non-supported BWA algorithm. Currently only 'backtracking' is supported: ", method)
  }

  isPaired <- isPaired(this)
  if (isPaired) {
    pairs <- getFilePairs(ds)
  }

  # - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
  # Apply aligner to each of the FASTQ files
  # - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
  isPaired <- isPaired(this)
  path <- getPath(this)

  res <- listenv()
  for (ii in todo) {
    df <- ds[[ii]]
    name <- getFullName(df)

    verbose && enter(verbose, sprintf("BWA alignment of sample #%d ('%s') of %d", ii, name, length(todo)))

    # The FASTQ to be aligned
    if (isPaired) {
      dfList <- list(R1=df, R2=getMateFile(df))
      verbose && cat(verbose, "Paired-end read data:")
      verbose && print(verbose, dfList)
    } else {
      dfList <- list(df)
      verbose && cat(verbose, "Single-end read data:")
      verbose && print(verbose, df)
    }

    pathnameFQs <- sapply(dfList, FUN=getPathname)
    verbose && cat(verbose, "FASTQ pathname(s): ", hpaste(pathnameFQs))

    # The SAIs to be generated
    fullnames <- sapply(dfList, FUN=getFullName, paired=FALSE)
    filenames <- sprintf("%s.sai", fullnames)
    pathnameSAIs <- sapply(filenames, FUN=Arguments$getWritablePathname, path=path)
    verbose && cat(verbose, "SAI pathname(s): ", hpaste(pathnameSAIs))

    # The SAM file to be generated
    fullname <- getFullName(dfList[[1L]])
    filename <- sprintf("%s.sam", fullname)
    pathnameSAM <- Arguments$getWritablePathname(filename, path=path)
    verbose && cat(verbose, "SAM pathname: ", pathnameSAM)

    # The BAM file to be generated
    filename <- sprintf("%s.bam", fullname)
    pathnameBAM <- Arguments$getWritablePathname(filename, path=path)
    verbose && cat(verbose, "BAM pathname: ", pathnameBAM)

    # Nothing to do?
    done <- (skip && isFile(pathnameBAM))
    if (done) {
      verbose && cat(verbose, "Already aligned. Skipping")
      res[[ii]] <- pathnameBAM
      verbose && exit(verbose)
      next
    }

    res[[ii]] %<-% {
      verbose && print(verbose, dfList)

      # - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
      # (a) Generate SAI file via BWA aln
      # - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
      verbose && enter(verbose, "Generating SAI")
      
      for (kk in seq_along(pathnameSAIs)) {
        pathnameSAI <- pathnameSAIs[kk]
        if (!isFile(pathnameSAI)) {
	  fq <- dfList[[kk]]
          pathnameFQ <- pathnameFQs[kk]
	  .stop_if_not(pathnameFQ == getPathname(fq))

          args <- list("aln", f=shQuote(pathnameSAI), shQuote(indexPrefix))
	  if (inherits(fq, "BamDataFile")) args <- c(args, "-b")
	  args <- c(args, shQuote(pathnameFQ))
	  args$verbose <- less(verbose, 10)
          args <- c(args, paramsList$aln)
          res <- do.call(systemBWA, args = args)
          verbose && cat(verbose, "System result code: ", res)
        }
      } # for (kk ...)

      # Sanity check
      Arguments$getReadablePathnames(pathnameSAIs)

      verbose && exit(verbose)

      # - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
      # (b) Generate SAM via BWA samse/sampe
      # - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
      if (!isFile(pathnameSAM)) {
        verbose && enter(verbose, "Generating SAM")
	
        # Extract sample-specific read group
        rgII <- getSamReadGroup(df)
        if (length(rgSet) > 0L) {
          rgII <- merge(rgSet, rgII)
        }
        verbose && cat(verbose, "Writing SAM Read Groups:")
        verbose && print(verbose, rgII)
        verbose && cat(verbose, "BWA 'samse' parameter:")
        fullname <- getFullName(df)
        rgArg <- asBwaParameter(rgII, default=list(ID=fullname, SM=fullname))
        verbose && print(verbose, rgArg)

        args <- list(pathnameSAI=pathnameSAIs, pathnameFQ=pathnameFQs,
                     indexPrefix=indexPrefix, pathnameD=pathnameSAM)
        args <- c(args, rgArg)
        args$verbose <- less(verbose, 5)

        if (isPaired) {
          res <- do.call(bwaSampe, args=args)
        } else {
          res <- do.call(bwaSamse, args=args)
        }
        verbose && cat(verbose, "System result code: ", res)

        # BWA 'samse/sampe' can generate empty SAM files
        if (isFile(pathnameSAM)) {
          if (file.info(pathnameSAM)$size == 0L) {
            verbose && cat(verbose, "Removing empty SAM file falsely created by BWA: ", pathnameSAM)
            file.remove(pathnameSAM)
          }
        }
	
        verbose && print(verbose, pathnameSAM)
        verbose && exit(verbose)
      }
      
      # Sanity check
      Arguments$getReadablePathname(pathnameSAM)


      # - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
      # (c) Generates a (sorted and indexed) BAM file from SAM file
      # - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
      if (!isFile(pathnameBAM)) {
        verbose && enter(verbose, "Generating BAM")
        sf <- SamDataFile(pathnameSAM)
        bf <- convertToBam(sf, verbose=less(verbose, 5))
        verbose && print(verbose, pathnameBAM)
        verbose && exit(verbose)
      }
      # Sanity check
      pathnameBAM <- Arguments$getReadablePathname(pathnameBAM)

      pathnameBAM
    } ## %<-%

    verbose && exit(verbose)
  } ## for (ii ...)

  ## Resolve futures
  res <- as.list(res)

  bams <- getOutputDataSet(this, onMissing="error", verbose=less(verbose, 1))

  verbose && exit(verbose)

  invisible(bams)
})
HenrikBengtsson/aroma.seq documentation built on Feb. 15, 2021, 2:21 a.m.