R/app-mapping.R

Defines functions getBismarkReference ezMethodBismark getBWAReference ezMethodBWATrimmomatic ezMethodBWA getSTARReference ezMethodSTAR getBowtieReference ezMethodBowtie getBowtie2Reference ezMethodBowtie2

Documented in getBowtie2Reference getBowtieReference getBWAReference getSTARReference

# Functional Genomics Center Zurich
# This code is distributed under the terms of the GNU General
# Public License Version 3, June 2007.
# The terms are available here: http://www.gnu.org/licenses/gpl.html
# www.fgcz.ch

ezMethodBowtie2 <- function(input = NA, output = NA, param = NA) {
  ref <- getBowtie2Reference(param)
  bamFile <- output$getColumn("BAM")
  sampleName <- sub(".bam", "", basename(bamFile))
  trimmedInput <- ezMethodFastpTrim(input = input, param = param)
  defOpt <- paste("-p", param$cores)
  readGroupOpt <- paste0(
    "--rg-id ", sampleName, " --rg SM:", sampleName,
    " --rg LB:RGLB_", sampleName,
    " --rg PL:illumina", " --rg PU:RGPU_", sampleName
  )
  cmd <- paste(
    "bowtie2", param$cmdOptions, defOpt, readGroupOpt,
    "-x", ref, if (param$paired) "-1", trimmedInput$getColumn("Read1"),
    if (param$paired) paste("-2", trimmedInput$getColumn("Read2")),
    "2>", paste0(sampleName, "_bowtie2.log"), "|",
    "samtools", "view -S -b -", " > bowtie.bam"
  )
  ezSystem(cmd)
  file.remove(trimmedInput$getColumn("Read1"))
  if (param$paired) {
    file.remove(trimmedInput$getColumn("Read2"))
  }

  if (!is.null(param$markDuplicates) && param$markDuplicates) {
    ezSortIndexBam("bowtie.bam", "sorted.bam",
      ram = param$ram, removeBam = TRUE,
      cores = param$cores
    )
    dupBam(inBam = "sorted.bam", outBam = basename(bamFile), operation = "mark", ram = param$ram)
    file.remove("sorted.bam")
  } else {
    ezSortIndexBam("bowtie.bam", basename(bamFile),
      ram = param$ram,
      removeBam = TRUE, cores = param$cores
    )
  }

  ## write an igv link
  if (param$writeIgvLink) {
    if ("IGV" %in% output$colNames) {
      writeIgvHtml(param, output)
    }
  }
  
  if (param$generateBigWig) {
    bam2bw(file = basename(bamFile), paired = param$paired, method = "Bioconductor", cores = param$cores)
  }
  return("Success")
}

##' @template getref-template
##' @templateVar methodName Bowtie2
##' @param param a list of parameters:
##' \itemize{
##'   \item{ezRef@@refIndex}{ a character specifying the location of the index that is used in the alignment.}
##'   \item{ezRef@@refBuildDir}{ a character specifying the directory of the reference build.}
##'   \item{ezRef@@refFastaFile}{ a character specifying the file path to the fasta file.}
##' }
getBowtie2Reference <- function(param) {
  refBase <- ifelse(param$ezRef["refIndex"] == "",
    file.path(param$ezRef["refBuildDir"], "Sequence/BOWTIE2Index/genome"),
    param$ezRef["refIndex"]
  )
  ## check the ref
  lockFile <- file.path(dirname(refBase), "lock")
  if (!file.exists(dirname(refBase))) {
    ## no lock file and no refFiles, so we build the reference
    dir.create(dirname(refBase))
    ezWrite(Sys.info(), con = lockFile)
    wd <- getwd()
    setwd(dirname(refBase))

    fastaFile <- param$ezRef["refFastaFile"]
    ezSystem(paste("ln -s", fastaFile, "."))
    cmd <- paste("bowtie2-build", "--threads", as.numeric(param$cores), "-f", basename(fastaFile), basename(refBase))
    ezSystem(cmd)
    # ezWriteElapsed(job, "done")
    setwd(wd)
    file.remove(lockFile)
  }
  stopifnot(file.exists(dirname(refBase)))
  i <- 0
  while (file.exists(lockFile) && i < INDEX_BUILD_TIMEOUT) {
    ### somebody else builds and we wait
    Sys.sleep(60)
    i <- i + 1
  }
  if (file.exists(lockFile)) {
    stop(paste("reference building still in progress after", INDEX_BUILD_TIMEOUT, "min"))
  }
  ## there is no lock file
  refFiles <- list.files(dirname(refBase), basename(refBase))
  if (length(refFiles) < 3) {
    ## we assume the index is built and complete
    stop(paste("index not available: ", refBase))
  }
  return(refBase)
}

##' @template app-template
##' @templateVar method ezMethodBowtie2(input=NA, output=NA, param=NA)
##' @description Use this reference class to run
##' @seealso \code{\link{getBowtie2Reference}}
##' @seealso \code{\link{ezMethodFastpTrim}}
EzAppBowtie2 <-
  setRefClass("EzAppBowtie2",
    contains = "EzApp",
    methods = list(
      initialize = function() {
        "Initializes the application using its specific defaults."
        runMethod <<- ezMethodBowtie2
        name <<- "EzAppBowtie2"
        appDefaults <<- rbind(
          writeIgvSessionLink = ezFrame(Type = "logical", DefaultValue = "TRUE", Description = "should an IGV link be generated"),
          markDuplicates = ezFrame(Type = "logical", DefaultValue = "TRUE", Description = "should duplicates be marked"),
          generateBigWig = ezFrame(Type = "logical", DefaultValue = "FALSE", Description = "should a bigwig file be generated")
        )
      }
    )
  )

ezMethodBowtie <- function(input = NA, output = NA, param = NA) {
  ref <- getBowtieReference(param)
  bamFile <- output$getColumn("BAM")
  trimmedInput <- ezMethodFastpTrim(input = input, param = param)
  defOpt <- paste("--chunkmbs 256", "--sam", "-p", param$cores)
  cmd <- paste(
    "bowtie", param$cmdOptions, defOpt,
    ref, if (param$paired) "-1", trimmedInput$getColumn("Read1"),
    if (param$paired) paste("-2", trimmedInput$getColumn("Read2")),
    "2> bowtie.log", "|", "samtools", "view -S -b -", " > bowtie.bam"
  )
  ezSystem(cmd)
  ezSortIndexBam("bowtie.bam", basename(bamFile),
    ram = param$ram, removeBam = TRUE,
    cores = param$cores
  )

  ## write an igv link
  if (param$writeIgvLink) {
    if ("IGV" %in% output$colNames) {
      writeIgvHtml(param, output)
    }
  }
  return("Success")
}

##' @template getref-template
##' @templateVar methodName Bowtie
##' @inheritParams getBowtie2Reference
getBowtieReference <- function(param) {
  refBase <- ifelse(param$ezRef["refIndex"] == "",
    file.path(param$ezRef["refBuildDir"], "Sequence/BOWTIEIndex/genome"),
    param$ezRef["refIndex"]
  )
  ## check the ref
  lockFile <- file.path(dirname(refBase), "lock")
  if (!file.exists(dirname(refBase))) {
    ## no lock file and no refFiles, so we build the reference
    dir.create(dirname(refBase))
    ezWrite(Sys.info(), con = lockFile)
    wd <- getwd()
    setwd(dirname(refBase))

    fastaFile <- param$ezRef["refFastaFile"]
    # job = ezJobStart("bowtie index")
    ezSystem(paste("ln -s", fastaFile, "."))
    buildOpts <- ""
    if (any(grepl("--threads", system("bowtie-build --help", intern = T)))) {
      buildOpts <- paste("--threads", param$cores)
    }
    cmd <- paste("bowtie-build", buildOpts, "-f", basename(fastaFile), basename(refBase))
    ezSystem(cmd)
    # ezWriteElapsed(job, "done")
    setwd(wd)
    file.remove(lockFile)
  }
  stopifnot(file.exists(dirname(refBase)))
  i <- 0
  while (file.exists(lockFile) && i < INDEX_BUILD_TIMEOUT) {
    ### somebody else builds and we wait
    Sys.sleep(60)
    i <- i + 1
  }
  if (file.exists(lockFile)) {
    stop(paste("reference building still in progress after", INDEX_BUILD_TIMEOUT, "min"))
  }
  ## there is no lock file
  refFiles <- list.files(dirname(refBase), basename(refBase))
  if (length(refFiles) < 3) {
    ## we assume the index is built and complete
    stop(paste("index not available: ", refBase))
  }
  return(refBase)
}

##' @template app-template
##' @templateVar method ezMethodBowtie(input=NA, output=NA, param=NA)
##' @description Use this reference class to run
##' @seealso \code{\link{getBowtieReference}}
##' @seealso \code{\link{ezMethodFastpTrim}}
EzAppBowtie <-
  setRefClass("EzAppBowtie",
    contains = "EzApp",
    methods = list(
      initialize = function() {
        "Initializes the application using its specific defaults."
        runMethod <<- ezMethodBowtie
        name <<- "EzAppBowtie"
        appDefaults <<- rbind(writeIgvSessionLink = ezFrame(Type = "logical", DefaultValue = "TRUE", Description = "should an IGV link be generated"))
      }
    )
  )

ezMethodSTAR <- function(input = NA, output = NA, param = NA) {
  refDir <- getSTARReference(param)
  bamFile <- output$getColumn("BAM")
  trimmedInput <- ezMethodFastpTrim(input = input, param = param)

  if (!str_detect(param$cmdOptions, "outSAMattributes")) {
    param$cmdOptions <- str_c(param$cmdOptions, "--outSAMattributes All",
      sep = " "
    )
  }

  ## we use the --genomeFastaFiles option only if we have spliced sequences; 
  ## if we have .fa and .gtf we build a separate index
  if (ezIsSpecified(param$secondRef)) {
    stopifnot(file.exists(param$secondRef))
    secondGtf <- sub(".fa", ".gtf", param$secondRef)
    if (!file.exists(secondGtf)){
      genomeFastaFileOption <- str_c("--genomeFastaFiles", param$secondRef, sep = " ")
    }
  } else {
    genomeFastaFileOption <- ""
  }
  

  cmd <- str_c(
    "STAR", "--genomeDir", refDir,
    "--readFilesIn", trimmedInput$getColumn("Read1"),
    if (param$paired) trimmedInput$getColumn("Read2"),
    "--twopassMode", if_else(param$twopassMode, "Basic", "None"),
    "--runThreadN", param$cores, param$cmdOptions,
    genomeFastaFileOption,
    "--outStd BAM_Unsorted --outSAMtype BAM Unsorted",
    "--outSAMattrRGline", str_c("ID:", trimmedInput$getNames()), str_c("SM:", trimmedInput$getNames()),
    if_else(str_detect(trimmedInput$getColumn("Read1"), "\\.gz$"), "--readFilesCommand zcat", ""),
    ">  Aligned.out.bam",
    sep = " "
  )
  ezSystem(cmd)

  nSortThreads <- min(param$cores, 8)
  ## if the index is loaded in shared memory we have to use only 10% of the scheduled RAM
  if (str_detect(param$cmdOptions, "--genomeLoad Load")) {
    sortRam <- param$ram / 10
  } else {
    sortRam <- param$ram
  }

  file.rename(
    from = "Log.final.out",
    to = basename(output$getColumn("STARLog"))
  )

  if (!is.null(param$markDuplicates) && param$markDuplicates) {
    ezSortIndexBam("Aligned.out.bam", "sorted.bam",
      ram = sortRam, removeBam = TRUE,
      cores = nSortThreads
    )
    dupBam(inBam = "sorted.bam", outBam = basename(bamFile), operation = "mark", ram = param$ram)
    file.remove("sorted.bam")
  } else {
    ezSortIndexBam("Aligned.out.bam", basename(bamFile),
      ram = sortRam,
      removeBam = TRUE, cores = nSortThreads
    )
  }

  if (param$getJunctions) {
    file.rename(
      from = "SJ.out.tab",
      to = basename(output$getColumn("Junctions"))
    )
    file.rename(
      from = "Chimeric.out.junction",
      to = basename(output$getColumn("Chimerics"))
    )
  }

  ## check the strandedness
  ezSystem(str_c(
    "infer_experiment.py", "-r", getReferenceFeaturesBed(param),
    "-i", basename(bamFile), "-s 1000000",
    sep = " "
  ),
  stopOnFailure = FALSE
  )

  ## write an igv link
  if (param$writeIgvLink && "IGV" %in% output$colNames) {
    writeIgvHtml(param, output)
  }
  return("Success")
}

##' @template getref-template
##' @templateVar methodName STAR
##' @param param a list of parameters:
##' \itemize{
##'   \item{ezRef@@refIndex}{ a character specifying the location of the index that is used in the alignment.}
##'   \item{ezRef@@refFeatureFile}{ a character specifying the file path to the annotation feature file (.gtf).}
##'   \item{ram}{ an integer specifying how many gigabytes of RAM to use.}
##'   \item{ezRef@@refFastaFile}{ a character specifying the file path to the fasta file.}
##' }
getSTARReference <- function(param) {
  if (ezIsSpecified(param$ezRef["refIndex"])) {
    refDir <- param$ezRef["refIndex"]
    stopifnot(file.exists(file.path(refDir, "SAindex")))
    return(refDir)
  }
  if (!ezIsSpecified(param$ezRef["refFeatureFile"])) {
    stop("refFeatureFile not defined")
  }

  ## default values  
  gtfFile <- param$ezRef["refFeatureFile"]
  genomeFastaFiles <- param$ezRef["refFastaFile"]
  refDir <- sub(".gtf$", "_STARIndex", param$ezRef["refFeatureFile"])
  ## update if second .fa and .gtf exists
  if (ezIsSpecified(param$secondRef)) {
    stopifnot(file.exists(param$secondRef))
    secondGtf <- sub(".fa", ".gtf", param$secondRef)
    if (file.exists(secondGtf)) {
      gtfFile <- tempfile(
        pattern = "genes", tmpdir = getwd(),
        fileext = ".gtf"
      )
      gtf1 <- ezRead.table(param$ezRef["refFeatureFile"], quote="", sep="\t", comment.char = "#", row.names = NULL, header = FALSE)
      gtf2 <- ezRead.table(secondGtf, quote="", sep="\t", comment.char = "#", row.names = NULL, header = FALSE)
      ## if the same chromosome name shows up in both GTF files, we can't concatenate them
      stopifnot(length(intersect(gtf1$V1, gtf2$V1)) == 0) ## first column holds the seqid
      ezSystem(paste("cp", param$ezRef["refFeatureFile"], gtfFile))
      ezSystem(paste("cat", secondGtf, ">>", gtfFile))
      genomeFastaFiles <- paste(param$ezRef["refFastaFile"], param$secondRef)
      refDir <- file.path(getwd(), "Custom_STARIndex")
    }
  }
  
  ## random sleep to avoid parallel ref building
  Sys.sleep(runif(1, max = 20))
  
  lockFile <- paste0(refDir, ".lock")
  i <- 0
  while (file.exists(lockFile) && i < INDEX_BUILD_TIMEOUT) {
    ### somebody else builds and we wait
    Sys.sleep(60)
    i <- i + 1
  }
  if (i >= INDEX_BUILD_TIMEOUT) {
    stop(paste("reference building still in progress after", INDEX_BUILD_TIMEOUT, "min"))
  }
  ## there is no lock file
  if (file.exists(file.path(refDir, "SAindex"))) {
    ## we assume the index is built and complete
    return(refDir)
  }

  ## no lock file and no refFiles, so we build the reference
  ezWrite(Sys.info(), con = lockFile)
  dir.create(refDir)

  fai <- ezRead.table(paste0(param$ezRef["refFastaFile"], ".fai"), header = FALSE)
  colnames(fai) <- c("LENGTH", "OFFSET", "LINEBASES", "LINEWDITH")
  if (nrow(fai) > 50) {
    binOpt <- "--genomeChrBinNbits 16"
  } else {
    binOpt <- ""
  }

  genomeLength <- sum(fai$LENGTH)
  readLength <- 150 ## assumption
  indexNBasesOpt <- paste("--genomeSAindexNbases", min(13, floor(log2(genomeLength) / 2 - 1)))
  if (binOpt == "") {
    genomeChrBinNbits <- paste("--genomeChrBinNbits", floor(min(
      18,
      log2(max(genomeLength / nrow(fai), readLength))
    )))
  } else {
    genomeChrBinNbits <- ""
  }
  
  job <- ezJobStart("STAR genome build")
  cmd <- paste(
    "STAR", "--runMode genomeGenerate --genomeDir", refDir, binOpt, indexNBasesOpt, genomeChrBinNbits,
    "--limitGenomeGenerateRAM", format(param$ram * 1e9, scientific = FALSE),
    "--genomeFastaFiles", genomeFastaFiles,
    "--sjdbGTFfile", gtfFile, "--sjdbOverhang 150", "--runThreadN", param$cores, "--genomeSAsparseD 2"
  )
  ezSystem(cmd)
  file.remove(lockFile)
  ezWriteElapsed(job, "done")
  file.remove("Log.out")
  return(refDir)
}

##' @template app-template
##' @templateVar method ezMethodSTAR(input=NA, output=NA, param=NA)
##' @description Use this reference class to run
##' @seealso \code{\link{getSTARReference}}
##' @seealso \code{\link{ezMethodFastpTrim}}
EzAppSTAR <-
  setRefClass("EzAppSTAR",
    contains = "EzApp",
    methods = list(
      initialize = function() {
        "Initializes the application using its specific defaults."
        runMethod <<- ezMethodSTAR
        name <<- "EzAppSTAR"
        appDefaults <<- rbind(
          getJunctions = ezFrame(Type = "logical", DefaultValue = "FALSE", Description = "should junctions be returned"),
          writeIgvLink = ezFrame(Type = "logical", DefaultValue = "TRUE", Description = "should an IGV link be generated"),
          markDuplicates = ezFrame(Type = "logical", DefaultValue = "TRUE", Description = "should duplicates be marked with picard"),
          twopassMode = ezFrame(Type = "logical", DefaultValue = "TRUE", Description = "1-pass mapping or basic 2-pass mapping")
        )
      }
    )
  )

ezMethodBWA <- function(input = NA, output = NA, param = NA) {
  refIdx <- getBWAReference(param)
  bamFile <- output$getColumn("BAM")
  trimmedInput <- ezMethodFastpTrim(input = input, param = param)
  if (param$algorithm == "aln") {
    cmd <- paste(
      "bwa", param$algorithm, param$cmdOptions, "-t", param$cores,
      refIdx, trimmedInput$getColumn("Read1"), ">", "read1.sai", "2> bwa.log"
    )
    ezSystem(cmd)
    if (param$paired) {
      cmd <- paste(
        "bwa", param$algorithm, param$cmdOptions, "-t", param$cores,
        refIdx, trimmedInput$getColumn("Read2"), ">", "read2.sai", "2> bwa.log"
      )
      ezSystem(cmd)
      cmd <- paste(
        "bwa", "sampe", refIdx, "read1.sai", "read2.sai",
        trimmedInput$getColumn("Read1"), trimmedInput$getColumn("Read2"),
        "2> bwa.log", "|",
        "samtools", "view -S -b -", " > aligned.bam"
      )
      ezSystem(cmd)
    } else {
      cmd <- paste(
        "bwa", "samse", refIdx, "read1.sai",
        trimmedInput$getColumn("Read1"), "2> bwa.log", "|",
        "samtools", "view -S -b -", " > aligned.bam"
      )
      ezSystem(cmd)
    }
  } else {
    if (param$algorithm == "bwasw" && param$paired) {
      stop("paired is not supported for algorithm bwasw")
    }
    cmd <- paste(
      "bwa", param$algorithm, param$cmdOptions, "-t", param$cores,
      refIdx, trimmedInput$getColumn("Read1"),
      if (param$paired) trimmedInput$getColumn("Read2"),
      "2> bwa.log", "|", "samtools", "view -S -b -", " > aligned.bam"
    )
    ezSystem(cmd)
  }
  file.remove(trimmedInput$getColumn("Read1"))
  if (param$paired) {
    file.remove(trimmedInput$getColumn("Read2"))
  }

  if (!is.null(param$markDuplicates) && param$markDuplicates) {
    ezSortIndexBam("aligned.bam", "sorted.bam",
      ram = param$ram, removeBam = TRUE,
      cores = param$cores
    )
    dupBam(inBam = "sorted.bam", outBam = basename(bamFile), operation = "mark", ram = param$ram)
    file.remove("sorted.bam")
  } else {
    ezSortIndexBam("aligned.bam", basename(bamFile),
      ram = param$ram,
      removeBam = TRUE, cores = param$cores
    )
  }


  ## write an igv link
  if (param$writeIgvLink) {
    if ("IGV" %in% output$colNames) {
      writeIgvHtml(param, output)
    }
  }
  return("Success")
}

ezMethodBWATrimmomatic <- function(input = NA, output = NA, param = NA) { # Perform BWA using fastp for read pre-processing

  refIdx <- getBWAReference(param)
  bamFile <- output$getColumn("BAM")
  trimmedInput <- ezMethodTrim(input = input, param = param)
  if (param$algorithm == "aln") {
    cmd <- paste(
      "bwa", param$algorithm, param$cmdOptions, "-t", param$cores,
      refIdx, trimmedInput$getColumn("Read1"), ">", "read1.sai", "2> bwa.log"
    )
    ezSystem(cmd)
    if (param$paired) {
      cmd <- paste(
        "bwa", param$algorithm, param$cmdOptions, "-t", param$cores,
        refIdx, trimmedInput$getColumn("Read2"), ">", "read2.sai", "2> bwa.log"
      )
      ezSystem(cmd)
      cmd <- paste(
        "bwa", "sampe", refIdx, "read1.sai", "read2.sai",
        trimmedInput$getColumn("Read1"), trimmedInput$getColumn("Read2"),
        "2> bwa.log", "|",
        "samtools", "view -S -b -", " > aligned.bam"
      )
      ezSystem(cmd)
    } else {
      cmd <- paste(
        "bwa", "samse", refIdx, "read1.sai",
        trimmedInput$getColumn("Read1"), "2> bwa.log", "|",
        "samtools", "view -S -b -", " > aligned.bam"
      )
      ezSystem(cmd)
    }
  } else {
    if (param$algorithm == "bwasw" && param$paired) {
      stop("paired is not supported for algorithm bwasw")
    }
    cmd <- paste(
      "bwa", param$algorithm, param$cmdOptions, "-t", param$cores,
      refIdx, trimmedInput$getColumn("Read1"),
      if (param$paired) trimmedInput$getColumn("Read2"),
      "2> bwa.log", "|", "samtools", "view -S -b -", " > aligned.bam"
    )
    ezSystem(cmd)
  }
  file.remove(trimmedInput$getColumn("Read1"))
  if (param$paired) {
    file.remove(trimmedInput$getColumn("Read2"))
  }

  ezSortIndexBam("aligned.bam", basename(bamFile),
    ram = param$ram, removeBam = TRUE,
    cores = param$cores
  )

  ## write an igv link
  if (param$writeIgvLink) {
    if ("IGV" %in% output$colNames) {
      writeIgvHtml(param, output)
    }
  }
  return("Success")
}

##' @template getref-template
##' @templateVar methodName BWA
##' @inheritParams getBowtie2Reference
getBWAReference <- function(param) {
  refPath <- ifelse(param$ezRef["refIndex"] == "",
    file.path(param$ezRef["refBuildDir"], "Sequence/BWAIndex/genome.fa"),
    param$ezRef["refIndex"]
  )
  ## check the ref
  lockFile <- file.path(dirname(refPath), "lock")
  i <- 0
  while (file.exists(lockFile) && i < INDEX_BUILD_TIMEOUT) {
    ### somebody else builds and we wait
    Sys.sleep(60)
    i <- i + 1
  }
  if (file.exists(lockFile)) {
    stop(paste("reference building still in progress after", INDEX_BUILD_TIMEOUT, "min"))
  }
  if (!file.exists(dirname(refPath))) {
    ## no lock file and no refFiles, so we build the reference
    dir.create(dirname(refPath))
    ezWrite(Sys.info(), con = lockFile)
    wd <- getwd()
    setwd(dirname(refPath))

    fastaFile <- param$ezRef["refFastaFile"]
    file.symlink(from = fastaFile, to = ".")
    cmd <- paste("bwa", "index", "-a", "bwtsw", basename(fastaFile))
    ezSystem(cmd)
    setwd(wd)
    file.remove(lockFile)
  }
  stopifnot(file.exists(dirname(refPath)))
  if (!file.exists(paste0(refPath, ".sa"))) {
    stop(paste("sa index not found for:", refPath))
  }
  return(refPath)
}

##' @template app-template
##' @templateVar method ezMethodBWA(input=NA, output=NA, param=NA)
##' @description Use this reference class to run
##' @seealso \code{\link{getBWAReference}}
##' @seealso \code{\link{ezMethodFastpTrim}}
EzAppBWA <-
  setRefClass("EzAppBWA",
    contains = "EzApp",
    methods = list(
      initialize = function() {
        "Initializes the application using its specific defaults."
        runMethod <<- ezMethodBWA
        name <<- "EzAppBWA"
        appDefaults <<- rbind(
          algorithm = ezFrame(Type = "character", DefaultValue = "mem", Description = "bwa's alignment algorithm. One of aln, bwasw, mem."),
          writeIgvSessionLink = ezFrame(Type = "logical", DefaultValue = "TRUE", Description = "should an IGV link be generated"),
          markDuplicates = ezFrame(Type = "logical", DefaultValue = "TRUE", Description = "should duplicates be marked")
        )
      }
    )
  )

EzAppBWATrimmomatic <-
  setRefClass("EzAppBWATrimmomatic",
    contains = "EzApp",
    methods = list(
      initialize = function() {
        "Initializes the application using its specific defaults."
        runMethod <<- ezMethodBWATrimmomatic
        name <<- "EzAppBWATrimmomatic"
        appDefaults <<- rbind(
          algorithm = ezFrame(Type = "character", DefaultValue = "mem", Description = "bwa's alignment algorithm. One of aln, bwasw, mem."),
          writeIgvSessionLink = ezFrame(Type = "logical", DefaultValue = "TRUE", Description = "should an IGV link be generated")
        )
      }
    )
  )






ezMethodBismark <- function(input = NA, output = NA, param = NA) {

  ref <- getBismarkReference(param)
  bamFile <- output$getColumn("BAM")
  trimmedInput <- ezMethodFastpTrim(input = input, param = param)
  defOpt <- paste("-p", max(2, param$cores / 2))
  if (param$paired) {
    cmd <- paste(
      "bismark", param$cmdOptions,
      "--path_to_bowtie", paste0("$Bowtie2", "/bin"), defOpt, ref,
      "-1", trimmedInput$getColumn("Read1"),
      if (param$paired) paste("-2", trimmedInput$getColumn("Read2")),
      "2> bismark.log"
    )
  } else {
    cmd <- paste(
      "bismark", param$cmdOptions,
      "--path_to_bowtie", paste0("$Bowtie2", "/bin"), defOpt, ref,
      trimmedInput$getColumn("Read1"),
      "2> bismark.log"
    )
  }
  ezSystem(cmd)
  bamFileNameBismark <- list.files(".", pattern = "bam$")
  reportFileNameBismark <- list.files(".", pattern = "report.txt$")
  reportFile <- paste0(names(bamFile), ".report.txt")
  ezSystem(paste("mv ", reportFileNameBismark, reportFile))
  if (param$deduplicate) {
    cmd <- paste("deduplicate_bismark", ifelse(param$paired, "-p", "-s"), bamFileNameBismark)
    ezSystem(cmd)
    bamFileNameBismark <- list.files(".", pattern = "deduplicated.bam$")
    deduplicationReportFile <- list.files(".", pattern = "deduplication_report.txt$")
    ezSystem(paste("cat ", deduplicationReportFile, ">>", reportFile))
  }

  cmd <- paste("bismark_methylation_extractor", ifelse(param$paired, "-p", "-s"), "--comprehensive", bamFileNameBismark)
  ezSystem(cmd)
  cmd <- paste("samtools", "view -S -b ", bamFileNameBismark, " > bismark.bam")
  ezSystem(cmd)
  ezSortIndexBam("bismark.bam", basename(bamFile),
    ram = param$ram, removeBam = TRUE,
    cores = param$cores
  )
  
  if (param$generateBigWig) {
      destination = sub("\\.bam$", "_Cov.bw", basename(bamFile), ignore.case = TRUE)
      bam2bw(file = basename(bamFile), destination = destination, paired = param$paired, method = "Bioconductor", cores = param$cores)
  }
  
  mBiasImages <- list.files(".", pattern = "png$")
  if ((length(mBiasImages)) > 0) {
    for (i in 1:length(mBiasImages)) {
      ezSystem(paste("mv ", mBiasImages[i], paste0(names(bamFile), ".M-bias_R", i, ".png")))
    }
  } else {
    ezSystem(paste("touch", paste0(names(bamFile), ".M-bias_R1.png")))
    ezSystem(paste("touch", paste0(names(bamFile), ".M-bias_R2.png")))
  }

  CpGFile <- list.files(".", pattern = "^CpG.*txt$")
  cmd <- paste("bismark2bedGraph --scaffolds", CpGFile, "-o", names(bamFile))
  ezSystem(cmd)
  ezSystem(paste("mv ", CpGFile, paste0(names(bamFile), ".CpG_context.txt")))

  splittingReportFile <- list.files(".", pattern = "splitting_report.txt$")
  ezSystem(paste("cat ", splittingReportFile, ">>", reportFile))
  ## write an igv link
  # if (param$writeIgvSessionLink){
  #  writeIgvSession(genome = getIgvGenome(param), refBuild=param$ezRef["refBuild"], file=basename(output$getColumn("IGV Session")),
  #                  bamUrls = paste(PROJECT_BASE_URL, bamFile, sep="/") )
  #  writeIgvJnlp(jnlpFile=basename(output$getColumn("IGV Starter")), projectId = sub("\\/.*", "", bamFile),
  #               sessionUrl = paste(PROJECT_BASE_URL, output$getColumn("IGV Session"), sep="/"))
  # }
  return("Success")
}


##' @template getref-template
##' @templateVar methodName Bismark
##' @param param a list of parameters:
##' \itemize{
##'   \item{ezRef@@refIndex}{ a character specifying the location of the index that is used in the alignment.}
##'   \item{ezRef@@refBuildDir}{ a character specifying the directory of the reference build.}
##'   \item{ezRef@@refFastaFile}{ a character specifying the file path to the fasta file.}
##' }
getBismarkReference <- function(param) {
  refBase <- ifelse(param$ezRef["refIndex"] == "",
    file.path(param$ezRef["refBuildDir"], "Sequence/WholeGenomeFasta/Bisulfite_Genome/CT_conversion"),
    param$ezRef["refIndex"]
  )
  ## check the ref
  lockFile <- file.path(dirname(refBase), "lock")
  if (!file.exists(dirname(refBase))) {
    ## no lock file and no refFiles, so we build the reference
    dir.create(dirname(refBase))
    ezWrite(Sys.info(), con = lockFile)
    wd <- getwd()
    cmd <- paste(
      "bismark_genome_preparation", dirname(param$ezRef["refFastaFile"]),
      "2> bismarkGenomePrep.log"
    )
    ezSystem(cmd)

    # ezWriteElapsed(job, "done")
    setwd(wd)
    file.remove(lockFile)
  }
  stopifnot(file.exists(dirname(refBase)))
  i <- 0
  while (file.exists(lockFile) && i < INDEX_BUILD_TIMEOUT) {
    ### somebody else builds and we wait
    Sys.sleep(60)
    i <- i + 1
  }
  if (file.exists(lockFile)) {
    stop(paste("reference building still in progress after", INDEX_BUILD_TIMEOUT, "min"))
  }
  ## there is no lock file
  refFiles <- list.files(dirname(refBase), basename(refBase))
  if (length(refFiles) < 1) {
    ## we assume the index is built and complete
    stop(paste("index not available: ", refBase))
  }
  return(dirname(param$ezRef@refFastaFile))
}



##' @template app-template
##' @templateVar method ezMethodBismark(input=NA, output=NA, param=NA)
##' @description Use this reference class to run
EzAppBismark <-
  setRefClass("EzAppBismark",
    contains = "EzApp",
    methods = list(
      initialize = function() {
        "Initializes the application using its specific defaults."
        runMethod <<- ezMethodBismark
        name <<- "EzAppBismark"
        appDefaults <<- rbind(
            generateBigWig = ezFrame(Type = "logical", DefaultValue = "FALSE", Description = "should a bigwig coverage file be generated")
        )
      }
    )
  )
uzh/ezRun documentation built on April 29, 2024, 10:31 a.m.