#' GATK binding as an R6 class
#'
#' @description
#' A GATK binding in an object-oriented form as a R6 class.
#'
#' @details
#' The GATK is predominantly used in a pipeline by chaining individual GATK function calls
#' to clean a bam file and prepare it for further analysis. This GATK class utilize
#' the R6 method chaining to facilitate this usage.
#'
#' @param output **optional** a path for an output bam file
#'
#' @examples
#' \dontrun{
#' bam = GATKR6$new("foo.bam", "bar.fas", "baz.vcf")
#' bam$SortSam()$SplitNCigarReads()$Recalibrate()
#' bam$bam
#' }
#'
#' @seealso [GATK] a group of functions that partially bind to the GATK toolkit,
#' [gatk_prepare] and [gatk_snv] for a convenience functions utilizing the GATK calls
#' @export
GATKR6 = R6::R6Class("GATK",
public = list(
#' @field bam a path to a current bam file. Modified during each method call to point to
#' the result of the last GATK action.
bam = NULL,
#' @field original a path to the original (unmodified) bam file
original = NULL,
#' @field reference a path to reference genome fasta (`.fas`) file
reference = NULL,
#' @field vcf a path to the Variant Coding File (VCF)
vcf = NULL,
#' @field outdir an output directory for individual methods
outdir = NULL,
#' @field remake whether to remake already existing files
remake = NULL,
#' @field history a method call history
history = list(),
#' @description
#' Create a new GATK object
#'
#' @param bam an input bam file
#' @param outdir **optional** an output directory
#' @template reference
#' @template vcf
#' @template remake
#'
#' @return a new `GATK` object
initialize = function(bam, reference, vcf, outdir=NULL, remake=FALSE){
stopifnot(is.character(bam), length(bam) == 1)
stopifnot(is.character(reference), length(reference) == 1)
stopifnot(is.character(vcf), length(vcf) == 1)
if(is_nn(outdir))
outdir = dirname(bam)
self$bam = bam
self$original = bam
self$reference = reference
self$vcf = vcf
self$outdir = outdir
self$remake = remake
# index vcf file
gatk_IndexFeatureFile(self$vcf, remake=self$remake)
mkdir(outdir)
invisible(self)
},
#' @description
#' Sort the SAM/BAM file
#'
SortSam = function(output=NULL){
if(is.null(output)) output = private$get_outfile("sorted")
gatk_SortSam(self$bam, output, self$remake)
private$add_to_history()
self$bam = output
invisible(self)
},
#' @description
#' Mark duplicates
#'
MarkDuplicates = function(output=NULL){
if(is.null(output)) output = private$get_outfile("dedup")
gatk_MarkDuplicates(self$bam, output, self$remake)
private$add_to_history()
self$bam = output
invisible(self)
},
#' @description
#' Split the reads around the N in their CIGAR string.
#'
SplitNCigarReads = function(output=NULL){
if(is.null(output)) output = private$get_outfile("split")
gatk_SplitNCigarReads(self$bam, output, self$reference, self$remake)
private$add_to_history()
self$bam = output
invisible(self)
},
#' @description
#' Recalibrate the base quality score
#'
#' @param table **optional** an output path for a new base quality scores
Recalibrate = function(output=NULL, table=NULL){
if(is.null(output)) output = private$get_outfile("recal")
if(is.null(table)) table =
file.path(self$outdir, paste0(basename(self$bam), ".recal.txt"))
gatk_BaseRecalibrator(self$bam, self$reference, self$vcf, table, self$remake)
gatk_ApplyBQSR(self$bam, self$reference, table, output, self$remake)
private$add_to_history("table"=table)
self$bam = output
invisible(self)
},
#' @description
#' Filter the sam/bam file according to tag and its values.
#'
#' @param tag a name of the tag that will be used for filtering reads
#' @param values one or multiple values of a chosen tag
FilterSamReadsTag = function(tag, values, output=NULL){
if(is.null(output)) output = private$get_outfile("filtered")
gatk_FilterSamReadsTag(self$bam, output, tag, values, self$remake)
private$add_to_history("tag"=tag, "values"=values)
self$bam = output
invisible(self)
},
#' @description
#' Clean all intermediate files
#'
#' Intermediate files are all files except the original bam file used during
#' the initialization of the `GATKR6` object and the current bam file.
clean = function(){
# Select only these types of files
file_types = c("input", "output", "table")
files = unlist(lapply(self$history, "[", file_types))
# Do not remove the original or the last bam file
files = files[!files %in% c(self$original, self$bam)]
# Also remove .bai files (.bam.bai)
bai = files[endsWith(files, ".bam")]
bai = tools::file_path_sans_ext(bai)
bai = paste0(bai, ".bai")
files = c(files, bai)
# Select only files that exists
files = files[file.exists(files)]
# Remove files
file.remove(files)
# Add this to history
self$history = c(self$history, list("call"=sys.call()))
invisible(self)
}
),
private = list(
get_outfile = function(add){
ext = tools::file_ext(self$bam)
if(tolower(ext) %in% c("sam", "bam")){
outfile = paste(corename(self$bam), add, ext, sep=".")
} else {
outfile = paste(basename(self$bam), add, sep=".")
}
file.path(self$outdir, outfile)
},
add_to_history = function(...){
call = sys.call(-1)
env = parent.frame()
history = list(
"call" = call,
"input" = parent.env(env)$self$bam,
"output" = env$output
)
self$history = c(self$history, list(c(history, list(...))))
}
)
)
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.