R/coverage.R

Defines functions bin_coverage

Documented in bin_coverage

# Copyright 2018 Christian Diener <mail[at]cdiener.com>
#
# Apache license 2.0. See LICENSE for more information.
#
# Tools for getting reference coverage from bam files.

#' Build a configuration for the coverage workflow.
#'
#' This can be saved and passed on to others to ensure reproducibility.
#'
#' @param ... Any arguments are used to update the default configuration. See
#'  the example below. Optional.
#' @return A list with the parameters used in the long read alignment
#'  workflow.
#' @export
#' @examples
#'  config <- config_coverage(bin_width = 10000)
config_coverage <- config_builder(list(
    bin_width = 100,
    threads = getOption("mc.cores", 1),
    min_coverage = 1,
    min_length = 2000
))


#' Reads the coverage from a set of alignments and calculates binned
#' averages.
#'
#' @param object An experiment data table as returned by any alignment method
#'  like \code{\link{align_short_reads}} or \code{\link{align_long_reads}}.
#' @param ... A configuration as generated by \code{\link{config_coverage}}.
#' @return A list with the
#' @export
#' @examples
#'  NULL
#' @importFrom GenomicAlignments coverage
#' @importFrom zoo rollapply
bin_coverage <- function(object, ...) {
    config <- config_parser(list(...), config_coverage)
    alignments <- get_alignments(object)
    apfun <- parse_threads(config$threads)

    flog.info(paste("Estimating read lengths from a sample of",
                    "100 reads per alignment."))
    rlens <- apfun(alignments$alignment, read_length) %>% as.numeric()
    names(rlens) <- alignments$id

    rows <- lapply(1:nrow(alignments), function(i) {
        a <- alignments[i, ]
        c(a$id, a$alignment)
    })
    flog.info(paste0(
        "Calculating coverage profiles with bin width of %dbp. ",
        "Minimum average coverage is %g and minimum contig length is %d."),
        config$bin_width, config$min_coverage, config$min_length)
    coverage <- lapply(rows, function(r) {
        id <- r[1]
        aln <- r[2]
        cv <- coverage(aln)
        means <- sapply(cv, mean)
        lengths <- sapply(cv, length)
        cv <- cv[means > config$min_coverage & lengths > config$min_length]
        binned <- apfun(cv, function(x) {
            b <- rollapply(
                as.numeric(x), mean, width = config$bin_width,
                by = config$bin_width, partial = TRUE, align = "left")
            return(b)
        })
        flog.info(paste0(
            "Finished coverage profile for sample %s. ",
            "%d/%d contigs passed the average coverage threshold %g."),
            id, length(cv), length(means), config$min_coverage)

        return(data.table(
            id = id,
            bin_width = config$bin_width,
            coverage = binned,
            contig = names(cv),
            length = lengths[names(cv)]
        ))
    }) %>% rbindlist()
    coverage[, "read_length" := rlens[id]]
    coverage <- coverage[!sapply(coverage, function(x) is.null(x[[1]]))]

    artifact <- list(
        alignments = alignments,
        coverage = coverage,
        steps = c(object[["steps"]], "binned_coverage")
    )

    return(artifact)
}
Gibbons-Lab/mbtools documentation built on Jan. 28, 2024, 11:08 a.m.