R/get_read_entropy.R

Defines functions get_read_entropy

#' @importFrom Biostrings width
# nocov start
get_read_entropy <- function(bam_path, sample = fs::path_file(bam_path)) {
    # helper functions ----
    count_crossings <- function(numbers) {
        crossings <- sum(((numbers[-1] - 127.5) * (numbers[-length(numbers)] - 127.5)) < 0)
        return(crossings)
    }

    parse_chunk <- function(reads, sample) {
        reads <- reads[!is.null(reads$seq)]
        tibble::tibble(
            sample = factor(sample),
            read_name = reads$qname,
            chr = factor(reads$rname),
            pos = reads$pos,
            width = Biostrings::width(reads$seq),
            center = as.integer(.data$pos + .data$width / 2),
            cg_count = purrr::map_int(as.character(reads$seq), count_cg_cpp),
            crossings = purrr::map_int(reads$tag$ML, count_crossings),
            entropy = .data$crossings / .data$cg_count
        )
    }

    # function body ----
    fname <- fs::path_file(bam_path)
        cli::cli_progress_bar(
            glue::glue("Parsing file: {fname}"),
            total = get_bam_total_reads(bam_path),
            format_done = paste0(
                "{.alert-success Data parsed: ", fname, " {.timestamp {cli::pb_elapsed}}}"),
            format_failed = paste0(
                "{.alert-danger Data parsing failed: ", fname, " {.timestamp {cli::pb_elapsed}}}"),
            clear = FALSE
        )

    bam_file <- Rsamtools::BamFile(bam_path, yieldSize = 15000)

    i <- 1
    df_list <- list()
    open(bam_file)
    while (Rsamtools::isIncomplete(bam_file)) {
        reads <- read_bam(bam_file)[[1]]
        df_list[[i]] <- parse_chunk(reads, sample)
        i <- i + 1
        cli::cli_progress_update(length(reads[[1]]))
    }
    close(bam_file)

    bind_rows(df_list)
}
# nocov end
Shians/NanoMethViz documentation built on June 8, 2024, 10:48 p.m.