nanopoRe: Accessory Methods for the Analysis of Oxford Nanopore Technologies DNA Sequence Data

Documented in enrichmentCoverageTypeOverChromosomes enrichmentExecutiveSummary enrichmentGetTargetList enrichmentMappingByGenomicSegment enrichmentMultiGeneCoveragePanel enrichmentOffTargetKaryogram enrichmentOffTargetTable enrichmentSingleTargetPlot enrichmentStrandedTargetPlot enrichmentTargetPerformanceTable enrichmentWriteExcelOffTarget enrichmentWriteExcelOnTarget ggbiosave importEnrichmentResults isEnrichmentAnalysisComplete

enrichmentEnvironment <- "ENRICHMENT"

#' imports the R target output generated by the ont_tutorial_cas9 workflow
#'
#' a method for streamlining the import of R format data from the pre-calculated
#' content generated within the ont_tutorial_cas9 workflow. This is an accessory
#' method to abstract some of the code that looks out-of-control
#'
#' @param resultsDir the location of the folder where files should be pulled
#' from - this should be determined automatically from config.yaml in most cases
#' @return None
#'
#' @examples
#' yamlFile <- system.file("extdata", "cas9_demo.yaml", package = "nanopoRe")
#' importConfigYAML(yamlFile=yamlFile)
#' importEnrichmentResults(system.file("extdata", package="nanopoRe"))
#'
#' @export
importEnrichmentResults <- function(resultsDir=NULL) {

    # target 1 : <study>_mapping_results.Rdata
    # target 2 : <study>_aggregated_coverage.Rdata
    # target 3 : <study>.unmapped.rcounts.Rdata

    study <- getCachedYAMLValue(field="study_name")
    if (is.null(resultsDir)) {
        resultsDir <- getRpath()
    }

    # Create an environment for storing the results ...
    setCachedObject(enrichmentEnvironment, new.env())

    # load the general mapping results and analysis ...
    mappingResultsFile <- file.path(
        resultsDir, paste0(study, "_mapping_results", ".Rdata"))
    load(mappingResultsFile, envir = getCachedObject(enrichmentEnvironment))

    # load the aggregated coverage file - used for plotting coverage at finer
    # resolution for the pre-defined targets
    aggregatedCovFile <- file.path(
        resultsDir, paste0(study, "_aggregated_coverage", ".Rdata"))
    load(aggregatedCovFile, envir = getCachedObject(enrichmentEnvironment))

    # load the sequence metadata for the unmapped sequence reads
    chromosomeFile <- file.path(
        resultsDir, paste0(study, ".unmapped.rcounts", ".Rdata"))
    assign("unmappedReads", readRDS(
        file = chromosomeFile), envir = getCachedObject(enrichmentEnvironment))
}






#' accessory method to test whether the cas9 data indexing
#' has been performed
#'
#' The cas9 workflow involves a deep dive into the experimental data and
#' produces a couple of Rdata files that contain the experimental context and
#' information relating to on-target, off-target and other effects. This method
#' tests whether the pre-process analysis has been performed
#'
#' @param resultsDir the location of the folder where files should be pulled
#' from - this should be determined automatically from config.yaml
#' @return TRUE or FALSE depending
#'
#' @examples
#' yamlFile <- system.file("extdata", "cas9_demo.yaml", package = "nanopoRe")
#' importConfigYAML(yamlFile=yamlFile)
#' # some demo data is stored with the package - import the data by supply the
#' # non-standard path
#' isEnrichmentAnalysisComplete(system.file("extdata", package="nanopoRe"))
#'
#' @export
isEnrichmentAnalysisComplete <- function(resultsDir=NULL) {

    if (is.null(resultsDir)) {
        resultsDir <- getRpath()
    }

    mappingResultsFile <- file.path(resultsDir, paste0(
        getCachedYAMLValue(field ="study_name"), "_mapping_results", ".Rdata"))
    aggregatedCovFile <- file.path(resultsDir, paste0(
        getCachedYAMLValue(
            field="study_name"), "_aggregated_coverage", ".Rdata"))

    return (file.exists(mappingResultsFile) && file.exists(aggregatedCovFile))
}


#' prepare executive summary infographic for the cas9 tutorial
#'
#' prepare an infographic style executive summary for key cas9 enrichment
#' measurements
#'
#' @import emojifont
#' @importFrom Hmisc wtd.quantile
#' @return path to a file that has been created
#'
#' @examples
#' yamlFile <- system.file("extdata", "cas9_demo.yaml", package = "nanopoRe")
#' importConfigYAML(yamlFile=yamlFile)
#' importEnrichmentResults(system.file("extdata", package="nanopoRe"))
#' library(emojifont)
#' enrichmentExecutiveSummary()
#'
#' @export
enrichmentExecutiveSummary <- function() {

    study <- getCachedYAMLValue(field="study_name")

    backgroundUniverse <- getCachedObject(
        "backgroundUniverse", enrichmentEnvironment)
    unmappedReads <- getCachedObject("unmappedReads", enrichmentEnvironment)
    offtargetUniverse <- getCachedObject(
        "offtargetUniverse", enrichmentEnvironment)
    targetproximalUniverse <- getCachedObject(
        "targetproximalUniverse", enrichmentEnvironment)
    ontargetUniverse <- getCachedObject(
        "ontargetUniverse", enrichmentEnvironment)

    cas9Throughput <- sum(backgroundUniverse$basesstart) +
        sum(unmappedReads$width) + sum(offtargetUniverse$basesstart) +
        sum(targetproximalUniverse$basesstart) +
        sum(ontargetUniverse$basesstart)
    cas9Throughput <- paste(round(cas9Throughput/1e+09, digits = 2), "Gb")

    ontargetLabel <- paste0(round(sum(ontargetUniverse$rstart)/
        (sum(ontargetUniverse$rstart) + length(unmappedReads) +
        sum(offtargetUniverse$rstart) + sum(targetproximalUniverse$rstart) +
        sum(backgroundUniverse$rstart)) * 100, digits = 2), "%")
    meanCovLabel <- paste0(round(mean(rep(ontargetUniverse$dmean,
        IRanges::width(ontargetUniverse))), digits = 1), "X")

    depletionLabel = paste0(round(Hmisc::wtd.quantile(
        ontargetUniverse$dmean, probs = c(0.5), weight = IRanges::width(
            ontargetUniverse))/Hmisc::wtd.quantile(
                as.numeric(backgroundUniverse$dmean), probs = c(0.5), weight =
            as.numeric(IRanges::width(backgroundUniverse))), digits = 1), " X")

    execsummPlot <- infoGraphicPlot4(
        identifier = paste0(study, "_enrichment_info"),
        panelA = c(key = "Throughput", value = cas9Throughput,
            icon = "fa-calculator"),
        panelB = c(key = "reads on target", value = ontargetLabel,
            icon = "fa-cut"),
        panelC = c(key = "mean target coverage", value = meanCovLabel,
            icon = "fa-map"),
        panelD = c(key = "non-target depletion", value = depletionLabel,
            icon = "fa-code-fork"))

    return(execsummPlot)
}


addRow <- function(df, metric, count, percentage = "") {
    return(df %>% add_row(metric = metric, count = count,
        percentage = percentage))
}


collateMappingCharacteristics <- function(
    bamFile, effectiveGenomeSize, unmappedBamFile = NA) {
    suppressWarnings(if (!methods::is(unmappedBamFile, "data.frame") &&
        is.na(unmappedBamFile)) {
        unmappedBamFile <- data.frame(width = numeric(), quality = numeric())
    })
    # basic counts for #s of reads
    mappedSeqs <- sum(bamFile$rstart)
    unmappedSq <- nrow(unmappedBamFile)
    totalReads <- mappedSeqs + unmappedSq
    # basic counts for #s of nucleotides
    mappedNts <- sum(bamFile$basesstart)
    unmappedNts <- sum(unmappedBamFile$width)
    fastqNts <- mappedNts + unmappedNts
    mappedClippedNts <- sum(bamFile$cigarmapped)

    # reference genome characteristics
    refSize <- paste0(round(sum(as.numeric(IRanges::width(
        bamFile)))/effectiveGenomeSize * 100, digits = 3), "%")

    meanCov <- sum(bamFile$dmean * IRanges::width(bamFile), na.rm = TRUE)/sum(
        IRanges::width(bamFile), na.rm = TRUE)

    summary.df <- data.frame(metric = character(), count = character(),
        percentage = character(), stringsAsFactors = FALSE)
    summary.df <- addRow(summary.df, "total sequence reads", (
        scales::comma_format())(totalReads))
    summary.df <- addRow(summary.df, "mapped reads (primary)", (
        scales::comma_format())(mappedSeqs))
    summary.df <- addRow(summary.df, "bases sequenced", (
        scales::comma_format())(fastqNts))
    summary.df <- addRow(summary.df, "bases mapped", (
        scales::comma_format())(mappedNts))
    summary.df <- addRow(summary.df, "Fraction of genome (%)", refSize)
    summary.df <- addRow(summary.df, "Mean coverage (primary)",
        round(meanCov, digits = 2))

    rownames(summary.df) <- summary.df[, 1]
    summary.df <- summary.df[, -1]
    return(summary.df)
}



#' prepare a table of cas9 target mapping types
#'
#' the method prepares a table that defines the mapping characteristics by
#' on-target, target-proximal, off-target and background - parses the content
#' produced within the broader on_tutorial_cas9 workflow
#'
#' @importFrom IRanges width
#' @import knitr
#' @import kableExtra
#' @importFrom methods is
#' @param format the format of the returned object - html default
#' @return kable table in specified format
#'
#' @examples
#' yamlFile <- system.file("extdata", "cas9_demo.yaml", package = "nanopoRe")
#' importConfigYAML(yamlFile=yamlFile)
#' importEnrichmentResults(system.file("extdata", package="nanopoRe"))
#' enrichmentMappingByGenomicSegment()
#'
#' @export
enrichmentMappingByGenomicSegment <- function(format="html") {
    backgroundUniverse <- getCachedObject(
        "backgroundUniverse", enrichmentEnvironment)
    unmappedReads <- getCachedObject("unmappedReads", enrichmentEnvironment)
    offtargetUniverse <- getCachedObject(
        "offtargetUniverse", enrichmentEnvironment)
    targetproximalUniverse <- getCachedObject(
        "targetproximalUniverse", enrichmentEnvironment)
    ontargetUniverse <- getCachedObject(
        "ontargetUniverse", enrichmentEnvironment)

    effectiveGenomeSize <- sum(IRanges::width(backgroundUniverse)) +
        sum(IRanges::width(offtargetUniverse)) +
        sum(IRanges::width(targetproximalUniverse)) +
        sum(IRanges::width(ontargetUniverse))

    summary.df <- as.data.frame(cbind(collateMappingCharacteristics(
        backgroundUniverse, effectiveGenomeSize, unmappedReads),
        collateMappingCharacteristics(offtargetUniverse, effectiveGenomeSize),
        collateMappingCharacteristics(targetproximalUniverse,
        effectiveGenomeSize), collateMappingCharacteristics(ontargetUniverse,
        effectiveGenomeSize)))

    summary.df <- summary.df[, -c(2, 4, 6, 8)]
    summary.df[summary.df == "NaN"] <- ""


    if (is.na(format)) {
        return(summary.df)
    } else if (format=="html") {

        row.names(summary.df)[1] <- paste0(row.names(summary.df)[1],
                                           footnote_marker_symbol(1, "html"))
        row.names(summary.df)[2] <- paste0(row.names(summary.df)[2],
                                           footnote_marker_symbol(2, "html"))
        row.names(summary.df)[6] <- paste0(row.names(summary.df)[6],
                                           footnote_marker_symbol(3, "html"))

    targettable <- kable(summary.df, format = format,
        col.names = rep(" ", ncol(summary.df)), caption = paste0(
        "Table summarising global mapping characteristics ranked by on-target",
        ", target-flanking and off-target"),
        booktabs = TRUE, table.envir = "table*", linesep="", escape=FALSE) %>%
        add_header_above(c(" ", Background = 1, `Off-Target` = 1,
        `Target-flanking` = 1, `On-Target` = 1)) %>%
        kable_styling(c("striped", "condensed")) %>% footnote(symbol=c(paste0(
        "fastq bases are calculated from the qwidth field of the mapped ",
        "sequences and from the sequence length of unmapped sequences"),
        "this table presents only primary sequence mappings",
        "depth of coverage based only on primary mapping reads"),
        symbol_title = "please note: ", footnote_as_chunk = TRUE)
    return(targettable)

    } else {


    simpletable <- kable(summary.df, format = format, col.names =
                             c("Background", "Off-Target", "Target-flanking", "On-Target"),
                         booktabs = TRUE, table.envir = "table*", linesep="", escape=FALSE)  %>%
        kable_styling(c("striped", "condensed"))

    return(paste(as.character(simpletable), collapse="\n"))
    }
}



#' summarise cas9 target performance
#'
#' prepare tabular data showing the enrichment performance for the cas9 targets
#' included within an enrichment study
#'
#' @importFrom IRanges width
#' @import knitr
#' @import kableExtra
#' @return kable table in html format
#'
#' @examples
#' yamlFile <- system.file("extdata", "cas9_demo.yaml", package = "nanopoRe")
#' importConfigYAML(yamlFile=yamlFile)
#' importEnrichmentResults(system.file("extdata", package="nanopoRe"))
#' enrichmentTargetPerformanceTable()
#'
#' @export
enrichmentTargetPerformanceTable <- function(format="html") {
    ontargetUniverse <- getCachedObject(
        "ontargetUniverse", enrichmentEnvironment)

    bygene <- cbind(names(ontargetUniverse), (scales::comma_format())
        (IRanges::width(ontargetUniverse)),
        round(ontargetUniverse$dmean, digits = 2), (scales::comma_format())
        (ontargetUniverse$rstart),(scales::comma_format())
        (ontargetUniverse$basesstart), (scales::comma_format())
        (round(ontargetUniverse$meanreadlen, digits = 2)),
        round(ontargetUniverse$readq, digits = 2),
        round(ontargetUniverse$mapq, digits = 2),
        round(ontargetUniverse$strandp/ontargetUniverse$rstart*100, digits=2))

    colnames(bygene) <- seq(1, ncol(bygene))

    colnames(bygene)[1] <- paste0("Target Gene")
    colnames(bygene)[2] <- paste0("Target size (nt)")
    colnames(bygene)[3] <- paste0("Mean coverage")
    colnames(bygene)[4] <- paste0("Read count",footnote_marker_symbol(1,"html"))
    colnames(bygene)[5] <- paste0("Bases", footnote_marker_symbol(2, "html"))
    colnames(bygene)[6] <- paste0("Mean readLength")
    colnames(bygene)[7] <- paste0("Mean readQuality")
    colnames(bygene)[8] <- paste0("Mean mapQuality")
    colnames(bygene)[9] <- paste0(
        "Reads on FWD(%)",footnote_marker_symbol(3,"html"))

    if (format=="raw") {
        return(bygene)
    } else if (format=="html") {

    candidate_table <- kable(bygene, format = format, caption =
        "Table summarising target mapping for pre-defined regions of interest",
        booktabs = TRUE, table.envir = "table*", linesep="", escape=FALSE) %>%
        kable_styling(c("striped", "condensed")) %>% footnote(symbol = c(paste0(
        "Reads are counted as all sequence reads where the SAM start location ",
        "is located within the target interval. This does not correct for ",
        "sequences on the reverse strand."),
        paste0("Bases are counted as the sum of nucleotides from all reads ",
        "where the SAM start location is within target region; some of these ",
        "bases will overlap the flanking region"),
        paste0("reads are assessed for strand of mapping; here reads on + ",
        "strand are summarised as percentage of all")),
        symbol_title = "please note: ", footnote_as_chunk = TRUE)

    return(candidate_table)

    } else {
        simpleTable = kable(bygene, format = format, booktabs = TRUE, table.envir = "table*", linesep="", escape=FALSE)  %>%
            kable_styling(c("striped", "condensed"))

        return(paste(as.character(simpleTable), collapse="\n"))
    }
}



#' return list of defined cas9 target regions
#'
#' return list of defined cas9 target regions
#'
#' @return vector of target ids
#'
#' @examples
#' yamlFile <- system.file("extdata", "cas9_demo.yaml", package = "nanopoRe")
#' importConfigYAML(yamlFile=yamlFile)
#' importEnrichmentResults(system.file("extdata", package="nanopoRe"))
#' enrichmentGetTargetList()
#'
#' @export
enrichmentGetTargetList <- function() {
    aggregatedGR <- getCachedObject("aggregatedGR", enrichmentEnvironment)
    return(unique(as.data.frame(aggregatedGR)$gene))
}



#' prepare a plot of sequence coverage over a cas9 target region
#'
#' prepare a plot of depth-of-coverage over and around a specified target region
#'
#' @import ggplot2
#' @import RColorBrewer
#' @param geneName is the qualuified name for the target selected
#' @param delta is a scaling value to zoom in beyond the target-proximal region
#' @return ggplot2 figure
#'
#' @examples
#' yamlFile <- system.file("extdata", "cas9_demo.yaml", package = "nanopoRe")
#' importConfigYAML(yamlFile=yamlFile)
#' importEnrichmentResults(system.file("extdata", package="nanopoRe"))
#' enrichmentSingleTargetPlot(enrichmentGetTargetList()[[1]])
#'
#' @export
enrichmentSingleTargetPlot <- function(geneName, delta = 0) {

    aggregatedGR <- getCachedObject("aggregatedGR", enrichmentEnvironment)
    target_proximity <- getCachedYAMLValue(field="target_proximity")

    wga.cov <- getCachedObject("wga.cov", enrichmentEnvironment)
    offtarget_level <- getCachedYAMLValue(field="offtarget_level")

    delta = min(delta, target_proximity)
    covData <- as.data.frame(aggregatedGR)
    covData <- covData[which(covData$gene == geneName), ]
    offset <- covData[covData$pos == 1, ]$start
    suppressWarnings(plot <- ggplot(covData) +
        geom_hline(yintercept=(wga.cov * offtarget_level), colour="#E69F00") +
            geom_line(aes_string(x = "start", y = "binned_cov"), size = 0.5,
            colour = brewer.pal(6, "Paired")[2]) +
            xlab(paste("Position on chromosome", unique(covData$seqnames))) +
            ylab("Depth of Coverage (X)") + labs(title = paste(
            "Plot showing depth of coverage vs position for target",geneName))+
            geom_vline(xintercept=(offset+target_proximity), colour="red",
            alpha = 0.4) + geom_vline(xintercept = (offset + (max(covData$pos) -
            target_proximity)), colour = "red", alpha = 0.4) +
            scale_x_continuous(limits=c(
                offset+delta,offset+max(covData$pos)-delta)))
    plot <- ggplot2handler(plot)
    return(plot)
}



#' prepare a plot of sequence coverage shaded by strand over a cas9 target
#' region
#'
#' prepare a plot of depth-of-coverage shaded by strand over and around a
#' specified target region
#'
#' @import ggplot2
#' @import reshape2
#' @import RColorBrewer
#' @param geneName is the qualuified name for the target selected
#' @param delta is a scaling value to zoom in beyond the target-proximal region
#' @return ggplot2 figure
#'
#' @examples
#' yamlFile <- system.file("extdata", "cas9_demo.yaml", package = "nanopoRe")
#' importConfigYAML(yamlFile=yamlFile)
#' importEnrichmentResults(system.file("extdata", package="nanopoRe"))
#' enrichmentStrandedTargetPlot(enrichmentGetTargetList()[[1]])
#'
#' @export
enrichmentStrandedTargetPlot <- function(geneName, delta = 0) {

    aggregatedGR <- getCachedObject("aggregatedGR", enrichmentEnvironment)
    target_proximity <- getCachedYAMLValue(field="target_proximity")

    wga.cov <- getCachedObject("wga.cov", enrichmentEnvironment)
    offtarget_level <- getCachedYAMLValue(field="offtarget_level")

    delta = min(delta, target_proximity)
    covData <- as.data.frame(aggregatedGR)
    covData <- covData[which(covData$gene == geneName), ]
    offset <- covData[covData$pos == 1, ]$start
    mdata <- melt(covData[, c("gene", "start", "fwd_cov", "rev_cov")],
        id.vars = c("gene", "start"))
    suppressWarnings(plot <- ggplot(mdata, aes_string("start", "value")) +
        geom_hline(yintercept = (wga.cov*offtarget_level), colour="#E69F00") +
            geom_area(aes_string(fill = "variable")) +
            xlab(paste("Position on chromosome", unique(covData$seqnames))) +
            ylab("Depth of Coverage (X)") +
            labs(title = paste(
            "Plot showing depth of coverage vs position for target",geneName)) +
            geom_vline(xintercept = (offset + target_proximity), colour="red",
            alpha = 0.4) + geom_vline(xintercept = (offset + (max(covData$pos) -
            target_proximity)), colour = "red", alpha = 0.4) +
            scale_x_continuous(limits = c(offset + delta, offset + max(
            covData$pos) - delta)) + scale_fill_manual(values = c(brewer.pal(5,
            "Paired")[1], brewer.pal(5, "Paired")[2]), name = "Strand mapped",
            breaks = c("fwd_cov", "rev_cov"), labels = c("Forward", "Reverse")))
    plot <- ggplot2handler(plot)
    return(plot)
}




#' write summary of on-target mapping results to an excel format
#' result file
#'
#' write summary of on-target mapping results to an excel format result file
#'
#' @importFrom writexl write_xlsx
#' @return path to file created
#'
#' @examples
#' yamlFile <- system.file("extdata", "cas9_demo.yaml", package = "nanopoRe")
#' importConfigYAML(yamlFile=yamlFile)
#' importEnrichmentResults(system.file("extdata", package="nanopoRe"))
#' enrichmentWriteExcelOnTarget()
#'
#' @export
enrichmentWriteExcelOnTarget <- function() {
    ontargetUniverse <- getCachedObject("ontargetUniverse",
        enrichmentEnvironment)
    study <- getCachedYAMLValue(field="study_name")
    ontarget.meta <- file.path(
        "Analysis", "OnTarget", paste0(study, "_ontarget.xlsx"))
    writexl::write_xlsx(
        as.data.frame(ontargetUniverse)[, c(1, 2, 3, 4, 6, 8, 14, 16, 23)],
        path = ontarget.meta)
    return(ontarget.meta)
}




#' write summary of off-target mapping results to an excel format
#' result file
#'
#' write summary of off-target mapping results to an excel format result file
#'
#' @importFrom writexl write_xlsx
#' @return path to file created
#'
#' @examples
#' yamlFile <- system.file("extdata", "cas9_demo.yaml", package = "nanopoRe")
#' importConfigYAML(yamlFile=yamlFile)
#' importEnrichmentResults(system.file("extdata", package="nanopoRe"))
#' enrichmentWriteExcelOffTarget()
#'
#' @export
enrichmentWriteExcelOffTarget <- function() {
    offtargetUniverse <- getCachedObject("offtargetUniverse",
        enrichmentEnvironment)

    study <- getCachedYAMLValue(field="study_name")
    offtarget.meta <- file.path("Analysis", "OffTarget", paste0(
        study, "_offtarget.xlsx"))

    # small reporting issue - reads are not double counted that means we can
    # have blocks of background where there is sufficient depth, but no reads
    # originate in the block - this leads to Na mean quality and mapq

    writexl::write_xlsx(
        as.data.frame(offtargetUniverse)[, c(1, 2, 3, 4, 6, 8, 14, 16, 23)],
        path = offtarget.meta)
    return(offtarget.meta)
}


#' prepare a plot of depth-of-coverage across each of the cas9-enrichment
#' targets included in study
#'
#' prepares a plot of depth-of-coverage across each of the cas9-enrichment
#' targets included in study. This is rendered as a multi-tile plot; simple for
#' quick visualisation of the target regions
#'
#' @import ggplot2
#' @import RColorBrewer
#' @param colMax defines the maximum number of columns to use within the tiled
#' matrix
#' @return ggplot2 format graph
#'
#' @examples
#' yamlFile <- system.file("extdata", "cas9_demo.yaml", package = "nanopoRe")
#' importConfigYAML(yamlFile=yamlFile)
#' importEnrichmentResults(system.file("extdata", package="nanopoRe"))
#' enrichmentMultiGeneCoveragePanel()
#'
#' @export
enrichmentMultiGeneCoveragePanel <- function(colMax = 4) {
    aggregatedGR <- getCachedObject("aggregatedGR", enrichmentEnvironment)
    target_proximity <- getCachedYAMLValue(field="target_proximity")
    wga.cov <- getCachedObject("wga.cov", enrichmentEnvironment)
    offtarget_level <- getCachedYAMLValue(field="offtarget_level")
    br <- getCachedObject("br", enrichmentEnvironment)
    covData <- as.data.frame(aggregatedGR)
    suppressWarnings(posMatrix <- matrix(gtools::mixedsort(names(br)), ncol =
        colMax, byrow = TRUE))
    # data may be recycled ... remove duplicate values ...
    posMatrix[which(
        duplicated(posMatrix[seq(nrow(posMatrix) * ncol(posMatrix))]))] <- NA

    plotLegend <- paste0("t::", gtools::mixedsort(names(br)))
    plotCols <- ceiling(length(plotLegend)/colMax)
    legendDF <- data.frame(x = Inf, y = Inf, lab = plotLegend, row = unlist(
        lapply(seq_len(plotCols), rep, times = colMax))[seq_along(plotLegend)],
        col = rep(seq_len(colMax), length.out = length(plotLegend)))
    # add row and column data to covData
    covData$row <- 0
    covData$col <- 0
    for (gid in unique(covData$gene)) {
        matrixpos <- which(posMatrix == gid, arr.ind = TRUE)
        matchrows <- which(covData$gene == gid)
        covData$row[matchrows] <- matrixpos[[1]]
        covData$col[matchrows] <- matrixpos[[2]]
    }

    covData$start <- (as.numeric(covData$start))/1000
    suppressWarnings(megadepthplot <- ggplot(covData,
        aes_string("pos", "binned_cov")) + geom_hline(yintercept = (wga.cov *
        offtarget_level), colour = "#E69F00") + geom_line(colour = brewer.pal(
        6, "Paired")[2]) + facet_grid(rows = vars(row), cols = vars(col)) +
        theme(axis.text.x = element_text(angle = 90, hjust = 1), strip.text.y =
        element_blank(), strip.text.x = element_blank()) +
        xlab("Position across target region (kb)") +
        ylab("Depth of Coverage (X)") + labs(title =
        "Plot showing depth of coverage vs position for target regions") +
        geom_text(aes_string("x", "y", label = "lab"), data = legendDF,
        vjust = 1, hjust = 1, size = 3.5) +
        theme(plot.title = element_text(size = 11)))

    megadepthplot <- ggplot2handler(megadepthplot)

    return(megadepthplot)
}



#' prepare a barchart showing number of bases per chromosome coloured by
#' mapping-type
#'
#' prepare a barchart showing number of bases per chromosome coloured by
#' mapping-type; enables looking for unexpected distributions of off-target,
#' background etc per chromosome
#'
#' @import ggplot2
#' @import RColorBrewer
#' @return ggplot2 format barchart showing fraction of mapping types per
#' chromosome
#'
#' @examples
#' yamlFile <- system.file("extdata", "cas9_demo.yaml", package = "nanopoRe")
#' importConfigYAML(yamlFile=yamlFile)
#' importEnrichmentResults(system.file("extdata", package="nanopoRe"))
#' enrichmentCoverageTypeOverChromosomes()
#'
#' @export
enrichmentCoverageTypeOverChromosomes <- function() {
    setLogFile()
    backgroundUniverse <- getCachedObject(
        "backgroundUniverse", enrichmentEnvironment)
    offtargetUniverse <- getCachedObject(
        "offtargetUniverse", enrichmentEnvironment)
    targetproximalUniverse <- getCachedObject(
        "targetproximalUniverse", enrichmentEnvironment)
    ontargetUniverse <- getCachedObject(
        "ontargetUniverse", enrichmentEnvironment)
    targetMap <- data.frame(chromosome = gtools::mixedsort(levels(
        GenomeInfoDb::seqnames(backgroundUniverse))), stringsAsFactors = FALSE)
    targetMap <- cbind(targetMap, offtarget = unlist(lapply(
        targetMap$chromosome, function(x) {
        sum(offtargetUniverse[which(levels(
            GenomeInfoDb::seqnames(offtargetUniverse)) == x)]$basesstart)
    })))
    targetMap <- cbind(targetMap, background = unlist(lapply(
        targetMap$chromosome, function(x) {
        sum(backgroundUniverse[which(levels(
            GenomeInfoDb::seqnames(backgroundUniverse)) == x)]$basesstart)
    })))
    targetMap <- cbind(targetMap, ontarget = unlist(lapply(
        targetMap$chromosome, function(x) {
        sum(ontargetUniverse[which(levels(
            GenomeInfoDb::seqnames(ontargetUniverse)) == x)]$basesstart)
    })))
    targetMap[is.na(targetMap)] <- 0
    # targetMap
    targetMelt <- melt(targetMap)
    targetMelt$variable <- factor(as.character(targetMelt$variable), c(
        "background", "ontarget", "offtarget"))
    targetMelt$chromosome <- factor(targetMelt$chromosome, gtools::mixedsort(
        unique(targetMelt$chromosome)))

    suppressWarnings(
        plot <- ggplot(targetMelt, aes_string("chromosome", "value")) +
            geom_col(aes_string(fill = "variable")) +
            scale_y_continuous(labels = scales::comma) +
            ylab("Number of bases (nt)") +
            labs(title = paste0("Barchart showing number of references bases ",
            "assigned as ontarget,\nofftarget or background")) +
        scale_fill_brewer(direction = -1, palette = "Spectral"))
    unsetLog()
    plot <- ggplot2handler(plot)
    return(plot)
}

default_device <- function(filename) {
    pieces <- strsplit(filename, "\\.")[[1]]
    ext <- tolower(pieces[length(pieces)])
    match.fun(ext)
}
convert_to_inches <- function(x, units) {
    x <- switch(units, `in` = x, cm = x/2.54, mm = x/2.54/10)
}
convert_from_inches <- function(x, units) {
    x <- switch(units, `in` = x, cm = x * 2.54, mm = x * 2.54 * 10)
}


#' save a ggbio plot to file
#'
#' @usage ggbiosave(filename,
#'     plot = last_plot(), device = default_device(filename), path = NULL,
#'     scale = 1, width = par('din')[1], height = par('din')[2],
#'     units = c('in', 'cm', 'mm'), dpi = 300, limitsize = TRUE, ...)
#' @importFrom grDevices dev.off
#' @importFrom graphics par
#' @importFrom utils capture.output
#' @param filename filename
#' @param plot plot
#' @param device device
#' @param path path
#' @param scale scale
#' @param width width
#' @param height height
#' @param units units
#' @param dpi dpi
#' @param limitsize limitsize
#' @param ... to downstream methods
#' @return path to image
ggbiosave <- function(filename, plot = last_plot(), device =
    default_device(filename), path = NULL,
    scale = 1, width = par("din")[1], height = par("din")[2],
    units = c("in", "cm", "mm"), dpi = 300,
    limitsize = TRUE, ...) {

    units <- match.arg(units)

    pdf <- function(..., version = "1.4")
        grDevices::pdf(..., version = version)
    png <- function(..., width, height)
        grDevices::png(..., width = width, height=height, res=dpi,units= "in")
    jpg <- jpeg <- function(..., width, height)
        grDevices::jpeg(..., width = width, height=height, res=dpi, units="in")


    if (!missing(width)) {
        width <- convert_to_inches(width, units)
    }
    if (!missing(height)) {
        height <- convert_to_inches(height, units)
    }
    if (missing(width) || missing(height)) {
        message("Saving ", prettyNum(convert_from_inches(width * scale, units),
            digits = 3), " x ", prettyNum(convert_from_inches(height * scale,
            units), digits = 3), " ", units, " image")
    }
    width <- width * scale
    height <- height * scale
    if (limitsize && (width >= 50 || height >= 50)) {
        stop(paste0("Dimensions exceed 50 inches (height and width are ",
        "specified in inches/cm/mm, not pixels)."),
        " If you are sure you want these dimensions, use 'limitsize=FALSE'.")
    }
    if (!is.null(path)) {
        filename <- file.path(path, filename)
    }
    device(file = filename, width = width, height = height, ...)
    on.exit(capture.output(dev.off()))
    print(plot)
    invisible()
}





#' prepare a karyogram shaded with regions of off-target mapping
#'
#' prepare a karyogram shaded with regions of off-target mapping
#'
#' @importFrom GenomeInfoDb seqlevels
#' @importFrom GenomeInfoDb seqnames
#' @importFrom GenomeInfoDb seqlengths
#' @importFrom gtools mixedsort
#' @importFrom ggbio autoplot
#' @param fill - if TRUE will populate chromosome lengths and ids for human
#' @return path to png format figure of plot
#'
#' @examples
#' yamlFile <- system.file("extdata", "cas9_demo.yaml", package = "nanopoRe")
#' importConfigYAML(yamlFile=yamlFile)
#' importEnrichmentResults(system.file("extdata", package="nanopoRe"))
#' enrichmentOffTargetKaryogram()
#'
#' @export
enrichmentOffTargetKaryogram <- function(fill=FALSE) {
    setLogFile()
    offtargetUniverse <- getCachedObject(
        "offtargetUniverse", enrichmentEnvironment)
    GenomeInfoDb::seqlevels(offtargetUniverse) <- unique(gtools::mixedsort(
        as.character(GenomeInfoDb::seqnames(offtargetUniverse))))
    if (fill) {
            GenomeInfoDb::seqlevels(offtargetUniverse) <- append(
                seq(1, 22), c("X", "Y"))

            GenomeInfoDb::seqlengths(offtargetUniverse) <- c(248956422,
                242193529, 198295559, 190214555, 181538259, 170805979,
                159345973, 145138636, 138394717, 133797422, 135086622,
                133275309, 114364328, 107043718, 101991189, 90338345, 83257441,
                80373285, 58617616, 64444167, 46709983, 50818468, 156040895,
                57227415)

    }

    plot <- ggbio::autoplot(offtargetUniverse, layout = "karyogram")

    dest <- tempfile(pattern = "", tmpdir = getRpath(), fileext = ".png")
    dim <- getPlotDimensions()
    unsetLog()
    ggbiosave(filename = dest, plot, width = dim$width, height = dim$height,
        units = dim$units, dpi = dim$dpi)

    return(dest)

}




#' prepare a table of cas9 off-target mapping locations
#'
#' prepare a table of cas9 off-target mapping locations
#'
#' @import knitr
#' @import kableExtra
#' @importFrom scales comma_format
#' @importFrom tibble add_column
#' @param maxRows the maximum number of rows to display in the table
#' @param format output format
#' @return table table
#'
#' @examples
#' yamlFile <- system.file("extdata", "cas9_demo.yaml", package = "nanopoRe")
#' importConfigYAML(yamlFile=yamlFile)
#' importEnrichmentResults(system.file("extdata", package="nanopoRe"))
#' enrichmentOffTargetTable()
#'
#' @export
enrichmentOffTargetTable <- function(maxRows=10, format="html") {
    offtargetUniverse <- getCachedObject(
        "offtargetUniverse", enrichmentEnvironment)
    offtargtop <- as.data.frame(offtargetUniverse[order(
        offtargetUniverse$dmean,decreasing=TRUE)])[,c(1,2,3,23,6,8,10,11,16,14)]
    if (nrow(offtargtop)>maxRows) {
        offtargtop <- offtargtop[seq_len(maxRows),]
    }

    offtargtop$strandp <- round(as.numeric(offtargtop$strandp)/(as.numeric(
        offtargtop$strandp) + as.numeric(offtargtop$strandn)) * 100, digits = 2)
    offtargtop <- add_column(offtargtop, width = (
        offtargtop$end - offtargtop$start + 1), .after = 3)
    offtargtop$start <- (scales::comma_format())(offtargtop$start)
    offtargtop$end <- (scales::comma_format())(offtargtop$end)
    offtargtop$dmean <- round(offtargtop$dmean, digits = 0)
    offtargtop$meanreadlen <- (scales::comma_format())(offtargtop$meanreadlen)
    offtargtop$mapq <- round(offtargtop$mapq, digits = 2)
    offtargtop$readq <- round(offtargtop$readq, digits = 2)
    offtargtop <- offtargtop[, -which(colnames(offtargtop) == "strandn")]

    colnames(offtargtop) <- c("chrId", "start", "end", "width", "mean coverage",
        "reads in segment", "mean read length", "%FWD reads", "mean readQ",
        "mean MAPQ")

    if (is.na(format)) {
        return(offtargtop)
    } else if (format=="html") {

    offttable <- kable(offtargtop, format = format, caption = paste0(
        "Table summarising the location and characteristics for the off-target",
        " regions with the highest depth-of-coverage"),
        booktabs = TRUE, table.envir = "table*", linesep = "",escape=FALSE) %>%
        kable_styling(c("striped", "condensed")) %>% footnote(symbol = c(
        paste0("This table has been prepared using only read mapping ",
        "information that corresponds to a primary map"), paste0(
        "The reads in segment column describes the number of sequences that ",
        "start within this genomic interval (using SAM start coordinate only)"),
        paste0("mean read length is the mean sequence read length for the ",
        "mapping reads identified; their strandedness is summarised in %FWD ",
        "reads (the number of sequences that appear on the forward strand) and",
        " the mapping quality is summarised in mapq")),
        symbol_title = "please note: ", footnote_as_chunk = TRUE)


    return(offttable)

    } else {
        simpleTable <- kable(offtargtop, format = format,
            booktabs = TRUE, table.envir = "table*", linesep = "",escape=FALSE) %>%
            kable_styling(c("striped", "condensed"))

        return(paste(as.character(simpleTable), collapse="\n"))
    }

}
sagrudd/nanopoRe documentation built on June 7, 2020, 10:20 p.m.
rdrr.io home R language documentation Run R code online
CRAN packages Bioconductor packages R-Forge packages GitHub packages
Note that we can't provide technical support on individual packages. You should contact the package authors for that.
sagrudd/nanopoRe
Accessory Methods for the Analysis of Oxford Nanopore Technologies DNA Sequence Data

R/EnrichmentPresentation.R
In sagrudd/nanopoRe: Accessory Methods for the Analysis of Oxford Nanopore Technologies DNA Sequence Data

R Package Documentation

Browse R Packages

We want your feedback!

sagrudd/nanopoRe Accessory Methods for the Analysis of Oxford Nanopore Technologies DNA Sequence Data

R/EnrichmentPresentation.R In sagrudd/nanopoRe: Accessory Methods for the Analysis of Oxford Nanopore Technologies DNA Sequence Data

R Package Documentation

Browse R Packages

We want your feedback!

sagrudd/nanopoRe
Accessory Methods for the Analysis of Oxford Nanopore Technologies DNA Sequence Data

R/EnrichmentPresentation.R
In sagrudd/nanopoRe: Accessory Methods for the Analysis of Oxford Nanopore Technologies DNA Sequence Data