In UofABioinformaticsHub/fastqcReports: Load FastqQC reports and other NGS related files

knitr::opts_chunk$set(
    echo = FALSE,
    results = "asis",
    message = FALSE, 
    warning = FALSE,
    error = FALSE
    )

library(ngsReports)
library(magrittr)
library(stringr)
library(dplyr)
library(readr)
library(tibble)
library(ggplot2)
library(scales)
library(DT)
library(pander)

globals <- list(
    usePlotly = params$usePlotly,
    cluster = TRUE,
    dendrogram = TRUE,
    theoreticalGC = TRUE,
    gcType = params$gcType,
    species = params$species
    )

fastqcFiles <- list.files(pattern = "(fastqc.zip|fastqc)$")
stopifnot(length(fastqcFiles) > 1)
message("FastQC files found. Loading FastQC data")
fastqcData <- tryCatch(FastqcDataList(fastqcFiles))
plotLabels <- structure(
    gsub(".(fastq|fastq.gz|bam)", "", fqName(fastqcData)),  
    names = fqName(fastqcData)
)

n <- length(fastqcData)
fh <- max(0.25*n, 6)

message("Checking for ", params$tgtsFile)
if (file.exists(params$tgtsFile)) {
    message("Found targets.csv...checking columns")
    targets <- read_csv(params$tgtsFile)
    fCol <- grep("[Ff]ile[Nn]ame", colnames(targets))
    lCol <- grep("[Ll]abel", colnames(targets))
    if (length(fCol) == 1 && length(lCol) == 1) {
        stopifnot(all(fqName(fastqcData) %in% targets[[fCol]]))
        message("Alternate labels found")
        plotLabels <- structure(targets[[lCol]], names = targets[[fCol]])
    }
    else{
        message("No valid labels found")
    }
}
if (!file.exists(params$tgtsFile)) {
    message(params$tgtsFile, " not found. Using default labels")
}

FastQC Summary

bs <- getModule(fastqcData, "Basic_Statistics")
bs %>%
    mutate(
        Sequence_length = paste(Shortest_sequence, Longest_sequence, sep = "-"),
        `%GC` = as.numeric(`%GC`) / 100
    ) %>%
    dplyr::select(Filename, contains("sequence"), `%GC`, File_type, Encoding, -contains("est")) %>%
    set_names(gsub("_", " ", names(.))) %>%
    rename_all(str_to_title) %>%
    set_names(str_remove_all(names(.), "[Ss]equences")) %>%
    rename_all(str_trim) %>%
    dplyr::rename(`%GC` = `%Gc`) %>%
    datatable(
        caption = "Summary statistics for all libraries",
        rownames = FALSE,
        options = list(
            pageLength = 25
        ),
        class = "stripe"
    ) %>%
    formatPercentage(
        columns = "%GC"
    ) %>%
    formatRound(
        columns = c("Total", "Flagged As Poor Quality"),
        digits = 0,
        mark = ","
    )

Read Totals

Library Sizes ranged between r pander(comma(range(readTotals(fastqcData)$Total_Sequences))) reads.

plotReadTotals(fastqcData, labels = plotLabels, usePlotly = globals$usePlotly)

FastQC Summary

plotSummary(fastqcData, labels = plotLabels, usePlotly = globals$usePlotly)

Per Base Sequence Quality

plotBaseQuals(fastqcData, labels = plotLabels, cluster = globals$cluster, dendrogram = globals$dendrogram, usePlotly = globals$usePlotly)

Per Sequence Quality Scores

plotSeqQuals(fastqcData, labels = plotLabels, cluster = globals$cluster, dendrogram = globals$dendrogram, usePlotly = globals$usePlotly)

Per Base Sequence Content

plotSeqContent(fastqcData, labels = plotLabels, cluster = globals$cluster, dendrogram = globals$dendrogram, usePlotly = globals$usePlotly)

Per Sequence GC Content

plotGcContent(fastqcData, labels = plotLabels, theoreticalGC = globals$theoreticalGC, gcType = globals$gcType, species = globals$species, cluster = globals$cluster, dendrogram = globals$dendrogram, usePlotly = globals$usePlotly)

plotGcContent(fastqcData, labels = plotLabels, theoreticalGC = globals$theoreticalGC, gcType = globals$gcType, species = globals$species, plotType = "line", usePlotly = globals$usePlotly)

Sequence Length Distribution

plotSeqLengthDistn(fastqcData, labels = plotLabels, cluster = globals$cluster, dendrogram = globals$dendrogram, usePlotly = globals$usePlotly)

plotSeqLengthDistn(fastqcData, plotType = "cdf", labels = plotLabels, cluster = globals$cluster, dendrogram = globals$dendrogram, usePlotly = globals$usePlotly)

Sequence Duplication Levels

plotDupLevels(fastqcData, labels = plotLabels, cluster = globals$cluster, dendrogram = globals$dendrogram, usePlotly = globals$usePlotly)

Adapter Content

plotAdapterContent(fastqcData, labels = plotLabels, cluster = globals$cluster, dendrogram = globals$dendrogram, usePlotly = globals$usePlotly)

Overrepresented Summary

plotOverrep(fastqcData, labels = plotLabels, usePlotly = globals$usePlotly)

Overrepresented Sequences

os <- getModule(fastqcData, "Overrepresented_sequences") 
if (length(os)) {
    os %>% 
        mutate(Filename = plotLabels[Filename]) %>%
        group_by(Sequence, Possible_Source) %>%
        summarise(
            Total = sum(Count),
            Files = n(),
            Max_Percentage = max(Percentage/100)
        ) %>%
        ungroup() %>%
        dplyr::arrange(desc(Total)) %>%
        dplyr::slice(1:params$nOver) %>%
        mutate(
            `% Across All Files` = Total / sum(bs$Total_Sequences)
        ) %>%
        dplyr::select(
            Sequence, Total, `Present In` = Files, `% Across All Files`, 
            `Max Individual %` = Max_Percentage, Possible_Source
        ) %>%
        set_names(gsub("_", " ", names(.))) %>%
        rename_all(str_to_title) %>%
        datatable(
            caption = paste(
                "Summary of the most overrepresented sequences in all files.",
                "A maximum of", params$nOver, "sequences are given"
            ),
            rownames = FALSE,
            options = list(
                pageLength = params$nOver,
                autoWidth = TRUE
            )
        ) %>%
        formatPercentage(
            columns = c("% Across All Files", "Max Individual %"),
            digits = 2
        ) %>%
        formatRound(
            columns = "Total",
            digits = 0,
            mark = ","
        )
}
if (length(os) == 0) {
    message("No overrepresented sequences were detected by FastQC")
}

Session Information

sessionInfo() %>% pander()

UofABioinformaticsHub/fastqcReports documentation built on May 8, 2024, 12:44 p.m.

rdrr.io home R language documentation Run R code online

CRAN packages Bioconductor packages R-Forge packages GitHub packages

Note that we can't provide technical support on individual packages. You should contact the package authors for that.

UofABioinformaticsHub/fastqcReports
Load FastqQC reports and other NGS related files

In UofABioinformaticsHub/fastqcReports: Load FastqQC reports and other NGS related files

FastQC Summary

Read Totals

FastQC Summary

Per Base Sequence Quality

Per Sequence Quality Scores

Per Base Sequence Content

Per Sequence GC Content

Sequence Length Distribution

Sequence Duplication Levels

Adapter Content

Overrepresented Summary

Overrepresented Sequences

Session Information

R Package Documentation

Browse R Packages

We want your feedback!

UofABioinformaticsHub/fastqcReports Load FastqQC reports and other NGS related files

In UofABioinformaticsHub/fastqcReports: Load FastqQC reports and other NGS related files

FastQC Summary

Read Totals

FastQC Summary

Per Base Sequence Quality

Per Sequence Quality Scores

Per Base Sequence Content

Per Sequence GC Content

Sequence Length Distribution

Sequence Duplication Levels

Adapter Content

Overrepresented Summary

Overrepresented Sequences

Session Information

R Package Documentation

Browse R Packages

We want your feedback!

UofABioinformaticsHub/fastqcReports
Load FastqQC reports and other NGS related files