TidyMultiqc.R
In TidyMultiqc: Converts 'MultiQC' Reports into Tidy Data Frames

## ---- include = FALSE, setup--------------------------------------------------
knitr::opts_chunk$set(
  collapse = TRUE,
  comment = "#>",
  cols.print = 3
)
multiqc_data_path = system.file("extdata", "wgs/multiqc_data.json", package = "TidyMultiqc")

## ---- eval=FALSE--------------------------------------------------------------
#  install.packages("TidyMultiqc")

## ---- eval=FALSE--------------------------------------------------------------
#  library(TidyMultiqc)

## ----paged.print=TRUE---------------------------------------------------------
df = TidyMultiqc::load_multiqc(multiqc_data_path)
df

## -----------------------------------------------------------------------------
TidyMultiqc::load_multiqc(multiqc_data_path, sections = 'raw')

## -----------------------------------------------------------------------------
df_both = TidyMultiqc::load_multiqc(multiqc_data_path, sections = c('raw', 'general'))
ncol(df_both)

## -----------------------------------------------------------------------------
library(magrittr)

df %>%
  ggplot2::ggplot(ggplot2::aes(x=metadata.sample_id, y=general.percent_duplication)) +
  ggplot2::geom_col()

## -----------------------------------------------------------------------------
t.test(df$general.percent_gc, mu=41)

## -----------------------------------------------------------------------------
TidyMultiqc::load_multiqc(
  multiqc_data_path, 
  find_metadata = function(sample, parsed) {
    # Split the sample ID to obtain some metadata
    segments <- stringr::str_split(sample, "_")[[1]]
    c(
      batch = segments[[1]],
      sample = segments[[2]]
    )
  }
)

## -----------------------------------------------------------------------------
TidyMultiqc::load_multiqc(
  multiqc_data_path, 
  find_metadata = function(sample, parsed) {
    # This gives us the path to the fastqc output file
    filepath = parsed$report_data_sources$FastQC$all_sections[[sample]]
    # Split into path segments
    path_segments = stringr::str_split(filepath, "/")[[1]]
    # The filename is the last path segment
    filename = dplyr::last(path_segments)
    # Split the filename using dots and underscores
    name_segments = stringr::str_split(filename, "[_\\.]")[[1]]
    # Arbitrarily assign names for the outputs
    name_segments %>% purrr::set_names(LETTERS[1:length(name_segments)])
  }
)

## -----------------------------------------------------------------------------
TidyMultiqc::load_multiqc(
  multiqc_data_path, 
  find_metadata = function(sample, parsed) {
    parsed[c(
      "config_creation_date",
      "config_version"
    )]
  }
)

## ----message=FALSE, warning=FALSE---------------------------------------------
df_both %>% dplyr::select(dplyr::contains('quality'))

## ---- eval = FALSE------------------------------------------------------------
#  TidyMultiqc::list_plots(multiqc_data_path)

## ---- echo = FALSE------------------------------------------------------------
TidyMultiqc::list_plots(multiqc_data_path) %>%
  dplyr::mutate(dplyr::across(dplyr::everything(), ~stringr::str_trunc(., 50)))

## -----------------------------------------------------------------------------
df = TidyMultiqc::load_multiqc(
  multiqc_data_path, 
  sections = 'plot',
  plots = "fastqc_per_sequence_quality_scores_plot"
)
df

## -----------------------------------------------------------------------------
df$plot.fastqc_per_sequence_quality_scores_plot[[1]]

## -----------------------------------------------------------------------------
df %>%
  tidyr::unnest(cols = plot.fastqc_per_sequence_quality_scores_plot)

## -----------------------------------------------------------------------------
df %>%
  tidyr::unnest(cols = plot.fastqc_per_sequence_quality_scores_plot) %>%
  dplyr::group_by(metadata.sample_id) %>%
  dplyr::summarise(total_reads = sum(y))

## -----------------------------------------------------------------------------
df %>%
  dplyr::mutate(
    total_reads = purrr::map_dbl(plot.fastqc_per_sequence_quality_scores_plot, ~sum(.$y)),
    plot.fastqc_per_sequence_quality_scores_plot = NULL
  )

## -----------------------------------------------------------------------------
df %>%
  tidyr::unnest(cols = plot.fastqc_per_sequence_quality_scores_plot) %>%
  dplyr::group_by(metadata.sample_id) %>%
  dplyr::mutate(hist = list(HistDat::HistDat(vals = x, counts = y)), .keep = "unused") %>%
  dplyr::mutate(
    mean_coverage = hist %>% dplyr::first() %>% mean(),
    median_coverage = hist %>% dplyr::first() %>% median(),
    max_coverage = hist %>% dplyr::first() %>% max(),
    hist= NULL
  ) %>%
  dplyr::slice(1)

## -----------------------------------------------------------------------------
df %>%
  dplyr::mutate(
    purrr::map_dfr(plot.fastqc_per_sequence_quality_scores_plot, function(plot_df){
      hist = HistDat::HistDat(vals=plot_df$x, counts = plot_df$y)
      list(
        mean_coverage = mean(hist),
        median_coverage = median(hist),
        max_coverage = max(hist)
      )
    }),
    plot.fastqc_per_sequence_quality_scores_plot = NULL
  )


## -----------------------------------------------------------------------------
TidyMultiqc::load_multiqc(
  multiqc_data_path, 
  sections = 'plot',
  plots = "fastqc_per_sequence_quality_scores_plot",
  plot_parsers = list(
    # This fake parser function takes a plot and just returns the iris dataset
    xy_line = function(plot_data, name){
      list(
        sample_1 = list(
          plot_name = list(iris)
        )
      )
    }
  )
)

Any scripts or data that you put into this service are public.

TidyMultiqc documentation built on Sept. 25, 2022, 9:05 a.m.

rdrr.io home R language documentation Run R code online

CRAN packages Bioconductor packages R-Forge packages GitHub packages

Note that we can't provide technical support on individual packages. You should contact the package authors for that.

TidyMultiqc
Converts 'MultiQC' Reports into Tidy Data Frames

inst/doc/TidyMultiqc.R
In TidyMultiqc: Converts 'MultiQC' Reports into Tidy Data Frames

Try the TidyMultiqc package in your browser

R Package Documentation

Browse R Packages

We want your feedback!

TidyMultiqc Converts 'MultiQC' Reports into Tidy Data Frames

inst/doc/TidyMultiqc.R In TidyMultiqc: Converts 'MultiQC' Reports into Tidy Data Frames

Try the TidyMultiqc package in your browser

R Package Documentation

Browse R Packages

We want your feedback!

TidyMultiqc
Converts 'MultiQC' Reports into Tidy Data Frames

inst/doc/TidyMultiqc.R
In TidyMultiqc: Converts 'MultiQC' Reports into Tidy Data Frames