knitr::opts_chunk$set(fig.width = 6, fig.height = 5, cache = FALSE) library("happyCompare") library("tidyverse") library("ggplot2") theme_set(theme_bw())
Loading demo data from a happyCompare
samplesheet creates a happy_compare
object:
samplesheet_path <- "vignettes/pcrfree_vs_nano.subset.csv" happy_compare <- read_samplesheet(samplesheet_path, lazy = TRUE)
happy_compare <- readRDS("pcrfree_vs_nano.subset.Rds")
That contains the following fields:
samplesheet
: the original samplesheethappy_results
: a list of happy_result
objects as defined in happyR
build_metrics
: a list of data.frames
with custom metricsids
: a vector of build idssapply(happy_compare, class)
hap.py results and samplesheet metadata can be accessed with extract_metrics()
, leaving them ready for downstream analysis:
e <- extract_metrics(happy_compare, table = "summary") class(e)
# extract performance metrics and tabulate mean plus/minus SD per group and variant type extract_metrics(happy_compare, table = "summary") %>% filter(Filter == "PASS") %>% hc_summarise_metrics(df = ., group_cols = c("Group.Id", "Type")) %>% knitr::kable()
# extract ROC metrics and plot a precision-recall curves, e.g. for PASS INDEL extract_metrics(happy_compare, table = "pr.indel.pass") %>% hc_plot_roc(happy_roc = ., type = "INDEL", filter = "PASS")
# link build metrics to hap.py results summary <- extract_metrics(happy_compare, table = "summary") build_metrics <- extract_metrics(happy_compare, table = "build.metrics") merged_df <- summary %>% inner_join(build_metrics) merged_df %>% filter(Type == "INDEL", Filter == "PASS") %>% ggplot(aes(x = mean_coverage, y = METRIC.Precision, group = Group.Id)) + geom_point(aes(color = Group.Id)) + ylab("INDEL_precision")
# extract stratified counts and visualise highest density intervals for recall in level 0 subsets hdi <- extract_metrics(happy_compare, table = "extended") %>% filter(Subtype == "*", Filter == "PASS", Subset.Level == 0, Subset %in% c("high.at", "high.gc")) %>% estimate_hdi(successes_col = "TRUTH.TP", totals_col = "TRUTH.TOTAL", group_cols = c("Group.Id", "Subset", "Type"), aggregate_only = FALSE) hdi %>% mutate(Subset = factor(Subset, levels = rev(unique(Subset)))) %>% filter(replicate_id == ".aggregate") %>% ggplot(aes(x = estimated_p, y = Subset, group = Subset)) + geom_point(aes(color = Group.Id), size = 2) + geom_errorbarh(aes(xmin = lower, xmax = upper, color = Group.Id), height = 0.4) + facet_grid(. ~ Type) + scale_colour_manual(values = c("#E69F00", "#56B4E9")) + theme(legend.position = "bottom") + ggtitle("Recall estimates across L0 subsets") + xlab("Recall") + ylab("") + xlim(0.7, 1)
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.