knitr::opts_chunk$set( collapse = TRUE, comment = "#>" ) options(tibble.print_min = 4L, tibble.print_max = 4L) library(JACUSA2helper) library(magrittr) library(plyranges) library(dplyr) library(ggplot2) set.seed(777)
A complementary genetic approach is an extension of the TRIBE technique called DART-seq @Meyer2019.
Meyer applied DART-seq on HEK293 cells where the APOBEC domain was fused to the YTH domain from human YTHDF2 (WT and mutated).
In essence, new C-to-U editing events that are significantly enriched in the YTHDF2-WT, but not in the binding domain mutant are bona fide candidates for m6A RNA modification.
JACUSA2helper supports the analysis of DARTseq data too (see below). Input is typically read in via read_result
.
data("DARTseq") filtered <- DARTseq # calculate number of unique base calls per site GenomicRanges::mcols(filtered)[["bc"]] <- lapply(filtered$bases, Reduce, f = "+") %>% Reduce(f="+") %>% base_count() filtered <- filtered %>% dplyr::filter(score >= 2) %>% dplyr::filter(All(cov$cond1 >= 10) & All(cov$cond2 >= 10)) %>% dplyr::filter(bc <= 2) %>% dplyr::filter(robust(bases))
The above code sequence filters the input to include only sites, which have a call-2 score higher than 2, a coverage across all replicates of at least 10. Moreover, the number of distinct base calls should be less or equal 2 (preferably C and U/T) and the base substitution should be visible in all replicates of at least one condition.
# sum base call counts of condition / RNA replicates rna_bases <- Reduce("+", filtered$bases$cond2) # we don't need lapply_repl, because we don't operate on all replicates from all # conditions - only condition 2 / RNA ref2YTHmut <- base_sub(rna_bases, filtered$ref) table(ref2YTHmut)
rnaref <- Reduce("+", filtered$bases$cond1) ref2YTH <- base_sub(rnaref, filtered$ref) table(ref2YTH, ref2YTHmut)
The following bar plot summarizes the number of sites by characteristic base substitutions relative to the reference sequence. We observe a lot more C-to-U transitions in the active YTH binding domain as compared to the inactivated binding domain.
tidyr::tibble( base_sub = c(ref2YTH, ref2YTHmut), Source = c( rep("YTH",length(ref2YTH)), rep("YTHmut",length(ref2YTHmut))) ) %>% # ggplot requires a data frame ggplot2::ggplot(ggplot2::aes(x = base_sub, fill = Source)) + ggplot2::geom_bar(position = "dodge") + ggplot2::xlab("Base substitution") + ggplot2::scale_fill_discrete(name = "Base substitution") + ggplot2::theme( legend.position = "bottom", axis.text.x = ggplot2::element_text(angle = 90, vjust = 0.5, hjust=1) )
tempo <- lapply_cond( lapply_cond(filtered$bases, function(b) { Reduce("+", b) } ), base_ratio ) GenomicRanges::mcols(filtered)[["YTH.t.ratio"]] <- tempo$cond1$T GenomicRanges::mcols(filtered)[["YTHmut.t.ratio"]] <- tempo$cond2$T GenomicRanges::mcols(filtered)[["pred"]] <- ifelse( tempo$cond1$T > tempo$cond2$T, "Positives", "Negatives" )
The following scatter plot summarizes the T/(C+T) frequencies and labels them.
GenomicRanges::mcols(filtered)[c("YTHmut.t.ratio", "YTH.t.ratio", "pred")] %>% as.data.frame() %>% ggplot2::ggplot(ggplot2::aes(x=YTHmut.t.ratio, y=YTH.t.ratio, color=pred)) + ggplot2::geom_point() + ggplot2::xlab("Mutated YTH binding domain (T/(C+T))") + ggplot2::ylab("Active YTH binding domain (T/(C+T))") + ggplot2::scale_color_manual(values=c('#999999', '#E69F00'), name = "") + ggplot2::annotate(geom="text", x=0.8, y=0.25, label="Negatives", color="#999999", size=6) + ggplot2::annotate(geom="text", x=0.25, y=0.8, label="Positives", color="#E69F00", size=6) + ggplot2::theme_classic() + ggplot2::theme(legend.position = "none")
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.