Nothing
## ----include = FALSE----------------------------------------------------------
knitr::opts_chunk$set(
collapse = TRUE,
comment = "#>",
fig.dim = c(6, 4)
)
## ----setup--------------------------------------------------------------------
library(bakR)
# Packages that are NOT automatically installed when bakR is installed
library(DESeq2)
library(pheatmap)
# Packages which are installed when bakR is installed
library(dplyr)
library(magrittr)
library(ggplot2)
library(stats)
# Set the seed for reproducibility
set.seed(123)
## -----------------------------------------------------------------------------
### Only differential synthesis
# Simulate a nucleotide recoding dataset
sim_data <- Simulate_relative_bakRData(1000, 1000000,
num_ks_DE = c(0, 300),
num_kd_DE = c(0, 0))
# This will simulate 1000 features, 1 million reads, 2 experimental conditions
# and 3 replicates for each experimental condition. 300 features will be differentially
# transcribed, and there will be no differential stability
# See ?Simulate_bakRData for details regarding tunable parameters
# Extract simulated bakRData object
bakRData <- sim_data$bakRData
## Run the efficient model
Fit_s <- bakRFit(bakRData)
### Only differential stability
# Simulate a nucleotide recoding dataset
sim_data <- Simulate_relative_bakRData(1000, 1000000,
num_ks_DE = c(0, 0),
num_kd_DE = c(0, 300))
# This will simulate 1000 features, 1 million reads, 2 experimental conditions
# and 3 replicates for each experimental condition. 300 features will be differentially
# transcribed, and there will be no differential stability
# See ?Simulate_bakRData for details regarding tunable parameters
# Extract simulated bakRData object
bakRData <- sim_data$bakRData
## Run the efficient model
Fit_d <- bakRFit(bakRData)
## -----------------------------------------------------------------------------
### Only differential synthesis dataset
# Get the count matrix from bakR
Counts <- Fit_s$Data_lists$Count_Matrix
# Experimental conditions for each sample
# There are 6 s4U treated samples (3 replicates of each condition)
# In addition, there are 2 -s4U control samples (1 for each condition)
## s4U conditions
# 1st three samples are reference (ref) samples
# Next three samples are experimental (exp) samples
conditions_s4U <- as.factor(rep(c("ref", "exp"), each = 3))
## -s4U control conditions
# 1st sample is reference, next is experimental
conditions_ctl <- as.factor(c("ref", "exp"))
# Combined s4U and -s4U control conditions
conditions <- c(conditions_s4U, conditions_ctl)
# Make the colData input for DESeq2
colData <- data.frame(conditions = conditions)
rownames(colData) <- colnames(Counts)
# Make DESeq2 data object
dds_s <- DESeqDataSetFromMatrix(countData = Counts,
colData = colData,
design = ~conditions)
# Fit DESeq2 model
ddso_s <- DESeq(dds_s)
# Extract results of experimental vs. reference comparison
reso_s <- results(ddso_s, contrast = c("conditions", "exp", "ref"))
### Only differential stability dataset
# Get the other count matrix from bakR
Counts <- Fit_d$Data_lists$Count_Matrix
# Make DESeq2 data object
dds_d <- DESeqDataSetFromMatrix(countData = Counts,
colData = colData,
design = ~conditions)
# Fit DESeq2 model
ddso_d <- DESeq(dds_d)
# Extract results of experimental vs. reference comparison
reso_d <- results(ddso_d, contrast = c("conditions", "exp", "ref"))
## ----results = 'hide'---------------------------------------------------------
### Only differential synthesis:
# Convert to data frame
reso_s <- as.data.frame(reso_s)
# Make data frame
DE_df_s <- data.frame(XF = row.names(reso_s),
L2FC_RNA = reso_s$log2FoldChange,
DE_score = reso_s$stat,
DE_se = reso_s$lfcSE,
DE_pval = reso_s$pval,
DE_padj = reso_s$padj)
### Only differential degradation:
# Convert to data frame
reso_d <- as.data.frame(reso_d)
# Make data frame
DE_df_d <- data.frame(XF = row.names(reso_d),
L2FC_RNA = reso_d$log2FoldChange,
DE_score = reso_d$stat,
DE_se = reso_d$lfcSE,
DE_pval = reso_d$pval,
DE_padj = reso_d$padj)
## ----results = 'hide'---------------------------------------------------------
# Decreasing sims parameter to speed up; wouldn't normally suggest this if you
# want higher precision mechanism p-values, discussed later
Mechs_s <- DissectMechanism(Fit_s, DE_df_s,
sims = 1000000)
Mechs_d <- DissectMechanism(Fit_d, DE_df_d,
sims = 1000000)
## ----fig.align='center'-------------------------------------------------------
# Nice red to blue color gradient
# Feel free to use any coloring your heart desires
col <- c("#053061", "#2166AC", "#4393C3", "#92C5DE",
"#D1E5F0", "#F7F7F7", "#FDDBC7", "#F4A582",
"#D6604D", "#B2182B", "#67001F")
# Heatmap for differential synthesis only dataset
pheatmap(Mechs_s$Heatmap_df, cluster_cols = FALSE, show_rownames = FALSE, color = col)
# Heatmap for differential degradation only dataset
pheatmap(Mechs_d$Heatmap_df, cluster_cols = FALSE, show_rownames = FALSE, color = col)
## ----fig.align='center'-------------------------------------------------------
# Scatter plot of L2FC(kdeg) vs. L2FC(RNA) colored by mechanism test stat
# Gotta transform the mech_stat because it spans many orders of magnitude
ggplot(Mechs_s$Mechanism_df, aes(x = L2FC_kdeg, y = L2FC_RNA, color = log10(abs(mech_stat) + 1)*sign(mech_stat))) +
geom_point() +
theme_classic() +
scale_color_viridis_c() +
xlab("L2FC(kdeg) from bakR") +
ylab("L2FC(RNA) from DESeq2") +
labs(color = "Mechanism")
## ----fig.align='center'-------------------------------------------------------
# Scatter plot of L2FC(kdeg) vs. L2FC(RNA) colored by mechanism test stat
# Gotta transform the mech_stat because it spans many orders of magnitude
ggplot(Mechs_d$Mechanism_df, aes(x = L2FC_kdeg, y = L2FC_RNA, color = log10(abs(mech_stat) + 1)*sign(mech_stat))) +
geom_point() +
theme_classic() +
scale_color_viridis_c() +
xlab("L2FC(kdeg) from bakR") +
ylab("L2FC(RNA) from DESeq2") +
labs(color = "Mechanism")
## ----fig.align='center'-------------------------------------------------------
## Grid to plot on
# Grid size in each axis
n <- 300
# DE z-scores
DE_score <- rep(seq(from = -10, to = 10, length.out = n), times = n)
# bakR z-scores
bakR_score <- rep(seq(from = -10, to = 10, length.out = n), each = n)
grid_df <- tibble(DE = DE_score,
bakR = bakR_score)
# Mechanism score
grid_df <- grid_df %>%
mutate(Mech = ifelse(DE > 0,
(bakR + 2)*DE,
(bakR - 2)*DE))
# Calculate p-value
grid_df <- grid_df %>%
mutate(Mech_pval = stats::df(abs(Mech), df1 = 2, df2 = 2, ncp = 2))
# Visualize test statistic
ggplot(grid_df, aes(x = bakR, y = DE, z = 0.03*Mech)) +
geom_contour_filled() +
theme_classic() +
xlab("bakR score") +
ylab("DE score")
Any scripts or data that you put into this service are public.
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.