Nothing
## ----include = FALSE----------------------------------------------------------
knitr::opts_chunk$set(
collapse = TRUE,
comment = "#>",
fig.dim = c(6, 4)
)
## ----setup--------------------------------------------------------------------
library(bakR)
# Packages that are NOT automatically installed when bakR is installed
library(DESeq2)
# Packages which are installed when bakR is installed
library(dplyr)
library(magrittr)
library(ggplot2)
library(stats)
# Set the seed for reproducibility
set.seed(123)
## -----------------------------------------------------------------------------
# Simulate a nucleotide recoding dataset
sim_data <- Simulate_bakRData(1000,
num_kd_DE = c(0, 0),
num_ks_DE = c(0, 200))
# This will simulate 500 features, 2 experimental conditions
# and 3 replicates for each experimental condition
# See ?Simulate_bakRData for details regarding tunable parameters
# Extract simulated bakRData object
bakRData <- sim_data$bakRData
# Extract simualted ground truths
sim_truth <- sim_data$sim_list
## Run the efficient model
# We'll tell it what the mutation rates are just for efficiency's sake
Fit <- bakRFit(bakRData, pnew = rep(0.05, times = 6), pold = 0.001)
## ----results = 'hide'---------------------------------------------------------
# Get the count matrix from bakR
Counts <- Fit$Data_lists$Count_Matrix
# Experimental conditions for each sample
# There are 6 s4U treated samples (3 replicates of each condition)
# In addition, there are 2 -s4U control samples (1 for each condition)
## s4U conditions
# 1st three samples are reference (ref) samples
# Next three samples are experimental (exp) samples
conditions_s4U <- as.factor(rep(c("ref", "exp"), each = 3))
## -s4U control conditions
# 1st sample is reference, next is experimental
conditions_ctl <- as.factor(c("ref", "exp"))
# Combined s4U and -s4U control conditions
conditions <- c(conditions_s4U, conditions_ctl)
# Make the colData input for DESeq2
colData <- data.frame(conditions = conditions)
rownames(colData) <- colnames(Counts)
# Take a look at the colData object
print(t(colData))
## ----echo = FALSE, warning = FALSE--------------------------------------------
knitr::kable(t(colData))
## -----------------------------------------------------------------------------
dds <- DESeqDataSetFromMatrix(countData = Counts,
colData = colData,
design = ~conditions)
## -----------------------------------------------------------------------------
ddso <- DESeq(dds)
# Extract results of experimental vs. reference comparison
reso <- results(ddso, contrast = c("conditions", "exp", "ref"))
# Look at the column names of the reso object
colnames(as.data.frame(reso))
## -----------------------------------------------------------------------------
ksyn_df <- data.frame(L2FC = reso$log2FoldChange + Fit$Fast_Fit$Effects_df$L2FC_kdeg,
Gene = Fit$Fast_Fit$Effects_df$XF)
## -----------------------------------------------------------------------------
ksyn_df$se <- sqrt(reso$lfcSE^2 + (Fit$Fast_Fit$Effects_df$se*log2(exp(1)))^2 )
## -----------------------------------------------------------------------------
# Calculate p-value using asymptotic Wald test
ksyn_df <- ksyn_df %>%
mutate(pval = 2*pnorm(-abs(L2FC/se)),
padj = p.adjust(pval, method = "BH"))
## ----fig.align='center'-------------------------------------------------------
# Add conclusion at 0.01 FDR control
ksyn_df <- ksyn_df %>%
mutate(conclusion = as.factor(ifelse(padj < 0.01,
ifelse(L2FC < 0, "Decreased txn", "Increased txn"),
"Not sig.")))
# Make volcano plot
ksyn_volc <- ggplot(ksyn_df, aes(x = L2FC, y = -log10(padj), color = conclusion)) +
theme_classic() +
geom_point(size = 1) +
xlab("L2FC(ksyn)") +
ylab("-log10(padj)") +
scale_color_manual(values = c("blue", "orange", "gray"))
# Observe volcano plot
ksyn_volc
## ----fig.align='center'-------------------------------------------------------
# Add conclusion at 0.01 FDR control
ksyn_df <- ksyn_df %>%
mutate(result = as.factor(ifelse(padj < 0.01,
ifelse(as.integer(Gene) <= 800, "FP", "TP"),
ifelse(as.integer(Gene) <= 800, "TN", "FN"))))
# Make volcano plot
ksyn_results <- ggplot(ksyn_df, aes(x = L2FC, y = -log10(padj), color = result)) +
theme_classic() +
geom_point(size = 1) +
xlab("L2FC(ksyn)") +
ylab("-log10(padj)") +
scale_color_manual(values = c("black", "gray", "forestgreen", "blue"))
# Observe volcano plot
ksyn_results
Any scripts or data that you put into this service are public.
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.