Nothing
## ----include = FALSE----------------------------------------------------------
knitr::opts_chunk$set(
collapse = TRUE,
comment = "#>"
)
## ----eval=FALSE---------------------------------------------------------------
# # The simulator
# install.packages("remotes")
# remotes::install_github("tgbrooks/dependentsimr")
#
# # DESeq2 - optional dependency only needed if doing RNA-seq data sets
# # Needed for type = "DESeq2"
# install.packages("BiocManager")
# BiocManager::install("DESeq2")
#
# # Two more dependencies that are used in some simulation modes
# # Needed for method = "corpcor"
# install.packages("corpcor")
#
# # Needed for method = "spiked Wishart"
# install.packages("sparsesvd")
#
# # Extras needed for this vignette
# install.packages("tidyverse")
## -----------------------------------------------------------------------------
library(dependentsimr)
suppressWarnings(suppressMessages(library(tidyverse, DESeq2)))
## -----------------------------------------------------------------------------
set.seed(0)
## -----------------------------------------------------------------------------
head(read_counts)
## -----------------------------------------------------------------------------
count_matrix <- as.matrix(read_counts[,2:ncol(read_counts)])
rownames(count_matrix) <- read_counts$gene_id
## -----------------------------------------------------------------------------
rs_pca <- get_random_structure(list(data=count_matrix), method="pca", rank=2, types="DESeq2")
## -----------------------------------------------------------------------------
rs_wishart <- get_random_structure(list(data=count_matrix), rank=11, method="spiked Wishart", types="DESeq2")
## -----------------------------------------------------------------------------
rs_corpcor <- get_random_structure(list(data=count_matrix), method="corpcor", types="DESeq2")
## -----------------------------------------------------------------------------
rs_indep <- remove_dependence(rs_pca)
## -----------------------------------------------------------------------------
actual_library_sizes <- count_matrix |> apply(2, sum)
N_SAMPLES <- 6
library_sizes <- sample(actual_library_sizes / mean(actual_library_sizes), size=N_SAMPLES, replace=TRUE)
## -----------------------------------------------------------------------------
control_sim_data <- draw_from_multivariate_corr(rs_pca, n_samples=N_SAMPLES, size_factors=library_sizes)$data
## -----------------------------------------------------------------------------
# fraction of all genes to modify in treatment
FRAC_DE <- 0.1
# Min and max value for the log fold change (LFC) for each DE gene chosen
# non-DE genes will have a LFC of 0
LFC_MIN <- 0.5
LFC_MAX <- 4.0
N_DE <- floor(FRAC_DE * nrow(control_sim_data))
# Pick which genes are DE
de_genes <- sample(nrow(control_sim_data), N_DE)
# Pick the LFCs of each of those genes
de_lfc <- runif(N_DE, LFC_MIN, LFC_MAX) * sample(c(-1, 1), size=N_DE, replace=TRUE)
## -----------------------------------------------------------------------------
rs_treatment <- rs_pca
rs_treatment$marginals$data$q[de_genes] <- 2^(de_lfc) * rs_treatment$marginals$data$q[de_genes]
## -----------------------------------------------------------------------------
treatment_sim_data <- draw_from_multivariate_corr(rs_treatment, n_samples=N_SAMPLES, size_factors=library_sizes)$data
## -----------------------------------------------------------------------------
sim_data <- cbind(control_sim_data, treatment_sim_data)
pca <- prcomp(t(sim_data), center=TRUE, scale.=TRUE, rank.=2)
pca_points <- predict(pca, t(sim_data))
pca_data <- tibble(x=pca_points[,1], y=pca_points[,2]) |>
mutate(group = c(rep("control", N_SAMPLES), rep("treatment", N_SAMPLES)))
ggplot(pca_data, aes(x, y, color=group)) +
geom_point() +
labs(x = "PC1", y = "PC2")
Any scripts or data that you put into this service are public.
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.