Nothing
## ----setup, echo=FALSE, message=FALSE, warning=FALSE--------------------------
knitr::opts_chunk$set(echo = TRUE)
## ----install_bioc, eval=FALSE-------------------------------------------------
# BiocManager::install("scPCA")
## ----install_github, eval=FALSE-----------------------------------------------
# remotes::install_github("PhilBoileau/scPCA")
## ----load_libs, message=FALSE, warning=FALSE----------------------------------
library(dplyr)
library(ggplot2)
library(ggpubr)
library(elasticnet)
library(scPCA)
library(microbenchmark)
## ----PCA_sim------------------------------------------------------------------
# set seed for reproducibility
set.seed(1742)
# load data
data(toy_df)
# perform PCA
pca_sim <- prcomp(toy_df[, 1:30])
# plot the 2D rep using first 2 components
df <- as_tibble(list("PC1" = pca_sim$x[, 1],
"PC2" = pca_sim$x[, 2],
"label" = as.character(toy_df[, 31])))
p_pca <- ggplot(df, aes(x = PC1, y = PC2, colour = label)) +
ggtitle("PCA on Simulated Data") +
geom_point(alpha = 0.5) +
theme_minimal()
p_pca
## ----sPCA_sim-----------------------------------------------------------------
# perform sPCA on toy_df for a range of L1 penalty terms
penalties <- exp(seq(log(10), log(1000), length.out = 6))
df_ls <- lapply(penalties, function(penalty) {
spca_sim_p <- elasticnet::spca(toy_df[, 1:30], K = 2, para = rep(penalty, 2),
type = "predictor", sparse = "penalty")$loadings
spca_sim_p <- as.matrix(toy_df[, 1:30]) %*% spca_sim_p
spca_out <- list("SPC1" = spca_sim_p[, 1],
"SPC2" = spca_sim_p[, 2],
"penalty" = round(rep(penalty, nrow(toy_df))),
"label" = as.character(toy_df[, 31])) %>%
as_tibble()
return(spca_out)
})
df <- dplyr::bind_rows(df_ls)
# plot the results of sPCA
p_spca <- ggplot(df, aes(x = SPC1, y = SPC2, colour = label)) +
geom_point(alpha = 0.5) +
ggtitle("SPCA on Simulated Data for Varying L1 Penalty Terms") +
facet_wrap(~ penalty, nrow = 2) +
theme_minimal()
p_spca
## ----cPCA_sim-----------------------------------------------------------------
cpca_sim <- scPCA(target = toy_df[, 1:30],
background = background_df,
penalties = 0,
n_centers = 4)
# create a dataframe to be plotted
cpca_df <- cpca_sim$x %>%
as_tibble() %>%
mutate(label = toy_df[, 31] %>% as.character)
colnames(cpca_df) <- c("cPC1", "cPC2", "label")
# plot the results
p_cpca <- ggplot(cpca_df, aes(x = cPC1, y = cPC2, colour = label)) +
geom_point(alpha = 0.5) +
ggtitle("cPCA of Simulated Data") +
theme_minimal()
p_cpca
## ----scPCA_sim----------------------------------------------------------------
# run scPCA for using 40 logarithmically seperated contrastive parameter values
# and possible 20 L1 penalty terms
scpca_sim <- scPCA(target = toy_df[, 1:30],
background = background_df,
n_centers = 4,
penalties = exp(seq(log(0.01), log(0.5), length.out = 10)),
alg = "var_proj")
# create a dataframe to be plotted
scpca_df <- scpca_sim$x %>%
as_tibble() %>%
mutate(label = toy_df[, 31] %>% as.character)
colnames(scpca_df) <- c("scPC1", "scPC2", "label")
# plot the results
p_scpca <- ggplot(scpca_df, aes(x = scPC1, y = scPC2, colour = label)) +
geom_point(alpha = 0.5) +
ggtitle("scPCA of Simulated Data") +
theme_minimal()
p_scpca
# create the loadings comparison plot
load_diff_df <- bind_rows(
cpca_sim$rotation %>% as.data.frame,
scpca_sim$rotation %>% as.data.frame) %>%
mutate(
sparse = c(rep("0", 30), rep("1", 30)),
coef = rep(1:30, 2)
)
colnames(load_diff_df) <- c("comp1", "comp2", "sparse", "coef")
p1 <- load_diff_df %>%
ggplot(aes(y = comp1, x = coef, fill = sparse)) +
geom_bar(stat = "identity") +
xlab("") +
ylab("") +
ylim(-1.4, 1.4) +
ggtitle("First Component") +
scale_fill_discrete(name = "Method", labels = c("cPCA", "scPCA")) +
theme_minimal()
p2 <- load_diff_df %>%
ggplot(aes(y = comp2, x = coef, fill = sparse)) +
geom_bar(stat = "identity") +
xlab("") +
ylab("") +
ylim(-1.4, 1.4) +
ggtitle("Second Component") +
scale_fill_discrete(name = "Method", labels = c("cPCA", "scPCA")) +
theme_minimal()
# build full plot via ggpubr
annotate_figure(
ggarrange(p1, p2, nrow = 1, ncol = 2,
common.legend = TRUE, legend = "right"),
top = "Leading Loadings Vectors Comparison",
left = "Loading Weights",
bottom = "Variable Index"
)
## ----cPCA_sim_cv--------------------------------------------------------------
cpca_cv_sim <- scPCA(target = toy_df[, 1:30],
background = background_df,
penalties = 0,
n_centers = 4,
cv = 3)
# create a dataframe to be plotted
cpca_cv_df <- cpca_cv_sim$x %>%
as_tibble() %>%
dplyr::mutate(label = toy_df[, 31] %>% as.character)
colnames(cpca_cv_df) <- c("cPC1", "cPC2", "label")
# plot the results
p_cpca_cv <- ggplot(cpca_cv_df, aes(x = cPC1, y = cPC2, colour = label)) +
geom_point(alpha = 0.5) +
ggtitle("cPCA of Simulated Data") +
theme_minimal()
## ----scPCA_sim_cv-------------------------------------------------------------
scpca_cv_sim <- scPCA(target = toy_df[, 1:30],
background = background_df,
n_centers = 4,
cv = 3,
penalties = exp(seq(log(0.01), log(0.5), length.out = 10)),
alg = "var_proj")
# create a dataframe to be plotted
scpca_cv_df <- scpca_cv_sim$x %>%
as_tibble() %>%
mutate(label = toy_df[, 31] %>% as.character)
colnames(scpca_cv_df) <- c("scPC1", "scPC2", "label")
# plot the results
p_scpca_cv <- ggplot(scpca_cv_df, aes(x = -scPC1, y = -scPC2, colour = label)) +
geom_point(alpha = 0.5) +
ggtitle("scPCA of Simulated Data") +
theme_minimal()
## ----plot_cv_cPCA_scPCA, echo=FALSE-------------------------------------------
ggarrange(
p_cpca_cv, p_scpca_cv,
nrow = 1,
common.legend = TRUE,
legend = "right"
)
## ----microbenchmark_comparison, warning=FALSE, message=FALSE------------------
timing_scPCA <- microbenchmark(
"Zou et al." = scPCA(target = toy_df[, 1:30],
background = background_df,
n_centers = 4,
alg = "iterative"),
"Erichson et al. SPCA" = scPCA(target = toy_df[, 1:30],
background = background_df,
n_centers = 4,
alg = "var_proj"),
"Erichson et al. RSPCA" = scPCA(target = toy_df[, 1:30],
background = background_df,
n_centers = 4,
alg = "rand_var_proj"),
times = 3
)
autoplot(timing_scPCA, log = TRUE) +
ggtitle("Running Time Comparison") +
theme_minimal()
## ----sce_setup, message=FALSE-------------------------------------------------
library(splatter)
library(SingleCellExperiment)
# Simulate the three groups of cells. Mask cell heterogeneity with batch effect
params <- newSplatParams(
seed = 6757293,
nGenes = 500,
batchCells = c(150, 150),
batch.facLoc = c(0.05, 0.05),
batch.facScale = c(0.05, 0.05),
group.prob = rep(1/3, 3),
de.prob = c(0.1, 0.05, 0.1),
de.downProb = c(0.1, 0.05, 0.1),
de.facLoc = rep(0.2, 3),
de.facScale = rep(0.2, 3)
)
sim_sce <- splatSimulate(params, method = "groups")
## ----make_sce_subs, message=FALSE---------------------------------------------
# rank genes by variance
n_genes <- 250
vars <- assay(sim_sce) %>%
log1p %>%
rowVars
names(vars) <- rownames(sim_sce)
vars <- sort(vars, decreasing = TRUE)
# subset SCE to n_genes with highest variance
sce_sub <- sim_sce[names(vars[seq_len(n_genes)]),]
sce_sub
# split the subsetted SCE into target and background SCEs
tg_sce <- sce_sub[, sce_sub$Group %in% c("Group1", "Group3")]
bg_sce <- sce_sub[, sce_sub$Group %in% c("Group2")]
## ----perform_dimred, message=FALSE--------------------------------------------
# for both cPCA and scPCA, we'll set the penalties and contrasts arguments
contrasts <- exp(seq(log(0.1), log(100), length.out = 5))
penalties <- exp(seq(log(0.001), log(0.1), length.out = 3))
# first, PCA
pca_out <- prcomp(t(log1p(counts(tg_sce))), center = TRUE, scale. = TRUE)
# next, cPCA
cpca_out <- scPCA(t(log1p(counts(tg_sce))),
t(log1p(counts(bg_sce))),
n_centers = 2,
n_eigen = 2,
contrasts = contrasts,
penalties = 0,
center = TRUE,
scale = TRUE)
# finally, scPCA
scpca_out <- scPCA(t(log1p(counts(tg_sce))),
t(log1p(counts(bg_sce))),
n_centers = 2,
n_eigen = 2,
contrasts = contrasts,
penalties = penalties,
center = TRUE,
scale = TRUE,
alg = "var_proj")
# store new representations in the SingleCellExperiment object
reducedDims(tg_sce) <- SimpleList(PCA = pca_out$x[, 1:2],
cPCA = cpca_out$x,
scPCA = scpca_out$x)
tg_sce
## ----plot_red_dims, echo=FALSE, fig.asp=.25, out.width="1600px"---------------
# plot the 2D representations
# prepare PCA
pca_df <- data.frame(
PC1 = pca_out$x[, 1],
PC2 = pca_out$x[, 2],
group = tg_sce$Group,
batch = tg_sce$Batch
)
pca_p <- pca_df %>%
ggplot(aes(x = PC1, y = PC2, colour = group, shape = batch)) +
geom_point(alpha = 0.75) +
ggtitle("PCA") +
scale_colour_viridis_d(name = "Target Group", labels = c("1", "2"),
begin = 0.1, end = 0.9) +
scale_shape_discrete(name = "Batch") +
theme_minimal()
# prepare cPCA
cpca_df <- data.frame(
cPC1 = cpca_out$x[, 1],
cPC2 = cpca_out$x[, 2],
group = tg_sce$Group,
batch = tg_sce$Batch
)
cpca_p <- cpca_df %>%
ggplot(aes(x = cPC1, y = cPC2, colour = group, shape = batch)) +
geom_point(alpha = 0.75) +
ggtitle("cPCA") +
scale_colour_viridis_d(name = "Target Group", labels = c("1", "2"),
begin = 0.1, end = 0.9) +
scale_shape_discrete(name = "Batch") +
theme_minimal()
# prepare scPCA
scpca_df <- data.frame(
scPC1 = scpca_out$x[, 1],
scPC2 = scpca_out$x[, 2],
group = tg_sce$Group,
batch = tg_sce$Batch
)
scpca_p <- scpca_df %>%
ggplot(aes(x = scPC1, y = scPC2, colour = group, shape = batch)) +
geom_point(alpha = 0.75) +
ggtitle("scPCA") +
scale_colour_viridis_d(name = "Target Group", labels = c("1", "2"),
begin = 0.1, end = 0.9) +
scale_shape_discrete(name = "Batch") +
theme_minimal()
# combine the plots
ggarrange(
pca_p, cpca_p, scpca_p,
nrow = 1,
common.legend = TRUE,
legend = "right"
)
## ----bpscpca, eval=FALSE------------------------------------------------------
# # perform the same operations in parallel using BiocParallel
# library(BiocParallel)
# param <- MulticoreParam()
# register(param)
#
# # perfom cPCA
# cpca_bp <- scPCA(
# target = toy_df[, 1:30],
# background = background_df,
# contrasts = exp(seq(log(0.1), log(100), length.out = 10)),
# penalties = 0,
# n_centers = 4,
# parallel = TRUE
# )
#
# # perform scPCA
# scpca_bp <- scPCA(
# target = toy_df[, 1:30],
# background = background_df,
# contrasts = exp(seq(log(0.1), log(100), length.out = 10)),
# penalties = seq(0.1, 1, length.out = 9),
# n_centers = 4,
# parallel = TRUE
# )
## ----session_info, echo=FALSE-------------------------------------------------
sessionInfo()
Any scripts or data that you put into this service are public.
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.