knitr::opts_chunk$set(echo = TRUE, message = FALSE, warning = FALSE, results = "hold", fig.align = "center") reticulate::use_virtualenv("~/Desktop/Python/science/venv/", required = TRUE)
In 2017, the Tuveson Lab at Cold Spring Harbor Cancer Center published a paper written by Elyada et al that detailed the discovery of cancer-associated fibroblasts (CAFs) in mice. The subtypes were then validated in human samples affected with PDAC in a subsequent paper released in 2019. Here we will use SCISSORS to identify the CAF subtypes within the larger stroma population, fine-grained immune cell types within broadly-defined immune clusters, and ductal & PDAC subtypes within the ductal group. You can install SCISSORS from our GitHub repository.
library(VAM) # single cell GSEA library(dplyr) # tidy data library(Seurat) # single cell infrastructure library(ggplot2) # pretty plots library(SingleR) # cell type assignment library(janitor) # clean data library(magrittr) # pipes library(decoderr) # de novo deconvolution library(SCISSORS) # our package library(mixtools) # Gaussian mixture model estimation library(ggsignif) # significance bars library(patchwork) # align plots library(latex2exp) # LaTeX library(paletteer) # color palettes library(CONICSmat) # CNV estimation library(reticulate) # Python interface library(kableExtra) # pretty tables library(wesanderson) # more color palettes
import numpy as np from openTSNE import TSNEEmbedding from openTSNE import initialization from openTSNE.affinity import Multiscale from openTSNE.affinity import PerplexityBasedNN
raw_counts <- Read10X(data.dir = "~/Desktop/Data/Tuveson_v6/count/raw_feature_bc_matrix/") pdac <- CreateSeuratObject(raw_counts, project = "Elyada", min.cells = 3, min.features = 500) pdac@meta.data %<>% mutate(sample = case_when(substr(rownames(.), 17, 18) == "-1" ~ "SRR9274536", substr(rownames(.), 17, 18) == "-2" ~ "SRR9274537", substr(rownames(.), 17, 18) == "-3" ~ "SRR9274538", substr(rownames(.), 17, 18) == "-4" ~ "SRR9274539", substr(rownames(.), 17, 18) == "-5" ~ "SRR9274541", substr(rownames(.), 17, 18) == "-6" ~ "SRR9274542", substr(rownames(.), 17, 18) == "-7" ~ "SRR9274543", substr(rownames(.), 17, 18) == "-8" ~ "SRR9274544"), sex = case_when(sample == "SRR9274536" ~ "F", sample == "SRR9274537" ~ "M", sample == "SRR9274538" ~ "M", sample == "SRR9274539" ~ "M", sample == "SRR9274541" ~ "F", sample == "SRR9274542" ~ "F", sample == "SRR9274543" ~ "M", sample == "SRR9274544" ~ "M"), condition = case_when(sample == "SRR9274536" ~ "PDAC", sample == "SRR9274537" ~ "PDAC", sample == "SRR9274538" ~ "PDAC", sample == "SRR9274539" ~ "PDAC", sample == "SRR9274541" ~ "AdjNorm", sample == "SRR9274542" ~ "PDAC", sample == "SRR9274543" ~ "AdjNorm", sample == "SRR9274544" ~ "PDAC"))
rm(raw_counts) gc(verbose = FALSE)
Next, we read in a dataset of basal-like and classical PDAC marker genes that we'll use later on to perform enrichment analysis.
load("~/Desktop/Data/cmbSubtypes.211014.RData") basal_genes <- subtypeGeneList[[4]][subtypeGeneList[[4]]$BasalLike, ]$geneSymbol classical_genes <- subtypeGeneList[[4]][!subtypeGeneList[[4]]$BasalLike, ]$geneSymbol
We run the typical single cell pre-processing steps on our cells - normalization, dimension reduction, and clustering.
pdac <- PrepareData(seurat.object = pdac, n.HVG = 4000, n.PC = 20, regress.mt = FALSE, regress.cc = FALSE, which.dim.reduc = "tsne", initial.resolution = 0.4, k.val = 125, random.seed = 629)
Let's check the default t-SNE embedding & Louvain clustering.
p0 <- DimPlot(pdac, reduction = "tsne", pt.size = 0.75) + scale_color_paletteer_d("ggthemes::Classic_20") + labs(x = "t-SNE 1", y = "t-SNE 2") + theme_yehlab() + theme(legend.text = element_text(size = 10)) + guides(color = guide_legend(nrow = 1, override.aes = list(size = 4))) p0
We compute the distribution of the silhouette scores for each cluster.
sil_df <- ComputeSilhouetteScores(pdac, avg = FALSE) p1 <- ggplot(sil_df, aes(x = Cluster, y = Score, fill = Cluster)) + geom_violin(draw_quantiles = .5, color = "black", scale = "width") + scale_fill_paletteer_d("ggthemes::Classic_20") + labs(y = "Silhouette Score", x = "Louvain Clusters", fill = NULL) + theme_minimal() + theme(panel.grid = element_blank(), panel.border = element_rect(fill = NA, size = 1), legend.position = "none", axis.title.x = element_text(size = 22), axis.title.y = element_text(size = 22), axis.text = element_text(size = 16), axis.ticks = element_line(), plot.subtitle = element_text(face = "italic", size = 10))
Some of our clusters have less-than-ideal fits.
p1
Lastly, we'll identify marker genes for each of the clusters.
pdac_markers <- FindAllMarkers(pdac, logfc.threshold = 1, test.use = "wilcox", only.pos = TRUE, verbose = FALSE, random.seed = 312) %>% filter(p_val_adj < 0.05) top5_pdac_markers <- pdac_markers %>% group_by(cluster) %>% arrange(desc(avg_log2FC)) %>% slice_head(n = 5) p2a <- DotPlot(pdac, features = unique(top5_pdac_markers$gene), dot.scale = 15) + scale_color_gradientn(colors = paletteer_d("wesanderson::Zissou1")) + labs(color = "Expression", size = "% Expressed", y = "Louvain Cluster") + theme(axis.text.x = element_text(angle = 90, size = 16, vjust = 0.5), legend.position = "right", legend.justification = "center", panel.border = element_rect(fill = NA, size = 1, color = "black"), axis.line = element_blank(), legend.title = element_text(size = 18), axis.title.x = element_blank(), axis.title.y = element_text(size = 20), axis.text.y = element_text(size = 18)) + guides(color = guide_colorbar(title.position = "top", barheight = unit(3, units = "cm"), title.hjust = 0.5), size = guide_legend(title.position = "top", title.hjust = 0.5)) p2a
Next we'll repeat the process, excluding marker genes for each cluster that are highly expressed in other clusters.
pdac_markers_specific <- FindSpecificMarkers(seurat.object = pdac, ident.use = "seurat_clusters", de.method = "wilcox", perc.cutoff = 0.9, log2fc.cutoff = 0.1) top5_pdac_markers_specific <- pdac_markers_specific %>% group_by(cluster) %>% arrange(desc(avg_log2FC)) %>% slice_head(n = 5) p2b <- DotPlot(pdac, features = unique(top5_pdac_markers_specific$gene), dot.scale = 15) + scale_color_gradientn(colors = paletteer_d("wesanderson::Zissou1")) + labs(color = "Expression", size = "% Expressed", y = "Louvain Cluster") + theme(axis.text.x = element_text(angle = 90, size = 16, vjust = 0.5), legend.position = "right", legend.justification = "center", panel.border = element_rect(fill = NA, size = 1, color = "black"), axis.line = element_blank(), legend.title = element_text(size = 18), axis.title.x = element_blank(), axis.title.y = element_text(size = 20), axis.text.y = element_text(size = 18)) + guides(color = guide_colorbar(title.position = "top", barheight = unit(3, units = "cm"), title.hjust = 0.5), size = guide_legend(title.position = "top", title.hjust = 0.5)) p2b
For information on how to install the openTSNE
implementation of Fit-SNE and how to run the algorithm, please visit the excellent GitHub repository of Pavlin Policar.
First we'll run a simple, standard Fit-SNE embedding. It's necessary to make the PCA embeddings accessible by Python.
pc_df <- Embeddings(pdac, reduction = "pca")
# import data pc_df = np.array(r.pc_df) # run Fit-SNE affin = PerplexityBasedNN(pc_df, perplexity=30, metric='cosine', random_state=629) init = initialization.pca(pc_df, random_state=629) tsne1 = TSNEEmbedding(init, affin, negative_gradient_method='fft') embed1 = tsne1.optimize(n_iter=350, exaggeration=12, momentum=0.6) embed2 = embed1.optimize(n_iter=750, momentum=0.8)
The embedding looks good, so we'll use it going forwards.
embed <- as.matrix(py$embed2) rownames(embed) <- colnames(pdac) pdac@reductions$fitsne <- CreateDimReducObject(embeddings = embed, key = "FitSNE_", assay = "SCT", global = TRUE) p3 <- DimPlot(pdac, reduction = "fitsne", pt.size = 0.75) + scale_color_paletteer_d("ggthemes::Classic_20") + labs(x = "Fit-SNE 1", y = "Fit-SNE 2") + theme_yehlab() + theme(legend.text = element_text(size = 10)) + guides(color = guide_legend(nrow = 1, override.aes = list(size = 4))) p3
Due to the way Seurat
accesses cell embeddings, we'll need to replace our original t-SNE dimension reduction in our Seurat
object with the new Fit-SNE version. We'll keep the original Barnes-Hut t-SNE embedding under a separate name.
pdac@reductions$bh_tsne <- pdac@reductions$tsne pdac@reductions$tsne <- pdac@reductions$fitsne
rm(embed, pc_df) gc(verbose = FALSE)
Here we use the SingleR
package to identify broad cell types. The reference dataset we load is an sctransform
-normalized version of the raw counts available in celldex::BaronPancreasData()
, which consists of normal pancreas cells that were sequenced and annotated by the researchers.
sc_ref <- readRDS("/Volumes/labs/Home/Jen Jen Yeh Lab/Jack/scRNAseq/Seurat/single_cell_ref_normalized.Rds") sc_preds <- SingleR(test = data.frame(pdac@assays$SCT@data), ref = sc_ref, labels = sc_ref$label, clusters = pdac$seurat_clusters, de.method = "wilcox") pdac[["SingleR.labels.sc"]] <- sc_preds$labels[match(pdac[[]][["seurat_clusters"]], rownames(sc_preds))] pdac$SingleR.labels.sc <- case_when(pdac$SingleR.labels.sc == "acinar" ~ "Acinar", pdac$SingleR.labels.sc == "activated_stellate" ~ "Activated Stellate", pdac$SingleR.labels.sc == "quiescent_stellate" ~ "Quiescent Stellate", pdac$SingleR.labels.sc == "ductal" ~ "Ductal", pdac$SingleR.labels.sc == "macrophage" ~ "Macrophage", pdac$SingleR.labels.sc == "t_cell" ~ "T")
We can see that there's a large immune population, as well as smaller ductal, fibroblast (denoted activated stellate in the reference dataset), stromal (denoted quiescent stellate in the reference dataset), and acinar groups. These broad cell types line up with what we expected to see given the authors' original cell cluster annotations.
p4 <- DimPlot(pdac, reduction = "tsne", group.by = "SingleR.labels.sc", pt.size = 0.75) + scale_color_manual(values = paletteer_d("ggsci::default_nejm")) + labs(x = "Fit-SNE 1", y = "Fit-SNE 2") + theme(plot.title = element_blank()) + theme_yehlab() + guides(color = guide_legend(nrow = 2, override.aes = list(size = 4))) p4
This dataset is composed of labeled & log-normalized bulk RNA-seq samples from the Human Primary Cell Atlas.
bulk_ref <- celldex::HumanPrimaryCellAtlasData() bulk_preds <- SingleR(test = data.frame(pdac@assays$SCT@data), ref = bulk_ref, labels = bulk_ref$label.main, clusters = pdac$seurat_clusters, de.method = "wilcox") pdac[["SingleR.labels.bulk"]] <- bulk_preds$labels[match(pdac[[]][["seurat_clusters"]], rownames(bulk_preds))] pdac$SingleR.labels.bulk <- case_when(pdac$SingleR.labels.bulk == "B_cell" ~ "B", pdac$SingleR.labels.bulk == "Epithelial_cells" ~ "Epithelial", pdac$SingleR.labels.bulk == "Endothelial_cells" ~ "Endothelial", pdac$SingleR.labels.bulk == "MSC" ~ "Mesenchymal Stem Cell", pdac$SingleR.labels.bulk == "Pre-B_cell_CD34-" ~ "Pre-B CD34-", pdac$SingleR.labels.bulk == "T_cells" ~ "T", TRUE ~ pdac$SingleR.labels.bulk)
The bulk reference gives us somewhat more granular labels for the immune cells, and confirms the identities of the epithelial and stromal clusters.
p5 <- DimPlot(pdac, reduction = "tsne", group.by = "SingleR.labels.bulk", pt.size = 0.75) + scale_color_manual(values = paletteer_d("ggsci::default_nejm")) + labs(x = "Fit-SNE 1", y = "Fit-SNE 2") + theme(plot.title = element_blank()) + theme_yehlab() + guides(color = guide_legend(nrow = 2, override.aes = list(size = 4))) p5
rm(sc_preds, bulk_preds, bulk_ref, sc_ref) gc(verbose = FALSE)
One shouldn't use SingleR
as the final authority for cell types, but we were able to confirm the identities of the ductal and fibroblast clusters, which is important for this dataset as the cells we are most interested in are the cancer-associated fibroblasts (CAFs).
Next we'll attempt to identify malignant cells using single-cell copy number variation estimation as implemented in the CONCISmat
package. Details of the GMM methodology used can be found at the Diaz Lab's GitHub repository. Note: this step is memory-intensive because 1) it requires the sparse counts matrix to be cast to an in-memory dense matrix and 2) a lot of Gaussian mixture models get estimated. If your machine doesn't have a lot of RAM it might be best to skip this and manually annotate the malignant cells instead through the usage of canonical markers, GSEA, etc.
chrom_regions <- read.table("/Volumes/labs/Home/Jen Jen Yeh Lab/Jack/scRNAseq/chrom_arm_positions.txt", sep = "\t", row.names = 1, header = TRUE) gene_pos <- getGenePositions(rownames(pdac)) cpm <- t(t(as.matrix(pdac@assays$SCT@counts)) / colSums(as.matrix(pdac@assays$SCT@counts))) * 1e5 cpm <- log2(cpm + 1) norm_factor <- calcNormFactors(cpm) cnv_est <- plotAll(mat = cpm, normFactor = norm_factor, regions = chrom_regions, gene_pos = gene_pos, fname = "Elyada")
gc(verbose = FALSE)
After estimating CNVs, we cluster the cells into $k = 3$ clusters, with the hope of finding one large cluster of normal cells and two smaller clusters composed of CAFs and PDAC cells.
bic_table <- read.table("./Elyada_BIC_LR.txt", sep = "\t", row.names = 1, header = TRUE, check.names = FALSE) cand_regions <- rownames(bic_table[bic_table$`BIC difference` > 1000 & bic_table$`LRT adj. p-val` < .01, ]) pdf("~/Desktop/R/SCISSORS/vignettes/figures_supp/Elyada/CONICSmat_Heatmap_new.pdf", width = 12, height = 6) hist1 <- plotHistogram(cnv_est[, cand_regions], cpm, clusters = 3, zscoreThreshold = 3, celltypes = pdac$SingleR.labels.sc, patients = pdac$sample) dev.off()
We add the CNV-based annotations to our Seurat
object.
normal <- which(hist1 == 1) malignant <- which(hist1 != 1) pdac@meta.data$malig <- ifelse(rownames(pdac@meta.data) %in% names(normal), "Normal", "Malignant")
We add the normal vs. malignant cell labels in to our Seurat
object's metadata, then visualize the results. As expected, the malignant cells are located in the clusters identified by SingleR
as ductal cells and fibroblasts. This indicates that CONICSmat
did a solid job of estimating the CNVs - no easy feat with sparse, noisy single-cell data.
p6 <- DimPlot(pdac, reduction = "tsne", group.by = "malig", pt.size = 0.75) + scale_color_manual(values = wes_palette("Zissou1", n = 5)[c(5, 2)]) + labs(x = "Fit-SNE 1", y = "Fit-SNE 2") + theme(plot.title = element_blank()) + theme_yehlab() + guides(color = guide_legend(nrow = 1, override.aes = list(size = 4))) p6
When looking at the SingleR
labels from the bulk reference, we see that the Ductal and Stellate cell (Fibroblast) clusters have the highest proportions of malignant cells, which is exactly what we expected to see.
pdac@meta.data %>% group_by(SingleR.labels.sc) %>% summarise(MeanMalig = mean(case_when(malig == "Malignant" ~ 1, TRUE ~ 0))) %>% mutate(MeanMalig = formattable::percent(MeanMalig)) %>% kbl(booktabs = TRUE, col.names = c("Single Cell Reference SingleR Label", "% Malignant Cells")) %>% kable_minimal(full_width = FALSE)
rm(cpm, bic_table, chrom_regions, cnv_est, gene_pos, cand_regions, hist1, normal, norm_factor, malignant, sil_df) gc(verbose = FALSE)
Next, we use Dr. Xianlu Peng's DECODER in order to deconvolve the dataset and assign weights to each cell using non-negative matrix factorization. We calculate basal & classical PDAC, normal & activated stroma, immune, and endocrine & exocrine pancreas compartment weights.
sample_wts_unscaled <- Decon_single_sample(refSet = "TCGA_RNAseq_PAAD", data = pdac@assays$SCT@data, geneIDType = "geneSymbol") sample_wts <- Norm_PDAC_weights(sample_wts_unscaled) pdac <- AddMetaData(pdac, sample_wts$Immune, "immune") pdac <- AddMetaData(pdac, sample_wts$bcRatio, "bc_ratio") pdac <- AddMetaData(pdac, sample_wts$Exocrine, "exocrine") pdac <- AddMetaData(pdac, sample_wts$Endocrine, "endocrine") pdac <- AddMetaData(pdac, sample_wts$BasalTumor, "basal_like") pdac <- AddMetaData(pdac, sample_wts$ClassicalTumor, "classical") pdac <- AddMetaData(pdac, sample_wts$NormalStroma, "norm_stroma") pdac <- AddMetaData(pdac, sample_wts$ActivatedStroma, "act_stroma")
rm(sample_wts, sample_wts_unscaled) gc(verbose = FALSE)
The basal weights are highest in a subcluster of the ductal group identified through SingleR
. This is interesting as the authors did not find evidence of the basal subtype in their paper.
p7 <- FeaturePlot(pdac, reduction = "tsne", features = "basal_like", pt.size = 0.75) + scale_color_gradientn(colors = wesanderson::wes_palette("Zissou1")) + labs(x = "Fit-SNE 1", y = "Fit-SNE 2") + theme(plot.title = element_blank()) + theme_yehlab() + NoLegend() p7
The classical weights are highest in another subcluster of the ductal cluster, and cells with high classical weights do not collocate with those that have high basal weights. The putative classical and basal PDAC cells also align closely with the cells identified through CONCISmat
as malignant.
p8 <- FeaturePlot(pdac, reduction = "tsne", features = "classical", pt.size = 0.75) + scale_color_gradientn(colors = wesanderson::wes_palette("Zissou1")) + labs(x = "Fit-SNE 1", y = "Fit-SNE 2") + theme(plot.title = element_blank()) + theme_yehlab() + NoLegend() p8
Part of the cluster identified through SingleR
as acinar cells is the only cell group with high exocrine pancreas weights.
p9 <- FeaturePlot(pdac, reduction = "tsne", features = "exocrine", pt.size = 0.75) + scale_color_gradientn(colors = wesanderson::wes_palette("Zissou1")) + labs(x = "Fit-SNE 1", y = "Fit-SNE 2") + theme(plot.title = element_blank()) + theme_yehlab() + NoLegend() p9
No cells have high endocrine pancreas weights.
p10 <- FeaturePlot(pdac, reduction = "tsne", features = "endocrine", pt.size = 0.75) + scale_color_gradientn(colors = wesanderson::wes_palette("Zissou1")) + labs(x = "Fit-SNE 1", y = "Fit-SNE 2") + theme(plot.title = element_blank()) + theme_yehlab() + NoLegend() p10
Once again, we confirm the largeness of the immune cell population in this dataset.
p11 <- FeaturePlot(pdac, reduction = "tsne", features = "immune", pt.size = 0.75) + scale_color_gradientn(colors = wesanderson::wes_palette("Zissou1")) + labs(x = "Fit-SNE 1", y = "Fit-SNE 2") + theme(plot.title = element_blank()) + theme_yehlab() + NoLegend() p11
Cells with high normal stroma stroma weights are located in the cluster identified by SingleR
as being stromal cells.
p12 <- FeaturePlot(pdac, reduction = "tsne", features = "norm_stroma", pt.size = 0.75) + scale_color_gradientn(colors = wesanderson::wes_palette("Zissou1")) + labs(x = "Fit-SNE 1", y = "Fit-SNE 2") + theme(plot.title = element_blank()) + theme_yehlab() + NoLegend() p12
Cells with high activated stroma weights are also located in the fibroblast cluster, and do not intersect with the cells that have high normal stroma scores. This indicates that SCISSORS
will most likely perform well on the fibroblast cluster and be able to quickly tease out the cell subtypes.
p13 <- FeaturePlot(pdac, reduction = "tsne", features = "act_stroma", pt.size = 0.75) + scale_color_gradientn(colors = wesanderson::wes_palette("Zissou1")) + labs(x = "Fit-SNE 1", y = "Fit-SNE 2") + theme(plot.title = element_blank()) + theme_yehlab() + NoLegend() p13
The fibroblast marker genes provided by Elyada et al match the SingleR
results defining cluster 8 as containing fibroblasts.
p14 <- FeaturePlot(pdac, reduction = "tsne", features = "COL1A1", pt.size = 0.75) + scale_color_gradientn(colors = wesanderson::wes_palette("Zissou1")) + labs(title = "COL1A1") + theme_yehlab() + NoLegend() + theme(axis.title = element_blank()) p15 <- FeaturePlot(pdac, reduction = "tsne", features = "COL3A1", pt.size = 0.75) + scale_color_gradientn(colors = wesanderson::wes_palette("Zissou1")) + labs(title = "COL3A1") + theme_yehlab() + NoLegend() + theme(axis.title = element_blank()) p16 <- FeaturePlot(pdac, reduction = "tsne", features = "LUM", pt.size = 0.75) + scale_color_gradientn(colors = wesanderson::wes_palette("Zissou1")) + labs(title = "LUM") + theme_yehlab() + NoLegend() + theme(axis.title = element_blank()) p17 <- FeaturePlot(pdac, reduction = "tsne", features = "DCN", pt.size = 0.75) + scale_color_gradientn(colors = wesanderson::wes_palette("Zissou1")) + labs(title = "DCN") + theme_yehlab() + NoLegend() + theme(axis.title = element_blank()) (p14 | p15) / (p16 | p17)
Here's the cells we'll be reclustering.
fibro_cells <- rownames(pdac@meta.data[pdac@meta.data$seurat_clusters == 8, ]) p18 <- DimPlot(pdac, reduction = "tsne", cells.highlight = fibro_cells, cols.highlight = "navy", pt.size = 0.75) + labs(x = NULL, y = NULL) + theme_yehlab() + NoLegend() p18 / p3
Here we use ReclusterCells()
to identify subclusters within cluster 8. We find five distinct subclusters.
fibro <- ReclusterCells(pdac, which.clust = c(8, 10), merge.clusters = TRUE, n.HVG = 4000, n.PC = 20, resolution.vals = c(.05, .1, .2, .3, .4), k.vals = c(10, 20, 30, 40), redo.embedding = TRUE) fibro_pc <- Embeddings(fibro, "pca")
We'll again run Fit-SNE on the reclustered cells, for consistencies sake.
# import data fibro_pc = r.fibro_pc # run Fit-SNE affin_fibro = PerplexityBasedNN(fibro_pc, perplexity=50, metric='cosine', random_state=629) init = initialization.pca(fibro_pc, random_state=629) tsne_f = TSNEEmbedding(init, affin_fibro, negative_gradient_method='fft') embed_f1 = tsne_f.optimize(n_iter=150, exaggeration=2, momentum=0.6) embed_f2 = embed_f1.optimize(n_iter=750, exaggeration=1, momentum=0.8)
Pulling the results back into R and visualizing them shows clear separation between the subclusters. There's some noise in subcluster 0, but other than that the re-embedding looks solid.
embed_fibro <- as.matrix(py$embed_f2) rownames(embed_fibro) <- colnames(fibro) fibro@reductions$bh_tsne <- fibro@reductions$tsne fibro@reductions$tsne<- CreateDimReducObject(embeddings = embed_fibro, key = "FitSNE_", assay = "SCT", global = TRUE) p19 <- DimPlot(fibro, reduction = "tsne", pt.size = 1.5) + scale_color_manual(values = paletteer_d("ggsci::default_nejm")) + labs(x = "Fit-SNE 1", y = "Fit-SNE 2") + theme_yehlab() + guides(color = guide_legend(nrow = 1, override.aes = list(size = 4))) p19
rm(fibro_pc, embed_fibro) gc(verbose = FALSE)
First we identify the endothelial and perivascular cells in clusters 3 & 1 using PLVAP and RGS5 expression.
p20 <- FeaturePlot(fibro, reduction = "tsne", features = "PLVAP", pt.size = 1.5) + scale_color_gradientn(colors = wesanderson::wes_palette("Zissou1")) + labs(title = "PLVAP") + theme_yehlab() + NoLegend() + theme(axis.title = element_blank()) p21 <- FeaturePlot(fibro, reduction = "tsne", features = "RGS5", pt.size = 1.5) + scale_color_gradientn(colors = wesanderson::wes_palette("Zissou1")) + labs(title = "RGS5") + theme_yehlab() + NoLegend() + theme(axis.title = element_blank()) (p20 | p21) / p19
Next we'll use VAM
to perform GSEA on the iCAF, myCAF, and apCAF gene sets.
icaf_genes <- subtypeGeneList[[9]][subtypeGeneList[[9]]$iCAF, ]$geneSymbol icaf_genes <- ifelse(icaf_genes == "IL8", "CXCL8", icaf_genes) mycaf_genes <- subtypeGeneList[[9]][subtypeGeneList[[9]]$myCAF, ]$geneSymbol apcaf_genes_mouse <- c("Slpi", "Ptgis", "Nkain4", "Hspb1", "Cav1", "Arhgdib", "Lgals7", "Spint2", "Dmkn", "Saa3", "Clu", "Slc9a3r1", "Krt8", "Cxadr", "Ezr", "H2-Ab1", "Cd74", "Fth1") apcaf_genes_human <- ConvertGeneOrthologs(gene.vec = apcaf_genes_mouse) pan_caf_genes <- c("COL1A1", "FAP", "PDPN", "DCN", "VIM")
gene_sets <- list(icaf_genes, mycaf_genes, apcaf_genes_human, pan_caf_genes) names(gene_sets) <- c("iCAF", "myCAF", "apCAF", "Pan-CAF") for (i in seq(gene_sets)) { gene_sets[[i]] <- gene_sets[[i]][gene_sets[[i]] %in% rownames(fibro)] } fibro <- vamForSeurat(fibro, gene.set.collection = gene_sets, gamma = TRUE) DefaultAssay(fibro) <- "VAMcdf"
Let's plot the results. Clusters 0, 2, and 4 are composed of iCAF, myCAF, and apCAF cells, respectively.
p22 <- FeaturePlot(fibro, reduction = "tsne", features = "iCAF", pt.size = 1.5) + scale_color_gradientn(colors = wesanderson::wes_palette("Zissou1")) + labs(title = "iCAF") + theme_yehlab() + NoLegend() + theme(axis.title = element_blank()) p23 <- FeaturePlot(fibro, reduction = "tsne", features = "myCAF", pt.size = 1.5) + scale_color_gradientn(colors = wesanderson::wes_palette("Zissou1")) + labs(title = "myCAF") + theme_yehlab() + NoLegend() + theme(axis.title = element_blank()) p24 <- FeaturePlot(fibro, reduction = "tsne", features = "apCAF", pt.size = 1.5) + scale_color_gradientn(colors = wesanderson::wes_palette("Zissou1")) + labs(title = "apCAF") + theme_yehlab() + NoLegend() + theme(axis.title = element_blank()) (p22 | p23 | p24) / p19
Here's the final cell labels for all of our stromal cells:
caf_colors <- c("darkslateblue", "coral", "darkgreen") fibro_colors <- c("firebrick", "darkgreen", "goldenrod", "coral", "darkslateblue") fibro$label <- case_when(fibro$seurat_clusters == 0 ~ "myCAF", fibro$seurat_clusters == 1 ~ "Perivascular", fibro$seurat_clusters == 2 ~ "iCAF", fibro$seurat_clusters == 3 ~ "Endothelial", fibro$seurat_clusters == 4 ~ "apCAF") Idents(fibro) <- "label" p25a <- DimPlot(fibro, reduction = "tsne", pt.size = 1.5) + scale_color_manual(values = fibro_colors) + labs(x = "Fit-SNE 1", y = "Fit-SNE 2") + theme_yehlab() + theme(plot.title = element_blank()) + guides(color = guide_legend(nrow = 1, override.aes = list(size = 4))) p25a
We can generate marker genes for those final labels as follows:
DefaultAssay(fibro) <- "SCT" fibro_markers_specific <- FindSpecificMarkers(fibro, ident.use = "label", de.method = "wilcox", log2fc.cutoff = .25, perc.cutoff = 0.9) %>% filter(p_val_adj < 0.05) fibro_markers_specific_top5 <- fibro_markers_specific %>% arrange(desc(avg_log2FC)) %>% group_by(cluster) %>% slice_head(n = 5) p25b <- DotPlot(fibro, features = unique(fibro_markers_specific_top5$gene), dot.scale = 15) + scale_color_gradientn(colors = paletteer_d("wesanderson::Zissou1")) + labs(color = "Expression", size = "% Expressed", y = NULL) + theme(axis.text.x = element_text(angle = 90, size = 16, vjust = 0.5), legend.position = "right", legend.justification = "center", panel.border = element_rect(fill = NA, size = 1, color = "black"), axis.line = element_blank(), legend.title = element_text(size = 18), axis.title.x = element_blank(), axis.title.y = element_text(size = 20), axis.text.y = element_text(size = 18)) + guides(color = guide_colorbar(title.position = "top", barheight = unit(3, units = "cm"), title.hjust = 0.5), size = guide_legend(title.position = "top", title.hjust = 0.5)) p25b
Here we'll generate violin plots for each of the CAF VAM
scores.
caf <- subset(fibro, subset = label %in% c("iCAF", "myCAF", "apCAF")) caf_markers_specific <- FindSpecificMarkers(caf, ident.use = "label", de.method = "wilcox", log2fc.cutoff = 0.25, perc.cutoff = 0.9) p26a <- data.frame(t(caf@assays$VAMcdf@data)) %>% bind_cols((caf@meta.data %>% dplyr::select(label))) %>% ggplot(aes(x = label, y = apCAF, fill = label)) + geom_violin(scale = "width", draw_quantiles = 0.5, size = 1.2) + geom_signif(comparisons = list(c("apCAF", "iCAF"), c("apCAF", "myCAF")), size = 1.2, textsize = 8, margin_top = .075, test = wilcox.test, color = "black", map_signif_level = TRUE, step_increase = .15) + scale_fill_manual(values = caf_colors) + scale_y_continuous(breaks = seq(0, 1, 0.25), limits = c(0, 1.29)) + labs(y = "Elyada apCAF", x = NULL, fill = NULL) + theme_minimal() + theme(panel.grid = element_blank(), panel.border = element_rect(fill = NA, size = 1), legend.position = "none", axis.title.y = element_text(size = 24), axis.text = element_text(size = 22), axis.text.x = element_text(size = 22, angle = 45, vjust = 1, hjust = 1), axis.ticks = element_line(), plot.subtitle = element_text(face = "italic", size = 10)) p26b <- data.frame(t(caf@assays$VAMcdf@data)) %>% bind_cols((caf@meta.data %>% dplyr::select(label))) %>% ggplot(aes(x = label, y = iCAF, fill = label)) + geom_violin(scale = "width", draw_quantiles = 0.5, size = 1.2) + geom_signif(comparisons = list(c("iCAF", "apCAF"), c("iCAF", "myCAF")), size = 1.2, textsize = 8, margin_top = .075, test = wilcox.test, color = "black", map_signif_level = TRUE, step_increase = .15) + scale_fill_manual(values = caf_colors) + scale_y_continuous(breaks = seq(0, 1, 0.25), limits = c(0, 1.29)) + labs(y = "Elyada iCAF", x = NULL, fill = NULL) + theme_minimal() + theme(panel.grid = element_blank(), panel.border = element_rect(fill = NA, size = 1), legend.position = "none", axis.title.y = element_text(size = 24), axis.text = element_text(size = 22), axis.text.x = element_text(size = 22, angle = 45, vjust = 1, hjust = 1), axis.ticks = element_line(), plot.subtitle = element_text(face = "italic", size = 10)) p26c <- data.frame(t(caf@assays$VAMcdf@data)) %>% bind_cols((caf@meta.data %>% dplyr::select(label))) %>% ggplot(aes(x = label, y = myCAF, fill = label)) + geom_violin(scale = "width", draw_quantiles = 0.5, size = 1.2) + geom_signif(comparisons = list(c("myCAF", "iCAF"), c("myCAF", "apCAF")), size = 1.2, textsize = 8, margin_top = .075, test = wilcox.test, color = "black", map_signif_level = TRUE, step_increase = .15) + scale_fill_manual(values = caf_colors) + scale_y_continuous(breaks = seq(0, 1, 0.25), limits = c(0, 1.29)) + labs(y = "Elyada myCAF", x = NULL, fill = NULL) + theme_minimal() + theme(panel.grid = element_blank(), panel.border = element_rect(fill = NA, size = 1), legend.position = "none", axis.title.y = element_text(size = 24), axis.text = element_text(size = 22), axis.text.x = element_text(size = 22, angle = 45, vjust = 1, hjust = 1), axis.ticks = element_line(), plot.subtitle = element_text(face = "italic", size = 10)) p26d <- data.frame(t(caf@assays$VAMcdf@data)) %>% bind_cols((caf@meta.data %>% dplyr::select(label))) %>% ggplot(aes(x = label, y = `Pan.CAF`, fill = label)) + geom_violin(scale = "width", draw_quantiles = 0.5, size = 1.2) + scale_fill_manual(values = caf_colors) + labs(y = "Elyada Pan-CAF", x = NULL, fill = NULL) + theme_minimal() + theme(panel.grid = element_blank(), panel.border = element_rect(fill = NA, size = 1), legend.position = "none", axis.title.y = element_text(size = 24), axis.text = element_text(size = 22), axis.text.x = element_text(size = 22, angle = 45, vjust = 1, hjust = 1), axis.ticks = element_line(), plot.subtitle = element_text(face = "italic", size = 10))
The VAM
score distributions differ significantly between our labeled clusters.
p26e <- (p26a | p26b) / (p26c | p26d) p26e
We use KRT8 expression to show the ductal cells residing in clusters 5 and 9. We note that some regions of clusters 5 and 9 have no KRT8 expression, which likely means that they are composed of different cell types.
p27 <- FeaturePlot(pdac, reduction = "tsne", features = "KRT8", pt.size = 0.75) + scale_color_gradientn(colors = wesanderson::wes_palette("Zissou1")) + theme_yehlab() + NoLegend() + theme(axis.title = element_blank()) p27 / p4
These are the cells we'll be reclustering.
ductal_cells <- rownames(pdac@meta.data[pdac@meta.data$seurat_clusters %in% c(5, 9), ]) p28 <- DimPlot(pdac, reduction = "tsne", cells.highlight = ductal_cells, cols.highlight = "navy", pt.size = 0.75) + labs(x = "Fit-SNE 1", y = "Fit-SNE 2") + theme_yehlab() + NoLegend() p28 / p3
We run SCISSORS
to recluster the cells.
ductal <- ReclusterCells(pdac, which.clust = c(5, 9), merge.clusters = TRUE, n.HVG = 4000, n.PC = 20, k.vals = c(20, 30, 40, 50), resolution.vals = c(.2, .3, .4), nn.metric = "euclidean", redo.embedding = TRUE, random.seed = 629) duct_pc <- Embeddings(ductal, reduction = "pca")
Again, we'll run Fit-SNE on the reclustered cells.
# import data duct_pc = r.duct_pc # run Fit-SNE affin_duct = PerplexityBasedNN(duct_pc, perplexity=40, random_state=629) init = initialization.pca(duct_pc, random_state=629) tsne_duct = TSNEEmbedding(init, affin_duct, negative_gradient_method='fft') embed_d1 = tsne_duct.optimize(n_iter=350, exaggeration=15, momentum=0.9) embed_d2 = embed_d1.optimize(n_iter=750, exaggeration=1, momentum=0.4)
We pull the results into R, making sure to save the Barnes-Hut t-SNE results in another reduction slot.
embed_duct <- as.matrix(py$embed_d2) rownames(embed_duct) <- colnames(ductal) ductal@reductions$bh_tsne <- ductal@reductions$tsne ductal@reductions$tsne<- CreateDimReducObject(embeddings = embed_duct, key = "FitSNE_", assay = "SCT", global = TRUE) p29 <- DimPlot(ductal, reduction = "tsne", pt.size = 1.5) + scale_color_manual(values = paletteer_d("ggsci::default_nejm")) + labs(x = "Fit-SNE 1", y = "Fit-SNE 2") + theme_yehlab() + guides(color = guide_legend(nrow = 1, override.aes = list(size = 4))) p29
We use ANPEP & FABP1 expression to identify the lipid processing ductal cells in cluster 5.
p30 <- FeaturePlot(ductal, reduction = "tsne", features = "ANPEP", pt.size = 1.5) + scale_color_gradientn(colors = wesanderson::wes_palette("Zissou1")) + theme_yehlab() + NoLegend() + theme(axis.title = element_blank()) p31 <- FeaturePlot(ductal, reduction = "tsne", features = "FABP1", pt.size = 1.5) + scale_color_gradientn(colors = wesanderson::wes_palette("Zissou1")) + theme_yehlab() + NoLegend() + theme(axis.title = element_blank()) (p30 | p31) / p29
Expression of SOD3 and CFTR reveals the secretory cells in cluster 2.
p32 <- FeaturePlot(ductal, reduction = "tsne", features = "SOD3", pt.size = 1.5) + scale_color_gradientn(colors = wesanderson::wes_palette("Zissou1")) + theme_yehlab() + NoLegend() + theme(axis.title = element_blank()) p33 <- FeaturePlot(ductal, reduction = "tsne", features = "CFTR", pt.size = 1.5) + scale_color_gradientn(colors = wesanderson::wes_palette("Zissou1")) + theme_yehlab() + NoLegend() + theme(axis.title = element_blank()) (p32 | p33) / p29
We show that cluster 3 is composed of acinar cells using its high CTRB2 expression.
p34 <- FeaturePlot(ductal, reduction = "tsne", features = "CTRB2", pt.size = 1.5) + scale_color_gradientn(colors = wesanderson::wes_palette("Zissou1")) + theme_yehlab() + NoLegend() + theme(axis.title = element_blank()) p34 / p29
We use TFF1 and TFF2 expression to annotate the classical 1 epithelial cells in clusters 0 and 1.
p35 <- FeaturePlot(ductal, reduction = "tsne", features = "TFF1", pt.size = 1.5) + scale_color_gradientn(colors = wesanderson::wes_palette("Zissou1")) + theme_yehlab() + NoLegend() + theme(axis.title = element_blank()) p36 <- FeaturePlot(ductal, reduction = "tsne", features = "TFF2", pt.size = 1.5) + scale_color_gradientn(colors = wesanderson::wes_palette("Zissou1")) + theme_yehlab() + NoLegend() + theme(axis.title = element_blank()) (p35 | p36) / p29
The Classical 2 marker CRISP3 is exclusively expressed in cluster 6. We also see that the canonical classical PDAC marker GATA6 is lightly expressed across the Classical PDAC clusters (0, 1, 6).
p37 <- FeaturePlot(ductal, reduction = "tsne", features = "CRISP3", pt.size = 1.5) + scale_color_gradientn(colors = wesanderson::wes_palette("Zissou1")) + theme_yehlab() + NoLegend() + theme(axis.title = element_blank()) p38 <- FeaturePlot(ductal, reduction = "tsne", features = "GATA6", pt.size = 1.5) + scale_color_gradientn(colors = wesanderson::wes_palette("Zissou1")) + theme_yehlab() + NoLegend() + theme(axis.title = element_blank()) (p37 | p38) / p29
The basal compartment weights from DECODER
are highest in cluster 4, which we will denote as being composed of basal-like PDAC.
p39 <- FeaturePlot(ductal, reduction = "tsne", features = "basal_like", pt.size = 1.5) + scale_color_gradientn(colors = wesanderson::wes_palette("Zissou1")) + labs(title = "Basal") + theme_yehlab() + NoLegend() + theme(axis.title = element_blank()) p40 <- FeaturePlot(ductal, reduction = "tsne", features = "classical", pt.size = 1.5) + scale_color_gradientn(colors = wesanderson::wes_palette("Zissou1")) + labs(title = "Classical") + theme_yehlab() + NoLegend() + theme(axis.title = element_blank()) p41 <- FeaturePlot(ductal, reduction = "tsne", features = "bc_ratio", pt.size = 1.5) + scale_color_gradientn(colors = wesanderson::wes_palette("Zissou1")) + labs(title = "Basal:Classical") + theme_yehlab() + NoLegend() + theme(axis.title = element_blank()) p42 <- FeaturePlot(ductal, reduction = "tsne", features = "malig", pt.size = 1.5) + scale_color_manual(values = wes_palette("Zissou1", n = 5)[c(5, 2)]) + labs(title = "Malignant") + theme_yehlab() + NoLegend() + theme(axis.title = element_blank()) (p39 | p40 | p41 | p42) / p29
We'll confirm the Basal-like and Classical PDAC cluster identities by using VAM
to generate scores for the basal and classical gene sets from Moffitt et al (2015):
pdac_gene_sets <- list(classical_genes, basal_genes) names(pdac_gene_sets) <- c("Classical PDAC", "Basal-like PDAC") for (i in seq(pdac_gene_sets)) { pdac_gene_sets[[i]] <- pdac_gene_sets[[i]][pdac_gene_sets[[i]] %in% rownames(ductal)] } ductal <- vamForSeurat(ductal, gene.set.collection = pdac_gene_sets, gamma = TRUE) DefaultAssay(ductal) <- "VAMcdf"
The Classical PDAC scores are highest in clusters 0 & 1 and very low in cluster 4, while the Basal-like scores are highest in cluster 4. There is some mixing, reflecting the intra-tumor heterogeneity present in PDAC.
p43 <- FeaturePlot(ductal, reduction = "tsne", features = "Classical PDAC", pt.size = 1.5) + scale_color_gradientn(colors = wesanderson::wes_palette("Zissou1")) + labs(title = "Classical PDAC") + theme_yehlab() + NoLegend() + theme(axis.title = element_blank()) p44 <- FeaturePlot(ductal, reduction = "tsne", features = "Basal-like PDAC", pt.size = 1.5) + scale_color_gradientn(colors = wesanderson::wes_palette("Zissou1")) + labs(title = "Basal-like PDAC") + theme_yehlab() + NoLegend() + theme(axis.title = element_blank()) (p43 | p44) / p29
We assign celltype labels to our ductal cells.
pdac_colors <- c("orange", "blue", "skyblue") ductal_colors <- c("blue", "forestgreen", "deeppink3", "skyblue", "orange", "peru") DefaultAssay(ductal) <- "SCT" ductal@meta.data %<>% mutate(label = case_when(seurat_clusters == 0 ~ "Classical 1", seurat_clusters == 1 ~ "Classical 1", seurat_clusters == 2 ~ "Secretory", seurat_clusters == 3 ~ "Acinar", seurat_clusters == 4 ~ "Basal-like", seurat_clusters == 5 ~ "Lipid Proc.", seurat_clusters == 6 ~ "Classical 2")) Idents(ductal) <- "label" p45a <- DimPlot(ductal, reduction = "tsne",pt.size = 1.5) + scale_color_manual(values = ductal_colors) + labs(x = "Fit-SNE 1", y = "Fit-SNE 2") + theme_yehlab() + theme(plot.title = element_blank()) + guides(color = guide_legend(nrow = 1, override.aes = list(size = 4))) p45a
We can generate marker genes for those final labels as follows:
ductal_markers_specific <- FindSpecificMarkers(ductal, ident.use = "label", de.method = "wilcox", log2fc.cutoff = .25, perc.cutoff = 0.9) %>% filter(p_val_adj < 0.05) ductal_markers_specific_top5 <- ductal_markers_specific %>% arrange(desc(avg_log2FC)) %>% group_by(cluster) %>% slice_head(n = 5) p45b <- DotPlot(ductal, features = unique(ductal_markers_specific_top5$gene), dot.scale = 15) + scale_color_gradientn(colors = paletteer_d("wesanderson::Zissou1")) + labs(color = "Expression", size = "% Expressed", y = NULL) + theme(axis.text.x = element_text(angle = 90, size = 16, vjust = 0.5), legend.position = "right", legend.justification = "center", panel.border = element_rect(fill = NA, size = 1, color = "black"), axis.line = element_blank(), legend.title = element_text(size = 18), axis.title.x = element_blank(), axis.title.y = element_text(size = 20), axis.text.y = element_text(size = 18)) + guides(color = guide_colorbar(title.position = "top", barheight = unit(3, units = "cm"), title.hjust = 0.5), size = guide_legend(title.position = "top", title.hjust = 0.5)) p45b
Lastly, we'll also provide some visualizations of canonical markers for basal vs. classical PDAC.
pdac_ductal <- subset(ductal, subset = label %in% c("Classical 1", "Classical 2", "Basal-like")) pdac_ductal@meta.data %<>% mutate(label_broad = case_when(label == "Basal-like" ~ label, TRUE ~ "Classical")) p46a <- t(data.frame(pdac_ductal@assays$SCT@data)) %>% as.data.frame() %>% mutate(label = pdac_ductal$label) %>% relocate(label, GATA6) %>% dplyr::select(label, GATA6) %>% ggplot(aes(x = label, fill = label, y = GATA6)) + geom_violin(scale = "width", draw_quantiles = 0.5, size = 1) + geom_signif(test = wilcox.test, map_signif_level = TRUE, step_increase = .15, size = 1.2, textsize = 8, comparisons = list(c("Classical 1", "Basal-like"), c("Classical 2", "Basal-like"))) + scale_fill_manual(values = pdac_colors) + scale_y_continuous(limits = c(0, 1.77), breaks = c(0, 0.5, 1, 1.5)) + labs(y = "GATA6", title = NULL, x = NULL) + theme(panel.grid = element_blank(), legend.position = "none", panel.background = element_blank(), panel.border = element_rect(fill = NA, size = 1), axis.title.x = element_text(size = 24), axis.title.y = element_text(size = 22), axis.text.y = element_text(size = 20), axis.text.x = element_text(size = 20, angle = 45, vjust = 1, hjust = 1)) p46b <- ggplot(pdac_ductal@meta.data, aes(x = label, fill = label, y = basal_like)) + geom_violin(scale = "width", draw_quantiles = 0.5, size = 1) + geom_signif(test = wilcox.test, map_signif_level = TRUE, step_increase = .15, size = 1.2, textsize = 8, comparisons = list(c("Classical 1", "Basal-like"), c("Classical 2", "Basal-like"))) + scale_fill_manual(values = pdac_colors) + scale_y_continuous(limits = c(0, 1.3), breaks = c(0, 0.25, 0.5, 0.75, 1)) + labs(y = "DECODER basal-like", title = NULL, x = NULL) + theme(panel.grid = element_blank(), legend.position = "none", panel.background = element_blank(), panel.border = element_rect(fill = NA, size = 1), axis.title.x = element_text(size = 24), axis.title.y = element_text(size = 22), axis.text.y = element_text(size = 20), axis.text.x = element_text(size = 20, angle = 45, vjust = 1, hjust = 1)) p46c <- ggplot(pdac_ductal@meta.data, aes(x = label, fill = label, y = classical)) + geom_violin(scale = "width", draw_quantiles = 0.5, size = 1) + geom_signif(test = wilcox.test, map_signif_level = TRUE, step_increase = .15, size = 1.2, textsize = 8, comparisons = list(c("Classical 1", "Basal-like"), c("Classical 2", "Basal-like"))) + scale_fill_manual(values = pdac_colors) + scale_y_continuous(limits = c(0, 1.25), breaks = c(0, 0.25, 0.5, 0.75, 1)) + labs(y = "DECODER classical", title = NULL, x = NULL) + theme(panel.grid = element_blank(), legend.position = "none", panel.background = element_blank(), panel.border = element_rect(fill = NA, size = 1), axis.title.x = element_text(size = 24), axis.title.y = element_text(size = 22), axis.text.y = element_text(size = 20), axis.text.x = element_text(size = 20, angle = 45, vjust = 1, hjust = 1)) p46d <- ggplot(pdac_ductal@meta.data, aes(x = label, fill = label, y = bc_ratio)) + geom_violin(scale = "width", draw_quantiles = 0.5, size = 1) + geom_signif(test = wilcox.test, map_signif_level = TRUE, step_increase = .15, size = 1.2, textsize = 8, comparisons = list(c("Classical 1", "Basal-like"), c("Classical 2", "Basal-like"))) + scale_fill_manual(values = pdac_colors) + scale_y_continuous(limits = c(0, 6.35)) + labs(y = "DECODER bcRatio", title = NULL, x = NULL) + theme(panel.grid = element_blank(), legend.position = "none", panel.background = element_blank(), panel.border = element_rect(fill = NA, size = 1), axis.title.x = element_text(size = 24), axis.title.y = element_text(size = 22), axis.text.y = element_text(size = 20), axis.text.x = element_text(size = 20, angle = 45, vjust = 1, hjust = 1)) p46e <- t(data.frame(pdac_ductal@assays$VAMcdf@data)) %>% as.data.frame() %>% mutate(label = pdac_ductal@meta.data$label) %>% ggplot(aes(x = label, fill = label, y = `Classical PDAC`)) + geom_violin(scale = "width", draw_quantiles = 0.5, size = 1) + geom_signif(test = wilcox.test, map_signif_level = TRUE, step_increase = .15, size = 1.2, textsize = 8, comparisons = list(c("Classical 1", "Basal-like"), c("Classical 2", "Basal-like"))) + scale_fill_manual(values = pdac_colors) + scale_y_continuous(limits = c(0, 1.3), breaks = c(0, 0.25, 0.5, 0.75, 1)) + labs(y = "Moffitt classical", title = NULL, x = NULL) + theme(panel.grid = element_blank(), legend.position = "none", panel.background = element_blank(), panel.border = element_rect(fill = NA, size = 1), axis.title.x = element_text(size = 24), axis.title.y = element_text(size = 22), axis.text.y = element_text(size = 20), axis.text.x = element_text(size = 20, angle = 45, vjust = 1, hjust = 1)) p46f <- t(data.frame(pdac_ductal@assays$VAMcdf@data)) %>% as.data.frame() %>% mutate(label = pdac_ductal@meta.data$label) %>% ggplot(aes(x = label, fill = label, y = `Basal-like PDAC`)) + geom_violin(scale = "width", draw_quantiles = 0.5, size = 1) + geom_signif(test = wilcox.test, map_signif_level = TRUE, step_increase = .15, size = 1.2, textsize = 8, comparisons = list(c("Classical 1", "Basal-like"), c("Classical 2", "Basal-like"))) + scale_fill_manual(values = pdac_colors) + scale_y_continuous(limits = c(0, 1.3), breaks = c(0, 0.25, 0.5, 0.75, 1)) + labs(y = "Moffitt basal-like", title = NULL, x = NULL) + theme(panel.grid = element_blank(), legend.position = "none", panel.background = element_blank(), panel.border = element_rect(fill = NA, size = 1), axis.title.x = element_text(size = 24), axis.title.y = element_text(size = 22), axis.text.y = element_text(size = 20), axis.text.x = element_text(size = 20, angle = 45, vjust = 1, hjust = 1))
The markers / gene sets match up with our cluster labels.
p47 <- (p46f | p46e | p46b | p46d | p46c | p46a) p47
rm(duct_pc, embed_duct) gc(verbose = FALSE)
We'll add final celltype labels to all cluster in the full Seurat
object.
basal_cells <- rownames(pdac_ductal@meta.data)[pdac_ductal@meta.data$label == "Basal-like"] classical1_cells <- rownames(pdac_ductal@meta.data)[pdac_ductal@meta.data$label == "Classical 1"] classical2_cells <- rownames(pdac_ductal@meta.data)[pdac_ductal@meta.data$label == "Classical 2"] icaf_cells <- rownames(caf@meta.data)[caf@meta.data$label == "iCAF"] mycaf_cells <- rownames(caf@meta.data)[caf@meta.data$label == "myCAF"] apcaf_cells <- rownames(caf@meta.data)[caf@meta.data$label == "apCAF"] pdac@meta.data %<>% mutate(label_broad = case_when(seurat_clusters == 0 ~ "Macrophage", seurat_clusters == 1 ~ "NK", seurat_clusters == 2 ~ "Monocyte", seurat_clusters == 3 ~ "T", seurat_clusters == 4 ~ "B", seurat_clusters == 5 ~ "Ductal", seurat_clusters == 6 ~ "DC", seurat_clusters == 7 ~ "T", seurat_clusters == 8 ~ "Fibroblast", seurat_clusters == 9 ~ "Acinar", seurat_clusters == 10 ~ "Normal Stroma", seurat_clusters == 11 ~ "Plasma", seurat_clusters == 12 ~ "pDC") , label_fine = case_when(rownames(.) %in% basal_cells ~ "Basal-like PDAC", rownames(.) %in% classical1_cells ~ "Classical PDAC 1", rownames(.) %in% classical2_cells ~ "Classical PDAC 2", rownames(.) %in% icaf_cells ~ "iCAF", rownames(.) %in% mycaf_cells ~ "myCAF", rownames(.) %in% apcaf_cells ~ "apCAF", TRUE ~ label_broad)) Idents(pdac) <- "label_fine"
Let's generate a plot of the CAF-specific marker genes. First, we'll exclude genes that are highly expressed in any other celltypes. Second, we'll want to exclude any genes highly expressed in macrophages because apCAFs in particular ("ap" = "antigen-presenting") have an immune-like transcriptomic profile. We should have removed those genes in the previous step, but just to make sure we'll pull some macrophage RNA-seq samples and grab their most highly expressed genes.
DefaultAssay(caf) <- "SCT" caf_markers_final <- FindAllMarkers(caf, logfc.threshold = 0.25, test.use = "wilcox", only.pos = TRUE, verbose = FALSE, random.seed = 312) %>% filter(p_val_adj < 0.05) # determine highly expressed genes in other celltypes gene_means_by_clust <- t(pdac@assays$SCT@data) %>% as.data.frame() %>% mutate(label_fine = pdac$label_fine) %>% group_by(label_fine) %>% summarise(across(where(is.numeric), mean)) # find list of genes w/ mean expression above 90th percentile of expression in each celltype high_exp_genes_non_caf <- c() loop_celltypes <- unname(gene_means_by_clust$label_fine[!gene_means_by_clust$label_fine %in% c("iCAF", "myCAF", "apCAF")]) for (i in loop_celltypes) { top_exp_genes <- gene_means_by_clust %>% filter(label_fine == i) %>% dplyr::select(-label_fine) %>% t() %>% as.data.frame() %>% filter(V1 > quantile(V1, .9)) %>% mutate(gene = rownames(.)) %>% pull(gene) high_exp_genes_non_caf <- c(high_exp_genes_non_caf, top_exp_genes) } high_exp_genes_non_caf <- unique(high_exp_genes_non_caf) # get highly expressed macrophage genes bed <- celldex::BlueprintEncodeData() bed <- bed[, bed$label.main == "Macrophages"] top_macro_genes <- bed@assays@data@listData[["logcounts"]] %>% as.data.frame() %>% t() %>% as.data.frame() %>% summarise(across(everything(), mean)) %>% t() %>% as.data.frame() %>% filter(V1 > quantile(V1, .9)) %>% mutate(gene = rownames(.)) %>% pull(gene) %>% unique() caf_markers_final %<>% filter(!(gene %in% high_exp_genes_non_caf), !(gene %in% top_macro_genes)) caf_markers_final %>% arrange(desc(avg_log2FC)) %>% group_by(cluster) %>% slice_head(n = 25) -> top25_caf_markers
The plot:
DefaultAssay(caf) <- "SCT" top5_caf_markers_final <- caf_markers_final %>% group_by(cluster) %>% arrange(desc(avg_log2FC)) %>% slice_head(n = 5) p48 <- DotPlot(caf, features = unique(top5_caf_markers_final$gene), dot.scale = 15) + scale_color_gradientn(colors = paletteer_d("wesanderson::Zissou1")) + labs(color = "Expression", size = "% Expressed", y = "Louvain Cluster") + theme(axis.text.x = element_text(angle = 90, size = 16, vjust = 0.5), legend.position = "right", legend.justification = "center", panel.border = element_rect(fill = NA, size = 1, color = "black"), axis.line = element_blank(), legend.title = element_text(size = 18), axis.title.x = element_blank(), axis.title.y = element_blank(), axis.text.y = element_text(size = 18)) + guides(color = guide_colorbar(title.position = "top", barheight = unit(3, units = "cm"), title.hjust = 0.5), size = guide_legend(title.position = "top", title.hjust = 0.5)) p48
We'll do the same for the PDAC cells:
Idents(pdac_ductal) <- "label" pdac_ductal_markers_final <- FindAllMarkers(pdac_ductal, logfc.threshold = 0.25, test.use = "wilcox", only.pos = TRUE, verbose = FALSE, random.seed = 312) %>% filter(p_val_adj < 0.05) loop_celltypes <- unname(gene_means_by_clust$label_fine[!gene_means_by_clust$label_fine %in% c("Classical PDAC 1", "Basal-like PDAC", "Classical PDAC 2")]) high_exp_genes_non_pdac1 <- c() for (i in loop_celltypes) { top_exp_genes <- gene_means_by_clust %>% filter(label_fine == i) %>% dplyr::select(-label_fine) %>% t() %>% as.data.frame() %>% filter(V1 > quantile(V1, .9)) %>% mutate(gene = rownames(.)) %>% pull(gene) high_exp_genes_non_pdac1 <- c(high_exp_genes_non_pdac1, top_exp_genes) } high_exp_genes_non_pdac1 <- unique(high_exp_genes_non_pdac1) pdac_ductal_markers_final %<>% filter(!(gene %in% high_exp_genes_non_pdac1))
The plot:
top5_pdac_ductal_markers_final <- pdac_ductal_markers_final %>% group_by(cluster) %>% arrange(desc(avg_log2FC)) %>% slice_head(n = 5) p49 <- DotPlot(pdac_ductal, features = unique(top5_pdac_ductal_markers_final$gene), dot.scale = 15) + scale_color_gradientn(colors = paletteer_d("wesanderson::Zissou1")) + labs(color = "Expression", size = "% Expressed", y = "Louvain Cluster") + theme(axis.text.x = element_text(angle = 90, size = 16, vjust = 0.5), legend.position = "right", legend.justification = "center", panel.border = element_rect(fill = NA, size = 1, color = "black"), axis.line = element_blank(), legend.title = element_text(size = 18), axis.title.x = element_blank(), axis.title.y = element_blank(), axis.text.y = element_text(size = 18)) + guides(color = guide_colorbar(title.position = "top", barheight = unit(3, units = "cm"), title.hjust = 0.5), size = guide_legend(title.position = "top", title.hjust = 0.5)) p49
We'll repeat the ductal marker gene identification for simple basal vs. classical:
Idents(pdac_ductal) <- "label_broad" pdac_ductal_broad_markers_final <- FindAllMarkers(pdac_ductal, logfc.threshold = 0.25, test.use = "wilcox", only.pos = TRUE, verbose = FALSE, random.seed = 312) %>% filter(p_val_adj < 0.05) loop_celltypes <- unname(gene_means_by_clust$label_fine[!gene_means_by_clust$label_fine %in% c("Classical PDAC 1", "Basal-like PDAC", "Classical PDAC 2")]) high_exp_genes_non_pdac2 <- c() for (i in loop_celltypes) { top_exp_genes <- gene_means_by_clust %>% filter(label_fine == i) %>% dplyr::select(-label_fine) %>% t() %>% as.data.frame() %>% filter(V1 > quantile(V1, .9)) %>% mutate(gene = rownames(.)) %>% pull(gene) high_exp_genes_non_pdac2 <- c(high_exp_genes_non_pdac2, top_exp_genes) } high_exp_genes_non_pdac2 <- unique(high_exp_genes_non_pdac2) pdac_ductal_broad_markers_final %<>% filter(!(gene %in% high_exp_genes_non_pdac2)) pdac_ductal_broad_markers_final %>% arrange(desc(avg_log2FC)) %>% group_by(cluster) %>% slice_head(n = 25) -> top25_pdac_broad_markers
Lastly, we'll create combination dotplots for the CAF and normal stroma cells using the marker genes we derived solely when comparing CAFs against one another.
caf_markers_final_top5 <- caf_markers_final %>% arrange(desc(avg_log2FC)) %>% with_groups(cluster, slice_head, n = 5) fibro_markers_specific_top5 %>% filter(cluster == "Endothelial") %>% bind_rows((caf_markers_final_top5 %>% filter(cluster == "myCAF")), (fibro_markers_specific_top5 %>% filter(cluster == "Perivascular")), (caf_markers_final_top5 %>% filter(cluster == "iCAF")), (caf_markers_final_top5 %>% filter(cluster == "apCAF"))) %>% mutate(source = case_when(source = is.na(source) ~ "Stroma", TRUE ~ source), log2FC_cutoff = case_when(is.na(log2FC_cutoff) ~ 0.25, TRUE ~ log2FC_cutoff)) -> fibro_markers_combo_top5 p50 <- DotPlot(fibro, features = unique(fibro_markers_combo_top5$gene), dot.scale = 15) + scale_color_gradientn(colors = paletteer_d("wesanderson::Zissou1")) + labs(color = "Expression", size = "% Expressed", y = NULL) + theme(axis.text.x = element_text(angle = 90, size = 16, vjust = 0.5), legend.position = "right", legend.justification = "center", panel.border = element_rect(fill = NA, size = 1, color = "black"), axis.line = element_blank(), legend.title = element_text(size = 18), axis.title.x = element_blank(), axis.title.y = element_blank(), axis.text.y = element_text(size = 18)) + guides(color = guide_colorbar(title.position = "top", barheight = unit(3, units = "cm"), title.hjust = 0.5), size = guide_legend(title.position = "top", title.hjust = 0.5)) p50
And we'll do the same for the PDAC subtypes.
pdac_ductal_markers_final_top5 <- pdac_ductal_markers_final %>% arrange(desc(avg_log2FC)) %>% with_groups(cluster, slice_head, n = 5) pdac_ductal_markers_final_top5 %>% filter(cluster == "Classical 1") %>% bind_rows((ductal_markers_specific_top5 %>% filter(cluster == "Acinar")), (ductal_markers_specific_top5 %>% filter(cluster == "Secretory")), (pdac_ductal_markers_final_top5 %>% filter(cluster == "Classical 2")), (pdac_ductal_markers_final_top5 %>% filter(cluster == "Basal-like")), (ductal_markers_specific_top5 %>% filter(cluster == "Lipid Proc."))) %>% mutate(source = case_when(source = is.na(source) ~ "Ductal", TRUE ~ source), log2FC_cutoff = case_when(is.na(log2FC_cutoff) ~ 0.25, TRUE ~ log2FC_cutoff)) -> ductal_markers_combo_top5 p51 <- DotPlot(ductal, features = unique(ductal_markers_combo_top5$gene), dot.scale = 15) + scale_color_gradientn(colors = paletteer_d("wesanderson::Zissou1")) + labs(color = "Expression", size = "% Expressed", y = NULL) + theme(axis.text.x = element_text(angle = 90, size = 16, vjust = 0.5), legend.position = "right", legend.justification = "center", panel.border = element_rect(fill = NA, size = 1, color = "black"), axis.line = element_blank(), legend.title = element_text(size = 18), axis.title.x = element_blank(), axis.title.y = element_blank(), axis.text.y = element_text(size = 18)) + guides(color = guide_colorbar(title.position = "top", barheight = unit(3, units = "cm"), title.hjust = 0.5), size = guide_legend(title.position = "top", title.hjust = 0.5)) p51
First the Seurat
objects.
saveRDS(pdac, file = "~/Desktop/Data/Tuveson_Seurat_Objs/Elyada.Rds") saveRDS(fibro, file = "~/Desktop/Data/Tuveson_Seurat_Objs/Elyada_fibro.Rds") saveRDS(caf, file = "~/Desktop/Data/Tuveson_Seurat_Objs/Elyada_caf.Rds") saveRDS(ductal, file = "~/Desktop/Data/Tuveson_Seurat_Objs/Elyada_ductal_acinar.Rds") saveRDS(pdac_ductal, file = "~/Desktop/Data/Tuveson_Seurat_Objs/Elyada_PDAC.Rds")
pdac <- readRDS(file = "~/Desktop/Data/Tuveson_Seurat_Objs/Elyada.Rds") fibro <- readRDS(file = "~/Desktop/Data/Tuveson_Seurat_Objs/Elyada_fibro.Rds") caf <- readRDS(file = "~/Desktop/Data/Tuveson_Seurat_Objs/Elyada_caf.Rds") ductal <- readRDS(file = "~/Desktop/Data/Tuveson_Seurat_Objs/Elyada_ductal_acinar.Rds") pdac_ductal <- readRDS(file = "~/Desktop/Data/Tuveson_Seurat_Objs/Elyada_PDAC.Rds")
And now the gene sets for CAF and PDAC cells.
caf_markers_final %<>% filter(avg_log2FC >= 0.5) %>% mutate(log2FC_cutoff = 0.5, source = "CAF") pdac_ductal_markers_final %<>% filter(avg_log2FC >= 1) %>% mutate(log2FC_cutoff = 1, source = "PDAC") openxlsx::write.xlsx(caf_markers_final, file = "~/Desktop/Data/Elyada_CAF_Specific_Marker_Genes.xlsx") openxlsx::write.xlsx(top25_caf_markers, file = "~/Desktop/Data/Elyada_CAF_Specific_Marker_Genes_Top25.xlsx") openxlsx::write.xlsx(pdac_ductal_markers_final, file = "~/Desktop/Data/Elyada_PDAC_Specific_Marker_Genes.xlsx") openxlsx::write.xlsx(top25_pdac_broad_markers, file = "~/Desktop/Data/Elyada_PDAC_Specific_Marker_Genes_Top25.xlsx") pdac_markers %>% mutate(log2FC_cutoff = 0.25) %>% openxlsx::write.xlsx(file = "~/Desktop/Data/Elyada_Cluster_Marker_Genes.xlsx") pdac_markers_specific %>% mutate(log2FC_cutoff = 01) %>% openxlsx::write.xlsx(file = "~/Desktop/Data/Elyada_Cluster_Marker_Genes_Specific.xlsx")
First we'll define a convenience function to save the figures to PDFs.
SaveFigure <- function(my.plot = NULL, name = NULL, height = 8, width = 8) { if (is.null(plot) | is.null(name)) stop("You forgot some arguments.") # save figure as is - w/ axis labels, titles, etc. dir <- "~/Desktop/R/SCISSORS/vignettes/figures_supp/Elyada" ggsave(my.plot, filename = paste0(name, ".pdf"), device = "pdf", units = "in", path = dir, height = height, width = width) # save "blank" figure w/ no labels, legends, etc. dir <- "~/Desktop/R/SCISSORS/vignettes/figures_pub/Elyada" plot_blank <- my.plot + theme(axis.title = element_blank(), panel.border = element_blank(), plot.title = element_blank(), plot.subtitle = element_blank(), plot.caption = element_blank(), legend.position = "none") ggsave(plot_blank, filename = paste0(name, ".pdf"), device = "pdf", units = "in", path = dir, height = height, width = width) }
And now we save.
SaveFigure(my.plot = p0, name = "Seurat_Clusters_tSNE") SaveFigure(my.plot = p1, name = "Seurat_Clusters_Sil_Score_Violin") SaveFigure(my.plot = p2a, name = "Seurat_Clusters_DotPlot", height = 8, width = 18) SaveFigure(my.plot = p2b, name = "Seurat_Clusters_DotPlot_Specific", height = 8, width = 18) SaveFigure(my.plot = p3, name = "Seurat_Clusters_FitSNE") SaveFigure(my.plot = p4, name = "SingleR_scRNAseq_Annos_FitSNE") SaveFigure(my.plot = p5, name = "SingleR_Bulk_Annos_FitSNE") SaveFigure(my.plot = p6, name = "CONICSmat_Annos_FitSNE") SaveFigure(my.plot = p7, name = "DECODER_Basal_PDAC_FitSNE") SaveFigure(my.plot = p8, name = "DECODER_Classical_PDAC_FitSNE") SaveFigure(my.plot = p9, name = "DECODER_Exocrine_FitSNE") SaveFigure(my.plot = p10, name = "DECODER_Endocrine_FitSNE") SaveFigure(my.plot = p11, name = "DECODER_Immune_FitSNE") SaveFigure(my.plot = p12, name = "DECODER_Norm_Stroma_FitSNE") SaveFigure(my.plot = p13, name = "DECODER_Act_Stroma_FitSNE") SaveFigure(my.plot = p14, name = "All_Cells_Stroma_COL1A1_FitSNE") SaveFigure(my.plot = p15, name = "All_Cells_Stroma_COL3A1_FitSNE") SaveFigure(my.plot = p16, name = "All_Cells_Stroma_LUM_FitSNE") SaveFigure(my.plot = p17, name = "All_Cells_Stroma_DCN_FitSNE") SaveFigure(my.plot = p18, name = "All_Cells_Highlight_Stroma_FitSNE") SaveFigure(my.plot = p19, name = "SCISSORS_Clusters_Stroma_FitSNE") SaveFigure(my.plot = p20, name = "SCISSORS_Clusters_Stroma_Endothelial_PLVAP") SaveFigure(my.plot = p21, name = "SCISSORS_Clusters_Stroma_Perivascular_RGS5") SaveFigure(my.plot = p22, name = "SCISSORS_Clusters_Stroma_VAM_iCAF_FitSNE") SaveFigure(my.plot = p23, name = "SCISSORS_Clusters_Stroma_VAM_myCAF_FitSNE") SaveFigure(my.plot = p24, name = "SCISSORS_Clusters_Stroma_VAM_apCAF_FitSNE") SaveFigure(my.plot = p25a, name = "SCISSORS_Clusters_Stroma_Labels_FitSNE") SaveFigure(my.plot = p25b, name = "SCISSORS_Clusters_Stroma_Dotplot_Specific", height = 5, width = 10) SaveFigure(my.plot = p26a, name = "SCISSORS_Clusters_CAF_apCAF_Violin") SaveFigure(my.plot = p26b, name = "SCISSORS_Clusters_CAF_iCAF_Violin") SaveFigure(my.plot = p26c, name = "SCISSORS_Clusters_CAF_myCAF_Violin") SaveFigure(my.plot = p26d, name = "SCISSORS_Clusters_CAF_panCAF_Violin") SaveFigure(my.plot = p26e, name = "SCISSORS_Clusters_CAF_Violin_Combined") SaveFigure(my.plot = p27, name = "All_Cells_Ductal_KRT8_FitSNE") SaveFigure(my.plot = p28, name = "All_Cells_Highlight_Ductal_FitSNE") SaveFigure(my.plot = p29, name = "SCISSORS_Clusters_Ductal_FitSNE") SaveFigure(my.plot = p30, name = "SCISSORS_Clusters_Ductal_Lipid_Proc_ANPEP") SaveFigure(my.plot = p31, name = "SCISSORS_Clusters_Ductal_Lipid_Proc_FABP1") SaveFigure(my.plot = p32, name = "SCISSORS_Clusters_Ductal_Secretory_SOD3_FitSNE") SaveFigure(my.plot = p33, name = "SCISSORS_Clusters_Ductal_Secretory_CFTR_FitSNE") SaveFigure(my.plot = p34, name = "SCISSORS_Clusters_Ductal_Acinar_CTRB2_FitSNE") SaveFigure(my.plot = p35, name = "SCISSORS_Clusters_Ductal_Classical1_TFF1_FitSNE") SaveFigure(my.plot = p36, name = "SCISSORS_Clusters_Ductal_Classical1_TFF2_FitSNE") SaveFigure(my.plot = p37, name = "SCISSORS_Clusters_Ductal_Classical2_CRISP3_FitSNE") SaveFigure(my.plot = p38, name = "SCISSORS_Clusters_Ductal_Classical2_GATA6_FitSNE") SaveFigure(my.plot = p39, name = "SCISSORS_Clusters_Ductal_DECODER_Basal_PDAC_FitSNE") SaveFigure(my.plot = p40, name = "SCISSORS_Clusters_Ductal_DECODER_Classical_PDAC_FitSNE") SaveFigure(my.plot = p41, name = "SCISSORS_Clusters_Ductal_DECODER_bcRatio_PDAC_FitSNE") SaveFigure(my.plot = p42, name = "SCISSORS_Clusters_Ductal_CONICSmat_Annos_FitSNE") SaveFigure(my.plot = p43, name = "SCISSORS_Clusters_Ductal_VAM_Moffitt_Classical25_FitSNE") SaveFigure(my.plot = p44, name = "SCISSORS_Clusters_Ductal_VAM_Moffitt_Basal25_FitSNE") SaveFigure(my.plot = p45a, name = "SCISSORS_Clusters_Ductal_Labels_FitSNE") SaveFigure(my.plot = p45b, name = "SCISSORS_Clusters_Ductal_Dotplot_Specific", height = 6, width = 12) SaveFigure(my.plot = p46a, name = "SCISSORS_Clusters_Ductal_GATA6_Violin") SaveFigure(my.plot = p46b, name = "SCISSORS_Clusters_Ductal_DECODER_Basal_PDAC_Violin") SaveFigure(my.plot = p46c, name = "SCISSORS_Clusters_Ductal_DECODER_Classical_PDAC_Violin") SaveFigure(my.plot = p46d, name = "SCISSORS_Clusters_Ductal_DECODER_bcRatio_Violin") SaveFigure(my.plot = p46e, name = "SCISSORS_Clusters_Ductal_VAM_Moffitt_Classical_25_Violin") SaveFigure(my.plot = p46f, name = "SCISSORS_Clusters_Ductal_VAM_Moffitt_Basal_25_Violin") SaveFigure(my.plot = p47, name = "SCISSORS_Clusters_Ductal_Violin_Combined", height = 5, width = 16) SaveFigure(my.plot = p48, name = "SCISSORS_Clusters_CAF_Dotplot_Specific", height = 6, width = 12) SaveFigure(my.plot = p49, name = "SCISSORS_Clusters_Ductal_Dotplot_Specific", height = 6, width = 12) SaveFigure(my.plot = p50, name = "SCISSORS_Clusters_Fibro_Dotplot_Specific_Combo", height = 5, width = 10) SaveFigure(my.plot = p51, name = "SCISSORS_Clusters_Ductal_Dotplot_Specific_Combo", height = 5, width = 10)
Lastly, we want specific gene sets for CAFs and PDAC cells that are larger than the restricted sets we generated earlier. We'll run the same process, but with relaxed parameters so that we get larger marker sets.
# caf_markers_final_2 <- FindAllMarkers(caf, # logfc.threshold = 0.25, # test.use = "wilcox", # only.pos = TRUE, # verbose = FALSE, # random.seed = 312) %>% # filter(p_val_adj < 0.05) # high_exp_genes_non_caf2 <- c() # loop_celltypes <- unname(gene_means_by_clust$label_fine[!gene_means_by_clust$label_fine %in% c("iCAF", "myCAF", "apCAF")]) # for (i in loop_celltypes) { # top_exp_genes <- gene_means_by_clust %>% # filter(label_fine == i) %>% # dplyr::select(-label_fine) %>% # t() %>% # as.data.frame() %>% # filter(V1 > quantile(V1, .9)) %>% # mutate(gene = rownames(.)) %>% # pull(gene) # high_exp_genes_non_caf2 <- c(high_exp_genes_non_caf2, top_exp_genes) # } # high_exp_genes_non_caf2 <- unique(high_exp_genes_non_caf2) # caf_markers_final_large <- caf_markers_final_2 %>% # filter(!(gene %in% high_exp_genes_non_caf2), !(gene %in% top_macro_genes)) # caf_markers_final_large %>% group_by(cluster) %>% slice_head(n = 25) caf_markers_final_2 <- FindAllMarkers(caf, logfc.threshold = 0, test.use = "wilcox", only.pos = TRUE, verbose = FALSE, random.seed = 312) %>% filter(p_val_adj < 0.05) high_exp_genes_non_caf2 <- c() loop_celltypes <- unname(gene_means_by_clust$label_fine[!gene_means_by_clust$label_fine %in% c("iCAF", "myCAF", "apCAF")]) for (i in loop_celltypes) { top_exp_genes <- gene_means_by_clust %>% filter(label_fine == i) %>% dplyr::select(-label_fine) %>% t() %>% as.data.frame() %>% filter(V1 > quantile(V1, .9)) %>% mutate(gene = rownames(.)) %>% pull(gene) high_exp_genes_non_caf2 <- c(high_exp_genes_non_caf2, top_exp_genes) } high_exp_genes_non_caf2 <- unique(high_exp_genes_non_caf2) caf_markers_final_large <- caf_markers_final_2 %>% filter(!(gene %in% high_exp_genes_non_caf2), !(gene %in% top_macro_genes)) caf_markers_final_large %>% mutate(log2FC_cutoff = 0) %>% openxlsx::write.xlsx(file = "~/Desktop/Data/SCISSORS_CAF_p005.xlsx")
pdac_ductal_broad_markers_final <- FindAllMarkers(pdac_ductal, logfc.threshold = 0.25, test.use = "wilcox", only.pos = TRUE, verbose = FALSE, random.seed = 312) %>% filter(p_val_adj < 0.05) loop_celltypes <- unname(gene_means_by_clust$label_fine[!gene_means_by_clust$label_fine %in% c("Classical PDAC 1", "Basal-like PDAC", "Classical PDAC 2")]) high_exp_genes_non_pdac3 <- c() for (i in loop_celltypes) { top_exp_genes <- gene_means_by_clust %>% filter(label_fine == i) %>% dplyr::select(-label_fine) %>% t() %>% as.data.frame() %>% filter(V1 > quantile(V1, .9)) %>% #filter(V1 > quantile(V1, .95)) %>% mutate(gene = rownames(.)) %>% pull(gene) high_exp_genes_non_pdac3 <- c(high_exp_genes_non_pdac3, top_exp_genes) } high_exp_genes_non_pdac3 <- unique(high_exp_genes_non_pdac3) pdac_ductal_broad_markers_final_large <- pdac_ductal_broad_markers_final %>% filter(!(gene %in% high_exp_genes_non_pdac3))
caf_cells <- rownames(fibro@meta.data[!fibro@meta.data$label %in% c("Perivascular", "Endothelial"), ]) peri_cells <- rownames(fibro@meta.data[fibro@meta.data$label == "Perivascular", ]) pdac@meta.data <- pdac@meta.data %>% mutate(label_finest = case_when(rownames(.) %in% caf_cells ~ "CAF", rownames(.) %in% peri_cells ~ "Perivascular", TRUE ~ label_fine)) gene_means_by_clust <- t(pdac@assays$SCT@data) %>% as.data.frame() %>% mutate(label_finest = pdac$label_finest) %>% group_by(label_finest) %>% summarise(across(where(is.numeric), mean)) peri_markers <- FindAllMarkers(fibro, logfc.threshold = 0.25, test.use = "wilcox", only.pos = TRUE, verbose = FALSE, random.seed = 312) %>% filter(p_val_adj < 0.05, cluster == "Perivascular") loop_celltypes <- unname(gene_means_by_clust$label_finest[!gene_means_by_clust$label_finest == "Perivascular"]) high_exp_genes_non_peri <- c() for (i in loop_celltypes) { top_exp_genes <- gene_means_by_clust %>% filter(label_finest == i) %>% dplyr::select(-label_finest) %>% t() %>% as.data.frame() %>% filter(V1 > quantile(V1, .9)) %>% mutate(gene = rownames(.)) %>% pull(gene) high_exp_genes_non_peri <- c(high_exp_genes_non_peri, top_exp_genes) } high_exp_genes_non_peri <- unique(high_exp_genes_non_peri) peri_markers <- peri_markers %>% filter(!(gene %in% high_exp_genes_non_peri))
endo_cells <- rownames(fibro@meta.data[fibro@meta.data$label == "Endothelial", ]) pdac@meta.data <- pdac@meta.data %>% mutate(label_finest = case_when(rownames(.) %in% endo_cells ~ "Endothelial", TRUE ~ label_fine)) gene_means_by_clust <- t(pdac@assays$SCT@data) %>% as.data.frame() %>% mutate(label_finest = pdac$label_finest) %>% group_by(label_finest) %>% summarise(across(where(is.numeric), mean)) endo_markers <- FindAllMarkers(fibro, logfc.threshold = 0.25, test.use = "wilcox", only.pos = TRUE, verbose = FALSE, random.seed = 312) %>% filter(p_val_adj < 0.05, cluster == "Endothelial") loop_celltypes <- unname(gene_means_by_clust$label_finest[!gene_means_by_clust$label_finest == "Endothelial"]) high_exp_genes_non_endo <- c() for (i in loop_celltypes) { top_exp_genes <- gene_means_by_clust %>% filter(label_finest == i) %>% dplyr::select(-label_finest) %>% t() %>% as.data.frame() %>% filter(V1 > quantile(V1, .9)) %>% mutate(gene = rownames(.)) %>% pull(gene) high_exp_genes_non_peri <- c(high_exp_genes_non_endo, top_exp_genes) } high_exp_genes_non_endo <- unique(high_exp_genes_non_endo) endo_markers <- endo_markers %>% filter(!(gene %in% high_exp_genes_non_endo))
We'll do the same for the CAF cells in order to identify pan-CAF markers.
fibro@meta.data <- fibro@meta.data %>% mutate(label_fine = case_when(rownames(.) %in% caf_cells ~ "CAF", TRUE ~ label)) Idents(fibro) <- "label_fine" DefaultAssay(fibro) <- "SCT" pan_caf_markers <- FindAllMarkers(fibro, logfc.threshold = 0.25, test.use = "wilcox", only.pos = TRUE, verbose = FALSE, random.seed = 312) %>% filter(p_val_adj < 0.05, cluster == "CAF") loop_celltypes <- unname(gene_means_by_clust$label_finest[!gene_means_by_clust$label_finest == "CAF"]) high_exp_genes_non_caf <- c() for (i in loop_celltypes) { top_exp_genes <- gene_means_by_clust %>% filter(label_finest == i) %>% dplyr::select(-label_finest) %>% t() %>% as.data.frame() %>% filter(V1 > quantile(V1, .9)) %>% mutate(gene = rownames(.)) %>% pull(gene) high_exp_genes_non_caf <- c(high_exp_genes_non_caf, top_exp_genes) } high_exp_genes_non_caf <- unique(high_exp_genes_non_caf) pan_caf_markers <- pan_caf_markers %>% filter(!(gene %in% high_exp_genes_non_caf), !(gene %in% top_macro_genes))
Lastly we save the expanded gene sets.
caf_markers_final_large %>% mutate(log2FC_cutoff = 0.25) %>% openxlsx::write.xlsx(file = "~/Desktop/Data/Elyada_CAF_Specific_Marker_Genes_Large.xlsx") pdac_ductal_broad_markers_final_large %>% mutate(log2FC_cutoff = 0.25) %>% openxlsx::write.xlsx(file = "~/Desktop/Data/Elyada_PDAC_Specific_Marker_Genes_Large.xlsx") peri_markers %>% mutate(log2FC_cutoff = 0.25) %>% openxlsx::write.xlsx(file = "~/Desktop/Data/Elyada_Perivascular_Specific_Marker_Genes_Large.xlsx") endo_markers %>% mutate(log2FC_cutoff = 0.25) %>% openxlsx::write.xlsx(file = "~/Desktop/Data/Elyada_Endothelial_Specific_Marker_Genes_Large.xlsx") pan_caf_markers %>% mutate(log2FC_cutoff = 0.25) %>% openxlsx::write.xlsx(file = "~/Desktop/Data/Elyada_PanCAF_Specific_Marker_Genes_Large.xlsx")
sessionInfo()
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.