Started on r format(Sys.time(), "%Y-%m-%d %H:%M:%S")
library(SummarizedExperiment) library(ezRun) library(cowplot) library(tidyverse) library(Seurat) library(kableExtra) # input for this report: sceURLs, param # sceURLs <- c("sample13days"="https://fgcz-sushi.uzh.ch/projects/p2326/SCReport_3days_2019-03-06--16-24-38/sample1_3days_v-run_SCReport/00index.html", # "sample22wks"="https://fgcz-sushi.uzh.ch/projects/p2326/SCReport_2wks_2019-03-06--23-38-34/sample2_2wks_v_run_SCReport/00index.html", # "sample3sedentary"="https://fgcz-sushi.uzh.ch/projects/p2326/SCReport_sedentary_2019-03-06--23-40-36/sample3_sedentary_SCReport/00index.html") sceList <- lapply(file.path("/srv/gstore/projects", sub("https://fgcz-(gstore|sushi).uzh.ch/projects", "", dirname(sceURLs)), "sce.rds"), readRDS) names(sceList) <- names(sceURLs) # to test # param <- metadata(sceList[[1]])$param # param$x.low.cutoff <- 0.0125 # param$x.high.cutoff <- 3 # param$y.cutoff <- 0.5 # param$resolution <- 0.6 # param$resolutionCCA <- 0.6 # param$pcs <- 20 # param$cc <- 20 # param$batchCorrection <- "None" pvalue_allMarkers <- 0.05 pvalue_all2allMarkers <- 0.01 nrSamples <- length(sceList) if(ezIsSpecified(param$chosenClusters)){ for(eachSample in names(param$chosenClusters)){ chosenCells <- names(metadata(sceList[[eachSample]])$scData@ident)[metadata(sceList[[eachSample]])$scData@ident %in% param$chosenClusters[[eachSample]]] sceList[[eachSample]] <- sceList[[eachSample]][, chosenCells] metadata(sceList[[eachSample]])$scData <- SubsetData(metadata(sceList[[eachSample]])$scData, ident.use=param$chosenClusters[[eachSample]]) } }
jsFile = system.file("extdata/enrichr.js", package="ezRun", mustWork=TRUE) invisible(file.copy(from=jsFile, to=basename(jsFile), overwrite=TRUE)) cat(paste0("<SCRIPT language=\"JavaScript\" SRC=\"", basename(jsFile), "\"></SCRIPT>"))
sce_cbind <- Reduce(SingleCellExperiment::cbind, sceList) scData_Merge <- Reduce(MergeSeurat, lapply(sceList, function(x){metadata(x)$scData})) scData_Merge@project.name <- param$name # newParams <- c("x.low.cutoff", "x.high.cutoff", "y.cutoff", "pcs", "resolution") # metadata(sce_cbind)$param[newParams] <- param[newParams] # sce_cbind <- seuratPreProcess(sce_cbind) # scData.None <- metadata(sce_cbind)$scData scData.None <- seuratClustering(scData_Merge, param)
p1 <- TSNEPlot(object=scData.None, group.by="Plate", do.return=TRUE) p1 p2 <- TSNEPlot(object=scData.None, do.return=TRUE, do.label=TRUE) p2 toTable <- cellTable(scData.None) ## TODO: add fisher test? kable(toTable, row.names=FALSE, format="html", caption="Number of cells in each cluster") %>% kable_styling(bootstrap_options = "striped", full_width = F, position = "float_right")
p2$data$Plate <- scData.None@meta.data[rownames(p2$data), "Plate"] p2 + facet_wrap(~Plate, ncol=2)
genes.use <- lapply(sceList, function(x){head(rownames(metadata(x)$scData@hvg.info), 1000)}) genes.use <- unique(unlist(genes.use)) for(i in 1:length(sceList)){ genes.use <- intersect(genes.use, rownames(metadata(sceList[[i]])$scData@scale.data)) } if(nrSamples > 2){ scData.CCA <- RunMultiCCA(lapply(sceList, function(x){metadata(x)$scData}), genes.use=genes.use, num.cc=30) }else{ scData.CCA <- RunCCA(metadata(sceList[[1]])$scData, metadata(sceList[[2]])$scData, genes.use=genes.use, num.cc=30, scale.factor=getSeuratScalingFactor(param$scProtocol)) }
rm(sceList) gc()
Visualize results of CCA plot CC1 versus CC2 and look at a violin plot.
p1 <- DimPlot(object=scData.CCA, reduction.use="cca", group.by="Plate", pt.size=0.5, do.return=TRUE) p1 p2 <- VlnPlot(object=scData.CCA, features.plot="CC1", group.by="Plate", do.return=TRUE) p2
MetageneBicorPlot examines a measure of correlation strength for each CC and find that this statistic generally saturates after a reasonable number of CCs.
p3 <- MetageneBicorPlot(scData.CCA, grouping.var="Plate", dims.eval=1:30, display.progress=FALSE)
scData.CCA <- AlignSubspace(scData.CCA, reduction.type="cca", grouping.var="Plate", dims.align=1:param$cc, do.par=TRUE, num.cores=param$cores)
We can visualize the aligned CCA and perform an integrated analysis.
p1 <- VlnPlot(object=scData.CCA, features.plot = "ACC1", group.by="Plate", do.return=TRUE) p1 p2 <- VlnPlot(object=scData.CCA, features.plot="ACC2", group.by="Plate", do.return=TRUE) p2
scData.CCA <- RunTSNE(object=scData.CCA, reduction.use="cca.aligned", dims.use=1:param$cc, tsne.method="Rtsne", perplexity=ifelse(length(scData.CCA@ident) > 200, 30, 10), num_threads=param$cores) scData.CCA <- FindClusters(object=scData.CCA, reduction.type="cca.aligned", dims.use=1:param$cc, save.SNN=TRUE, force.recalc=FALSE, print.output=FALSE, resolution=param$resolutionCCA)
Visualization on t-SNE plot
p1 <- TSNEPlot(object=scData.CCA, group.by="Plate", do.return=TRUE) p1 p2 <- TSNEPlot(object=scData.CCA, do.return=TRUE, do.label=TRUE) p2 toTable <- cellTable(scData.CCA) ## TODO: add fisher test? kable(toTable, row.names=FALSE, format="html", caption="Number of cells in each cluster") %>% kable_styling(bootstrap_options = "striped", full_width = F, position = "float_right")
p2$data$Plate <- scData.CCA@meta.data[rownames(p2$data), "Plate"] p2 + facet_wrap(~Plate, ncol=2)
if(param$batchCorrection == "None"){ scData <- scData.None }else if(param$batchCorrection == "CCA"){ scData <- scData.CCA } # This is to avoid error in SplitDotPlotGG # It uses _ to separate the ident scData@meta.data$Plate <- gsub("_", ".", scData@meta.data$Plate) # scData@meta.data$Plate <- factor(scData@meta.data$Plate, # levels=unique(scData@meta.data$Plate))
rm(scData.CCA, scData.None) gc()
FeaturePlot(object = scData, features.plot = "nGene", reduction.use = "tsne", no.legend = FALSE) FeaturePlot(object = scData, features.plot = "nUMI", reduction.use = "tsne", no.legend = FALSE) FeaturePlot(object = scData, features.plot = "perc_mito", reduction.use = "tsne", no.legend = FALSE) if(!is.null(scData@meta.data$CellCycle) && !any(is.na(scData@meta.data$CellCycle))){ TSNEPlot(object=scData, group.by="CellCycle", plot.title="Cell cycle phase") }
markers <- FindAllMarkers(object=scData, only.pos=TRUE, return.thresh = pvalue_allMarkers) posMarkersFn <- "pos_markers.tsv" write_tsv(as_tibble(markers), posMarkersFn) ## Significant markers cm <- markers[ ,c("gene","cluster","avg_logFC","p_val_adj")] rownames(cm) <- NULL
List of all positive cluster markers can be accessed here.
if(doEnrichr(param)){ genesPerCluster <- split(markers$gene, markers$cluster) jsCall = paste0('enrich({list: "', sapply(genesPerCluster, paste, collapse="\\n"), '", popup: true});') enrichrCalls <- paste0("<a href='javascript:void(0)' onClick='", jsCall, "'>Analyse at Enrichr website</a>") enrichrTable <- tibble(Cluster=names(genesPerCluster), "# of markers"=lengths(genesPerCluster), "Enrichr link"=enrichrCalls) kable(enrichrTable, format="html", escape=FALSE, caption=paste0("GeneSet enrichment: genes with pvalue ", pvalue_allMarkers)) %>% kable_styling("striped", full_width = F, position = "left") }
caption ="Expression differences of cluster marker genes" ezInteractiveTableRmd(cm, digits=4, title=caption)
if(doEnrichr(param) && param$all2allMarkers){ cat("#### All clusters against all clusters comparison") cat("\n") }
if(param$all2allMarkers){ clusterCombs <- combn(levels(scData@ident), m=2) all2allMarkers <- mcmapply(FindMarkers, as.integer(clusterCombs[1, ]), as.integer(clusterCombs[2, ]), MoreArgs = list(object=scData, only.pos=FALSE), mc.preschedule=FALSE, mc.cores=min(4L, param$cores), SIMPLIFY=FALSE) all2allMarkers <- lapply(all2allMarkers, function(x){ x[x$p_val <= pvalue_all2allMarkers, ] }) names(all2allMarkers) <- apply(clusterCombs, 2, paste, collapse="vs") }
if(doEnrichr(param) && param$all2allMarkers){ for(comparison in names(all2allMarkers)){ write_tsv(as_tibble(all2allMarkers[[comparison]], rownames="Gene"), file=paste0(comparison, ".tsv")) } genesAllPerCluster <- lapply(all2allMarkers, rownames) genesUpPerCluster <- lapply(all2allMarkers, function(x){rownames(x)[x$avg_logFC > 0]}) genesDownPerCluster <- lapply(all2allMarkers, function(x){rownames(x)[x$avg_logFC < 0]}) jsCall_all = paste0('enrich({list: "', sapply(genesAllPerCluster, paste, collapse="\\n"), '", popup: true});') jsCall_up = paste0('enrich({list: "', sapply(genesUpPerCluster, paste, collapse="\\n"), '", popup: true});') jsCall_down = paste0('enrich({list: "', sapply(genesDownPerCluster, paste, collapse="\\n"), '", popup: true});') enrichrCalls_all <- paste0("<a href='javascript:void(0)' onClick='", jsCall_all, "'>Analyse at Enrichr website</a>") enrichrCalls_up <- paste0("<a href='javascript:void(0)' onClick='", jsCall_up, "'>Analyse at Enrichr website</a>") enrichrCalls_down <- paste0("<a href='javascript:void(0)' onClick='", jsCall_down, "'>Analyse at Enrichr website</a>") enrichrTable <- tibble(Comparison=names(all2allMarkers), "# of differentially expressed genes"=lengths(genesAllPerCluster), "Enrichr link: all significant genes"=enrichrCalls_all, "Enrichr link: up-regulated genes"=enrichrCalls_up, "Enrichr link: down-regulated genes"=enrichrCalls_down, "List of differentially expressed genes"=text_spec(paste0(names(all2allMarkers), ".tsv"), link=paste0(names(all2allMarkers), ".tsv"))) kable(enrichrTable, format="html", escape=FALSE, caption=paste0("GeneSet enrichment: genes with pvalue ", pvalue_all2allMarkers)) %>% kable_styling("striped", full_width = F, position = "left") }
some_markers <- head(dplyr::arrange(cm, p_val_adj) %>% select(gene) %>% pull(), 100) for(i in 1:length(some_markers)){ plot(VlnPlot(object = scData, some_markers[i], do.return=TRUE)) FeaturePlot(object = scData, features.plot = some_markers[i], cols.use = c("gray", "red"), reduction.use = "tsne", no.legend = FALSE) }
Max r param$markersToShow
marker genes are shown for each cluster.
top10 <- markers %>% group_by(cluster) %>% top_n(param$markersToShow, avg_logFC) DoHeatmap(object = scData, genes.use = top10$gene, slim.col.label = TRUE, remove.key=TRUE)
markers <- list() for(eachCluster in levels(scData@ident)){ markersEach <- try(FindConservedMarkers(scData, ident.1=eachCluster, grouping.var="Plate", print.bar=FALSE, only.pos=TRUE), silent=TRUE) ## to skip some groups with few cells if(class(markersEach) != "try-error" && nrow(markersEach) > 0){ markers[[eachCluster]] <- as_tibble(markersEach, rownames="gene") } } ## some of the cluster have no significant conserved markers markers <- markers[sapply(markers, nrow) != 0L] markers <- bind_rows(markers, .id="cluster") posConservedMarkersFn <- "pos_conserved_markers.tsv" write_tsv(markers, file=posConservedMarkersFn)
List of all positive conserved cell type markers can be accessed here.
if(doEnrichr(param)){ genesPerCluster <- split(markers$gene, markers$cluster) genesPerCluster <- genesPerCluster[gtools::mixedorder(names(genesPerCluster))] jsCall = paste0('enrich({list: "', sapply(genesPerCluster, paste, collapse="\\n"), '", popup: true});') enrichrCalls <- paste0("<a href='javascript:void(0)' onClick='", jsCall, "'>Analyse at Enrichr website</a>") enrichrTable <- tibble(Cluster=names(genesPerCluster), "# of markers"=lengths(genesPerCluster), "Enrichr link"=enrichrCalls) kable(enrichrTable, format="html", escape=FALSE, caption=paste0("GeneSet enrichment analysis: conserved marker genes")) %>% kable_styling("striped", full_width = F, position = "left") }
caption ="Conserved cell type markers" ezInteractiveTableRmd(markers, title=caption)
Top 30 conserved cell type markers are shown.
markers.to.plot <- head(dplyr::arrange(markers, minimump_p_val) %>% dplyr::select(gene) %>% pull(), 30) for(i in 1:length(markers.to.plot)){ plot(VlnPlot(object = scData, markers.to.plot[i], do.return=TRUE)) FeaturePlot(object = scData, features.plot = markers.to.plot[i], cols.use = c("gray", "red"), reduction.use = "tsne", no.legend = FALSE) }
sdp <- SplitDotPlotGG(scData, genes.plot = unique(markers.to.plot), # It gives error when there are duplicated markers cols.use = RColorBrewer::brewer.pal(nrSamples, "Set1"), x.lab.rot = T, plot.legend = T, dot.scale = 8, do.return = T, grouping.var = "Plate")
scData@meta.data$cluster.plate <- paste0(scData@ident, "_", scData@meta.data$Plate) scData <- StashIdent(scData, save.name = "cluster") scData <- SetAllIdent(scData, id = "cluster.plate") uniquePlates <- as.character(unique(scData@meta.data$Plate)) diffGenesFns <- c() uniquePlatesComb <- combn(uniquePlates, m=2) for(i in 1:ncol(uniquePlatesComb)){ diffGenes <- list() for(eachCluster in gtools::mixedsort(unique(scData@meta.data$cluster))){ markersEach <- try(FindMarkers(scData, ident.1=paste0(eachCluster, "_", uniquePlatesComb[1,i]), ident.2=paste0(eachCluster, "_", uniquePlatesComb[2,i]), print.bar=FALSE), silent=TRUE) ## to skip some groups with few cells if(class(markersEach) != "try-error"){ diffGenes[[eachCluster]] <- as_tibble(markersEach, rownames="gene") cat("\n") cat("#### Top 10 differential expressed genes from cluster", eachCluster, ", comparison ", uniquePlatesComb[,i]) cat("\n") FeatureHeatmap(scData, features.plot=head(rownames(markersEach), 10), group.by="Plate", pt.size=0.25, key.position="top", max.exp=3) cat("\n") } } diffGenes <- bind_rows(diffGenes, .id="cluster") diffGenesFn <- paste0("diffExpGenesPerCluster_", paste(uniquePlatesComb[,i], collapse="Vs"), ".tsv") write_tsv(diffGenes, file=diffGenesFn) diffGenesFns <- c(diffGenesFns, diffGenesFn) }
caption ="Differential expressed genes per cluster" ezInteractiveTableRmd(diffGenes, title=caption)
cat("List of differential expressed genes per cluster can be accessed at", paste(paste0("[", diffGenesFns, "](",diffGenesFns, ")"), collapse=", ")) cat("The comparison is ", paste(uniquePlates, collapse=" vs "))
cat("No differential expression computed")
metadata(sce_cbind)$scData <- scData sceFn <- "sce.rds" saveRDS(sce_cbind, file=sceFn)
tr_cnts <- expm1(scData@data) geneMeans <- rowsum(t(as.matrix(tr_cnts)), group=scData@ident) geneMeans <- sweep(geneMeans, 1, STATS=table(scData@ident)[rownames(geneMeans)], FUN="/") geneMeans <- log1p(t(geneMeans)) colnames(geneMeans) <- paste("cluster", colnames(geneMeans), sep="_") geneMeanPerClusterFn <- "gene_means_per_cluster.txt" ezWrite.table(geneMeans, geneMeanPerClusterFn) geneMeans <- Matrix::rowMeans(tr_cnts) geneMeans <- log1p(geneMeans) geneMeansFn <- "gene_means.txt" ezWrite.table(geneMeans, geneMeansFn) tSNE_data <- as_tibble(scData@dr$tsne@cell.embeddings, rownames="cells") tSNE_data <- dplyr::rename(tSNE_data, X=`tSNE_1`, Y=`tSNE_2`) tSNE_data$cluster <- scData@ident tSNEFn <- "tSNE_data.tsv" write_tsv(tSNE_data, file=tSNEFn)
The SingleCellExperiment object with final Seurat object after merging is here.
param[c("x.low.cutoff", "x.high.cutoff", "y.cutoff", "pcs", "resolution", "batchCorrection", "cc", "resolutionCCA", "chosenClusters")]
PrintFindClustersParams(object = scData)
sessionInfo()
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.