Started on r format(Sys.time(), "%Y-%m-%d %H:%M:%S")
# input for this report: sce library(ezRun) library(Seurat) library(kableExtra) library(fastICA) library(Matrix) library(RColorBrewer) library(SummarizedExperiment) library(Rmagic) ## This has to be loaded after setting correct python library(tidyverse) ## ---------------------------------------------- ## debug # title: "`r metadata(sce)$param$name`" # sce <- readRDS("/scratch/gtan/dev/SCReports-p2284/AVM_17_08_2018_SCReport/sce.rds") # sce <- readRDS("/srv/gstore/projects/p2838/SCReport_34239_2019-02-19--21-11-44//NCC_Trpv5_Tomato_sc_A01_SCReport/sce.rds") # sce <- readRDS("/scratch/gtan/debug/quickDebug/sce.rds") ## end of debug debug <- FALSE scData <- metadata(sce)$scData param <- metadata(sce)$param output <- metadata(sce)$output pvalue_allMarkers <- 0.05 pvalue_all2allMarkers <- 0.01
jsFile = system.file("extdata/enrichr.js", package="ezRun", mustWork=TRUE) invisible(file.copy(from=jsFile, to=basename(jsFile), overwrite=TRUE)) cat(paste0("<SCRIPT language=\"JavaScript\" SRC=\"", basename(jsFile), "\"></SCRIPT>"))
markers <- FindAllMarkers(object=scData, only.pos=TRUE, return.thresh = pvalue_allMarkers) posMarkersFn <- "pos_markers.tsv" write_tsv(as_tibble(markers), file=posMarkersFn) ## Significant markers if (nrow(markers) >0){ cm <- markers[ ,c("gene","cluster","avg_logFC","p_val_adj")] rownames(cm) <- NULL } else { cm = markers }
if(param$all2allMarkers){ clusterCombs <- combn(levels(scData@ident), m=2) all2allMarkers <- mcmapply(FindMarkers, as.integer(clusterCombs[1, ]), as.integer(clusterCombs[2, ]), MoreArgs = list(object=scData, only.pos=FALSE), mc.preschedule=FALSE, mc.cores=min(4L, param$cores), SIMPLIFY=FALSE) all2allMarkers <- lapply(all2allMarkers, function(x){ x[x$p_val <= pvalue_all2allMarkers, ] }) names(all2allMarkers) <- apply(clusterCombs, 2, paste, collapse="vs") }
coordinatesData <- scData@dr$tsne@cell.embeddings if(!is.null(scData@dr$umap)){ ## TODO: remove the condition if umap is always computed coordinatesData <- cbind(coordinatesData, scData@dr$umap@cell.embeddings) } tSNE_data <- as_tibble(coordinatesData, rownames="cells") tSNE_data$cluster <- scData@ident
if(param$runPseudoTime){ cd <- as.matrix(scData@data) set.seed(10) a <- fastICA(cd, 2, alg.typ = "deflation", fun = "logcosh", alpha = 1, method = "C", row.norm = FALSE, maxit = 200, tol = 0.0001, verbose = FALSE) tSNE_data$pseudotime <- a$A[1,] } tSNEFn <- "coordinates_data.tsv" write_tsv(tSNE_data, file=tSNEFn)
This analysis was done with the R package Seurat. The distance metric which drives the clustering analysis is based on Principal Component Analysis. In the plot below each dot is a cell.
We eliminated the genes that are expressed in < r param$minCellsPerGene
cells. We eliminated the cells that have < r param$minGenesPerCell
genes detected, or > r param$maxGenesPerCell
genes. We have r sum(summary(scData@ident))
cells left in total.
# Coloured by plate/run if available if(length(unique(scData@meta.data$Plate)) >= 2){ TSNEPlot(object=scData, group.by="Plate") } # Coloured by cluster TSNEPlot(object=scData, do.label=TRUE) kable(tibble(Cluster=names(summary(scData@ident)), "# of cells"=summary(scData@ident)), row.names=FALSE, format="html", caption="Number of cells in each cluster") %>% kable_styling(bootstrap_options = "striped", full_width = F, position = "float_right")
if(ezIsSpecified(param$pcGenes)){ cat("Selected genes for clustering:\n") metadata(sce)$param[["pc.genes"]] cat("\n") }
tr_cnts <- expm1(scData@data) geneMeans <- rowsum(t(as.matrix(tr_cnts)), group=scData@ident) geneMeans <- sweep(geneMeans, 1, STATS=table(scData@ident)[rownames(geneMeans)], FUN="/") geneMeans <- log1p(t(geneMeans)) colnames(geneMeans) <- paste("cluster", colnames(geneMeans), sep="_") geneMeanPerClusterFn <- "gene_means_per_cluster.txt" ezWrite.table(geneMeans, geneMeanPerClusterFn) geneMeans <- Matrix::rowMeans(tr_cnts) geneMeans <- log1p(geneMeans) geneMeansFn <- "gene_means.txt" ezWrite.table(geneMeans, geneMeansFn)
DimPlot(scData, reduction.use="umap", do.label=TRUE)
FeaturePlot(object = scData, features.plot = "nGene", reduction.use = "tsne", no.legend = FALSE) FeaturePlot(object = scData, features.plot = "nUMI", reduction.use = "tsne", no.legend = FALSE) FeaturePlot(object = scData, features.plot = "perc_mito", reduction.use = "tsne", no.legend = FALSE) if(!is.null(scData@meta.data$CellCycle) && !any(is.na(scData@meta.data$CellCycle))){ TSNEPlot(object=scData, group.by="CellCycle", plot.title="Cell cycle phase") }
List of all positive cluster markers can be accessed here.
if(doEnrichr(param)){ genesPerCluster <- split(markers$gene, markers$cluster) jsCall = paste0('enrich({list: "', sapply(genesPerCluster, paste, collapse="\\n"), '", popup: true});') enrichrCalls <- paste0("<a href='javascript:void(0)' onClick='", jsCall, "'>Analyse at Enrichr website</a>") enrichrTable <- tibble(Cluster=names(genesPerCluster), "# of markers"=lengths(genesPerCluster), "Enrichr link"=enrichrCalls) kable(enrichrTable, format="html", escape=FALSE, caption=paste0("GeneSet enrichment: genes with pvalue ", pvalue_allMarkers)) %>% kable_styling("striped", full_width = F, position = "left") }
caption ="Expression differences of cluster marker genes" ezInteractiveTableRmd(cm, digits=4, title=caption)
if(doEnrichr(param) && param$all2allMarkers){ cat("#### All clusters against all clusters comparison") cat("\n") }
if(doEnrichr(param) && param$all2allMarkers){ for(comparison in names(all2allMarkers)){ if(nrow(all2allMarkers[[comparison]]) > 0){ write_tsv(as_tibble(all2allMarkers[[comparison]], rownames="Gene"), file=paste0(comparison, ".tsv")) }else{ write_tsv(as_tibble(all2allMarkers[[comparison]]), file=paste0(comparison, ".tsv")) } } genesAllPerCluster <- lapply(all2allMarkers, rownames) genesUpPerCluster <- lapply(all2allMarkers, function(x){rownames(x)[x$avg_logFC > 0]}) genesDownPerCluster <- lapply(all2allMarkers, function(x){rownames(x)[x$avg_logFC < 0]}) jsCall_all = paste0('enrich({list: "', sapply(genesAllPerCluster, paste, collapse="\\n"), '", popup: true});') jsCall_up = paste0('enrich({list: "', sapply(genesUpPerCluster, paste, collapse="\\n"), '", popup: true});') jsCall_down = paste0('enrich({list: "', sapply(genesDownPerCluster, paste, collapse="\\n"), '", popup: true});') enrichrCalls_all <- paste0("<a href='javascript:void(0)' onClick='", jsCall_all, "'>Analyse at Enrichr website</a>") enrichrCalls_up <- paste0("<a href='javascript:void(0)' onClick='", jsCall_up, "'>Analyse at Enrichr website</a>") enrichrCalls_down <- paste0("<a href='javascript:void(0)' onClick='", jsCall_down, "'>Analyse at Enrichr website</a>") enrichrTable <- tibble(Comparison=names(all2allMarkers), "# of differentially expressed genes"=lengths(genesAllPerCluster), "Enrichr link: all significant genes"=enrichrCalls_all, "Enrichr link: up-regulated genes"=enrichrCalls_up, "Enrichr link: down-regulated genes"=enrichrCalls_down, "List of differentially expressed genes"=text_spec(paste0(names(all2allMarkers), ".tsv"), link=paste0(names(all2allMarkers), ".tsv"))) kable(enrichrTable, format="html", escape=FALSE, caption=paste0("GeneSet enrichment: genes with pvalue ", pvalue_all2allMarkers)) %>% kable_styling("striped", full_width = F, position = "left") }
some_markers <- head(arrange(cm, p_val_adj) %>% select(gene) %>% pull(), 100) for(i in 1:length(some_markers)){ plot(VlnPlot(object = scData, some_markers[i], do.return=TRUE)) FeaturePlot(object = scData, features.plot = some_markers[i], cols.use = c("gray", "red"), reduction.use = "tsne", no.legend = FALSE) }
Max r param$markersToShow
marker genes are shown for each cluster.
top10 <- markers %>% group_by(cluster) %>% top_n(param$markersToShow, avg_logFC) DoHeatmap(object = scData, genes.use = top10$gene, slim.col.label = TRUE, remove.key=TRUE)
Each dot is a cell. On the right-hand plot, the color corresponds to the gene expression levels.
if(length(param$markersToCheck) >0){ all_m = list() for(i in 1:length(param$markersToCheck)){ cat("\n") cat(paste("####", names(param$markersToCheck)[i])) cat("\n") check_markers <- param$markersToCheck[[i]] check_markers <- rownames(scData@data)[toupper(rownames(scData@data)) %in% toupper(check_markers)] all_m[[i]] <- check_markers if(length(check_markers) > 0){ for(j in 1:length(check_markers)){ plot(VlnPlot(object = scData, check_markers[j], do.return=TRUE)) FeaturePlot(object = scData, features.plot = check_markers[j], cols.use = c("gray", "red"), reduction.use = "tsne", no.legend = FALSE) } } cat("\n") } }
Slingshot is a cluster based ineage reconstruction and pseudotime inference approach. Here, we use reduced-dimensional space of tSNE derived above and cell clustering produced by gaussian mixture modeling, instead of cell clustering done by Seurat. More details is available at here.
Slingshot is disabled for now due to an issue of rgl
package.
sceSlingshot <- Convert(from = scData, to = "sce") sceSlingshot <- slingshot::slingshot(sceSlingshot, clusterLabels = 'ident', reducedDim = 'TSNE') colors <- colorRampPalette(brewer.pal(11,'Spectral')[-6])(100) nrPseudoT <- grep("slingPseudotime_\\d+", colnames(colData(sceSlingshot)), value=TRUE) for(i in nrPseudoT){ plot(reducedDim(sceSlingshot, type="TSNE"), col=colors[cut(sceSlingshot[[i]], breaks=100)], pch=16, asp = 1, main=i) lines(SlingshotDataSet(sceSlingshot), lwd=2) } ## Trajectory cell.colorsPalette <- set_names(gg_color_hue(length(levels(sceSlingshot$ident))), levels(sceSlingshot$ident)) plot(reducedDim(sceSlingshot, type="TSNE"), col = cell.colorsPalette[sceSlingshot$ident], pch=16, asp = 1, main="Trajectory reconstruction") lines(SlingshotDataSet(sceSlingshot), lwd=2, type = 'lineages') legend("topleft", legend=names(cell.colorsPalette), pch=19, col=cell.colorsPalette) ## smooth curves crv1 <- getCurves(SlingshotDataSet(sceSlingshot)) plot(reducedDim(sceSlingshot, type="TSNE"), col = cell.colorsPalette[sceSlingshot$ident], asp = 1, pch = 16, main="Smooth curve") lines(crv1, lwd = 3) legend("topleft", legend=names(cell.colorsPalette), pch=19, col=cell.colorsPalette)
p <- ggplot(tSNE_data, aes(tSNE_1, tSNE_2)) + geom_point(shape=21, size=5, colour = "black", aes(fill=pseudotime), alpha=0.7) + scale_fill_gradient(low="yellow", high="blue") print(p)
scData@scale.data <- NULL saveRDS(list(scData=scData, tSNE_data=tSNE_data), file=basename(output$getColumn("Live Report"))) sce_iSEE = as.SingleCellExperiment(scData) saveRDS(sce_iSEE, "sce_iSEE.rds")
iSEE explorer{target="_blank"}
Markov Affinity-based Graph Imputation of Cells (MAGIC) is an algorithm for denoising high-dimensional data most commonly applied to single-cell RNA sequencing data. MAGIC learns the manifold data, using the resultant graph to smooth the features and restore the structure of the data.
See more details at https://github.com/KrishnaswamyLab/MAGIC.
magic_data <- magic(Matrix::t(scData@data), genes="all_genes", t="auto", seed=10) magicDataFn <- "magic_data.tsv" write_tsv(as_tibble(t(as.data.frame(magic_data)), rownames="gene_name"), file=magicDataFn) magicDataFn <- R.utils::gzip(magicDataFn)
Smoothed data is available at r magicDataFn
.
After loading r magicDataFn
into R,
to visualise the gene-gene relationships after MAGIC,
use the code below to show genes of interest,
for example, "Pvalb", "Slc12a3" and "Aqp2" here.
ggplot(magic_data) + geom_point(aes(Pvalb, Slc12a3, color=Aqp2)) + scale_color_viridis(option="B")
organism = NULL if(startsWith(param$refBuild, "Homo_sapiens/Ensembl")){ organism <- "Human" } else if(startsWith(param$refBuild, "Mus_musculus/Ensembl")){ organism <- "Mouse" }
sink("sessionInfo.txt") sessionInfo() sink()
unloadNamespace("SingleR") #be sure SingleR is unloaded #detach(package:SingleR) unloadNamespace("Seurat") #unload the Seurat v2 #detach(package:Seurat) library("Seurat", lib="/home/daymegr/myRpackages") #and load the version 3 scData_v3 = UpdateSeuratObject(scData) library(SingleR) #only load SingleR after loading the newest Seurat version singler = CreateSinglerObject(as.matrix(GetAssayData(scData_v3, slot = "counts")), annot = NULL, project.name = "", min.genes = 0, technology = "10X", species = organism, citation = "", normalize.gene.length = F, variable.genes = "de", fine.tune = T, do.signatures = F, clusters=scData_v3@active.ident, do.main.types = T, reduce.file.size=T,temp.dir = NULL, numCores = param$cores) singler$seurat = scData_v3 singler$meta.data$orig.ident = scData_v3@meta.data$Batch singler$meta.data$xy = scData_v3@reductions$tsne@cell.embeddings # the tSNE coordinates singler$meta.data$clusters = Idents(scData_v3) #convert the file to use it in the interactive browser singler.browser = convertSingleR2Browser(singler) saveRDS(singler.browser, 'singler.browser.rds')
cat("### Cell populations") cat("\n") cat("\n") cat("In order to identify cell populations we used SingleR [ Aran, Looney, Liu et al. Reference-based analysis of lung single-cell sequencing reveals a transitional profibrotic macrophage. Nature Immunology (2019)], a computational method for unbiased cell type recognition of scRNA-seq. SingleR leverages reference transcriptomic datasets of pure cell types to infer the cell of origin of each of the single cells independently. \n The SingleR browser allows to visualize the cell populations identified in different and easier ways.") cat("\n") cat("\n") cat(paste0("[SingleR browser](http://fgcz-shiny.uzh.ch/fgcz_SingleRBrowser_app/?data=" , output$getColumn("Report"), "/singler.browser.rds)"))
param[c("minCellsPerGene", "minGenesPerCell", "maxGenesPerCell", "minReadsPerCell", "maxMitoFraction", "pcs", "pcGenes", "x.low.cutoff", "x.high.cutoff", "y.cutoff", "vars.to.regress", "resolution", "markersToCheck")]
Seurat2::PrintFindClustersParams(object = scData)
sessionInfo()
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.