Started on r format(Sys.time(), "%Y-%m-%d %H:%M:%S")

# input for this report: sce
library(ezRun)
library(Seurat)
library(kableExtra)
library(fastICA)
library(Matrix)
library(RColorBrewer)
library(SummarizedExperiment)
library(Rmagic) ## This has to be loaded after setting correct python
library(tidyverse)

## ----------------------------------------------
## debug
# title:  "`r metadata(sce)$param$name`"
# sce <- readRDS("/scratch/gtan/dev/SCReports-p2284/AVM_17_08_2018_SCReport/sce.rds")
# sce <- readRDS("/srv/gstore/projects/p2838/SCReport_34239_2019-02-19--21-11-44//NCC_Trpv5_Tomato_sc_A01_SCReport/sce.rds")
# sce <- readRDS("/scratch/gtan/debug/quickDebug/sce.rds")
## end of debug
debug <- FALSE

scData <- metadata(sce)$scData
param <- metadata(sce)$param
output <- metadata(sce)$output

pvalue_allMarkers <- 0.05
pvalue_all2allMarkers <- 0.01
jsFile = system.file("extdata/enrichr.js", package="ezRun", mustWork=TRUE)
invisible(file.copy(from=jsFile, to=basename(jsFile), overwrite=TRUE))
cat(paste0("<SCRIPT language=\"JavaScript\" SRC=\"", basename(jsFile), "\"></SCRIPT>"))
markers <- FindAllMarkers(object=scData, only.pos=TRUE,
                          return.thresh = pvalue_allMarkers)
posMarkersFn <- "pos_markers.tsv"
write_tsv(as_tibble(markers), file=posMarkersFn)

## Significant markers
if (nrow(markers) >0){
  cm <- markers[ ,c("gene","cluster","avg_logFC","p_val_adj")]
  rownames(cm) <- NULL
} else {
  cm = markers
}
if(param$all2allMarkers){
  clusterCombs <- combn(levels(scData@ident), m=2)
  all2allMarkers <- mcmapply(FindMarkers, as.integer(clusterCombs[1, ]), 
                             as.integer(clusterCombs[2, ]),
                             MoreArgs = list(object=scData,
                                             only.pos=FALSE),
                             mc.preschedule=FALSE,
                             mc.cores=min(4L, param$cores),
                             SIMPLIFY=FALSE)
  all2allMarkers <- lapply(all2allMarkers, function(x){
    x[x$p_val <= pvalue_all2allMarkers, ]
  })
  names(all2allMarkers) <- apply(clusterCombs, 2, paste, collapse="vs")
}
coordinatesData <- scData@dr$tsne@cell.embeddings
if(!is.null(scData@dr$umap)){
  ## TODO: remove the condition if umap is always computed
  coordinatesData <- cbind(coordinatesData, scData@dr$umap@cell.embeddings)
}
tSNE_data <- as_tibble(coordinatesData, rownames="cells")
tSNE_data$cluster <- scData@ident
if(param$runPseudoTime){
  cd <- as.matrix(scData@data)
  set.seed(10)
  a <- fastICA(cd, 2, alg.typ = "deflation", fun = "logcosh",
               alpha = 1, method = "C", row.norm = FALSE, maxit = 200,
               tol = 0.0001, verbose = FALSE)
  tSNE_data$pseudotime <- a$A[1,]
}
tSNEFn <- "coordinates_data.tsv"
write_tsv(tSNE_data, file=tSNEFn)

Clustering Results {.tabset}

Cell clustering tSNE

This analysis was done with the R package Seurat. The distance metric which drives the clustering analysis is based on Principal Component Analysis. In the plot below each dot is a cell. We eliminated the genes that are expressed in < r param$minCellsPerGene cells. We eliminated the cells that have < r param$minGenesPerCell genes detected, or > r param$maxGenesPerCell genes. We have r sum(summary(scData@ident)) cells left in total.

# Coloured by plate/run if available
if(length(unique(scData@meta.data$Plate)) >= 2){
  TSNEPlot(object=scData, group.by="Plate")
}
# Coloured by cluster
TSNEPlot(object=scData, do.label=TRUE)

kable(tibble(Cluster=names(summary(scData@ident)),
             "# of cells"=summary(scData@ident)),
      row.names=FALSE, format="html",
      caption="Number of cells in each cluster") %>%
  kable_styling(bootstrap_options = "striped", full_width = F,
                position = "float_right")
if(ezIsSpecified(param$pcGenes)){
  cat("Selected genes for clustering:\n")
  metadata(sce)$param[["pc.genes"]]
  cat("\n")
}
tr_cnts <- expm1(scData@data)
geneMeans <- rowsum(t(as.matrix(tr_cnts)), group=scData@ident)
geneMeans <- sweep(geneMeans, 1,
                   STATS=table(scData@ident)[rownames(geneMeans)], FUN="/")
geneMeans <- log1p(t(geneMeans))
colnames(geneMeans) <- paste("cluster", colnames(geneMeans), sep="_")
geneMeanPerClusterFn <- "gene_means_per_cluster.txt"
ezWrite.table(geneMeans, geneMeanPerClusterFn)

geneMeans <- Matrix::rowMeans(tr_cnts)
geneMeans <- log1p(geneMeans)
geneMeansFn <- "gene_means.txt"
ezWrite.table(geneMeans, geneMeansFn)

r geneMeanPerClusterFn

r geneMeansFn

r tSNEFn

UMAP plot (beta)

DimPlot(scData, reduction.use="umap", do.label=TRUE)

Control of the cell clustering

FeaturePlot(object = scData, features.plot = "nGene", 
            reduction.use = "tsne", no.legend = FALSE)
FeaturePlot(object = scData, features.plot = "nUMI",
            reduction.use = "tsne", no.legend = FALSE)
FeaturePlot(object = scData, features.plot = "perc_mito",
            reduction.use = "tsne", no.legend = FALSE)
if(!is.null(scData@meta.data$CellCycle) && 
   !any(is.na(scData@meta.data$CellCycle))){
  TSNEPlot(object=scData, group.by="CellCycle", plot.title="Cell cycle phase")
}

Top cluster markers

List of all positive cluster markers can be accessed here.

if(doEnrichr(param)){
  genesPerCluster <- split(markers$gene, markers$cluster)
  jsCall = paste0('enrich({list: "', sapply(genesPerCluster, paste, collapse="\\n"), '", popup: true});')
  enrichrCalls <- paste0("<a href='javascript:void(0)' onClick='", jsCall, 
                         "'>Analyse at Enrichr website</a>")
  enrichrTable <- tibble(Cluster=names(genesPerCluster),
                         "# of markers"=lengths(genesPerCluster),
                         "Enrichr link"=enrichrCalls)
  kable(enrichrTable, format="html", escape=FALSE,
        caption=paste0("GeneSet enrichment: genes with pvalue ", pvalue_allMarkers)) %>%
    kable_styling("striped", full_width = F, position = "left")
}
caption ="Expression differences of cluster marker genes"
ezInteractiveTableRmd(cm, digits=4, title=caption)
if(doEnrichr(param) && param$all2allMarkers){
  cat("#### All clusters against all clusters comparison")
  cat("\n")
}
if(doEnrichr(param) && param$all2allMarkers){
  for(comparison in names(all2allMarkers)){
    if(nrow(all2allMarkers[[comparison]]) > 0){
      write_tsv(as_tibble(all2allMarkers[[comparison]], rownames="Gene"),
                file=paste0(comparison, ".tsv"))
    }else{
      write_tsv(as_tibble(all2allMarkers[[comparison]]),
                file=paste0(comparison, ".tsv"))
    }
  }
  genesAllPerCluster <- lapply(all2allMarkers, rownames)
  genesUpPerCluster <- lapply(all2allMarkers, function(x){rownames(x)[x$avg_logFC > 0]})
  genesDownPerCluster <- lapply(all2allMarkers, function(x){rownames(x)[x$avg_logFC < 0]})

  jsCall_all = paste0('enrich({list: "', sapply(genesAllPerCluster, paste, collapse="\\n"), '", popup: true});')
  jsCall_up = paste0('enrich({list: "', sapply(genesUpPerCluster, paste, collapse="\\n"), '", popup: true});')
  jsCall_down = paste0('enrich({list: "', sapply(genesDownPerCluster, paste, collapse="\\n"), '", popup: true});')

  enrichrCalls_all <- paste0("<a href='javascript:void(0)' onClick='", jsCall_all,
                         "'>Analyse at Enrichr website</a>")
  enrichrCalls_up <- paste0("<a href='javascript:void(0)' onClick='", jsCall_up,
                         "'>Analyse at Enrichr website</a>")
  enrichrCalls_down <- paste0("<a href='javascript:void(0)' onClick='", jsCall_down,
                         "'>Analyse at Enrichr website</a>")
  enrichrTable <- tibble(Comparison=names(all2allMarkers),
                         "# of differentially expressed genes"=lengths(genesAllPerCluster),
                         "Enrichr link: all significant genes"=enrichrCalls_all,
                         "Enrichr link: up-regulated genes"=enrichrCalls_up,
                         "Enrichr link: down-regulated genes"=enrichrCalls_down,
                         "List of differentially expressed genes"=text_spec(paste0(names(all2allMarkers), ".tsv"), link=paste0(names(all2allMarkers), ".tsv")))
  kable(enrichrTable, format="html", escape=FALSE,
        caption=paste0("GeneSet enrichment: genes with pvalue ", pvalue_all2allMarkers)) %>%
    kable_styling("striped", full_width = F, position = "left")
}

Visualizations of top cluster markers

some_markers <- head(arrange(cm, p_val_adj) %>% select(gene) %>% pull(), 100)
for(i in 1:length(some_markers)){
  plot(VlnPlot(object = scData, some_markers[i], do.return=TRUE))
  FeaturePlot(object = scData, features.plot = some_markers[i],
              cols.use = c("gray", "red"),
              reduction.use = "tsne", no.legend = FALSE)
}

Heatmap of cluster marker genes

Max r param$markersToShow marker genes are shown for each cluster.

top10 <- markers %>% group_by(cluster) %>% top_n(param$markersToShow, avg_logFC)
DoHeatmap(object = scData, genes.use = top10$gene, slim.col.label = TRUE,
          remove.key=TRUE)

Expression of selected genes

Each dot is a cell. On the right-hand plot, the color corresponds to the gene expression levels.

if(length(param$markersToCheck) >0){
  all_m = list()
  for(i in 1:length(param$markersToCheck)){
    cat("\n")
    cat(paste("####", names(param$markersToCheck)[i]))
    cat("\n")
    check_markers <- param$markersToCheck[[i]]
    check_markers <- rownames(scData@data)[toupper(rownames(scData@data)) %in%
                                             toupper(check_markers)]
    all_m[[i]] <- check_markers
    if(length(check_markers) > 0){
      for(j in 1:length(check_markers)){
        plot(VlnPlot(object = scData, check_markers[j], do.return=TRUE))
        FeaturePlot(object = scData, features.plot = check_markers[j],
                    cols.use = c("gray", "red"),
                    reduction.use = "tsne", no.legend = FALSE)
      }
    }
    cat("\n")
  }
}

Trajectory reconstruction and pseudotime inference

Slingshot (Beta)

Slingshot is a cluster based ineage reconstruction and pseudotime inference approach. Here, we use reduced-dimensional space of tSNE derived above and cell clustering produced by gaussian mixture modeling, instead of cell clustering done by Seurat. More details is available at here.

Slingshot is disabled for now due to an issue of rgl package.

sceSlingshot <- Convert(from = scData, to = "sce")

sceSlingshot <- slingshot::slingshot(sceSlingshot, clusterLabels = 'ident',
                                     reducedDim = 'TSNE')
colors <- colorRampPalette(brewer.pal(11,'Spectral')[-6])(100)
nrPseudoT <- grep("slingPseudotime_\\d+", colnames(colData(sceSlingshot)),
                  value=TRUE)
for(i in nrPseudoT){
  plot(reducedDim(sceSlingshot, type="TSNE"),
       col=colors[cut(sceSlingshot[[i]], breaks=100)],
       pch=16, asp = 1, main=i)
  lines(SlingshotDataSet(sceSlingshot), lwd=2)
}
## Trajectory
cell.colorsPalette <- set_names(gg_color_hue(length(levels(sceSlingshot$ident))),
                                levels(sceSlingshot$ident))
plot(reducedDim(sceSlingshot, type="TSNE"), 
     col = cell.colorsPalette[sceSlingshot$ident], 
     pch=16, asp = 1, main="Trajectory reconstruction")
lines(SlingshotDataSet(sceSlingshot), lwd=2, type = 'lineages')
legend("topleft", legend=names(cell.colorsPalette),
       pch=19, col=cell.colorsPalette)

##  smooth curves
crv1 <- getCurves(SlingshotDataSet(sceSlingshot))
plot(reducedDim(sceSlingshot, type="TSNE"),
     col = cell.colorsPalette[sceSlingshot$ident], asp = 1, pch = 16,
     main="Smooth curve")
lines(crv1, lwd = 3)
legend("topleft", legend=names(cell.colorsPalette),
       pch=19, col=cell.colorsPalette)

fastICA

p <- ggplot(tSNE_data, aes(tSNE_1, tSNE_2)) + 
  geom_point(shape=21, size=5, colour = "black",
             aes(fill=pseudotime), alpha=0.7) +
  scale_fill_gradient(low="yellow", high="blue")
print(p)

Interactive explorer

scData@scale.data <- NULL
saveRDS(list(scData=scData, tSNE_data=tSNE_data),
        file=basename(output$getColumn("Live Report")))
sce_iSEE = as.SingleCellExperiment(scData)
saveRDS(sce_iSEE, "sce_iSEE.rds")

Shiny explorer


iSEE explorer{target="_blank"}

MAGIC

Markov Affinity-based Graph Imputation of Cells (MAGIC) is an algorithm for denoising high-dimensional data most commonly applied to single-cell RNA sequencing data. MAGIC learns the manifold data, using the resultant graph to smooth the features and restore the structure of the data.

See more details at https://github.com/KrishnaswamyLab/MAGIC.

magic_data <- magic(Matrix::t(scData@data), genes="all_genes",
                    t="auto", seed=10)
magicDataFn <- "magic_data.tsv"
write_tsv(as_tibble(t(as.data.frame(magic_data)), rownames="gene_name"),
          file=magicDataFn)
magicDataFn <- R.utils::gzip(magicDataFn)

Smoothed data is available at r magicDataFn.

After loading r magicDataFn into R, to visualise the gene-gene relationships after MAGIC, use the code below to show genes of interest, for example, "Pvalb", "Slc12a3" and "Aqp2" here.

ggplot(magic_data) +
  geom_point(aes(Pvalb, Slc12a3, color=Aqp2)) +
  scale_color_viridis(option="B")
organism = NULL
if(startsWith(param$refBuild, "Homo_sapiens/Ensembl")){
    organism <- "Human"
} else if(startsWith(param$refBuild, "Mus_musculus/Ensembl")){
  organism <- "Mouse"
}
sink("sessionInfo.txt")
sessionInfo()
sink()
unloadNamespace("SingleR") #be sure SingleR is unloaded
#detach(package:SingleR)
unloadNamespace("Seurat") #unload the Seurat v2
#detach(package:Seurat)
library("Seurat", lib="/home/daymegr/myRpackages") #and load the version 3
scData_v3 = UpdateSeuratObject(scData)

library(SingleR) #only load SingleR after loading the newest Seurat version
singler = CreateSinglerObject(as.matrix(GetAssayData(scData_v3, slot = "counts")), annot = NULL, project.name = "", min.genes = 0, technology = "10X", species = organism, citation = "", normalize.gene.length = F, variable.genes = "de", fine.tune = T, do.signatures = F, clusters=scData_v3@active.ident, do.main.types = T, reduce.file.size=T,temp.dir = NULL, numCores = param$cores)

singler$seurat = scData_v3
singler$meta.data$orig.ident = scData_v3@meta.data$Batch
singler$meta.data$xy = scData_v3@reductions$tsne@cell.embeddings # the tSNE coordinates
singler$meta.data$clusters = Idents(scData_v3)

#convert the file to use it in the interactive browser
singler.browser = convertSingleR2Browser(singler)
saveRDS(singler.browser, 'singler.browser.rds')
cat("### Cell populations")
cat("\n")
cat("\n")
cat("In order to identify cell populations we used SingleR [ Aran, Looney, Liu et al. Reference-based analysis of lung single-cell sequencing reveals a transitional profibrotic macrophage. Nature Immunology (2019)], a computational method for unbiased cell type recognition of scRNA-seq. SingleR leverages reference transcriptomic datasets of pure cell types to infer the cell of origin of each of the single cells independently. \n The SingleR browser allows to visualize the cell populations identified in different and easier ways.")
cat("\n")
cat("\n")

cat(paste0("[SingleR browser](http://fgcz-shiny.uzh.ch/fgcz_SingleRBrowser_app/?data=" , output$getColumn("Report"), "/singler.browser.rds)"))

Parameters

param[c("minCellsPerGene", "minGenesPerCell", "maxGenesPerCell", "minReadsPerCell", 
        "maxMitoFraction", "pcs",
        "pcGenes", "x.low.cutoff", "x.high.cutoff", "y.cutoff", "vars.to.regress",
        "resolution", "markersToCheck")]
Seurat2::PrintFindClustersParams(object = scData)

SessionInfo

sessionInfo()


uzh/ezRun documentation built on May 4, 2024, 3:23 p.m.