Started on r format(Sys.time(), "%Y-%m-%d %H:%M:%S")

library(SummarizedExperiment)
library(ezRun)
library(cowplot)
library(tidyverse)
library(Seurat)
library(kableExtra)
# input for this report: sceURLs, param
# sceURLs <- c("sample13days"="https://fgcz-sushi.uzh.ch/projects/p2326/SCReport_3days_2019-03-06--16-24-38/sample1_3days_v-run_SCReport/00index.html",
#              "sample22wks"="https://fgcz-sushi.uzh.ch/projects/p2326/SCReport_2wks_2019-03-06--23-38-34/sample2_2wks_v_run_SCReport/00index.html",
#              "sample3sedentary"="https://fgcz-sushi.uzh.ch/projects/p2326/SCReport_sedentary_2019-03-06--23-40-36/sample3_sedentary_SCReport/00index.html")

sceList <- lapply(file.path("/srv/gstore/projects",
                          sub("https://fgcz-(gstore|sushi).uzh.ch/projects", "",
                              dirname(sceURLs)), "sce.rds"),
                  readRDS)
names(sceList) <- names(sceURLs)

# to test
# param <- metadata(sceList[[1]])$param
# param$x.low.cutoff <- 0.0125
# param$x.high.cutoff <- 3
# param$y.cutoff <- 0.5
# param$resolution <- 0.6
# param$resolutionCCA <- 0.6
# param$pcs <- 20
# param$cc <- 20
# param$batchCorrection <- "None"

pvalue_allMarkers <- 0.05
pvalue_all2allMarkers <- 0.01
nrSamples <- length(sceList)

if(ezIsSpecified(param$chosenClusters)){
  for(eachSample in names(param$chosenClusters)){
    chosenCells <- names(metadata(sceList[[eachSample]])$scData@ident)[metadata(sceList[[eachSample]])$scData@ident %in% param$chosenClusters[[eachSample]]]
    sceList[[eachSample]] <- sceList[[eachSample]][, chosenCells]
    metadata(sceList[[eachSample]])$scData <-
      SubsetData(metadata(sceList[[eachSample]])$scData,
                 ident.use=param$chosenClusters[[eachSample]])
  }
}
jsFile = system.file("extdata/enrichr.js", package="ezRun", mustWork=TRUE)
invisible(file.copy(from=jsFile, to=basename(jsFile), overwrite=TRUE))
cat(paste0("<SCRIPT language=\"JavaScript\" SRC=\"", basename(jsFile), "\"></SCRIPT>"))

Clustering results of merged samples {.tabset}

Cell clustering without correction

sce_cbind <- Reduce(SingleCellExperiment::cbind, sceList)
scData_Merge <- Reduce(MergeSeurat, lapply(sceList, function(x){metadata(x)$scData}))
scData_Merge@project.name <- param$name
# newParams <- c("x.low.cutoff", "x.high.cutoff", "y.cutoff", "pcs", "resolution")
# metadata(sce_cbind)$param[newParams] <- param[newParams]
# sce_cbind <- seuratPreProcess(sce_cbind)
# scData.None <- metadata(sce_cbind)$scData
scData.None <- seuratClustering(scData_Merge, param)
p1 <- TSNEPlot(object=scData.None, group.by="Plate", do.return=TRUE)
p1
p2 <- TSNEPlot(object=scData.None, do.return=TRUE, do.label=TRUE)
p2
toTable <- cellTable(scData.None)
## TODO: add fisher test?
kable(toTable,
      row.names=FALSE, format="html",
      caption="Number of cells in each cluster") %>%
  kable_styling(bootstrap_options = "striped", full_width = F,
                position = "float_right")
p2$data$Plate <- scData.None@meta.data[rownames(p2$data), "Plate"]
p2 + facet_wrap(~Plate, ncol=2)

Cell clustering with CCA correction

genes.use <- lapply(sceList, function(x){head(rownames(metadata(x)$scData@hvg.info), 1000)})
genes.use <- unique(unlist(genes.use))
for(i in 1:length(sceList)){
  genes.use <- intersect(genes.use,
                         rownames(metadata(sceList[[i]])$scData@scale.data))
}
if(nrSamples > 2){
  scData.CCA <- RunMultiCCA(lapply(sceList, function(x){metadata(x)$scData}),
                            genes.use=genes.use, num.cc=30)
}else{
  scData.CCA <- RunCCA(metadata(sceList[[1]])$scData, 
                       metadata(sceList[[2]])$scData,
                       genes.use=genes.use, num.cc=30,
                       scale.factor=getSeuratScalingFactor(param$scProtocol))
}
rm(sceList)
gc()

Visualize results of CCA plot CC1 versus CC2 and look at a violin plot.

p1 <- DimPlot(object=scData.CCA, reduction.use="cca", group.by="Plate",
              pt.size=0.5, do.return=TRUE)
p1
p2 <- VlnPlot(object=scData.CCA, features.plot="CC1", group.by="Plate",
              do.return=TRUE)
p2

MetageneBicorPlot examines a measure of correlation strength for each CC and find that this statistic generally saturates after a reasonable number of CCs.

p3 <- MetageneBicorPlot(scData.CCA, grouping.var="Plate", dims.eval=1:30,
                        display.progress=FALSE)
scData.CCA <- AlignSubspace(scData.CCA, reduction.type="cca",
                            grouping.var="Plate", dims.align=1:param$cc,
                            do.par=TRUE, num.cores=param$cores)

We can visualize the aligned CCA and perform an integrated analysis.

p1 <- VlnPlot(object=scData.CCA, features.plot = "ACC1",
              group.by="Plate", do.return=TRUE)
p1
p2 <- VlnPlot(object=scData.CCA, features.plot="ACC2", group.by="Plate", 
              do.return=TRUE)
p2
scData.CCA <- RunTSNE(object=scData.CCA, reduction.use="cca.aligned",
                      dims.use=1:param$cc, tsne.method="Rtsne",
                      perplexity=ifelse(length(scData.CCA@ident) > 200, 30, 10),
                      num_threads=param$cores)
scData.CCA <- FindClusters(object=scData.CCA, reduction.type="cca.aligned",
                           dims.use=1:param$cc, save.SNN=TRUE,
                           force.recalc=FALSE, print.output=FALSE,
                           resolution=param$resolutionCCA)

Visualization on t-SNE plot

p1 <- TSNEPlot(object=scData.CCA, group.by="Plate", do.return=TRUE)
p1
p2 <- TSNEPlot(object=scData.CCA, do.return=TRUE, do.label=TRUE)
p2

toTable <- cellTable(scData.CCA)
## TODO: add fisher test?
kable(toTable,
      row.names=FALSE, format="html",
      caption="Number of cells in each cluster") %>%
  kable_styling(bootstrap_options = "striped", full_width = F,
                position = "float_right")
p2$data$Plate <- scData.CCA@meta.data[rownames(p2$data), "Plate"]
p2 + facet_wrap(~Plate, ncol=2)
if(param$batchCorrection == "None"){
  scData <- scData.None
}else if(param$batchCorrection == "CCA"){
  scData <- scData.CCA
}
# This is to avoid error in SplitDotPlotGG
# It uses _ to separate the ident
scData@meta.data$Plate <- gsub("_", ".", scData@meta.data$Plate)
# scData@meta.data$Plate <- factor(scData@meta.data$Plate,
#                                  levels=unique(scData@meta.data$Plate))
rm(scData.CCA, scData.None)
gc()

Control of the cell clustering

FeaturePlot(object = scData, features.plot = "nGene", 
            reduction.use = "tsne", no.legend = FALSE)
FeaturePlot(object = scData, features.plot = "nUMI", 
            reduction.use = "tsne", no.legend = FALSE)
FeaturePlot(object = scData, features.plot = "perc_mito",
            reduction.use = "tsne", no.legend = FALSE)
if(!is.null(scData@meta.data$CellCycle) && 
   !any(is.na(scData@meta.data$CellCycle))){
  TSNEPlot(object=scData, group.by="CellCycle", plot.title="Cell cycle phase")
}

Top cluster markers

markers <- FindAllMarkers(object=scData, only.pos=TRUE,
                          return.thresh = pvalue_allMarkers)
posMarkersFn <- "pos_markers.tsv"
write_tsv(as_tibble(markers), posMarkersFn)

## Significant markers
cm <- markers[ ,c("gene","cluster","avg_logFC","p_val_adj")]
rownames(cm) <- NULL

List of all positive cluster markers can be accessed here.

if(doEnrichr(param)){
  genesPerCluster <- split(markers$gene, markers$cluster)
  jsCall = paste0('enrich({list: "', sapply(genesPerCluster, paste, collapse="\\n"), '", popup: true});')
  enrichrCalls <- paste0("<a href='javascript:void(0)' onClick='", jsCall, 
                         "'>Analyse at Enrichr website</a>")
  enrichrTable <- tibble(Cluster=names(genesPerCluster),
                         "# of markers"=lengths(genesPerCluster),
                         "Enrichr link"=enrichrCalls)
  kable(enrichrTable, format="html", escape=FALSE,
        caption=paste0("GeneSet enrichment: genes with pvalue ", pvalue_allMarkers)) %>%
    kable_styling("striped", full_width = F, position = "left")
}
caption ="Expression differences of cluster marker genes"
ezInteractiveTableRmd(cm, digits=4, title=caption)
if(doEnrichr(param) && param$all2allMarkers){
  cat("#### All clusters against all clusters comparison")
  cat("\n")
}
if(param$all2allMarkers){
  clusterCombs <- combn(levels(scData@ident), m=2)
  all2allMarkers <- mcmapply(FindMarkers, as.integer(clusterCombs[1, ]), 
                             as.integer(clusterCombs[2, ]),
                             MoreArgs = list(object=scData,
                                             only.pos=FALSE),
                             mc.preschedule=FALSE,
                             mc.cores=min(4L, param$cores),
                             SIMPLIFY=FALSE)
  all2allMarkers <- lapply(all2allMarkers, function(x){
    x[x$p_val <= pvalue_all2allMarkers, ]
  })
  names(all2allMarkers) <- apply(clusterCombs, 2, paste, collapse="vs")
}
if(doEnrichr(param) && param$all2allMarkers){
  for(comparison in names(all2allMarkers)){
    write_tsv(as_tibble(all2allMarkers[[comparison]], rownames="Gene"),
              file=paste0(comparison, ".tsv"))
  }
  genesAllPerCluster <- lapply(all2allMarkers, rownames)
  genesUpPerCluster <- lapply(all2allMarkers, function(x){rownames(x)[x$avg_logFC > 0]})
  genesDownPerCluster <- lapply(all2allMarkers, function(x){rownames(x)[x$avg_logFC < 0]})

  jsCall_all = paste0('enrich({list: "', sapply(genesAllPerCluster, paste, collapse="\\n"), '", popup: true});')
  jsCall_up = paste0('enrich({list: "', sapply(genesUpPerCluster, paste, collapse="\\n"), '", popup: true});')
  jsCall_down = paste0('enrich({list: "', sapply(genesDownPerCluster, paste, collapse="\\n"), '", popup: true});')

  enrichrCalls_all <- paste0("<a href='javascript:void(0)' onClick='", jsCall_all,
                         "'>Analyse at Enrichr website</a>")
  enrichrCalls_up <- paste0("<a href='javascript:void(0)' onClick='", jsCall_up,
                         "'>Analyse at Enrichr website</a>")
  enrichrCalls_down <- paste0("<a href='javascript:void(0)' onClick='", jsCall_down,
                         "'>Analyse at Enrichr website</a>")
  enrichrTable <- tibble(Comparison=names(all2allMarkers),
                         "# of differentially expressed genes"=lengths(genesAllPerCluster),
                         "Enrichr link: all significant genes"=enrichrCalls_all,
                         "Enrichr link: up-regulated genes"=enrichrCalls_up,
                         "Enrichr link: down-regulated genes"=enrichrCalls_down,
                         "List of differentially expressed genes"=text_spec(paste0(names(all2allMarkers), ".tsv"), link=paste0(names(all2allMarkers), ".tsv")))
  kable(enrichrTable, format="html", escape=FALSE,
        caption=paste0("GeneSet enrichment: genes with pvalue ", pvalue_all2allMarkers)) %>%
    kable_styling("striped", full_width = F, position = "left")
}

Visualizations of top cluster markers

some_markers <- head(dplyr::arrange(cm, p_val_adj) %>% select(gene) %>% pull(), 
                     100)
for(i in 1:length(some_markers)){
  plot(VlnPlot(object = scData, some_markers[i], do.return=TRUE))
  FeaturePlot(object = scData, features.plot = some_markers[i],
              cols.use = c("gray", "red"),
              reduction.use = "tsne", no.legend = FALSE)
}

Heatmap of cluster marker genes

Max r param$markersToShow marker genes are shown for each cluster.

top10 <- markers %>% group_by(cluster) %>% top_n(param$markersToShow, avg_logFC)
DoHeatmap(object = scData, genes.use = top10$gene, slim.col.label = TRUE,
          remove.key=TRUE)

Conserved cell type markers

markers <- list()
for(eachCluster in levels(scData@ident)){
   markersEach <- try(FindConservedMarkers(scData, ident.1=eachCluster, 
                                       grouping.var="Plate",
                                       print.bar=FALSE, only.pos=TRUE),
                      silent=TRUE)
   ## to skip some groups with few cells
   if(class(markersEach) != "try-error" && nrow(markersEach) > 0){
     markers[[eachCluster]] <- as_tibble(markersEach, rownames="gene")
   }
}
## some of the cluster have no significant conserved markers
markers <- markers[sapply(markers, nrow) != 0L] 
markers <- bind_rows(markers, .id="cluster")

posConservedMarkersFn <- "pos_conserved_markers.tsv"
write_tsv(markers, file=posConservedMarkersFn)

List of all positive conserved cell type markers can be accessed here.

if(doEnrichr(param)){
  genesPerCluster <- split(markers$gene, markers$cluster)
  genesPerCluster <- genesPerCluster[gtools::mixedorder(names(genesPerCluster))]
  jsCall = paste0('enrich({list: "', sapply(genesPerCluster, paste, collapse="\\n"), '", popup: true});')
  enrichrCalls <- paste0("<a href='javascript:void(0)' onClick='", jsCall, 
                         "'>Analyse at Enrichr website</a>")
  enrichrTable <- tibble(Cluster=names(genesPerCluster),
                         "# of markers"=lengths(genesPerCluster),
                         "Enrichr link"=enrichrCalls)
  kable(enrichrTable, format="html", escape=FALSE,
        caption=paste0("GeneSet enrichment analysis: conserved marker genes")) %>%
    kable_styling("striped", full_width = F, position = "left")
}
caption ="Conserved cell type markers"
ezInteractiveTableRmd(markers, title=caption)

Visualisation of conserved markers

Top 30 conserved cell type markers are shown.

markers.to.plot <- head(dplyr::arrange(markers, minimump_p_val) %>% 
                          dplyr::select(gene) %>% pull(), 30)
for(i in 1:length(markers.to.plot)){
  plot(VlnPlot(object = scData, markers.to.plot[i], do.return=TRUE))
  FeaturePlot(object = scData, features.plot = markers.to.plot[i],
              cols.use = c("gray", "red"),
              reduction.use = "tsne", no.legend = FALSE)
}
sdp <- SplitDotPlotGG(scData, genes.plot = unique(markers.to.plot),
                      # It gives error when there are duplicated markers
                      cols.use = RColorBrewer::brewer.pal(nrSamples, "Set1"),
                      x.lab.rot = T, plot.legend = T, 
                      dot.scale = 8, do.return = T,
                      grouping.var = "Plate")

Differential expressed genes

  scData@meta.data$cluster.plate <- paste0(scData@ident, "_",
                                           scData@meta.data$Plate)
  scData <- StashIdent(scData, save.name = "cluster")
  scData <- SetAllIdent(scData, id = "cluster.plate")
  uniquePlates <- as.character(unique(scData@meta.data$Plate))

  diffGenesFns <- c()
  uniquePlatesComb <- combn(uniquePlates, m=2)
  for(i in 1:ncol(uniquePlatesComb)){
    diffGenes <- list()
    for(eachCluster in gtools::mixedsort(unique(scData@meta.data$cluster))){
      markersEach <- try(FindMarkers(scData, ident.1=paste0(eachCluster, "_",
                                                            uniquePlatesComb[1,i]),
                                     ident.2=paste0(eachCluster, "_", 
                                                    uniquePlatesComb[2,i]),
                                     print.bar=FALSE), silent=TRUE)
      ## to skip some groups with few cells
      if(class(markersEach) != "try-error"){
        diffGenes[[eachCluster]] <- as_tibble(markersEach, rownames="gene")

        cat("\n")
        cat("#### Top 10 differential expressed genes from cluster", 
            eachCluster, ", comparison ", uniquePlatesComb[,i])
        cat("\n")

        FeatureHeatmap(scData, features.plot=head(rownames(markersEach), 10),
                       group.by="Plate", pt.size=0.25, key.position="top",
                       max.exp=3)
        cat("\n")
      }
    }
    diffGenes <- bind_rows(diffGenes, .id="cluster")
    diffGenesFn <- paste0("diffExpGenesPerCluster_",
                          paste(uniquePlatesComb[,i], collapse="Vs"), ".tsv")
    write_tsv(diffGenes, file=diffGenesFn)
    diffGenesFns <- c(diffGenesFns, diffGenesFn)
  }
caption ="Differential expressed genes per cluster"
ezInteractiveTableRmd(diffGenes, title=caption)
cat("List of differential expressed genes per cluster can be accessed at",
    paste(paste0("[", diffGenesFns, "](",diffGenesFns, ")"), collapse=", "))
cat("The comparison is ", paste(uniquePlates, collapse=" vs "))
cat("No differential expression computed")

Data availability

metadata(sce_cbind)$scData <- scData

sceFn <- "sce.rds"
saveRDS(sce_cbind, file=sceFn)
tr_cnts <- expm1(scData@data)
geneMeans <- rowsum(t(as.matrix(tr_cnts)), group=scData@ident)
geneMeans <- sweep(geneMeans, 1,
                   STATS=table(scData@ident)[rownames(geneMeans)], FUN="/")
geneMeans <- log1p(t(geneMeans))
colnames(geneMeans) <- paste("cluster", colnames(geneMeans), sep="_")
geneMeanPerClusterFn <- "gene_means_per_cluster.txt"
ezWrite.table(geneMeans, geneMeanPerClusterFn)

geneMeans <- Matrix::rowMeans(tr_cnts)
geneMeans <- log1p(geneMeans)
geneMeansFn <- "gene_means.txt"
ezWrite.table(geneMeans, geneMeansFn)

tSNE_data <- as_tibble(scData@dr$tsne@cell.embeddings,
                       rownames="cells")
tSNE_data <- dplyr::rename(tSNE_data, X=`tSNE_1`, Y=`tSNE_2`)
tSNE_data$cluster <- scData@ident
tSNEFn <- "tSNE_data.tsv"
write_tsv(tSNE_data, file=tSNEFn)

r geneMeanPerClusterFn

r geneMeansFn

r tSNEFn

The SingleCellExperiment object with final Seurat object after merging is here.

Parameters

param[c("x.low.cutoff", "x.high.cutoff", "y.cutoff", "pcs", "resolution",
        "batchCorrection", "cc", "resolutionCCA",
        "chosenClusters")]
PrintFindClustersParams(object = scData)

SessionInfo

sessionInfo()


uzh/ezRun documentation built on May 4, 2024, 3:23 p.m.