In uzh/ezRun: An R meta-package for the analysis of Next Generation Sequencing Data

Started on r format(Sys.time(), "%Y-%m-%d %H:%M:%S")

library(SummarizedExperiment)
library(ezRun)
library(cowplot)
library(tidyverse)
library(Seurat)
library(kableExtra)
# input for this report: sceURLs, param
# sceURLs <- c("sample13days"="https://fgcz-sushi.uzh.ch/projects/p2326/SCReport_3days_2019-03-06--16-24-38/sample1_3days_v-run_SCReport/00index.html",
#              "sample22wks"="https://fgcz-sushi.uzh.ch/projects/p2326/SCReport_2wks_2019-03-06--23-38-34/sample2_2wks_v_run_SCReport/00index.html",
#              "sample3sedentary"="https://fgcz-sushi.uzh.ch/projects/p2326/SCReport_sedentary_2019-03-06--23-40-36/sample3_sedentary_SCReport/00index.html")

sceList <- lapply(file.path("/srv/gstore/projects",
                          sub("https://fgcz-(gstore|sushi).uzh.ch/projects", "",
                              dirname(sceURLs)), "sce.rds"),
                  readRDS)
names(sceList) <- names(sceURLs)

# to test
# param <- metadata(sceList[[1]])$param
# param$x.low.cutoff <- 0.0125
# param$x.high.cutoff <- 3
# param$y.cutoff <- 0.5
# param$resolution <- 0.6
# param$resolutionCCA <- 0.6
# param$pcs <- 20
# param$cc <- 20
# param$batchCorrection <- "None"

pvalue_allMarkers <- 0.05
pvalue_all2allMarkers <- 0.01
nrSamples <- length(sceList)

if(ezIsSpecified(param$chosenClusters)){
  for(eachSample in names(param$chosenClusters)){
    chosenCells <- names(metadata(sceList[[eachSample]])$scData@ident)[metadata(sceList[[eachSample]])$scData@ident %in% param$chosenClusters[[eachSample]]]
    sceList[[eachSample]] <- sceList[[eachSample]][, chosenCells]
    metadata(sceList[[eachSample]])$scData <-
      SubsetData(metadata(sceList[[eachSample]])$scData,
                 ident.use=param$chosenClusters[[eachSample]])
  }
}

jsFile = system.file("extdata/enrichr.js", package="ezRun", mustWork=TRUE)
invisible(file.copy(from=jsFile, to=basename(jsFile), overwrite=TRUE))
cat(paste0("<SCRIPT language=\"JavaScript\" SRC=\"", basename(jsFile), "\"></SCRIPT>"))

Clustering results of merged samples {.tabset}

Cell clustering without correction

sce_cbind <- Reduce(SingleCellExperiment::cbind, sceList)
scData_Merge <- Reduce(MergeSeurat, lapply(sceList, function(x){metadata(x)$scData}))
scData_Merge@project.name <- param$name
# newParams <- c("x.low.cutoff", "x.high.cutoff", "y.cutoff", "pcs", "resolution")
# metadata(sce_cbind)$param[newParams] <- param[newParams]
# sce_cbind <- seuratPreProcess(sce_cbind)
# scData.None <- metadata(sce_cbind)$scData
scData.None <- seuratClustering(scData_Merge, param)

p1 <- TSNEPlot(object=scData.None, group.by="Plate", do.return=TRUE)
p1
p2 <- TSNEPlot(object=scData.None, do.return=TRUE, do.label=TRUE)
p2
toTable <- cellTable(scData.None)
## TODO: add fisher test?
kable(toTable,
      row.names=FALSE, format="html",
      caption="Number of cells in each cluster") %>%
  kable_styling(bootstrap_options = "striped", full_width = F,
                position = "float_right")

p2$data$Plate <- scData.None@meta.data[rownames(p2$data), "Plate"]
p2 + facet_wrap(~Plate, ncol=2)

Cell clustering with CCA correction

genes.use <- lapply(sceList, function(x){head(rownames(metadata(x)$scData@hvg.info), 1000)})
genes.use <- unique(unlist(genes.use))
for(i in 1:length(sceList)){
  genes.use <- intersect(genes.use,
                         rownames(metadata(sceList[[i]])$scData@scale.data))
}
if(nrSamples > 2){
  scData.CCA <- RunMultiCCA(lapply(sceList, function(x){metadata(x)$scData}),
                            genes.use=genes.use, num.cc=30)
}else{
  scData.CCA <- RunCCA(metadata(sceList[[1]])$scData, 
                       metadata(sceList[[2]])$scData,
                       genes.use=genes.use, num.cc=30,
                       scale.factor=getSeuratScalingFactor(param$scProtocol))
}

rm(sceList)
gc()

Visualize results of CCA plot CC1 versus CC2 and look at a violin plot.

p1 <- DimPlot(object=scData.CCA, reduction.use="cca", group.by="Plate",
              pt.size=0.5, do.return=TRUE)
p1
p2 <- VlnPlot(object=scData.CCA, features.plot="CC1", group.by="Plate",
              do.return=TRUE)
p2

MetageneBicorPlot examines a measure of correlation strength for each CC and find that this statistic generally saturates after a reasonable number of CCs.

p3 <- MetageneBicorPlot(scData.CCA, grouping.var="Plate", dims.eval=1:30,
                        display.progress=FALSE)

scData.CCA <- AlignSubspace(scData.CCA, reduction.type="cca",
                            grouping.var="Plate", dims.align=1:param$cc,
                            do.par=TRUE, num.cores=param$cores)

We can visualize the aligned CCA and perform an integrated analysis.

p1 <- VlnPlot(object=scData.CCA, features.plot = "ACC1",
              group.by="Plate", do.return=TRUE)
p1
p2 <- VlnPlot(object=scData.CCA, features.plot="ACC2", group.by="Plate", 
              do.return=TRUE)
p2

scData.CCA <- RunTSNE(object=scData.CCA, reduction.use="cca.aligned",
                      dims.use=1:param$cc, tsne.method="Rtsne",
                      perplexity=ifelse(length(scData.CCA@ident) > 200, 30, 10),
                      num_threads=param$cores)
scData.CCA <- FindClusters(object=scData.CCA, reduction.type="cca.aligned",
                           dims.use=1:param$cc, save.SNN=TRUE,
                           force.recalc=FALSE, print.output=FALSE,
                           resolution=param$resolutionCCA)

Visualization on t-SNE plot

p1 <- TSNEPlot(object=scData.CCA, group.by="Plate", do.return=TRUE)
p1
p2 <- TSNEPlot(object=scData.CCA, do.return=TRUE, do.label=TRUE)
p2

toTable <- cellTable(scData.CCA)
## TODO: add fisher test?
kable(toTable,
      row.names=FALSE, format="html",
      caption="Number of cells in each cluster") %>%
  kable_styling(bootstrap_options = "striped", full_width = F,
                position = "float_right")

p2$data$Plate <- scData.CCA@meta.data[rownames(p2$data), "Plate"]
p2 + facet_wrap(~Plate, ncol=2)

if(param$batchCorrection == "None"){
  scData <- scData.None
}else if(param$batchCorrection == "CCA"){
  scData <- scData.CCA
}
# This is to avoid error in SplitDotPlotGG
# It uses _ to separate the ident
scData@meta.data$Plate <- gsub("_", ".", scData@meta.data$Plate)
# scData@meta.data$Plate <- factor(scData@meta.data$Plate,
#                                  levels=unique(scData@meta.data$Plate))

rm(scData.CCA, scData.None)
gc()

Control of the cell clustering

FeaturePlot(object = scData, features.plot = "nGene", 
            reduction.use = "tsne", no.legend = FALSE)
FeaturePlot(object = scData, features.plot = "nUMI", 
            reduction.use = "tsne", no.legend = FALSE)
FeaturePlot(object = scData, features.plot = "perc_mito",
            reduction.use = "tsne", no.legend = FALSE)
if(!is.null(scData@meta.data$CellCycle) && 
   !any(is.na(scData@meta.data$CellCycle))){
  TSNEPlot(object=scData, group.by="CellCycle", plot.title="Cell cycle phase")
}

Top cluster markers

markers <- FindAllMarkers(object=scData, only.pos=TRUE,
                          return.thresh = pvalue_allMarkers)
posMarkersFn <- "pos_markers.tsv"
write_tsv(as_tibble(markers), posMarkersFn)

## Significant markers
cm <- markers[ ,c("gene","cluster","avg_logFC","p_val_adj")]
rownames(cm) <- NULL

List of all positive cluster markers can be accessed here.

if(doEnrichr(param)){
  genesPerCluster <- split(markers$gene, markers$cluster)
  jsCall = paste0('enrich({list: "', sapply(genesPerCluster, paste, collapse="\\n"), '", popup: true});')
  enrichrCalls <- paste0("<a href='javascript:void(0)' onClick='", jsCall, 
                         "'>Analyse at Enrichr website</a>")
  enrichrTable <- tibble(Cluster=names(genesPerCluster),
                         "# of markers"=lengths(genesPerCluster),
                         "Enrichr link"=enrichrCalls)
  kable(enrichrTable, format="html", escape=FALSE,
        caption=paste0("GeneSet enrichment: genes with pvalue ", pvalue_allMarkers)) %>%
    kable_styling("striped", full_width = F, position = "left")
}

caption ="Expression differences of cluster marker genes"
ezInteractiveTableRmd(cm, digits=4, title=caption)

if(doEnrichr(param) && param$all2allMarkers){
  cat("#### All clusters against all clusters comparison")
  cat("\n")
}

if(param$all2allMarkers){
  clusterCombs <- combn(levels(scData@ident), m=2)
  all2allMarkers <- mcmapply(FindMarkers, as.integer(clusterCombs[1, ]), 
                             as.integer(clusterCombs[2, ]),
                             MoreArgs = list(object=scData,
                                             only.pos=FALSE),
                             mc.preschedule=FALSE,
                             mc.cores=min(4L, param$cores),
                             SIMPLIFY=FALSE)
  all2allMarkers <- lapply(all2allMarkers, function(x){
    x[x$p_val <= pvalue_all2allMarkers, ]
  })
  names(all2allMarkers) <- apply(clusterCombs, 2, paste, collapse="vs")
}

if(doEnrichr(param) && param$all2allMarkers){
  for(comparison in names(all2allMarkers)){
    write_tsv(as_tibble(all2allMarkers[[comparison]], rownames="Gene"),
              file=paste0(comparison, ".tsv"))
  }
  genesAllPerCluster <- lapply(all2allMarkers, rownames)
  genesUpPerCluster <- lapply(all2allMarkers, function(x){rownames(x)[x$avg_logFC > 0]})
  genesDownPerCluster <- lapply(all2allMarkers, function(x){rownames(x)[x$avg_logFC < 0]})

  jsCall_all = paste0('enrich({list: "', sapply(genesAllPerCluster, paste, collapse="\\n"), '", popup: true});')
  jsCall_up = paste0('enrich({list: "', sapply(genesUpPerCluster, paste, collapse="\\n"), '", popup: true});')
  jsCall_down = paste0('enrich({list: "', sapply(genesDownPerCluster, paste, collapse="\\n"), '", popup: true});')

  enrichrCalls_all <- paste0("<a href='javascript:void(0)' onClick='", jsCall_all,
                         "'>Analyse at Enrichr website</a>")
  enrichrCalls_up <- paste0("<a href='javascript:void(0)' onClick='", jsCall_up,
                         "'>Analyse at Enrichr website</a>")
  enrichrCalls_down <- paste0("<a href='javascript:void(0)' onClick='", jsCall_down,
                         "'>Analyse at Enrichr website</a>")
  enrichrTable <- tibble(Comparison=names(all2allMarkers),
                         "# of differentially expressed genes"=lengths(genesAllPerCluster),
                         "Enrichr link: all significant genes"=enrichrCalls_all,
                         "Enrichr link: up-regulated genes"=enrichrCalls_up,
                         "Enrichr link: down-regulated genes"=enrichrCalls_down,
                         "List of differentially expressed genes"=text_spec(paste0(names(all2allMarkers), ".tsv"), link=paste0(names(all2allMarkers), ".tsv")))
  kable(enrichrTable, format="html", escape=FALSE,
        caption=paste0("GeneSet enrichment: genes with pvalue ", pvalue_all2allMarkers)) %>%
    kable_styling("striped", full_width = F, position = "left")
}

Visualizations of top cluster markers

some_markers <- head(dplyr::arrange(cm, p_val_adj) %>% select(gene) %>% pull(), 
                     100)
for(i in 1:length(some_markers)){
  plot(VlnPlot(object = scData, some_markers[i], do.return=TRUE))
  FeaturePlot(object = scData, features.plot = some_markers[i],
              cols.use = c("gray", "red"),
              reduction.use = "tsne", no.legend = FALSE)
}

Heatmap of cluster marker genes

Max r param$markersToShow marker genes are shown for each cluster.

top10 <- markers %>% group_by(cluster) %>% top_n(param$markersToShow, avg_logFC)
DoHeatmap(object = scData, genes.use = top10$gene, slim.col.label = TRUE,
          remove.key=TRUE)

Conserved cell type markers

markers <- list()
for(eachCluster in levels(scData@ident)){
   markersEach <- try(FindConservedMarkers(scData, ident.1=eachCluster, 
                                       grouping.var="Plate",
                                       print.bar=FALSE, only.pos=TRUE),
                      silent=TRUE)
   ## to skip some groups with few cells
   if(class(markersEach) != "try-error" && nrow(markersEach) > 0){
     markers[[eachCluster]] <- as_tibble(markersEach, rownames="gene")
   }
}
## some of the cluster have no significant conserved markers
markers <- markers[sapply(markers, nrow) != 0L] 
markers <- bind_rows(markers, .id="cluster")

posConservedMarkersFn <- "pos_conserved_markers.tsv"
write_tsv(markers, file=posConservedMarkersFn)

List of all positive conserved cell type markers can be accessed here.

if(doEnrichr(param)){
  genesPerCluster <- split(markers$gene, markers$cluster)
  genesPerCluster <- genesPerCluster[gtools::mixedorder(names(genesPerCluster))]
  jsCall = paste0('enrich({list: "', sapply(genesPerCluster, paste, collapse="\\n"), '", popup: true});')
  enrichrCalls <- paste0("<a href='javascript:void(0)' onClick='", jsCall, 
                         "'>Analyse at Enrichr website</a>")
  enrichrTable <- tibble(Cluster=names(genesPerCluster),
                         "# of markers"=lengths(genesPerCluster),
                         "Enrichr link"=enrichrCalls)
  kable(enrichrTable, format="html", escape=FALSE,
        caption=paste0("GeneSet enrichment analysis: conserved marker genes")) %>%
    kable_styling("striped", full_width = F, position = "left")
}

caption ="Conserved cell type markers"
ezInteractiveTableRmd(markers, title=caption)

Visualisation of conserved markers

Top 30 conserved cell type markers are shown.

markers.to.plot <- head(dplyr::arrange(markers, minimump_p_val) %>% 
                          dplyr::select(gene) %>% pull(), 30)
for(i in 1:length(markers.to.plot)){
  plot(VlnPlot(object = scData, markers.to.plot[i], do.return=TRUE))
  FeaturePlot(object = scData, features.plot = markers.to.plot[i],
              cols.use = c("gray", "red"),
              reduction.use = "tsne", no.legend = FALSE)
}

sdp <- SplitDotPlotGG(scData, genes.plot = unique(markers.to.plot),
                      # It gives error when there are duplicated markers
                      cols.use = RColorBrewer::brewer.pal(nrSamples, "Set1"),
                      x.lab.rot = T, plot.legend = T, 
                      dot.scale = 8, do.return = T,
                      grouping.var = "Plate")

Differential expressed genes

  scData@meta.data$cluster.plate <- paste0(scData@ident, "_",
                                           scData@meta.data$Plate)
  scData <- StashIdent(scData, save.name = "cluster")
  scData <- SetAllIdent(scData, id = "cluster.plate")
  uniquePlates <- as.character(unique(scData@meta.data$Plate))

  diffGenesFns <- c()
  uniquePlatesComb <- combn(uniquePlates, m=2)
  for(i in 1:ncol(uniquePlatesComb)){
    diffGenes <- list()
    for(eachCluster in gtools::mixedsort(unique(scData@meta.data$cluster))){
      markersEach <- try(FindMarkers(scData, ident.1=paste0(eachCluster, "_",
                                                            uniquePlatesComb[1,i]),
                                     ident.2=paste0(eachCluster, "_", 
                                                    uniquePlatesComb[2,i]),
                                     print.bar=FALSE), silent=TRUE)
      ## to skip some groups with few cells
      if(class(markersEach) != "try-error"){
        diffGenes[[eachCluster]] <- as_tibble(markersEach, rownames="gene")

        cat("\n")
        cat("#### Top 10 differential expressed genes from cluster", 
            eachCluster, ", comparison ", uniquePlatesComb[,i])
        cat("\n")

        FeatureHeatmap(scData, features.plot=head(rownames(markersEach), 10),
                       group.by="Plate", pt.size=0.25, key.position="top",
                       max.exp=3)
        cat("\n")
      }
    }
    diffGenes <- bind_rows(diffGenes, .id="cluster")
    diffGenesFn <- paste0("diffExpGenesPerCluster_",
                          paste(uniquePlatesComb[,i], collapse="Vs"), ".tsv")
    write_tsv(diffGenes, file=diffGenesFn)
    diffGenesFns <- c(diffGenesFns, diffGenesFn)
  }

caption ="Differential expressed genes per cluster"
ezInteractiveTableRmd(diffGenes, title=caption)

cat("List of differential expressed genes per cluster can be accessed at",
    paste(paste0("[", diffGenesFns, "](",diffGenesFns, ")"), collapse=", "))
cat("The comparison is ", paste(uniquePlates, collapse=" vs "))

cat("No differential expression computed")

Data availability

metadata(sce_cbind)$scData <- scData

sceFn <- "sce.rds"
saveRDS(sce_cbind, file=sceFn)

tr_cnts <- expm1(scData@data)
geneMeans <- rowsum(t(as.matrix(tr_cnts)), group=scData@ident)
geneMeans <- sweep(geneMeans, 1,
                   STATS=table(scData@ident)[rownames(geneMeans)], FUN="/")
geneMeans <- log1p(t(geneMeans))
colnames(geneMeans) <- paste("cluster", colnames(geneMeans), sep="_")
geneMeanPerClusterFn <- "gene_means_per_cluster.txt"
ezWrite.table(geneMeans, geneMeanPerClusterFn)

geneMeans <- Matrix::rowMeans(tr_cnts)
geneMeans <- log1p(geneMeans)
geneMeansFn <- "gene_means.txt"
ezWrite.table(geneMeans, geneMeansFn)

tSNE_data <- as_tibble(scData@dr$tsne@cell.embeddings,
                       rownames="cells")
tSNE_data <- dplyr::rename(tSNE_data, X=`tSNE_1`, Y=`tSNE_2`)
tSNE_data$cluster <- scData@ident
tSNEFn <- "tSNE_data.tsv"
write_tsv(tSNE_data, file=tSNEFn)

r geneMeanPerClusterFn

r geneMeansFn

r tSNEFn

The SingleCellExperiment object with final Seurat object after merging is here.

Parameters

param[c("x.low.cutoff", "x.high.cutoff", "y.cutoff", "pcs", "resolution",
        "batchCorrection", "cc", "resolutionCCA",
        "chosenClusters")]

PrintFindClustersParams(object = scData)

SessionInfo

sessionInfo()

uzh/ezRun documentation built on June 14, 2025, 1:29 p.m.

rdrr.io home R language documentation Run R code online

CRAN packages Bioconductor packages R-Forge packages GitHub packages

Note that we can't provide technical support on individual packages. You should contact the package authors for that.

uzh/ezRun
An R meta-package for the analysis of Next Generation Sequencing Data

In uzh/ezRun: An R meta-package for the analysis of Next Generation Sequencing Data

Clustering results of merged samples {.tabset}

Cell clustering without correction

Cell clustering with CCA correction

Control of the cell clustering

Top cluster markers

Visualizations of top cluster markers

Heatmap of cluster marker genes

Conserved cell type markers

Visualisation of conserved markers

Differential expressed genes

Data availability

Parameters

SessionInfo

R Package Documentation

Browse R Packages

We want your feedback!

uzh/ezRun An R meta-package for the analysis of Next Generation Sequencing Data

In uzh/ezRun: An R meta-package for the analysis of Next Generation Sequencing Data

Clustering results of merged samples {.tabset}

Cell clustering without correction

Cell clustering with CCA correction

Control of the cell clustering

Top cluster markers

Visualizations of top cluster markers

Heatmap of cluster marker genes

Conserved cell type markers

Visualisation of conserved markers

Differential expressed genes

Data availability

Parameters

SessionInfo

R Package Documentation

Browse R Packages

We want your feedback!

uzh/ezRun
An R meta-package for the analysis of Next Generation Sequencing Data