single cell cluster ananse analysis

# if (!requireNamespace("remotes", quietly = TRUE)) {
#   install.packages("remotes")
# }
# Sys.unsetenv("GITHUB_PAT")
# remotes::install_github("mojaveazure/seurat-disk", upgrade = "never")
# remotes::install_github("JGASmits/AnanseSeurat", upgrade = "never")

library(Signac)
library(Seurat)
library(EnsDb.Hsapiens.v86)
library(BSgenome.Hsapiens.UCSC.hg38)
library(circlize)
library(stringr)
library(ComplexHeatmap)
library(circlize)

library(SeuratDisk)
library(AnanseSeurat)

set.seed(1234)
knitr::opts_knit$set(root.dir = normalizePath(
  "input your working dir containing the scANANSE folder"))
# load the RNA and ATAC data
counts <-
  Read10X_h5(
    './scANANSE/data/pbmc_granulocyte_sorted_10k_filtered_feature_bc_matrix.h5')
fragpath <-
  "./scANANSE/data/pbmc_granulocyte_sorted_10k_atac_fragments.tsv.gz"
# get gene annotations for hg38
annotation <- GetGRangesFromEnsDb(ensdb = EnsDb.Hsapiens.v86)
seqlevelsStyle(annotation) <- "UCSC"
# create a Seurat object containing the RNA adata
pbmc <- CreateSeuratObject(counts = counts$`Gene Expression`,
                           assay = "RNA")
# create ATAC assay and add it to the object
pbmc[["ATAC"]] <- CreateChromatinAssay(
  counts = counts$Peaks,
  sep = c(":", "-"),
  fragments = fragpath,
  annotation = annotation
)

DefaultAssay(pbmc) <- "ATAC"
pbmc <- NucleosomeSignal(pbmc)
pbmc <- TSSEnrichment(pbmc)

# filter out low quality cells
pbmc <- subset(
  x = pbmc,
  subset = nCount_ATAC < 100000 &
    nCount_RNA < 25000 &
    nCount_ATAC > 1000 &
    nCount_RNA > 1000 &
    nucleosome_signal < 2 &
    TSS.enrichment > 1
)

# call peaks using MACS2
peaks <- CallPeaks(pbmc)
# remove peaks on nonstandard chromosomes and in genomic blacklist regions
peaks <- keepStandardChromosomes(peaks, pruning.mode = "coarse")
peaks <-
  subsetByOverlaps(x = peaks, ranges = blacklist_hg38_unified, invert = TRUE)

# quantify counts in each peak
macs2_counts <- FeatureMatrix(
  fragments = Fragments(pbmc),
  features = peaks,
  cells = colnames(pbmc)
)

# create a new assay using the MACS2 peak set and add it to the Seurat object
pbmc[["peaks"]] <- CreateChromatinAssay(counts = macs2_counts,
                                        fragments = fragpath,
                                        annotation = annotation)

DefaultAssay(pbmc) <- "RNA"
pbmc <- SCTransform(pbmc)
pbmc <- RunPCA(pbmc)

DefaultAssay(pbmc) <- "peaks"
pbmc <- FindTopFeatures(pbmc, min.cutoff = 5)
pbmc <- RunTFIDF(pbmc)
pbmc <- RunSVD(pbmc)
# load PBMC reference
reference <-
  LoadH5Seurat("./scANANSE/data/pbmc_multimodal.h5seurat")
DefaultAssay(pbmc) <- "SCT"

# transfer cell type labels from reference to query
transfer_anchors <- FindTransferAnchors(
  reference = reference,
  query = pbmc,
  normalization.method = "SCT",
  reference.reduction = "spca",
  recompute.residuals = FALSE,
  dims = 1:50
)

predictions <- TransferData(
  anchorset = transfer_anchors,
  refdata = reference$celltype.l2,
  weight.reduction = pbmc[['pca']],
  dims = 1:50
)

pbmc <- AddMetaData(object = pbmc,
                    metadata = predictions)

# set the cell identities to the cell type predictions
Idents(pbmc) <- "predicted.id"

# set a reasonable order for cell types to be displayed when plotting
levels(pbmc) <-
  c(
    "CD4 Naive",
    "CD4 TCM",
    "CD4 CTL",
    "CD4 TEM",
    "CD4 Proliferating",
    "CD8 Naive",
    "dnT",
    "CD8 TEM",
    "CD8 TCM",
    "CD8 Proliferating",
    "MAIT",
    "NK",
    "NK_CD56bright",
    "NK Proliferating",
    "gdT",
    "Treg",
    "B naive",
    "B intermediate",
    "B memory",
    "Plasmablast",
    "CD14 Mono",
    "CD16 Mono",
    "cDC1",
    "cDC2",
    "pDC",
    "HSPC",
    "Eryth",
    "ASDC",
    "ILC",
    "Platelet"
  )
# build a joint neighbor graph using both assays
pbmc <- FindMultiModalNeighbors(
  object = pbmc,
  reduction.list = list("pca", "lsi"),
  dims.list = list(1:50, 2:40),
  modality.weight.name = "RNA.weight",
  verbose = TRUE
)

# build a joint UMAP visualization
pbmc <- RunUMAP(
  object = pbmc,
  nn.name = "weighted.nn",
  assay = "RNA",
  verbose = TRUE
)
DimPlot(pbmc,
        label = TRUE,
        repel = TRUE,
        reduction = "umap") + NoLegend()
Idents(pbmc) <- str_replace(Idents(pbmc), ' ', '-')
Idents(pbmc) <- str_replace(Idents(pbmc), '_', '-')
pbmc$predicted.id <- str_replace(pbmc$predicted.id, ' ', '-')
pbmc$predicted.id <- str_replace(pbmc$predicted.id, '_', '-')
rds_file <- './scANANSE/preprocessed_PBMC.Rds'
if (file.exists(rds_file)) {
  pbmc <- readRDS(rds_file)
} else{
  saveRDS(pbmc, file = rds_file)
}

pdf(
  './scANANSE/umap.pdf',
  width = 7,
  height = 3.5,
  paper = 'special'
)
DimPlot(pbmc,
        label = TRUE,
        repel = TRUE,
        reduction = "umap") + NoLegend()
dev.off()

table(pbmc@meta.data$predicted.id)
#Load pre-processed seurat object RDS file
rds_file <- './scANANSE/data/preprocessed_PBMC.Rds'
pbmc <- readRDS(rds_file)

export_CPM_scANANSE(
  pbmc,
  min_cells <- 25,
  output_dir = './scANANSE/analysis',
  cluster_id = 'predicted.id',
  RNA_count_assay = 'RNA'
)

export_ATAC_scANANSE(
  pbmc,
  min_cells <- 25,
  output_dir = './scANANSE/analysis',
  cluster_id = 'predicted.id',
  ATAC_peak_assay = 'peaks'
)

#specify additional contrasts:
contrasts <-  list('B-naive_B-memory',
                   'B-memory_B-naive',
                   'B-naive_CD16-Mono',
                   'CD16-Mono_B-naive')

config_scANANSE(
  pbmc,
  min_cells <- 25,
  output_dir = './scANANSE/analysis',
  cluster_id = 'predicted.id',
  genome = './scANANSE/data/hg38',
  additional_contrasts = contrasts
)

DEGS_scANANSE(
  pbmc,
  min_cells <- 25,
  output_dir = './scANANSE/analysis',
  cluster_id = 'predicted.id',
  additional_contrasts = contrasts
)

Run Anansnake before processing

Lets load the results from Anansnake

rds_file <- './scANANSE/data/preprocessed_PBMC.Rds'
pbmc <- readRDS(rds_file)
cluster_id <- 'predicted.id'

pbmc <- import_seurat_scANANSE(pbmc,
                               cluster_id = 'predicted.id',
                               anansnake_inf_dir = 
                                 "./scANANSE/analysis/influence/")

TF_influence <- per_cluster_df(pbmc,
                               cluster_id = 'predicted.id',
                               assay = 'influence')

get top 5 highest TFs

TF_influence$TF <- rownames(TF_influence)
TF_long <- reshape2::melt(TF_influence, id.vars = 'TF')
TF_influence$TF <- NULL
colnames(TF_long) <- c('TF', 'cluster', 'influence')
TF_long <- TF_long[order(TF_long$influence, decreasing = TRUE),]

#get the top n TFs per cluster
topTF <- Reduce(rbind,
                by(TF_long,
                   TF_long["cluster"],
                   head,
                   n = 5))# Top N highest TFs by cluster

top_TFs <- unique(topTF$TF)

TF_table <- topTF %>%
  dplyr::group_by(cluster) %>%
  dplyr::mutate('TopTFs' = paste0(TF, collapse = " "))

unique(TF_table[, c('cluster', 'TopTFs')])
col_fun <- colorRamp2(c(0, 1), c("white", "orange"))
mat <- TF_influence[rownames(TF_influence) %in% top_TFs, ]

pdf(
  './scANANSE/analysis/ANANSE_Heatmap.pdf',
  width = 16,
  height = 8,
  paper = 'special'
)
Heatmap(mat, col = col_fun)
dev.off()

Generate a Heatmap of the top TFs

set.seed(123)
DefaultAssay(object = pbmc) <- "influence"
mat <- TF_influence[rownames(TF_influence) %in% top_TFs, ]

col_fun <- colorRamp2(c(0, 1), c("white", "orange"))
costum_sample_order <- c(
  'CD14-Mono',
  'CD16-Mono',
  'cDC2',
  'pDC',
  'HSPC',
  'B-naive',
  'B-intermediate',
  'B-memory',
  'CD8-Naive',
  'CD4-Naive',
  'CD8-TCM',
  'CD4-TEM',
  'CD4-TCM',
  'Treg',
  'MAIT',
  'CD8-TEM',
  'gdT',
  'NK'
)

dend <- stats::as.dendrogram(stats::hclust(dist(t(mat))))

pdf(
  "./scANANSE/analysis/ANANSE_Heatmap.pdf" ,
  width = 16,
  height = 8,
  paper = 'special'
)
print(
  ComplexHeatmap::Heatmap(
    mat,
    row_dend_side = "right",
    show_column_dend = T,
    col = col_fun,
    cluster_columns = dend,
    row_km_repeats = 100
  )
)
dev.off()
pdf(
  './scANANSE/analysis/reference_umap.pdf',
  width = 15,
  height = 15,
  paper = 'special'
)
Idents(object = reference) <- "celltype.l2"
print(DimPlot(
  reference,
  label = T,
  repel = TRUE,
  reduction = "umap"
))
Idents(object = reference) <- "celltype.l1"
print(DimPlot(
  reference,
  label = T,
  repel = TRUE,
  reduction = "umap"
))
dev.off()

pdf(
  './scANANSE/analysis/ANANSE_umap.pdf',
  width = 10,
  height = 5,
  paper = 'special'
)
Idents(object = pbmc) <- "predicted.id"
print(DimPlot(
  pbmc,
  label = T,
  repel = TRUE,
  reduction = "umap"
) + NoLegend())
dev.off()
highlight_TF1 <- c('STAT4', 'LEF1', 'MEF2C')

pdf(
  './scANANSE/analysis/ANANSE_highlight.pdf',
  width = 10,
  height = 10,
  paper = 'special'
)
DefaultAssay(object = pbmc) <- "RNA"
plot_expression1 <-
  FeaturePlot(pbmc, features = highlight_TF1, ncol = 1)
DefaultAssay(object = pbmc) <- "influence"
plot_ANANSE1 <-
  FeaturePlot(
    pbmc,
    ncol = 1,
    features = highlight_TF1,
    cols = c("darkgrey", "#fc8d59")
  )
print(DimPlot(
  pbmc,
  label = T,
  repel = TRUE,
  reduction = "umap"
) + NoLegend())
print(plot_expression1 | plot_ANANSE1)
dev.off()

Lets vizualise the influence results of a specific contrast:

MemoryInfluence <-
  read.table('./scANANSE/analysis/influence/anansesnake_B-memory_B-naive.tsv',
             header = T)
NaiveInfluence <-
  read.table('./scANANSE/analysis/influence/anansesnake_B-naive_B-memory.tsv',
             header = T)

NaiveInfluence$factor_fc <- NaiveInfluence$factor_fc * -1
B_comparison <- rbind(NaiveInfluence, MemoryInfluence)

B_comparison_plot <-
  ggplot(B_comparison, aes(factor_fc, influence_score)) +
  geom_point(aes(size = direct_targets, colour = influence_score)) +
  xlim(-2, 2) +
  geom_text(aes(
    label = ifelse(factor_fc > 0.26 |
                     factor_fc < -0.5, as.character(factor), ""),
    hjust = 0.5,
    vjust = 2
  ))

pdf(
  './scANANSE/analysis/B_comparison_plot.pdf',
  width = 8,
  height = 3.5,
  paper = 'special'
)
print(B_comparison_plot)
dev.off()

import motif enrichment:

rds_file <- './scANANSE/data/preprocessed_PBMC.Rds'
pbmc <- readRDS(rds_file)

pbmc <- import_seurat_maelstrom(pbmc,
                                cluster_id = 'predicted.id',
                                maelstrom_dir = 
                                  './scANANSE/analysis/maelstrom/')

motif_scores <- per_cluster_df(pbmc,
                               assay = 'maelstrom',
                               cluster_id = 'predicted.id')
head(motif_scores)
pbmc <- Maelstrom_Motif2TF(
  pbmc,
  cluster_id = 'predicted.id',
  maelstrom_dir = './scANANSE/analysis/maelstrom',
  RNA_expression_assay = "SCT",
  output_dir = './scANANSE/analysis',
  expr_tresh = 100,
  cor_tresh = 0.3,
  combine_motifs = 'max_cor'
)

act_t <-
  per_cluster_df(pbmc, assay = 'TFcor', cluster_id = 'predicted.id')
negcor_TFs <-
  per_cluster_df(pbmc, assay = 'TFanticor', cluster_id = 'predicted.id')
top_pTFs <- head(pbmc@assays[["TFcor"]][[]], 15)
top_nTFs <- head(pbmc@assays[["TFanticor"]][[]], 15)

cluster_order <-
  c(
    'CD14-Mono',
    'CD16-Mono',
    "cDC2",
    "pDC",
    "HSPC",
    "B-naive",
    "B-intermediate",
    "B-memory",
    "CD4-Naive",
    "CD8-Naive",
    "CD8-TCM",
    "CD4-TEM",
    "Treg",
    "CD8-TEM" ,
    "MAIT",
    "gdT",
    "NK"
  )
col_fun <-
  circlize::colorRamp2(c(-5, 0, 5), c('#998ec3', 'white', '#f1a340'))
col_fun_cor <-
  circlize::colorRamp2(c(-1, 0, 1), c('#7b3294', '#f7f7f7', '#008837'))

pdf(
  './scANANSE/analysis/Maelstrom_correlations.pdf',
  width = 8,
  height = 5,
  paper = 'special'
)

for (regtype in c('TFcor', 'TFanticor')) {
  top_TFs <- head(pbmc@assays[[regtype]][[]], 15)
  mat <-
    per_cluster_df(pbmc, assay = regtype, cluster_id = 'predicted.id')
  mat <- mat[rownames(mat) %in% rownames(top_TFs), ]

  #get TF expression matrix
  exp_mat <-
    AverageExpression(
      pbmc,
      assay = 'SCT',
      slot = 'data',
      features = rownames(top_TFs),
      group.by = 'predicted.id'
    )[[1]]
  exp_mat <- exp_mat[, colnames(exp_mat)]
  exp_mat <-  t(scale(t(exp_mat)))
  #get correlation score
  row_ha <- rowAnnotation(correlation = top_TFs$cor,
                         col = list(correlation = col_fun_cor))
  print(
    Heatmap(exp_mat[, cluster_order], cluster_columns = F) + Heatmap(
      mat[, cluster_order],
      col = col_fun,
      cluster_columns = F,
      right_annotation = row_ha
    )
  )
}

dev.off()
TF_list <- c('PAX5', 'STAT6', 'ETS1', 'GATA3', 'MAX')
pdf(
  './scANANSE/analysis/Factor_Motif_TFanticor.pdf',
  width = 8,
  height = 8,
  paper = 'special'
)
Factor_Motif_Plot(
  pbmc,
  TF_list,
  assay_maelstrom = 'TFanticor',
  logo_dir = './scANANSE/analysis/maelstrom/logos/',
  col = c('darkred', 'white', 'grey')
)
dev.off()

TF_list <- c('MEF2C', 'TCF7', 'ETS1', 'GATA3', 'MAX')
pdf(
  './scANANSE/analysis/Factor_Motif_TFcor.pdf',
  width = 8,
  height = 8,
  paper = 'special'
)
Factor_Motif_Plot(
  pbmc,
  TF_list,
  assay_maelstrom = 'TFcor',
  logo_dir = './scANANSE/analysis/maelstrom/logos/',
  col = c('grey', 'white', 'darkgreen')
)
dev.off()


Try the AnanseSeurat package in your browser

Any scripts or data that you put into this service are public.

AnanseSeurat documentation built on Nov. 12, 2023, 1:08 a.m.