In CostaLab/sigurd: Single cell Genotyping Using RNA Data

knitr::opts_chunk$set(echo = TRUE)

Loading necessary packages.

suppressPackageStartupMessages(library(sigurd))
suppressPackageStartupMessages(library(SummarizedExperiment))
suppressPackageStartupMessages(library(ggplot2))
suppressPackageStartupMessages(library(Seurat))
suppressPackageStartupMessages(library(ComplexHeatmap))
suppressPackageStartupMessages(library(circlize))
suppressPackageStartupMessages(library(grid))
suppressPackageStartupMessages(library(EnhancedVolcano))

Analysis of MPN data

Here, we illustrate how to analysis genotyping data from MPN samples. The JAK2V617F mutation was genotyped for 6 MPN samples and 3 healthy controls.

This setup makes this analysis different from the previous analyses. Previously, only one sample was analysed at a time. Now, we load and analyse the samples simultaneously.

Loading MPN data

Now, we load the JAK2V617F data and plot the mutated cells on the UMAP.

Since we are loading multiple samples, this code is more complicated than the code for the MAESTER SW sample.

# Loading the scRNA-seq data.
scrna <- load_object("/data/MPN/exp/scRNA/MPN_mutations/SIGURD_paper/Paper_Figures/ZenodoFiles/Zenodo_Seurat_Object.rds")

# Loading the genotyping data.
# genotyping  <- LoadingVarTrix_typewise(patient = "MPN", samples_file = "/data/MPN/exp/scRNA/MPN_mutations/SIGURD_paper/sigurd/data/DesignMatrix_VarTrix.csv", min_reads = 0, vcf_path = "/data/MPN/exp/scRNA/MPN_mutations/SIGURD_paper/sigurd/data/CosmicSubset_JAK2V617F.vcf", type_use = "scRNAseq_Somatic", min_cells = 0, verbose = FALSE)
# amplicon <- LoadingVarTrix_typewise(patient = "MPN", samples_file = "/data/MPN/exp/scRNA/MPN_mutations/SIGURD_paper/sigurd/data/DesignMatrix_Amplicon.csv", min_reads = 0, vcf_path = "/data/MPN/exp/scRNA/MPN_mutations/SIGURD_paper/sigurd/data/CosmicSubset_UCSC_JAK2V617F.vcf", type_use = "scRNAseq_Amplicon", min_cells = 0, verbose = FALSE)
# genotyping  <- AmpliconSupplementing(scRNAseq = genotyping, amplicon = amplicon, verbose = FALSE)

# Filtering the genotyping object to remove false positives.
# genotyping <- Filtering(genotyping, min_cells_per_variant = 0, fraction_threshold = 0.21, cells_include = colnames(scrna), min_variants_per_cell = 1, reject_value = "NoCall", verbose = FALSE)

# We load the provided genotyping object.
genotyping <- load_object("/data/MPN/exp/scRNA/MPN_mutations/SIGURD_paper/sigurd/data/MPN_JAK2V617F_Genotyping.rds")

Combining Seurat and SIGURD

Now, we add the genotyping information to the Seurat object.

scrna <- SetVariantInfo(SE = genotyping, seurat_object = scrna)

Visualisation

We visualize the genotyping information on the UMAP.

# Plotting the mutated cells on the UMAP.
scrna$JAK2_p.V617F_c.1849G.T_consensus <- factor(scrna$JAK2_p.V617F_c.1849G.T_consensus, levels = c("Ref", "Alt"))
jak2_cells <- sum(scrna$JAK2_p.V617F_c.1849G.T_consensus == "Alt", na.rm = TRUE)
ref_cells <- sum(scrna$JAK2_p.V617F_c.1849G.T_consensus == "Ref", na.rm = TRUE)
all_cells <- ncol(scrna)
p <- DimPlot(scrna, group.by = "JAK2_p.V617F_c.1849G.T_consensus", reduction = "INTE_UMAP", order = TRUE, pt.size = 1, raster = FALSE, na.value = "grey95", cols = c(Alt = "#aa0051", Ref = "#79b938")) +
  ggtitle(paste0("Alt: ", jak2_cells, ", Ref: ", ref_cells, ", All: ", all_cells)) +
  xlab("UMAP 1") + ylab("UMAP 2") + scale_color_manual(breaks = c("Alt", "Ref"), values = c("#aa0051", "#79b938"), na.value = "grey95") +
  theme(legend.position = "top")
print(p)

We determine DEGs between the JAK2V617F and WT cells.

Now, we determine the DEGs between the JAK2V617F cells and the wild type cells for the Orthochromatic Erythroblasts.

scrna_orthoe <- subset(scrna, celltype == "Orthochromatic Erythroblasts" & condition != "HC")
degs <- FindMarkers(scrna_orthoe, group.by = "JAK2_p.V617F_c.1849G.T_consensus", ident.1 = "Alt", ident.2 = "Ref", logfc.threshold = 0)
degs <- data.frame(degs, gene = rownames(degs))
degs_up   <- nrow(subset(degs, avg_log2FC >   0.25 & p_val_adj < 0.05))
degs_down <- nrow(subset(degs, avg_log2FC <  -0.25 & p_val_adj < 0.05))
EnhancedVolcano(degs, x = "avg_log2FC", y = "p_val_adj", lab = degs$gene, FCcutoff = 0.25, pCutoff = 0.05, title = "OrthoE JAK2V617F Pos. vs. Neg.",subtitle = NULL)

Combining the JAK2V617F data with the MT data.

We combine the JAK2V617F SIGURD object with the MT object. Cells and variants in both objects are combined.

To save time, we are only processing a single sample. All other samples are processed equally. Since the genotyping object is a list, it is easily processed using parallel computing.

First, we load and filter the mitochondrial variant information.

# Loading the data. 
# genotyping_mt <- LoadingMAEGATK_typewise(patient = "MPN1", samples_file = "/data/MPN/exp/scRNA/MPN_mutations/SIGURD_paper/sigurd/data/DesignMatrix_MT.csv", min_cells = 0, cells_include = colnames(scrna), type_use = "scRNAseq_MT", verbose = FALSE)
# genotyping_mt <- Filtering(genotyping_mt, min_variants_per_cell = 1, min_cells_per_variant = 2, fraction_threshold = 0.05, cells_include = colnames(scrna), verbose = FALSE)
# Adding the JAK2V617F information.
# genotyping_MPN1 <- Filtering(genotyping, cells_include = grep("MPN1", colnames(genotyping), value = TRUE))
# genotyping_mt <- CombineSEobjects(genotyping_MPN1, genotyping_mt)

# We load the Zenodo File. This is equivalent to the way we do it above. 
genotyping_mt_all <- load_object("/data/MPN/exp/scRNA/MPN_mutations/SIGURD_paper/Paper_Figures/ZenodoFiles/scrna_genotyping.rds")
genotyping_mt <- genotyping_mt_all[["MPN1"]]
genotyping_mt <- Filtering(genotyping_mt, min_variants_per_cell = 1, min_cells_per_variant = 2, fraction_threshold = 0.05, cells_include = colnames(scrna), verbose = FALSE)

MPN VOIs and Diversity

Now, we select the mtVOIs. We then determine the clonal lineages and plot the results on a heatmap.

We subset the cells to only include the erythroid cells.

# Subset to erythroid cells.
erythroid_cells <- colnames(subset(scrna, celltype_merged == "ErythroidCells"))
genotyping_mt <- Filtering(genotyping_mt, cells_include = erythroid_cells)

# Selecting the mtVOIS.
mt_vois <- VariantSelection_Quantile(genotyping_mt, min_coverage = 1, verbose = FALSE)

# Determining the clonal lineages.
genotyping_mt <- ClonalDefinition(se = genotyping_mt, variants_ls = list(mt_vois), verbose = FALSE)

Visualisation of mtVOIs and Clonal Lineages

We now plot the variants of interest and the clonal lineages.

# Generating a heatmap.
colors_lineage <- c("C1" = "#563e00", "C2" = "#dec234", "C3" = "#e23856", "C4" = "#7f0073", "C5" = "#006639", "C6" = "#b1db8e", "C7" = "#00348f", "Negative" = "black")
ha <- columnAnnotation(Clones = colData(genotyping_mt)[,"Clones"],
                       col = list(Clones = colors_lineage),
                       annotation_name_gp = gpar(fontsize = 10, fontface = "plain"),
                       annotation_legend_param = list(Clones = list(nrow = 1, title = "")))
fraction <- as.matrix(assays(genotyping_mt)[["fraction"]])[mt_vois, ]
hm <- Heatmap(fraction,
  name = "VAF",
  row_title_gp = gpar(fontsize = 12),
  row_names_gp = gpar(fontsize = 10),
  row_names_max_width = unit(16, "cm"),
  column_title = "MPN1",
  column_title_gp = gpar(fontsize = 12),
  column_names_gp = gpar(fontsize = 10),
  col = colorRamp2(seq(0, round(max(fraction, na.rm = TRUE)), length.out = 9), c("#FCFCFC", "#FFEDB0", "#FFDF5F", "#FEC510", "#FA8E24", "#F14C2B", "#DA2828", "#BE2222", "#A31D1D")),
  show_column_dend = FALSE,
  show_row_names = TRUE,
  show_column_names = FALSE,
  cluster_columns = TRUE,
  cluster_rows = FALSE,
  heatmap_legend_param = list(border = "#000000", grid_height = unit(10, "mm"),
  direction = "horizontal"),
  bottom_annotation = ha,
  border = TRUE,
  use_raster = TRUE,
  show_heatmap_legend = TRUE)
draw(hm, annotation_legend_side = "bottom")

Calculating clonal diversity.

We calculate the clonal diversity for the sample MPN1 as an example.

clonal_diversity <- ClonalDiversity(genotyping_mt, grouping = "Clones", diversity_measure = "EffectiveSpecies", verbose = FALSE)
print(paste0("Clonal Diversity in Effective Number of Species for MPN1: ", clonal_diversity))

Repeating the MT analysis for all samples.

We now repeat the MT analysis for all samples. This will take considereable time.

We speed the analsyis up by using parallel computing. Since the Zenodo file was already loaded, we save time here.

# We get all patients present in the design matrix.
design_matrix <- read.table("/data/MPN/exp/scRNA/MPN_mutations/SIGURD_paper/sigurd/data/DesignMatrix_MT.csv", header = TRUE, sep = ",")
patients <- design_matrix$patient
# genotyping_mt_all <- parallel::mclapply(patients, LoadingMAEGATK_typewise, samples_file = "/data/MPN/exp/scRNA/MPN_mutations/SIGURD_paper/sigurd/data/DesignMatrix_MT.csv", min_cells = 0, cells_include = colnames(scrna), type_use = "scRNAseq_MT", verbose = FALSE, mc.cores = length(patients))
# names(genotyping_mt_all) <- patients
# We already loaded the data.
genotyping_mt_all <- parallel::mclapply(genotyping_mt_all, Filtering, min_variants_per_cell = 1, min_cells_per_variant = 2, fraction_threshold = 0.05, cells_include = erythroid_cells, verbose = FALSE, mc.cores = length(patients))

Selecting variants of interest for all samples

We select variants of interest for all samples.

# Selecting the mtVOIS.
mt_vois_all <- parallel::mclapply(genotyping_mt_all, VariantSelection_Quantile, min_coverage = 1, verbose = FALSE, mc.cores = length(patients))

# Determining the clonal lineages. This takes very long. We use parallel computing to process all samples simultaneously.
genotyping_mt_all <- parallel::mclapply(1:length(mt_vois_all), function(x){
  sample_use <- names(mt_vois_all)[x]
  clones <- ClonalDefinition(se = genotyping_mt_all[[sample_use]], mt_vois_all[sample_use], verbose = FALSE)
  return(clones)
}, mc.cores = length(mt_vois_all))
names(genotyping_mt_all) <- patients
save_object(genotyping_mt_all, "/data/MPN/exp/scRNA/MPN_mutations/SIGURD_paper/Paper_Figures/ZenodoFiles/genotyping_mt_all.rds.lz4", "lz4")

Calculating the diverstiy for all samples

# We load the previous results.
genotyping_mt_all <- load_object("/data/MPN/exp/scRNA/MPN_mutations/SIGURD_paper/Paper_Figures/ZenodoFiles/genotyping_mt_all.rds.lz4")
clonal_diversity_all <- unlist(lapply(genotyping_mt_all, ClonalDiversity, grouping = "Clones", diversity_measure = "EffectiveSpecies", verbose = FALSE))
clonal_diversity_all <- data.frame(Sample = names(genotyping_mt_all), EffectiveSpecies = clonal_diversity_all)
knitr::kable(clonal_diversity_all, format="html")

CostaLab/sigurd documentation built on Feb. 10, 2025, 11:08 p.m.

rdrr.io home R language documentation Run R code online

CRAN packages Bioconductor packages R-Forge packages GitHub packages

Note that we can't provide technical support on individual packages. You should contact the package authors for that.

CostaLab/sigurd
Single cell Genotyping Using RNA Data

In CostaLab/sigurd: Single cell Genotyping Using RNA Data

Loading necessary packages.

Analysis of MPN data

Loading MPN data

Combining Seurat and SIGURD

Visualisation

We determine DEGs between the JAK2V617F and WT cells.

Combining the JAK2V617F data with the MT data.

MPN VOIs and Diversity

Visualisation of mtVOIs and Clonal Lineages

Calculating clonal diversity.

Repeating the MT analysis for all samples.

Selecting variants of interest for all samples

Calculating the diverstiy for all samples

R Package Documentation

Browse R Packages

We want your feedback!

CostaLab/sigurd Single cell Genotyping Using RNA Data

In CostaLab/sigurd: Single cell Genotyping Using RNA Data

Loading necessary packages.

Analysis of MPN data

Loading MPN data

Combining Seurat and SIGURD

Visualisation

We determine DEGs between the JAK2V617F and WT cells.

Combining the JAK2V617F data with the MT data.

MPN VOIs and Diversity

Visualisation of mtVOIs and Clonal Lineages

Calculating clonal diversity.

Repeating the MT analysis for all samples.

Selecting variants of interest for all samples

Calculating the diverstiy for all samples

R Package Documentation

Browse R Packages

We want your feedback!

CostaLab/sigurd
Single cell Genotyping Using RNA Data