knitr::read_chunk("../../analysis/chunks.R")
library(ggplot2) library(dplyr) library(dropestr) library(dropEstAnalysis) library(Matrix) theme_set(theme_base) set.seed(42) kDropEstData <- '../../data/dropest/10x/frozen_bmmc_healthy_donor1/' kEstFolder <- paste0(kDropEstData, 'est_11_10_umi_quality/') k10xFolder <- paste0(kDropEstData, 'filtered_matrices_mex/hg19/') kAnnotationData <- '../../data/annotation/' kOutputPath <- '../../output/'
holder <- readRDS(paste0(kEstFolder, 'bmmc_no_umi.rds')) genes <- read.table(paste0(k10xFolder, 'genes.tsv')) %>% filter(V2 %in% names(which(table(V2) == 1))) gene_id_to_names <- setNames(genes$V2, genes$V1) holder$cm <- holder$cm[grep("^[^;]+$", rownames(holder$cm)),] holder$cm_raw <- holder$cm_raw[grep("^[^;]+$", rownames(holder$cm_raw)),]
umis_per_cell <- sort(Matrix::colSums(holder$cm_raw), decreasing=T) est_cell_num <- EstimateCellsNumber(umis_per_cell)
Quality scores:
scores <- ScorePipelineCells(holder, mit.chromosome.name='MT', predict.all=T, verbose=T)[names(umis_per_cell)] PlotCellScores(scores, cells.number=est_cell_num)
Pagoda run:
real_cbs <- names(scores)[1:est_cell_num$expected] real_cbs <- real_cbs[scores[real_cbs] > 0.5] real_cbs <- union(names(scores)[scores > 0.9], real_cbs) r_cm <- holder$cm_raw[, real_cbs] r_cm <- r_cm[intersect(rownames(r_cm), names(gene_id_to_names)), ] rownames(r_cm) <- gene_id_to_names[rownames(r_cm)] pgd <- GetPagoda(r_cm, n.cores=30, tsne.iter.num=5000) # clusters <- pgd$clusters$PCA$infomap # write.csv(clusters, paste0(kAnnotationData, 'bmmc1_clusters.csv')) # Pagoda uses stochastic clustering algorithm, so we saved clusters from one run clusters <- read.csv(paste0(kAnnotationData, 'bmmc1_clusters.csv'), row.names=1) clusters <- setNames(clusters$x, rownames(clusters)) log_mtx <- log10(1e-3 + as.matrix(pgd$counts[names(clusters), ]))
Initial clustering:
PlotPagodaEmbeding(pgd, clusters=clusters, show.ticks=F)
Description:
https://www.bdbiosciences.com/documents/Bcell_Brochure.pdf - B cells
https://www.bdbiosciences.com/documents/cd_marker_handbook.pdf - CD Markers
de_genes <- pgd$getDifferentialGenes(type='PCA', groups=clusters, upregulated.only=T) %>% lapply(rownames) major_cell_types <- lst( `T cells` = sapply(de_genes, function(genes) 'CD3D' %in% genes) %>% which() %>% names() %>% as.integer(), `B cells` = sapply(de_genes, function(genes) 'MS4A1' %in% genes) %>% which() %>% names() %>% as.integer() ) major_type_clusts <- major_cell_types %>% unlist() if (length(major_type_clusts) != length(unique(major_type_clusts))) stop("Something goes wrong")
heatmap_genes <- c( 'CD19', 'MME', 'MS4A1', 'CD3D', 'LYZ', 'CD14', 'GZMA', 'GZMB', 'GNLY', 'NKG7', 'FCER1A', 'CST3', 'CD34', 'PTPRC', 'ITGB1', 'ENG', 'EPCAM', 'APOE', 'GYPA', 'CD36', 'TFRC' ) heatmap_clusters <- clusters[!(clusters %in% unlist(major_cell_types))] heatmap_clusters <- heatmap_clusters[heatmap_clusters > 9] PlotExpressionHeatmap(log_mtx, heatmap_clusters, heatmap_genes)
type_ids <- c(major_cell_types, lst( `CD14+ Monocytes` = c(2), `NK cells` = c(4), `Dendritic cells` = c(14), `Monocyte progenitors` = c(11, 15), `Epithelial cells` = c(9), `Erythroblasts` = c(5, 6, 8) )) type_ids$`B cells` <- c(type_ids$`B cells`, 10, 12:13) type_ids$`T cells` <- c(type_ids$`T cells`, 16:17) markers_df <- data.frame( Type = c("B cells", "T cells", "CD14+ Monocytes", "NK cells", "Dendritic cells", "Monocyte progenitors", "Epithelial cells", "Erythroblasts"), Markers = c("CD19, MME (CD10), MS4A1 (CD20)", "CD3D", "LYZ, CD14", "GZMA, GZMB, GNLY, NKG7", "FCER1A, CST3", "LYZ, CD34", "EPCAM (CD326), CD226-, APOE (CD165)", "GYPA (CD235a), CD36, TFRC (CD71)") ) markers_df$Clusters <- sapply(type_ids, paste, collapse=", ")[as.character(markers_df$Type)] markers_df
clusters_annotated <- AnnotateClusters(clusters, type_ids) PlotClustering(pgd, clusters_annotated)
heatmap_genes <- c( 'MS4A1', 'CD40', 'IL4R', 'IL7R', 'CD34', 'CD38', 'MME', 'CD19', 'CD37', 'IGLL5') heatmap_clusters <- clusters[clusters %in% type_ids$`B cells`] # heatmap_clusters <- heatmap_clusters[heatmap_clusters > 9] PlotExpressionHeatmap(log_mtx, heatmap_clusters, heatmap_genes)
b_markers_df <- data.frame( Type = c("Immature B cells", "Pre-pro B cells", "Pre B cells", "Non-dividing Pre B cells"), Markers = c("MS4A1 (CD20), CD40, IL4R, IL7R-", "CD34, CD38, MME (CD10), CD24-, IL7R-", "CD34-, CD40-, IL7R+, IL4R-, CD19+", "CD34, IGLL5") ) type_ids <- c(type_ids, lst( `Immature B cells` = c(3), `Pre-pro B cells` = c(13), `Non-dividing Pre B cells` = c(10), `Pre B cells` = c(12) )) type_ids$`B cells` <- NULL b_markers_df$Clusters <- sapply(type_ids, paste, collapse=", ")[as.character(b_markers_df$Type)] b_markers_df
heatmap_genes <- c('CCR7', "CD3E", "CD8B", "SELL", "GNLY", "GZMA", "GZMB", "GZMH", "GZMK", "PRF1", "NKG7", "IL7R", "CD4") heatmap_clusters <- clusters[clusters %in% type_ids$`T cells`] PlotExpressionHeatmap(log_mtx, heatmap_clusters, heatmap_genes)
t_markers_df <- data.frame( Type = c("Cytotoxic T cells", "T cells"), Markers = c("NKG7, GZMA, GZMH, GZMK", "CD3E, CD8B, IL7R") ) type_ids <- c(type_ids, lst( `Cytotoxic T cells` = c(7) )) type_ids$`T cells` <- setdiff(type_ids$`T cells`, type_ids$`Cytotoxic T cells`) t_markers_df$Clusters <- sapply(type_ids, paste, collapse=", ")[as.character(t_markers_df$Type)] t_markers_df
clusters_annotated <- AnnotateClusters(clusters, type_ids) write.csv(data.frame(Barcode=names(clusters_annotated), Type=as.vector(clusters_annotated)), paste0(kAnnotationData, 'bmmc1_clusters_annotated.csv'))
all_markers_df <- bind_rows(list(markers_df, t_markers_df, b_markers_df)) write.csv(all_markers_df[c('Type', 'Markers')], paste0(kOutputPath, 'tables/annotation_bmmc1_markers.csv'), row.names=F) all_markers_df
plot_mtx <- apply(log_mtx, 2, function(vec) scales::rescale(rank(vec)))
raster_width <- 8 / 3 raster_height <- 8 / 4 raster_dpi <- 150 clusters_annotated <- AnnotateClusters(clusters, type_ids) plot_clusters_annotated <- clusters_annotated long_type_names <- c("CD14+ Monocytes", "Non-dividing Pre B cells", "Monocyte progenitors", "Epithelial cells", "Cytotoxic T cells", "Immature B cells", "Dendritic cells", "Pre-pro B cells") for (type in long_type_names) { plot_clusters_annotated[plot_clusters_annotated == type] <- sub(" ", "\n", type) } gg_annotated <- PlotClustering(pgd, plot_clusters_annotated, lineheight=0.9, size=0.3, raster=T, raster.width=raster_width, raster.height=raster_height, raster.dpi=raster_dpi) + scale_size_continuous(range=c(2, 3)) + theme(axis.title=element_blank(), plot.margin=margin())
plot_genes <- c('CD3D', 'NKG7', 'MS4A1', 'IGLL5', 'CD37', 'GYPA', 'EPCAM', 'FCER1A', 'CD14', 'LYZ', 'CD34') gene_plots <- lapply(plot_genes, PlotGeneFraction, pgd, plot_mtx, title.x=0.04, title.y=0.99, legend.position="none", size=0.3, raster=T, raster.width=raster_width, raster.height=raster_height, raster.dpi=raster_dpi) gene_plots <- c(list(gg_annotated), gene_plots)
gg_fig <- cowplot::plot_grid(plotlist=gene_plots, ncol=3) + theme(plot.margin=margin(1, 1, 1, 1))
gg_fig
ggsave(paste0(kOutputPath, 'figures/supp_annotation_bmmc1.pdf'), width=8, height=8)
# # Web app # library(pagoda2) # go_env <- p2.generate.human.go(pgd) # pgd$testPathwayOverdispersion(setenv = go_env, verbose = T, correlation.distance.threshold = 0.9, # recalculate.pca = F, min.pathway.size = 100, max.pathway.size = 1000) # # go_sets <- p2.generate.human.go.web(colnames(pgd$counts)) # de_sets <- get.de.geneset(pgd, groups = pgd$clusters$PCA$infomap, prefix = 'de_') # go_sets <- c(go_sets, de_sets) # # additional_metadata <- list() # additional_metadata$altCluster <- as.factor(clusters_annotated) %>% # p2.metadata.from.factor(displayname='Annotated', s=0.7, v=0.8, start=0, end=0.5) # # is_rescued <- rep(T, ncol(r_cm)) %>% setNames(colnames(r_cm)) # is_rescued[intersect(names(umis_per_cell)[1:2000], colnames(r_cm))] <- F # additional_metadata$isRescued <- p2.metadata.from.factor(as.factor(is_rescued), displayname='IsRescued') # # pgd_web_object <- make.p2.app(pgd, dendrogramCellGroups = pgd$clusters$PCA$infomap, # additionalMetadata = additional_metadata, # geneSets = go_sets, show.clusters = T) # # pgd_web_object$serializeToStaticFast(binary.filename = paste0(kEstFolder, 'bmmc1_pagoda_annotated.bin')) # saveRDS(pgd_web_object, paste0(kEstFolder, 'pagoda_annotation_web.rds')) # # # show.app(pgd_web_object, "bmmc")
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.