In VPetukhov/dropEstAnalysis: Analysis to produce figures from dropEst paper

knitr::read_chunk("../../analysis/chunks.R")

library(cowplot)
library(ggplot2)
library(ggpubr)
library(ggrastr)
library(dplyr)
library(Matrix)
library(parallel)
library(reshape2)
library(Seurat)

library(dropestr)
library(dropEstAnalysis)

theme_set(theme_base)

set.seed(42)
kDataPath <- '../../data/dropest/allon_new/'

kOutputFolder <- '../../output/'
kCsvLink <- 'ftp://ftp.ncbi.nlm.nih.gov/geo/samples/GSM2230nnn/GSM2230762/suppl/GSM2230762%5Fmouse2%5Fumifm%5Fcounts%2Ecsv%2Egz'
kRescuedScoreThreshold <- 0.9
kFilteredScoreThreshold <- 0.1

LibrariesUnion <- function(libs, names.func=names, set.names.func=setNames, union.func=unlist) {
  return(lapply(1:length(libs), function(i) 
    set.names.func(libs[[i]], paste0(names.func(libs[[i]]), '-', i))) %>% union.func())
}

Load data

Pipeline data

d_folders <- c('SRR3879617/est_11_22/', 'SRR3879618/est_11_22/', 'SRR3879619/est_11_29/')
rds_paths <- paste0(kDataPath, d_folders, 'cell.counts.rds')
holders <- mclapply(rds_paths, readRDS, mc.cores=3)

cms <- lapply(holders, `[[`, 'cm')
common_genes <- Reduce(intersect, lapply(cms, rownames))
cms <- lapply(cms, function(x) x[common_genes,])

cm_union <- LibrariesUnion(cms, colnames, `colnames<-`, function(x) Reduce(cbind, x))
scores <- mclapply(holders, ScorePipelineCells, mit.chromosome.name='chrM', 
                   mc.cores=length(holders)) %>% LibrariesUnion()

mit_fracs <- lapply(holders, `[[`, 'reads_per_chr_per_cells') %>% lapply(`[[`, 'Exon') %>%
  lapply(GetChromosomeFraction, 'chrM') %>% LibrariesUnion()

umis_per_cb <- sort(Matrix::colSums(cm_union), decreasing=T)

Data from the paper

published_csv <- url(kCsvLink) %>% gzcon() %>% readLines() %>% paste0(collapse='\n') %>% 
  data.table::fread(data.table=F)

published_csv <- published_csv[, 1:3] %>%
  mutate(LibId = substr(V1, 11, 11) %>% as.integer(),
         Barcode = gsub("-", "", barcode) %>% paste0('-', LibId),
         Cluster = gsub("_", " ", assigned_cluster)) %>%
  dplyr::select(-V1, -barcode, -LibId, -assigned_cluster)

cluster_by_barcodes <- setNames(published_csv$Cluster, published_csv$Barcode)
dim(published_csv)

Pagoda analysis

r_cm <- cm_union[, order(Matrix::colSums(cm_union), decreasing=T)]

cell_mask <- setNames(rep(FALSE, ncol(r_cm)), colnames(r_cm))
cell_mask[names(scores)[scores > kRescuedScoreThreshold]] <- TRUE
cell_mask[intersect(names(cluster_by_barcodes), names(cell_mask))] <- TRUE
r_cm <- r_cm[, cell_mask]

r_cm <- r_cm[Matrix::rowSums(r_cm) > 20, ]

cluster_by_barcodes <- cluster_by_barcodes[names(cluster_by_barcodes) %>% 
                                             intersect(colnames(r_cm))]

r <- GetPagoda(r_cm, n.cores=10)

PlotPagodaEmbeding(r, colors=mit_fracs, show.legend=T, title='Mitochondrial fraction',
                   alpha=0.9, size=0.5, font.size=4.5) +
  scale_color_continuous(name='Fraction') +
  theme_pdf(legend.pos=c(1, 1))

Number of cells

umi_counts_union_full <- sort(colSums(cm_union), decreasing=T)
umi_counts_union <- umi_counts_union_full[1:5000]

gg1 <- PlotCellsNumberHist(umi_counts_union_full, breaks=80, estimate.cells.number=T) +
  scale_x_continuous(name='#UMI in cell', labels=round(10^(2:4)), breaks=2:4) + 
  theme_pdf()
gg2 <- PlotCellsNumberLine(umi_counts_union_full, breaks=80, estimate.cells.number=T) + 
  theme_pdf() + theme(legend.position="none")
gg3 <- PlotCellsNumberLogLog(umi_counts_union_full, estimate.cells.number=T) + 
  theme_pdf() + theme(legend.position="none")

gg_cell_num <- cowplot::plot_grid(gg1, gg2, gg3, ncol=1, nrow=3, labels="AUTO")

ggsave(paste0(kOutputFolder, 'figures/supp_cell_num_srr3.pdf'), gg_cell_num, 
       width=5, height=7)

gg_cell_num

Rescued cells

filtered_cbs <- intersect(names(cluster_by_barcodes), 
                          names(scores)[scores <= kFilteredScoreThreshold])
rescued_cbs <- setdiff(names(scores)[scores > kRescuedScoreThreshold], 
                       names(cluster_by_barcodes))
intersect_cbs <- intersect(names(r$clusters$PCA$infomap), names(cluster_by_barcodes))

notannotated_cells <- setdiff(colnames(r_cm), names(cluster_by_barcodes))
clusters_annotated_resc <- AnnotateClustersByGraph(r$graphs$PCA, cluster_by_barcodes, 
                                            notannotated_cells,
                                            max.iter=100, mc.cores=10)
rescued_clusters <- clusters_annotated_resc[rescued_cbs]

plot_clusters <- cluster_by_barcodes[names(scores)[scores > kFilteredScoreThreshold]]
plot_rescued_clusters <- rescued_clusters

for (type in c("immune other", "activated stellate", "quiescent stellate")) {
  plot_clusters[plot_clusters == type] <- gsub(" ", "\n", type)
  plot_rescued_clusters[plot_rescued_clusters == type] <- gsub(" ", "\n", type)
}

gg_tsne <- PlotFiltrationResults(r, clusters=plot_clusters, 
                                 rescued.clusters=plot_rescued_clusters,
                                 filtered.cbs=filtered_cbs,
                                 unchanged.alpha=1, rescued.alpha=1, lineheight=0.7,
                                 raster.width=7 / 1.75, raster.height=6 / 1.3) +
  theme_pdf(legend.pos=c(0, 1), show.ticks=F)

gg_tsne

Number of rescued cells per cluster

rescued_table <- TableOfRescuedCells(clusters_annotated_resc[c(intersect_cbs, rescued_cbs)], 
                                     rescued_cbs)
write.csv(rescued_table, paste0(kOutputFolder, "tables/rescued_cbc_srr3.csv"), row.names=F)
rescued_table

Seurat analysis

srt <- CreateSeuratObject(raw.data=r_cm, min.cells=10, min.genes=0, display.progress=F)
srt <- NormalizeData(object = srt, normalization.method = "LogNormalize", 
                     scale.factor = 10000, display.progress=F)
srt <- FindVariableGenes(object=srt, mean.function=ExpMean, dispersion.function=LogVMR,
                         x.low.cutoff = 0.0125, x.high.cutoff = 3, y.cutoff = 1, 
                         do.plot=F, display.progress=F)
srt <- ScaleData(object = srt, vars.to.regress = "nUMI", display.progress=F)

srt@ident <- as.factor(clusters_annotated_resc[colnames(srt@raw.data)])
names(srt@ident) <- colnames(srt@raw.data)
compared_clusters <- c('beta', 'delta', 'alpha', 'gamma')
cluster_markers <- mclapply(compared_clusters, function(i) 
  mclapply(setdiff(compared_clusters, i), FindClusterMarkers, i, srt, mc.cores=3), 
  mc.cores=4)
overexpressed_genes <- GetOverexpressedGenes(srt, compared_clusters, cluster_markers, 
                                             genes.from.cluster=50, 
                                             expression.threshold=0.3)
length(overexpressed_genes)

Heatmaps

clusts_after_filt <- cluster_by_barcodes[setdiff(names(cluster_by_barcodes), filtered_cbs)]
real_clusts <- c(clusts_after_filt, rescued_clusters)

selected_clusters <- compared_clusters

tested_clusts <- sort(real_clusts[real_clusts %in% selected_clusters])
separation <- c(setNames(rep('rescued', length(rescued_cbs)), rescued_cbs),
                setNames(rep('real', length(clusts_after_filt)), names(clusts_after_filt)))

umis_per_cb_subset <- log10(Matrix::colSums(r_cm[, names(tested_clusts)]))
tested_clusts <- tested_clusts[order(tested_clusts, -umis_per_cb_subset)]
de_genes <- intersect(overexpressed_genes, colnames(r$counts))

plot_df <- ExpressionMatrixToDataFrame(r$counts[names(tested_clusts), de_genes], 
                                       umis_per_cb_subset, tested_clusts, rescued_cbs)
# plot_df <- plot_df %>% filter(UmisPerCb < 3.5)
plot_dfs <- split(plot_df, plot_df$IsRescued) %>% setNames(c('real', 'rescued'))

ggs <- lapply(plot_dfs, HeatmapAnnotGG) %>%
  lapply(lapply, `+`, theme(plot.margin=margin()))

legend_guides <- list(HeatmapLegendGuide('Expression'),
                      HeatmapLegendGuide('Cell type', guide=guide_legend, nrow=4),
                      HeatmapLegendGuide('log10(#molecules)'))
gg_legends <- mapply(`+`, ggs$real, legend_guides, SIMPLIFY=F) %>%
  lapply(`+`, theme(legend.margin=margin(l=4, r=4, unit='pt'))) %>% lapply(get_legend)

ggs$real$heatmap <- ggs$real$heatmap + rremove('xlab') + ylab('Cells')
ggs$rescued$heatmap <- ggs$rescued$heatmap + labs(x = 'Genes', y = 'Cells')
ggs_annot <- lapply(ggs, function(gg) plot_grid(plotlist=lapply(gg, `+`, rremove('legend')), 
                                                nrow=1, rel_widths=c(1.5, 0.12, 0.12),
                                                align='h'))

Figure

gg_legends_plot <- plot_grid(plotlist=gg_legends[c(1, 3)], nrow=2, align='v') %>%
  plot_grid(gg_legends[[2]], ncol=2, rel_widths=c(2, 1))

gg_left <- plot_grid(ggs_annot$real, ggs_annot$rescued, nrow=2, labels='AUTO')
gg_right <- plot_grid(gg_tsne + theme(plot.margin=margin(l=0.1, unit='in')), gg_legends_plot,
                      nrow=2, rel_heights=c(1, 0.3), align='v', labels=c('C'))

gg_fig <- plot_grid(gg_left, gg_right, rel_widths=c(0.8, 1)) +
  theme(plot.margin=margin(1, 1, 1, 1))

gg_fig

ggsave(paste0(kOutputFolder, 'figures/supp_lq_srr3.pdf'), gg_fig, width=7, height=6)

Session information

VPetukhov/dropEstAnalysis documentation built on Dec. 28, 2019, 8:16 p.m.

rdrr.io home R language documentation Run R code online

CRAN packages Bioconductor packages R-Forge packages GitHub packages

Note that we can't provide technical support on individual packages. You should contact the package authors for that.

VPetukhov/dropEstAnalysis
Analysis to produce figures from dropEst paper

In VPetukhov/dropEstAnalysis: Analysis to produce figures from dropEst paper

Load data

Pipeline data

Data from the paper

Pagoda analysis

Number of cells

Rescued cells

Number of rescued cells per cluster

Seurat analysis

Heatmaps

Figure

Session information

R Package Documentation

Browse R Packages

We want your feedback!

VPetukhov/dropEstAnalysis Analysis to produce figures from dropEst paper

In VPetukhov/dropEstAnalysis: Analysis to produce figures from dropEst paper

Load data

Pipeline data

Data from the paper

Pagoda analysis

Number of cells

Rescued cells

Number of rescued cells per cluster

Seurat analysis

Heatmaps

Figure

Session information

R Package Documentation

Browse R Packages

We want your feedback!

VPetukhov/dropEstAnalysis
Analysis to produce figures from dropEst paper