suppressPackageStartupMessages({ library(MultiAssayExperiment) library(SingleCellExperiment) library(scater) library(scran) library(dplyr) library(ggplot2) })
b.cells <- read10XResults("../../data/data_raw/zheng/b_cells/filtered_matrices_mex/hg19") naive.cytotoxic <- read10XResults("../../data/data_raw/zheng/naive_cytotoxic/filtered_matrices_mex/hg19") cd14.monocytes <- read10XResults("../../data/data_raw/zheng/cd14_monocytes/filtered_matrices_mex/hg19") regulatory.t <- read10XResults("../../data/data_raw/zheng/regulatory_t/filtered_matrices_mex/hg19") cd4.t.helper <- read10XResults("../../data/data_raw/zheng/cd4_t_helper/filtered_matrices_mex/hg19") cd56.nk <- read10XResults("../../data/data_raw/zheng/cd56_nk/filtered_matrices_mex/hg19") memory.t <- read10XResults("../../data/data_raw/zheng/memory_t/filtered_matrices_mex/hg19") naive.t <- read10XResults("../../data/data_raw/zheng/naive_t/filtered_matrices_mex/hg19") colnames(b.cells) <- paste0("b.cells", seq_len(ncol(b.cells))) colnames(naive.cytotoxic) <- paste0("naive.cytotoxic", seq_len(ncol(naive.cytotoxic))) colnames(cd14.monocytes) <- paste0("cd14.monocytes", seq_len(ncol(cd14.monocytes))) colnames(regulatory.t) <- paste0("regulatory.t", seq_len(ncol(regulatory.t))) colnames(cd4.t.helper) <- paste0("cd4.t.helper", seq_len(ncol(cd4.t.helper))) colnames(cd56.nk) <- paste0("cd56.nk", seq_len(ncol(cd56.nk))) colnames(memory.t) <- paste0("memory.t", seq_len(ncol(memory.t))) colnames(naive.t) <- paste0("naive.t", seq_len(ncol(naive.t))) colData(b.cells)$phenoid <- "b.cells" colData(naive.cytotoxic)$phenoid <- "naive.cytotoxic" colData(cd14.monocytes)$phenoid <- "cd14.monocytes" colData(regulatory.t)$phenoid <- "regulatory.t" colData(cd4.t.helper)$phenoid <- "cd4.t.helper" colData(cd56.nk)$phenoid <- "cd56.nk" colData(memory.t)$phenoid <- "memory.t" colData(naive.t)$phenoid <- "naive.t" ## Subsample set.seed(1234) b.cells <- b.cells[, sample(colnames(b.cells), 500, replace = FALSE)] naive.cytotoxic <- naive.cytotoxic[, sample(colnames(naive.cytotoxic), 400, replace = FALSE)] cd14.monocytes <- cd14.monocytes[, sample(colnames(cd14.monocytes), 600, replace = FALSE)] regulatory.t <- regulatory.t[, sample(colnames(regulatory.t), 500, replace = FALSE)] cd4.t.helper <- cd4.t.helper[, sample(colnames(cd4.t.helper), 400, replace = FALSE)] cd56.nk <- cd56.nk[, sample(colnames(cd56.nk), 600, replace = FALSE)] memory.t <- memory.t[, sample(colnames(memory.t), 500, replace = FALSE)] naive.t <- naive.t[, sample(colnames(naive.t), 500, replace = FALSE)]
sce <- cbind(b.cells, naive.cytotoxic, cd14.monocytes, regulatory.t, cd4.t.helper, cd56.nk, memory.t, naive.t) counts(sce) <- as.matrix(counts(sce)) sce <- normalise(sce, exprs_values = "counts", return_log = TRUE, return_norm_as_exprs = TRUE) ## generates logcounts(sce)
Exclude features that are not expressed
keep_features <- rowSums(counts(sce) > 0) > 0 table(keep_features) sce <- sce[keep_features, ] dim(sce)
is.spike <- grepl("^ERCC", rowData(sce)$symbol) table(is.spike) summary(colSums(counts(sce[is.spike, ])))
sce <- calculateQCMetrics(sce)
We create a PCA plot based the quality metrics for each cell, e.g., the total number of reads, the total number of features and the proportion of spike-in reads.
sce <- scater::runPCA(sce, pca_data_input = "coldata") scater::plotPCA(sce, colour_by = "phenoid")
We remove cells with log-library sizes (or total features) that are more than 3 median absolute deviations (MADs) below the median log-library size (or total features).
colData(sce)$libsize.drop <- isOutlier(sce$total_counts, nmads = 3, type = "lower", log = TRUE) ggplot(as.data.frame(colData(sce)), aes(x = total_counts)) + geom_histogram(bins = 20, fill = "grey80") + xlab("Total count") + ylab("Number of cells") + geom_vline(xintercept = min(sce$total_counts[!sce$libsize.drop]), color = "red", linetype = "dashed") + theme_bw() colData(sce)$feature.drop <- isOutlier(sce$total_features, nmads = 3, type = "lower", log = TRUE) ggplot(as.data.frame(colData(sce)), aes(x = total_features)) + geom_histogram(bins = 20, fill = "grey80") + xlab("Number of detected features") + ylab("Number of cells") + geom_vline(xintercept = min(sce$total_features[!sce$feature.drop]), color = "red", linetype = "dashed") + theme_bw()
table(libsize = sce$libsize.drop, feature = sce$feature.drop) sce <- sce[, !(sce$libsize.drop | sce$feature.drop)] dim(sce)
plotQC(sce, type = "highest-expression", n = 50)
sce <- normalise(sce, exprs_values = "counts", return_log = TRUE, return_norm_as_exprs = TRUE) sce <- normalise(sce, exprs_values = "counts", return_log = FALSE, return_norm_as_exprs = FALSE)
expl_vars <- c("phenoid", "log10_total_counts", "log10_total_features", "pct_dropout", "pct_counts_top_200_features", "log10_counts_feature_controls", "pct_counts_feature_controls") plotQC(sce, type = "explanatory-variables", variables = expl_vars)
set.seed(1234) sce <- runTSNE(sce, exprs_values = "logcounts", perplexity = 10) plotTSNE(sce, colour_by = "phenoid") plotTSNE(sce, colour_by = "total_features", size_by = "total_counts")
sce <- sce[!grepl("^ERCC", rownames(sce)), ] dim(sce) table(sce$phenoid) saveRDS(sce, file = "../../data/sce_full/sce_full_Zhengmix8eq.rds")
date() sessionInfo()
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.