DOCNAME = knitr::current_input() knitr::opts_chunk$set(autodep = TRUE, cache = FALSE, cache.path = paste0("cache/", DOCNAME, "/"), cache.comments = TRUE, echo = TRUE, error = FALSE, fig.align = "center", fig.path = paste0("../reports/figures/", DOCNAME, "/"), fig.width = 8, fig.height = 5, message = FALSE, warning = FALSE)
Here is what I am going to do...
library(rhdf5) library(tidyverse) library(Seurat) library(patchwork) random_seed <- 12345
cc <- readRDS('../genomes/mmu/mouse_cell_cycle_genes.rds') counts <- read.csv('data/counts.csv') metadata <- read.csv('data/metadata.csv') raw_ann <- CreateSeuratObject(counts, meta.data = metadata, min.cells = 3)
raw_ann[['percent.mito']] <- PercentageFeatureSet(raw_ann, pattern = "^Mt-") raw_ann[['percent.ercc']] <- PercentageFeatureSet(raw_ann, pattern = "^ERCC-") raw_ann[['percent.ribo']] <- PercentageFeatureSet(raw_ann, pattern = "^Rp[ls]")
p1 <- VlnPlot(raw_ann, features = c("nFeature_RNA", "nCount_RNA", "percent.mito", "percent.ribo"), ncol = 4) p2 <- FeatureScatter(raw_ann, feature1 = "nCount_RNA", feature2 = "nFeature_RNA", group.by = "Stage") CombinePlots(plots = list(p1,p2), ncol = 1)
print(paste0("Before filtering: ", dim(raw_ann)[2], " cells ", dim(raw_ann)[1], " genes"))
Remove ERCC if necessary
raw_ann <- raw_ann[rownames(raw_ann)[!grepl('ERCC-', rownames(raw_ann))], ]
Remove outlier cells using quantiles and cells with more than 5% of mitochondrial counts
feature_max <- quantile(raw_ann$nFeature_RNA, probs = 0.99) feature_min <- quantile(raw_ann$nFeature_RNA, probs = 0.01) count_max <- quantile(raw_ann$nCount_RNA, probs = 0.99) count_min <- quantile(raw_ann$nCount_RNA, probs = 0.01)
adata <- subset(raw_ann, subset = nFeature_RNA > feature_min & nFeature_RNA < feature_max & nCount_RNA > count_min & nCount_RNA < count_max & percent.ribo < 5) rm(raw_ann)
Find very low expressed genes
gene_counts <- data.frame(counts = rowSums(data.frame(GetAssayData(adata)))) ggplot(gene_counts) + geom_density(aes (x = log10(counts)))# + scale_x_continuous(limits = c(0,1000))
Remove quantile 0.01
min_gene_counts <- quantile(gene_counts$counts, probs = 0.01) adata <- subset(adata, features = rownames(gene_counts)[gene_counts$counts > min_gene_counts])
p1 <- VlnPlot(adata, features = c("nFeature_RNA", "nCount_RNA", "percent.mito", "percent.ribo"), ncol = 4) p2 <- FeatureScatter(adata, feature1 = "nCount_RNA", feature2 = "nFeature_RNA", group.by = "Stage") CombinePlots(plots = list(p1,p2), ncol = 1)
print(paste0("After filtering: ", dim(adata)[2], " cells ", dim(adata)[1], " genes"))
adata <- NormalizeData(adata) adata <- FindVariableFeatures(adata, selection.method = "vst", nfeatures = 2000)
adata <- ScaleData(adata, features = rownames(adata))
adata <- CellCycleScoring(adata, s.features = cc$s.genes, g2m.features = cc$g2m.genes, set.ident = TRUE)
adata <- RunPCA(adata, features = VariableFeatures(object = adata), seed.use = random_seed)
pcalod_1 <- VizDimLoadings(object = adata, dims = 1) + theme(axis.text.y = element_text(size = 8)) pcalod_2 <- VizDimLoadings(object = adata, dims = 2) + theme(axis.text.y = element_text(size = 8)) CombinePlots(plots = list(pcalod_1, pcalod_2), ncol = 2)
DimPlot(adata, reduction = "pca", group.by = c('Stage'))
adata <- FindNeighbors(adata, dims = 1:20) adata <- FindClusters(adata, random.seed = random_seed)
adata <- RunTSNE(adata, seed.use = random_seed) adata <- RunUMAP(adata, dims = 1:20, seed.use = random_seed)
p1 <- DimPlot(adata, reduction = "umap", group.by = 'Stage') p2 <- DimPlot(adata, reduction = "umap") p1 + p2
p3 <- DimPlot(adata, reduction = "tsne", group.by = 'Stage') p4 <- DimPlot(adata, reduction = "tsne") p3 + p4
Factor preparation for scViz
cell_metadata <- adata@meta.data sapply(cell_metadata,class)
cell_metadata <- cell_metadata %>% mutate_if(is.character,as.factor) adata@meta.data <- cell_metadata
saveRDS(adata, file = "../../../services/scRNAviz/data/XXXXX.rds") saveRDS(adata, file = "../data/processed/XXXXX.rds")
devtools::session_info()
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.