DOCNAME = knitr::current_input()
knitr::opts_chunk$set(autodep        = TRUE,
                      cache          = FALSE,
                      cache.path     = paste0("cache/", DOCNAME, "/"),
                      cache.comments = TRUE,
                      echo           = TRUE,
                      error          = FALSE,
                      fig.align      = "center",
                      fig.path       = paste0("../reports/figures/", DOCNAME, "/"),
                      fig.width      = 8,
                      fig.height     = 5,
                      message        = FALSE,
                      warning        = FALSE)

Introduction

Here is what I am going to do...

Load libraries

library(rhdf5)
library(tidyverse)
library(Seurat)
library(patchwork)
random_seed <- 12345
cc <- readRDS('../genomes/mmu/mouse_cell_cycle_genes.rds')
counts <- read.csv('data/counts.csv')
metadata <- read.csv('data/metadata.csv')
raw_ann <- CreateSeuratObject(counts, meta.data = metadata, min.cells = 3)

QC

Before filtering

raw_ann[['percent.mito']] <- PercentageFeatureSet(raw_ann, pattern = "^Mt-")
raw_ann[['percent.ercc']] <- PercentageFeatureSet(raw_ann, pattern = "^ERCC-")
raw_ann[['percent.ribo']] <- PercentageFeatureSet(raw_ann, pattern = "^Rp[ls]")
p1 <- VlnPlot(raw_ann, 
        features = c("nFeature_RNA", "nCount_RNA", "percent.mito", "percent.ribo"),
        ncol = 4)
p2 <- FeatureScatter(raw_ann, feature1 = "nCount_RNA", feature2 = "nFeature_RNA", group.by = "Stage")

CombinePlots(plots = list(p1,p2), ncol = 1)
print(paste0("Before filtering: ", dim(raw_ann)[2], " cells ",  dim(raw_ann)[1], " genes"))

Filtering

Remove ERCC if necessary

raw_ann <- raw_ann[rownames(raw_ann)[!grepl('ERCC-', rownames(raw_ann))], ]

Remove outlier cells using quantiles and cells with more than 5% of mitochondrial counts

feature_max <- quantile(raw_ann$nFeature_RNA, probs = 0.99)
feature_min <- quantile(raw_ann$nFeature_RNA, probs = 0.01)
count_max <- quantile(raw_ann$nCount_RNA, probs = 0.99)
count_min <- quantile(raw_ann$nCount_RNA, probs = 0.01)
adata <- subset(raw_ann, subset = 
                    nFeature_RNA > feature_min & nFeature_RNA < feature_max & 
                    nCount_RNA > count_min & nCount_RNA < count_max & percent.ribo < 5)
rm(raw_ann)

Find very low expressed genes

gene_counts <- data.frame(counts = rowSums(data.frame(GetAssayData(adata))))

ggplot(gene_counts) + geom_density(aes (x = log10(counts)))# + scale_x_continuous(limits = c(0,1000))

Remove quantile 0.01

min_gene_counts <- quantile(gene_counts$counts, probs = 0.01)
adata <- subset(adata, features = rownames(gene_counts)[gene_counts$counts > min_gene_counts])
p1 <- VlnPlot(adata, 
        features = c("nFeature_RNA", "nCount_RNA", "percent.mito", "percent.ribo"),
        ncol = 4)
p2 <- FeatureScatter(adata, feature1 = "nCount_RNA", feature2 = "nFeature_RNA", group.by = "Stage")

CombinePlots(plots = list(p1,p2), ncol = 1)
print(paste0("After filtering: ", dim(adata)[2], " cells ",  dim(adata)[1], " genes"))

Normalization

adata <- NormalizeData(adata)
adata <- FindVariableFeatures(adata, selection.method = "vst", nfeatures = 2000)

Scale

adata <- ScaleData(adata, features = rownames(adata))

Cell Cycle

adata <- CellCycleScoring(adata, s.features = cc$s.genes, g2m.features = cc$g2m.genes, set.ident = TRUE)

PCA

adata <- RunPCA(adata, features = VariableFeatures(object = adata), seed.use = random_seed)
pcalod_1 <- VizDimLoadings(object = adata, dims = 1) + theme(axis.text.y = element_text(size = 8)) 
pcalod_2 <- VizDimLoadings(object = adata, dims = 2) + theme(axis.text.y = element_text(size = 8))

CombinePlots(plots = list(pcalod_1, pcalod_2), ncol = 2)
DimPlot(adata, reduction = "pca", group.by = c('Stage'))

Clustering

adata <- FindNeighbors(adata, dims = 1:20)
adata <- FindClusters(adata, random.seed = random_seed)

Visualization

adata <- RunTSNE(adata, seed.use = random_seed)
adata <- RunUMAP(adata, dims = 1:20, seed.use = random_seed)

UMAP

p1 <- DimPlot(adata, reduction = "umap", group.by = 'Stage')
p2 <- DimPlot(adata, reduction = "umap")
p1 + p2

TSNE

p3 <- DimPlot(adata, reduction = "tsne", group.by = 'Stage')
p4 <- DimPlot(adata, reduction = "tsne")
p3 + p4

scViz

Factor preparation for scViz

cell_metadata <- adata@meta.data
sapply(cell_metadata,class)
cell_metadata <- cell_metadata %>% mutate_if(is.character,as.factor)
adata@meta.data <- cell_metadata
saveRDS(adata, file = "../../../services/scRNAviz/data/XXXXX.rds")
saveRDS(adata, file = "../data/processed/XXXXX.rds")

Session info

devtools::session_info()


brickmanlab/commonR documentation built on Dec. 19, 2021, 11:44 a.m.