library(DropletUtils) library(Seurat) library(outliers) library(tidyverse) flag <- tolower(params$flag)
if (flag == "seurat"){ print("QC based on Seurat object") # Load the .RData, and store the name of the loaded object in tmp tmp = load(params$obj_se) # Get the object by its name obj = get(tmp) rm(tmp) # add percent of mito to the object mito.features <- grep(pattern = "^MT-", x = rownames(x = obj), value = TRUE) percent.mt <- Matrix::colSums(x = GetAssayData(object = obj, slot = 'counts')[mito.features, ]) / Matrix::colSums(x = GetAssayData(object = obj, slot = 'counts')) obj[['percent.mt']] <- percent.mt } if (flag == "sce"){ print("QC based on SingleCellExperiment object") # Load the .RData, and store the name of the loaded object in tmp tmp = load(params$obj_sce) # Get the object by its name obj = get(tmp) rm(tmp) }
print(obj)
vln_before <- VlnPlot(obj, features = c("nFeature_RNA", "nCount_RNA", "percent.mt"), ncol = 3) sct1_before <- FeatureScatter(obj, feature1 = "nCount_RNA", feature2 = "percent.mt") sct2_before <- FeatureScatter(obj, feature1 = "nCount_RNA", feature2 = "nFeature_RNA") vln_before CombinePlots(plots = list(sct1_before, sct2_before)) #rank of cells(barcodes) on total UMI count average rank of each cell/barcode corss gene/total UMI count bcrank_before <- barcodeRanks(obj) uniq = !duplicated(bcrank_before$rank) plot(bcrank_before$rank[uniq], bcrank_before$total[uniq], log="xy", main = "barcode ranking - before QC", xlab="Rank", ylab="Total UMI count", cex=0.5, cex.lab=1.2) abline(h=metadata(bcrank_before)$inflection, col="blue", lty=2) abline(h=metadata(bcrank_before)$knee, col="red", lty=2) legend("left", legend=c("Inflection", "Knee"), bty="n", col=c("blue", "red"), lty=2, cex=1.2)
find_ot <- function(vec, prob){ otvalue <- vec[scores(vec, type = "chisq", prob = prob) == TRUE] lo = max(otvalue[otvalue<median(vec)]) hi = min(otvalue[otvalue>median(vec)]) return(list(lo,hi)) } # cell level ot_mito <- as.numeric(find_ot(obj@meta.data$percent.mt, params$cut_mt_se))[2] ot_nfeature <- as.numeric(find_ot(obj@meta.data$nFeature_RNA, params$cut_nFeature_se)) ot_ncount <- as.numeric(find_ot(obj@meta.data$nCount_RNA, 0.9)) # note: ot_ncount is not used in the subset() obj <- subset(obj, subset = nFeature_RNA > ot_nfeature[1] & nFeature_RNA < ot_nfeature[2] & percent.mt < ot_mito) assay <- as.matrix(GetAssayData(object = obj, slot = "counts")) genesum <- log(rowSums(assay) + 1 ) genelist <- names(genesum)[genesum > 0] obj <- subset(obj, features = c(genelist, rownames(obj@assays$ADT))) # have to specify ADT name to avoid deletion due to duplicate names between ADT and RNA print(obj) # plots after preliminary QC vln_after <- VlnPlot(obj, features = c("nFeature_RNA", "nCount_RNA", "percent.mt"), ncol = 3) sct1_after <- FeatureScatter(obj, feature1 = "nCount_RNA", feature2 = "percent.mt") sct2_after <- FeatureScatter(obj, feature1 = "nCount_RNA", feature2 = "nFeature_RNA") vln_after CombinePlots(plots = list(sct1_after, sct2_after)) # rank of cells(barcodes) on total UMI count average rank of each cell/barcode corss gene/total UMI count bcrank_after <- barcodeRanks(obj) uniq = !duplicated(bcrank_after$rank) plot(bcrank_after$rank[uniq], bcrank_after$total[uniq], log="xy", main = "barcode ranking - after QC", xlab="Rank", ylab="Total UMI count", cex=0.5, cex.lab=1.2) abline(h=metadata(bcrank_after)$inflection, col="blue", lty=2) abline(h=metadata(bcrank_after)$knee, col="red", lty=2) legend("left", legend=c("Inflection", "Knee"), bty="n", col=c("blue", "red"), lty=2, cex=1.2)
# remove all objects except obj # rm(list = setdiff(ls(), "obj") )
obj <- NormalizeData(obj, normalization.method = "LogNormalize", scale.factor = 10000) obj <- FindVariableFeatures(obj, selection.method = "vst", nfeatures = 2000) all.genes <- rownames(x = obj) # have check these 4 samples, they don't need regress out cc genes obj <- ScaleData(object = obj, all.genes, vars.to.regress = c("nCount_RNA", "percent.mito")) obj <- RunPCA(obj, features = VariableFeatures(object = obj)) obj <- FindNeighbors(obj, dims = 1:10) obj <- FindClusters(obj, resolution = 0.8) head(Idents(obj), 5) obj <- RunUMAP(obj, dims = 1:10) LabelClusters(plot = DimPlot(obj, reduction = "umap", group.by = "RNA_snn_res.0.8"), id = "RNA_snn_res.0.8", size = 4)
obj <- NormalizeData(obj, assay = "ADT", normalization.method = "CLR") obj <- ScaleData(obj, assay = "ADT") cd4 <- as.matrix(obj@assays$ADT@scale.data) cd4 <- cd4[rownames(cd4) == "CD4", ] cd4 <- data.frame(cd4) print("cd4 quantiles") quantile(cd4$cd4, prob = seq(0,1,0.05)) ggplot(cd4, aes(x=cd4)) + geom_density() + ggtitle("ADT cd4 distribution")
save(obj, file = params$obj_1st_qc)
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.