library(SingleCellExperiment) library(Seurat) library(scater) library(scran) library(uwot) library(umap) library(DropletUtils) library(outliers) library(tidyverse) library(ggplot2) library(knitr) library(kableExtra) knitr::opts_chunk$set(warning = FALSE, message = FALSE) ifelse(!dir.exists(paste(getwd(), "data/proc", sep = "/")), dir.create(paste(getwd(), "data/proc", sep = "/"), showWarnings = FALSE, recursive = TRUE), FALSE) out_path <- paste(getwd(), "data/proc", sep = "/")
# load raw data # go to data/raw folder if only data name for params$obj_in; otherwise, load the file with the entire path if (file.exists(paste(paste(getwd(), "data/proc", sep = "/"), params$obj_in, sep = "/"))) { tmp = load(paste(paste(getwd(), "data/proc", sep = "/"), params$obj_in, sep = "/")) } else if (file.exists(params$obj_in)) { tmp = load(params$obj_in) } else { warning("Raw object does not exist") } obj_in = get(tmp) rm(tmp) # load QCed data and pull out cell and feature names # go to data/proc folder if only data name for params$obj_in_cell_feature; otherwise, load the file with the entire path if (file.exists(paste(paste(getwd(), "data/proc", sep = "/"), params$obj_in_cell_feature, sep = "/"))) { tmp = load(paste(paste(getwd(), "data/proc", sep = "/"), params$obj_in_cell_feature, sep = "/")) } else if (file.exists(params$obj_in_cell_feature)) { tmp = load(params$obj_in_cell_feature) } else { warning("List ovbject does not exist") } obj_in_cell_feature = get(tmp) rm(tmp) # filter raw data with QCed data # # rename rownames/colnames, find index of surface protein rownames(obj_in) <- rowData(obj_in)[, "Symbol"] indx <- which(grepl("_TotalSeqC", rownames(obj_in),fixed = TRUE)) rownames(obj_in) <- gsub(pattern = "_TotalSeqC", replacement = "", rownames(obj_in)) colnames(obj_in) <- gsub(pattern = "-1", replacement = "", colData(obj_in)$Barcode) # seprate rna and adt data rna <- obj_in[-indx, obj_in_cell_feature$cell_keep] rna <- rna[which(rownames(rna) %in% obj_in_cell_feature$feature_keep), ] adt <- obj_in[indx, obj_in_cell_feature$cell_keep] obj <- obj_in rm(obj_in, obj_in_cell_feature, indx)
sizeFactors(rna) <- librarySizeFactors(rna) summary(sizeFactors(rna)) rna <- scater::normalize(rna)
fit <- trendVar(rna, use.spikes = FALSE) dec <- decomposeVar(rna, fit) dec <- dec[order(dec$bio, decreasing = TRUE), ] hvg_genes <- rownames(dec)[dec$bio > 0]
# This method saves the percent variance explained per component as an attribute rna <- scater::runPCA(rna, ncomponents = 20, feature_set = hvg_genes) ## access the attribute where percentVar is saved in reducedDim pct_var_explained <- attr(reducedDim(rna, 'PCA'), 'percentVar') plot(pct_var_explained) rna <- scater::runUMAP(rna, use_dimred = 'PCA', n_dimred = 20)
set.seed(1234) snng <- buildSNNGraph(rna, k = 5, d = 20, assay.type = "logcounts") snng_clusters <- igraph::cluster_walktrap(snng) table(snng_clusters$membership) colData(rna)$clusters <- as.factor(snng_clusters$membership) colData(rna)$clusters print(plotReducedDim(rna, use_dimred = "UMAP", colour_by = "clusters")) sample_cluster = colData(rna)[, c("Barcode","clusters")]
rna_mk <- findMarkers(rna, clusters = colData(rna)$clusters, #subset.row = hvg_genes[1:250], subset.row = hvg_genes, lfc = 1.5, direction = 'up', log.p = TRUE, BPPARAM = BiocParallel::MulticoreParam())
for (i in 1:length(rna_mk@listData)) { cat("##### ", "cluster", i, "\n\n") tmp <- as.data.frame(rna_mk@listData[[i]]) tmp <- tmp[tmp$Top < 10, ] tmp$gene <- rownames(tmp) tmp <- tmp[, colnames(tmp) %in% c("Top", "gene")] print(kable(tmp) %>% kable_styling(bootstrap_options = "striped", full_width = F)) cat("\n\n") }
quick_clusters_adt <- quickCluster(adt, method = "hclust", min.size = 300, assay.type = "counts", use.ranks = TRUE) adt <- computeSumFactors(adt, clusters = quick_clusters_adt) adt <- scater::normalize(adt) adt <- scater::runPCA(adt, ncomponents = 10) adt <- scater::runUMAP(adt, use_dimred = 'PCA', n_dimred = 10) set.seed(1234) snng_adt <- buildSNNGraph(adt, k = 5, d = 20, assay.type = "logcounts") snng_adt_clusters <- igraph::cluster_walktrap(snng_adt) colData(adt)$clusters_prt <- as.factor(snng_adt_clusters$membership) colData(adt)$clusters_rna <- as.factor(snng_clusters$membership)
for (i in 1:length(rownames(adt))) { cat("###### ", rownames(adt)[i], "\n\n") print(plotExpression(adt, features = rownames(adt)[i], x = 'clusters_rna', colour_by = "clusters_rna")) cat("\n\n") }
for (i in 1:length(rownames(adt))) { cat("###### ", rownames(adt)[i], "\n\n") print(plotExpression(adt, features = rownames(adt)[i], x = 'clusters_prt', colour_by = "clusters_prt")) cat("\n\n") }
adt_s <- as.Seurat(adt, counts = "counts", data = "logcounts", assay = "adt") adt_s <- ScaleData(adt_s, assay = "adt") DoHeatmap(adt_s, features = rownames(adt_s), assay = "adt", group.by = "clusters_rna") + theme(legend.position = "bottom") + ggtitle("ADT on RNA") # tmp <- as.matrix(logcounts(adt)) # adt_s <- SetAssayData( # object = adt_s, # slot = "scale.data", # new.data = tmp, # assay = "adt" # ) # DoHeatmap(adt_s, features = rownames(adt_s), assay = "adt", group.by = "clusters_rna", # disp.min = min(tmp), disp.max = max(tmp)) + theme(legend.position = "bottom") + ggtitle("ADT on RNA") # DoHeatmap(adt_s, features = rownames(adt_s), assay = "adt", group.by = "clusters_prt", # disp.min = min(tmp), disp.max = max(tmp)) + theme(legend.position = "bottom") + ggtitle("ADT on ADT") # rm(tmp)
DoHeatmap(adt_s, features = rownames(adt_s), assay = "adt", group.by = "clusters_prt") + theme(legend.position = "bottom") + ggtitle("ADT on ADT")
obj_to_save <- list(bioc_rna = rna, bioc_rna_mk = rna_mk, bioc_adt = adt) save(obj_to_save, file = paste(out_path, params$obj_out, sep = "/"))
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.