library(DropletUtils) library(Seurat) library(outliers) library(tidyverse) library(ggplot2) library(knitr) library(kableExtra) knitr::opts_chunk$set(warning = FALSE, message = FALSE) ifelse(!dir.exists(paste(getwd(), "data/proc", sep = "/")), dir.create(paste(getwd(), "data/proc", sep = "/"), showWarnings = FALSE, recursive = TRUE), FALSE) out_path <- paste(getwd(), "data/proc", sep = "/")
# load raw data # go to data/raw folder if only data name for params$obj_in; otherwise, load the file with the entire path if (file.exists(paste(paste(getwd(), "data/proc", sep = "/"), params$obj_in, sep = "/"))){ tmp = load(paste(paste(getwd(), "data/proc", sep = "/"), params$obj_in, sep = "/")) } else if (file.exists(params$obj_in)) { tmp = load(params$obj_in) } else { warning("Raw object does not exist") } obj_in = get(tmp) rm(tmp) # load QCed data and pull out cell and feature names # go to data/proc folder if only data name for params$obj_in_cell_feature; otherwise, load the file with the entire path if (file.exists(paste(paste(getwd(), "data/proc", sep = "/"), params$obj_in_cell_feature, sep = "/"))){ tmp = load(paste(paste(getwd(), "data/proc", sep = "/"), params$obj_in_cell_feature, sep = "/")) } else if (file.exists(params$obj_in_cell_feature)) { tmp = load(params$obj_in_cell_feature) } else { warning("List ovbject does not exist") } obj_in_cell_feature = get(tmp) rm(tmp) # filter raw data with QCed data obj_in <- subset(x = obj_in, cells = obj_in_cell_feature$cell_keep) obj_in <- subset(x = obj_in, features = c(obj_in_cell_feature$feature_keep, rownames(obj_in@assays$ADT))) # add ADT features to avoid accidental removal obj <- obj_in rm(obj_in, obj_in_cell_feature)
# standard log-normalization obj <- NormalizeData(obj)
obj <- FindVariableFeatures(obj, selection.method = "vst", nfeatures = 2000) # Identify the 10 most highly variable genes top10_hvg <- head(VariableFeatures(obj), 10) # plot variable features with labels plot1 <- VariableFeaturePlot(obj) + theme(legend.position="bottom") + theme(legend.title=element_blank()) plot2 <- LabelPoints(plot = plot1, points = top10_hvg, repel = TRUE) + theme(legend.position="bottom") + theme(legend.title=element_blank()) print(plot2)
# standard scaling (no regression), scale on 2000 variables features obj <- ScaleData(object = obj, vars.to.regress = c("nCount_RNA", "percent.mito"))
# Run PCA obj <- RunPCA(obj, verbose = FALSE) # Examine and visualize PCA results a few different ways print(obj[["pca"]], dims = 1:5, nfeatures = 5) print(VizDimLoadings(obj, dims = 1:2, reduction = "pca")) DimHeatmap(obj, dims = 1:9, cells = 500, balanced = TRUE) # elbowplot to show the percentage of variance explained by each PCA ElbowPlot(obj, ndims = 30)
obj <- FindNeighbors(obj, dims = 1:25) obj <- FindClusters(obj, resolution = 0.8, verbose = FALSE) obj <- RunTSNE(obj, dims = 1:15, method = "FIt-SNE") obj <- RunUMAP(obj, dims = 1:15)
# Find the markers that define each cluster, and use these to annotate the clusters, we use # max.cells.per.ident to speed up the process, and report only the positive ones rna_markers <- FindAllMarkers(obj, only.pos = TRUE, min.pct = 0.25, logfc.threshold = 0.25, max.cells.per.ident = 100) print(DimPlot(obj, label = TRUE) + NoLegend()) # heatmap on top10 markers top10 <- rna_markers %>% group_by(cluster) %>% top_n(n = 10, wt = avg_logFC) print(DoHeatmap(obj, features = top10$gene) + NoLegend() + ggtitle("top10 markers") + theme(axis.text=element_text(size = 4))) topn_gene <- rna_markers %>% group_by(cluster) %>% top_n(n = 10, wt = avg_logFC) %>% mutate_if(is.numeric, round, 3) %>% select(gene, cluster, avg_logFC, pct.1, pct.2, p_val, p_val_adj)
for (i in 1:length(unique(rna_markers$cluster))) { j = i - 1 cat("###### ", "cluster", i, "\n\n") tmp <- topn_gene[topn_gene$cluster == j, ] print(kable(tmp, caption = "table of top10 markers (positive, avg_logFC 0.25)") %>% kable_styling(bootstrap_options = "striped", full_width = F)) }
obj <- NormalizeData(obj, assay = "ADT", normalization.method = "CLR") obj <- ScaleData(obj, assay = "ADT")
len <- length(rownames(obj@assays$ADT)) j <- 1 for (i in seq(from = 2, to = len, by = 2)){ print(RidgePlot(obj, features = rownames(obj@assays$ADT)[j:i], assay = "ADT", same.y.lims = TRUE, log = TRUE, ncol = 2)) j <- j + 2 } rm(len, j, i)
# Downsample the clusters to a maximum of 300 cells each (makes the heatmap easier to see for # small clusters) obj_small <- subset(obj, downsample = 300) # Find protein markers for all clusters, and draw a heatmap adt_markers <- FindAllMarkers(obj_small, assay = "ADT", only.pos = TRUE) DoHeatmap(obj_small, features = rownames(obj@assays$ADT), assay = "ADT", angle = 90) + NoLegend() + ggtitle("ADT on RNA, all") DoHeatmap(obj_small, features = unique(adt_markers$gene), assay = "ADT", angle = 90) + NoLegend() + ggtitle("ADT on RNA, DE markers")
DefaultAssay(obj) <- "ADT" obj <- RunPCA(obj, features = rownames(obj), reduction.name = "pca_adt", reduction.key = "pca_adt_", verbose = FALSE) # DimPlot(obj, reduction = "pca_adt") + ggtitle("ADT on ADT, all") adt.data <- GetAssayData(obj, slot = "data") adt.dist <- dist(t(adt.data)) # stash RNA cluster IDs for later obj[["rnaClusterID"]] <- Idents(obj) # rerun tSNE using our distance matrix defined only on ADT (protein) levels. obj[["tsne_adt"]] <- RunTSNE(adt.dist, assay = "ADT", reduction.key = "adtTSNE_") obj[["umap_adt"]] <- RunUMAP(adt.dist, assay = "ADT", reduction.key = "adtUMAP_") obj[["adt_snn"]] <- FindNeighbors(adt.dist)$snn obj <- FindClusters(obj, resolution = 0.2, graph.name = "adt_snn") tsne_rnaClusters <- DimPlot(obj, reduction = "tsne_adt", group.by = "rnaClusterID") + NoLegend() tsne_rnaClusters <- tsne_rnaClusters + ggtitle("ADT on RNA") + theme(plot.title = element_text(hjust = 0.5)) tsne_rnaClusters <- LabelClusters(plot = tsne_rnaClusters, id = "rnaClusterID", size = 4) tsne_adtClusters <- DimPlot(obj, reduction = "tsne_adt", pt.size = 0.5) + NoLegend() tsne_adtClusters <- tsne_adtClusters + ggtitle("ADT on ADT") + theme(plot.title = element_text(hjust = 0.5)) tsne_adtClusters <- LabelClusters(plot = tsne_adtClusters, id = "ident", size = 4) CombinePlots(plots = list(tsne_rnaClusters, tsne_adtClusters), ncol = 2)
obj_to_save <- list(seurat = obj, seurat_rna_mk = rna_markers, seurat_adt_mk = adt_markers) save(obj_to_save, file = paste(out_path, params$obj_out, sep = "/"))
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.