library(knitr) opts_chunk$set(fig.align = 'center', fig.width = 6, fig.height = 5, dev = 'png')
'FastSC3' is an extension of the 'SC3' package to very large datasets. While using some approximations, it allows a user to cluster the dataset without using the hybrid (SVM) strategy and to work on the total number of cells.
'FastSC3' depends on 'SC3' so that all the 'SC3' methods are accessible from 'FastSC3'.
FastSC3
InputThe input for FastSC3 is an object of class scater. Here we will create it from the dataset ('treutlein') that comes with the package:
Macosko
library(scater) library(FastSC3) library(pheatmap) # macosko <- readRDS("~/Dropbox/scRNASeq-public-datasets/drosophila-retina/intermediate/macosko.rds") # macosko <- macosko[!duplicated(unlist(lapply(strsplit(rownames(macosko), ":"), "[[", 3))), ] # rownames(macosko) <- unlist(lapply(strsplit(rownames(macosko), ":"), "[[", 3)) # cell_info <- data.frame(cell_id = as.numeric(colnames(macosko))) # cell_info$cell_type <- "rods" # cell_info$cell_type[cell_info$cell_id == 1] <- "horizontal" # cell_info$cell_type[cell_info$cell_id == 2] <- "ganglion" # cell_info$cell_type[cell_info$cell_id %in% 3:23] <- "amacrine" # cell_info$cell_type[cell_info$cell_id == 25] <- "cones" # cell_info$cell_type[cell_info$cell_id %in% 26:33] <- "bipolar" # cell_info$cell_type[cell_info$cell_id == 34] <- "muller" # cell_info$cell_type[cell_info$cell_id == 35] <- "astrocytes" # cell_info$cell_type[cell_info$cell_id == 36] <- "fibroblasts" # cell_info$cell_type[cell_info$cell_id == 37] <- "vascular_endothelium" # cell_info$cell_type[cell_info$cell_id == 38] <- "pericytes" # cell_info$cell_type[cell_info$cell_id == 39] <- "microglia" # cell_inds <- paste("Cell", 1:ncol(macosko), sep = "_") # rownames(cell_info) <- cell_inds # cell_exprs <- macosko # colnames(cell_exprs) <- cell_inds # pd <- new("AnnotatedDataFrame", data = cell_info) # sceset <- newSCESet(countData = cell_exprs, phenoData = pd) # sceset <- calculateQCMetrics(sceset) # sceset@featureData@data$feature_symbol <- featureNames(sceset) # saveRDS(sceset, file = "~/Dropbox/scRNASeq-public-datasets/drosophila-retina/macosko_sceset.rds") sceset <- readRDS("~/Dropbox/scRNASeq-public-datasets/drosophila-retina/macosko_sceset.rds") gc() ptm <- proc.time() # prepare for k prediction sceset <- sc3_prepare(sceset, ks = 38:40, svm_max = Inf) sceset <- fsc3_get_hyperplanes(sceset, n_genes = 100, suppress_plot = F) sceset <- fsc3_get_signatures(sceset) sceset <- fsc3_get_buckets(sceset, common_bits = 75) sceset <- fsc3_get_buckets_signatures(sceset) sceset <- fsc3_select_repr_cells(sceset) # run normal SC3 pipeline on the selected cells sceset <- sc3_calc_dists(sceset) sceset <- sc3_calc_transfs(sceset) sceset <- sc3_kmeans(sceset) sceset <- sc3_calc_consens(sceset) # infer cluster indexes from buckets sceset <- fsc3_get_clusters(sceset, k = k) # summarise results sceset <- sc3_summarise_results(sceset, k = k) mclust::adjustedRandIndex(sceset@sc3$results$clusters[,1], as.numeric(pData(sceset)[,1])) proc.time() - ptm gc()
Shekhar
library(scater) library(FastSC3) library(pheatmap) # # shekhar <- readRDS("~/Dropbox/scRNASeq-public-datasets/drosophila-retina/intermediate/shekhar.rds") # cell_info <- readRDS("~/Dropbox/scRNASeq-public-datasets/drosophila-retina/intermediate/shekhar_ann.rds") # cell_info$cell_type <- "unknown" # cell_info$cell_type[cell_info$louvain_clusts %in% c(1, 3:15)] <- "bipolar" # cell_info$cell_type[cell_info$louvain_clusts == 2] <- "muller" # cell_info$cell_type[cell_info$louvain_clusts == 16] <- "amacrine" # cell_info$cell_type[cell_info$louvain_clusts == 20] <- "rods" # cell_info$cell_type[cell_info$louvain_clusts == 22] <- "cones" # pd <- new("AnnotatedDataFrame", data = cell_info) # sceset <- newSCESet(countData = shekhar, phenoData = pd) # sceset <- calculateQCMetrics(sceset) # sceset@featureData@data$feature_symbol <- featureNames(sceset) # saveRDS(sceset, file = "~/Dropbox/scRNASeq-public-datasets/drosophila-retina/shekhar_sceset.rds") sceset <- readRDS("~/Dropbox/scRNASeq-public-datasets/drosophila-retina/shekhar_sceset.rds") gc() ptm <- proc.time() # prepare for k prediction sceset <- sc3_prepare(sceset, ks = 2:26, svm_max = Inf) sceset <- fsc3_get_features(sceset) sceset <- fsc3_get_signatures(sceset) sceset <- fsc3_get_buckets(sceset, common_bits = 61, runs = 10) sceset <- fsc3_get_buckets_signatures(sceset) mclust::adjustedRandIndex(sceset@sc3$buckets, as.numeric(pData(sceset)[,1])) sceset <- fsc3_select_repr_cells(sceset) # run normal SC3 pipeline on the selected cells sceset <- sc3_calc_dists(sceset) sceset <- sc3_calc_transfs(sceset) sceset <- sc3_kmeans(sceset) sceset <- sc3_calc_consens(sceset) # infer cluster indexes from buckets sceset <- fsc3_get_clusters(sceset, k = 10) # summarise results sceset <- sc3_summarise_results(sceset, k = 10) mclust::adjustedRandIndex(sceset@sc3$results$clusters[,1], as.numeric(pData(sceset)[,1])) proc.time() - ptm gc()
Pollen
library(scater) library(FastSC3) # pollen2 <- readRDS("~/Dropbox/scRNASeq-public-datasets/pollen2.rds") # cell_info <- data.frame(cell_id = colnames(pollen2)) # cell_inds <- paste("Cell", 1:ncol(pollen2), sep = "_") # rownames(cell_info) <- cell_inds # cell_exprs <- pollen2 # colnames(cell_exprs) <- cell_inds # pd <- new("AnnotatedDataFrame", data = cell_info) # sceset <- newSCESet(tpmData = cell_exprs, phenoData = pd) # is_exprs(sceset) <- exprs(sceset) > 0 # sceset <- calculateQCMetrics(sceset) # saveRDS(sceset, file = "~/Dropbox/scRNASeq-public-datasets/pollen2_sceset.rds") sceset <- readRDS("~/Dropbox/scRNASeq-public-datasets/pollen2_sceset.rds") # prepare for k prediction sceset <- sc3_prepare(sceset, ks = 8:14) # prepare for M3Drop + LSH sceset <- fsc3_get_hyperplanes(sceset, n_genes = 100, suppress_plot = F) sceset <- fsc3_get_signatures(sceset) sceset <- fsc3_get_buckets(sceset, common_bits = 75) mclust::adjustedRandIndex(sceset@sc3$buckets, as.numeric(pData(sceset)[,1])) sceset <- fsc3_get_buckets_signatures(sceset) sceset <- fsc3_select_repr_cells(sceset) # run normal SC3 pipeline on the selected cells sceset <- sc3_calc_dists(sceset) sceset <- sc3_calc_transfs(sceset) sceset <- sc3_kmeans(sceset) sceset <- sc3_calc_consens(sceset) # infer cluster indexes from buckets sceset <- fsc3_get_clusters(sceset, k = 11) # summarise results sceset <- sc3_summarise_results(sceset, k = 11) mclust::adjustedRandIndex(sceset@sc3$results$clusters[,1], as.numeric(pData(sceset)[,1])) plot_sankey(as.numeric(pData(sceset)[,1]), sceset@sc3$results$clusters[,1])
Zeisel
library(scater) library(FastSC3) # zeisel <- readRDS("~/Dropbox/scRNASeq-public-datasets/zeisel.rds") # cell_info <- data.frame(cell_id = colnames(zeisel)) # cell_inds <- paste("Cell", 1:ncol(zeisel), sep = "_") # rownames(cell_info) <- cell_inds # cell_exprs <- zeisel # colnames(cell_exprs) <- cell_inds # pd <- new("AnnotatedDataFrame", data = cell_info) # sceset <- newSCESet(countData = cell_exprs, phenoData = pd) # sceset <- calculateQCMetrics(sceset) # saveRDS(sceset, file = "~/Dropbox/scRNASeq-public-datasets/zeisel_sceset.rds") sceset <- readRDS("~/Dropbox/scRNASeq-public-datasets/zeisel_sceset.rds") # prepare for k prediction sceset <- sc3_prepare(sceset) # sceset <- sc3_estimate_k(sceset) k <- 5:11 sceset <- sc3_set_ks(sceset, ks = k) # prepare for M3Drop + LSH sceset <- fsc3_get_hyperplanes(sceset, n_genes = 10, suppress_plot = F) sceset <- fsc3_get_signatures(sceset) sceset <- fsc3_get_buckets(sceset, common_bits = 7) # mclust::adjustedRandIndex(sceset@sc3$buckets, as.numeric(pData(sceset)[,1])) sceset <- fsc3_get_buckets_signatures(sceset) sceset <- fsc3_select_repr_cells(sceset) # run normal SC3 pipeline on the selected cells sceset <- sc3_calc_dists(sceset) sceset <- sc3_calc_transfs(sceset) sceset <- sc3_kmeans(sceset) sceset <- sc3_calc_consens(sceset) # infer cluster indexes from buckets sceset <- fsc3_get_clusters(sceset, k = 8) # summarise results sceset <- sc3_summarise_results(sceset, k = 8) mclust::adjustedRandIndex(sceset@sc3$results$clusters[,1], as.numeric(pData(sceset)[,1])) plot_sankey(as.numeric(pData(sceset)[,1]), sceset@sc3$results$clusters[,1])
Klein
library(scater) library(FastSC3) # klein <- readRDS("~/Dropbox/scRNASeq-public-datasets/klein.rds") # cell_info <- data.frame(cell_id = colnames(klein)) # cell_inds <- paste("Cell", 1:ncol(klein), sep = "_") # rownames(cell_info) <- cell_inds # cell_exprs <- klein # colnames(cell_exprs) <- cell_inds # pd <- new("AnnotatedDataFrame", data = cell_info) # sceset <- newSCESet(countData = cell_exprs, phenoData = pd) # sceset <- calculateQCMetrics(sceset) # saveRDS(sceset, file = "~/Dropbox/scRNASeq-public-datasets/klein_sceset.rds") sceset <- readRDS("~/Dropbox/scRNASeq-public-datasets/klein_sceset.rds") # prepare for k prediction sceset <- sc3_prepare(sceset) # sceset <- sc3_estimate_k(sceset) k <- 3:6 sceset <- sc3_set_ks(sceset, ks = k) # prepare for M3Drop + LSH sceset <- fsc3_get_hyperplanes(sceset, n_genes = 100, suppress_plot = F) sceset <- fsc3_get_signatures(sceset) sceset <- fsc3_get_buckets(sceset, common_bits = 65, runs = 10) mclust::adjustedRandIndex(sceset@sc3$buckets, as.numeric(pData(sceset)[,1])) sceset <- fsc3_get_buckets_signatures(sceset) sceset <- fsc3_select_repr_cells(sceset) # run normal SC3 pipeline on the selected cells sceset <- sc3_calc_dists(sceset) sceset <- sc3_calc_transfs(sceset) sceset <- sc3_kmeans(sceset) sceset <- sc3_calc_consens(sceset) # infer cluster indexes from buckets sceset <- fsc3_get_clusters(sceset, k = 4) # summarise results sceset <- sc3_summarise_results(sceset, k = 4) mclust::adjustedRandIndex(sceset@sc3$results$clusters[,1], as.numeric(pData(sceset)[,1]))
Kolodziejczyk
library(scater) library(FastSC3) # kolodziejczyk <- readRDS("~/Dropbox/scRNASeq-public-datasets/kolodziejczyk.rds") # cell_info <- data.frame(cell_id = colnames(kolodziejczyk)) # cell_inds <- paste("Cell", 1:ncol(kolodziejczyk), sep = "_") # rownames(cell_info) <- cell_inds # cell_exprs <- kolodziejczyk # colnames(cell_exprs) <- cell_inds # pd <- new("AnnotatedDataFrame", data = cell_info) # sceset <- newSCESet(tpmData = cell_exprs, phenoData = pd) # is_exprs(sceset) <- exprs(sceset) > 0 # sceset <- calculateQCMetrics(sceset) # saveRDS(sceset, file = "~/Dropbox/scRNASeq-public-datasets/kolodziejczyk_sceset.rds") sceset <- readRDS("~/Dropbox/scRNASeq-public-datasets/kolodziejczyk_sceset.rds") # prepare for k prediction sceset <- sc3_prepare(sceset) # sceset <- sc3_estimate_k(sceset) k <- 3:5 sceset <- sc3_set_ks(sceset, ks = k) # prepare for M3Drop + LSH sceset <- fsc3_get_hyperplanes(sceset, n_genes = 100, suppress_plot = F) sceset <- fsc3_get_signatures(sceset) sceset <- fsc3_get_buckets(sceset, common_bits = 70) mclust::adjustedRandIndex(sceset@sc3$buckets, as.numeric(pData(sceset)[,1])) sceset <- fsc3_get_buckets_signatures(sceset) sceset <- fsc3_select_repr_cells(sceset) # run normal SC3 pipeline on the selected cells sceset <- sc3_calc_dists(sceset) sceset <- sc3_calc_transfs(sceset) sceset <- sc3_kmeans(sceset) sceset <- sc3_calc_consens(sceset) # infer cluster indexes from buckets sceset <- fsc3_get_clusters(sceset, k = 3) # summarise results sceset <- sc3_summarise_results(sceset, k = 3) mclust::adjustedRandIndex(sceset@sc3$results$clusters[,1], as.numeric(pData(sceset)[,1]))
FJLT
sceset <- readRDS("~/Dropbox/scRNASeq-public-datasets/pollen2_sceset.rds") # prepare for k prediction sceset <- sc3_prepare(sceset) # sceset <- sc3_estimate_k(sceset) k <- 11 sceset <- sc3_set_ks(sceset, ks = k) # prepare for FJLT + LSH sceset <- fsc3_fjlt(sceset) sceset <- fsc3_get_signatures_fjlt(sceset) sceset <- fsc3_get_buckets(sceset, common_bits = 80)
sc3_prepare
# Note that n.cores = 1 is required for compilation of this vignette. # Please remove this parameter when running on your computer: # sceset <- sc3_prepare(sceset) sceset <- sc3_prepare(sceset, n.cores = 1) sceset <- sc3_estimate_k(sceset) sceset <- sc3_set_ks(sceset, ks = 5)
sceset <- fsc3_fjlt(sceset) pheatmap::pheatmap(sceset@sc3$fjlt)
sceset <- sc3_calc_dists(sceset)
sceset <- fsc3_calc_transfs(sceset)
sceset <- fsc3_calc_eigenv(sceset)
sceset <- sc3_kmeans(sceset)
sceset <- sc3_calc_consens(sceset)
sceset <- sc3_summarise_results(sceset, k = 5)
shekhar <- readRDS("~/Dropbox/scRNASeq-public-datasets/mouse-retina/shekhar_sceset.rds") shekhar <- fsc3_get_features(shekhar) macosko <- readRDS("~/Dropbox/scRNASeq-public-datasets/mouse-retina/macosko_sceset.rds") macosko <- fsc3_get_features(macosko) shekh <- featureNames(shekhar)[fData(shekhar)$fsc3_features] mac <- featureNames(macosko)[fData(macosko)$fsc3_features] upset(fromList(list(Shekhar = shekh, Macosko = mac)), order.by = "freq", nsets = 2) # Assignments based on Shekhar common_features <- get_common_fsc3_features(list(shekhar)) macosko <- fsc3_set_features(macosko, common_features) # update common_features common_features <- featureNames(macosko)[fData(macosko)$fsc3_features] shekhar <- fsc3_set_features(shekhar, common_features) shekhar <- fsc3_get_signatures(shekhar) macosko <- fsc3_get_signatures(macosko) p_data <- pData(shekhar) p_data$fsc3_buckets <- p_data$cell_type pData(shekhar) <- new("AnnotatedDataFrame", data = p_data) shekhar <- fsc3_get_buckets_signatures(shekhar, threshold = 0.6) macosko <- fsc3_assign_signatures(macosko, shekhar, threshold = 0.6) if(is.factor(pData(macosko)$cell_type)) { labs_orig <- levels(pData(macosko)$cell_type)[pData(macosko)$cell_type] } else { labs_orig <- pData(macosko)$cell_type } labs_new <- pData(macosko)$fsc3_buckets_assigned plot_sankey(labs_orig, labs_new) irr::kappa2(cbind(labs_orig, labs_new))$value mclust::adjustedRandIndex(labs_orig, labs_new) # Assignments based on Macosko macosko <- fsc3_get_features(macosko) common_features <- get_common_fsc3_features(list(macosko)) shekhar <- fsc3_set_features(shekhar, common_features) # update common_features common_features <- featureNames(shekhar)[fData(shekhar)$fsc3_features] macosko <- fsc3_set_features(macosko, common_features) shekhar <- fsc3_get_signatures(shekhar) macosko <- fsc3_get_signatures(macosko) p_data <- pData(macosko) p_data$fsc3_buckets <- p_data$cell_type pData(macosko) <- new("AnnotatedDataFrame", data = p_data) macosko <- fsc3_get_buckets_signatures(macosko, threshold = 0.6) shekhar <- fsc3_assign_signatures(shekhar, macosko, threshold = 0.6) if(is.factor(pData(shekhar)$cell_type)) { labs_orig <- levels(pData(shekhar)$cell_type)[pData(shekhar)$cell_type] } else { labs_orig <- pData(shekhar)$cell_type } labs_new <- pData(shekhar)$fsc3_buckets_assigned plot_sankey(labs_orig, labs_new) irr::kappa2(cbind(labs_orig, labs_new))$value mclust::adjustedRandIndex(labs_orig, labs_new) common_features <- get_common_fsc3_features(list(shekhar, macosko)) shekhar <- fsc3_set_features(shekhar, common_features) macosko <- fsc3_set_features(macosko, common_features) shekhar <- fsc3_get_signatures(shekhar) macosko <- fsc3_get_signatures(macosko) # Assignments based on Shekhar p_data <- pData(shekhar) p_data$fsc3_buckets <- p_data$cell_type pData(shekhar) <- new("AnnotatedDataFrame", data = p_data) shekhar <- fsc3_get_buckets_signatures(shekhar, threshold = 0.8) macosko <- fsc3_assign_signatures(macosko, shekhar, threshold = 0.7) if(is.factor(pData(macosko)$cell_type)) { labs_orig <- levels(pData(macosko)$cell_type)[pData(macosko)$cell_type] } else { labs_orig <- pData(macosko)$cell_type } labs_new <- pData(macosko)$fsc3_buckets_assigned plot_sankey(labs_orig, labs_new) # plot_sankey(labs_orig, labs_new, colors = c("#FF0000","#FFA500","#008000","#008000","#008000","#008000","#008000","#008000","#008000","#008000","#008000","#008000","#008000","#008000","#008000","#008000","#008000","#008000","#008000","#008000","#008000","#008000","#008000","#57A5D6","#57A5D6","#0000FF","#0000FF","#0000FF","#0000FF","#0000FF","#0000FF","#0000FF","#0000FF","#800080","#800080","#800080","#800080","#800080","#800080")) irr::kappa2(cbind(labs_orig, labs_new)) mclust::adjustedRandIndex(labs_orig, labs_new) # Assignments based on Macosko p_data <- pData(macosko) p_data$fsc3_buckets <- p_data$cell_type pData(macosko) <- new("AnnotatedDataFrame", data = p_data) macosko <- fsc3_get_buckets_signatures(macosko, threshold = 0.8) shekhar <- fsc3_assign_signatures(shekhar, macosko, threshold = 0.7) if(is.factor(pData(shekhar)$cell_type)) { labs_orig <- levels(pData(shekhar)$cell_type)[pData(shekhar)$cell_type] } else { labs_orig <- pData(shekhar)$cell_type } labs_new <- pData(shekhar)$fsc3_buckets_assigned plot_sankey(labs_orig, labs_new) # plot_sankey(labs_orig, labs_new, colors = c("#0000FF", "#800080", "#0000FF", "#0000FF", "#0000FF", "#0000FF", "#0000FF", "#0000FF", "#0000FF", "#0000FF", "#0000FF", "#0000FF", "#0000FF", "#0000FF", "#0000FF", "#008000", "#000000", "#000000", "#000000", "#57A5D6", "#000000", "#57A5D6", "#000000", "#000000", "#000000", "#000000")) irr::kappa2(cbind(labs_orig, labs_new)) mclust::adjustedRandIndex(labs_orig, labs_new)
# d <- readRDS("~/Dropbox/scRNASeq-public-datasets/deng.rds") # # 1 1-4 - zygote # # 2 5-12 - early2cell # # 3 13-24 - mid2cell # # 4 25-34 - late2cell # # 5 35-48 - 4cell # # 6 49-85 - 8cell # # 7 86-135 - 16cell # # 8 136-178 - earlyblast # # 9 179-238 - midblast # # 10 239-268 - lateblast # labs <- colnames(d) # labs[labs == "1" | labs == "2"] = "zygote" # labs[labs == "3" | labs == "4"] = "2cell" # labs[labs == "5"] = "4cell" # labs[labs == "6"] = "8cell" # labs[labs == "7"] = "16cell" # labs[labs == "8" | labs == "9" | labs == "10"] = "blast" # ann <- data.frame(stage = labs) # rownames(ann) <- paste("cell", 1:ncol(d), sep = "_") # pd <- new("AnnotatedDataFrame", data = ann) # d <- d[!duplicated(rownames(d)), ] # colnames(d) <- rownames(ann) # deng <- newSCESet(fpkmData = d, phenoData = pd) # is_exprs(deng) <- exprs(deng) > 0 # deng <- calculateQCMetrics(deng) # # use gene names as feature symbols # deng@featureData@data$feature_symbol <- featureNames(deng) # saveRDS(deng, "~/Dropbox/scRNASeq-public-datasets/mouse-embryo-devel/deng_rpkm_sceset.rds")
# d <- read.table("~/Dropbox/scRNASeq-public-datasets/mouse-embryo-devel/intermediate/GSE57249_fpkm_ZHONG.txt", header = T) # d <- d[!duplicated(d$ID),] # rownames(d) <- d$ID # d <- d[,2:ncol(d)] # # # options(stringsAsFactors = F) # # labs <- read.table("~/Dropbox/scRNASeq-public-datasets/mouse-embryo-devel/intermediate/GSE57249_labels_ZHONG.txt", stringsAsFactors = F) # labs <- as.character(labs) # # labs[grep("TE",labs)] = "blast" # labs[grep("ICM",labs)] = "blast" # # ann <- data.frame(stage = labs) # rownames(ann) <- colnames(d) # # colnames(d) <- rownames(ann) # pd <- new("AnnotatedDataFrame", data = ann) # biase <- newSCESet(fpkmData = as.matrix(d), phenoData = pd) # is_exprs(biase) <- exprs(biase) > 0 # biase <- calculateQCMetrics(biase) # # use gene names as feature symbols # biase@featureData@data$feature_symbol <- featureNames(biase) # saveRDS(biase, "~/Dropbox/scRNASeq-public-datasets/mouse-embryo-devel/biase_fpkm.rds")
# d <- read.table("~/Dropbox/scRNASeq-public-datasets/mouse-embryo-devel/intermediate/Goolam_et_al_2015_count_table.tsv", header = T) # # labs <- colnames(d) # labs[grep("4cell",labs)] = "4cell" # labs[grep("8cell",labs)] = "8cell" # labs[grep("16cell",labs)] = "16cell" # labs[grep("32cell",labs)] = "blast" # labs[grep("2cell",labs)] = "2cell" # # ann <- data.frame(stage = labs) # rownames(ann) <- colnames(d) # colnames(d) <- rownames(ann) # pd <- new("AnnotatedDataFrame", data = ann) # goolam <- newSCESet(countData = as.matrix(d), phenoData = pd) # goolam <- calculateQCMetrics(goolam, feature_controls = grep("ERCC-", rownames(d))) # # convert ensembl ids into gene names # goolam <- getBMFeatureAnnos( # goolam, filters="ensembl_gene_id", # biomart="ensembl", dataset="mmusculus_gene_ensembl") # saveRDS(goolam, "~/Dropbox/scRNASeq-public-datasets/mouse-embryo-devel/goolam_counts.rds")
# d <- read.table("~/Dropbox/scRNASeq-public-datasets/mouse-embryo-devel/intermediate/GSE53386_matrix_fpkms.tsv") # # labs <- colnames(d) # labs[grep("ocyte",labs)] = "zygote" # labs[grep("zygote",labs)] = "zygote" # labs[grep("a.AM",labs)] = "zygote" # labs[grep("2.cell",labs)] = "2cell" # labs[grep("4.cell",labs)] = "4cell" # labs[grep("8.cell",labs)] = "8cell" # labs[grep("morula",labs)] = "16cell" # labs[grep("ICM",labs)] = "blast" # labs[grep("TE",labs)] = "blast" # labs[grep("blast",labs)] = "blast" # # ann <- data.frame(stage = labs) # rownames(ann) <- colnames(d) # # colnames(d) <- rownames(ann) # pd <- new("AnnotatedDataFrame", data = ann) # fan <- newSCESet(fpkmData = as.matrix(d), phenoData = pd) # is_exprs(fan) <- exprs(fan) > 0 # fan <- calculateQCMetrics(fan) # # use gene names as feature symbols # fan@featureData@data$feature_symbol <- featureNames(fan) # saveRDS(fan, "~/Dropbox/scRNASeq-public-datasets/mouse-embryo-devel/fan_fpkm.rds")
deng_rpkm <- readRDS("~/Dropbox/scRNASeq-public-datasets/mouse-embryo-devel/deng_rpkm_sceset.rds") deng_rpkm <- fsc3_get_features(deng_rpkm) biase_fpkm <- readRDS("~/Dropbox/scRNASeq-public-datasets/mouse-embryo-devel/biase_fpkm.rds") biase_fpkm <- fsc3_get_features(biase_fpkm) goolam_counts <- readRDS("~/Dropbox/scRNASeq-public-datasets/mouse-embryo-devel/goolam_counts.rds") goolam_counts <- fsc3_get_features(goolam_counts) fan_fpkm <- readRDS("~/Dropbox/scRNASeq-public-datasets/mouse-embryo-devel/fan_fpkm.rds") fan_fpkm <- fsc3_get_features(fan_fpkm) deng <- featureNames(deng_rpkm)[fData(deng_rpkm)$fsc3_features] biase <- featureNames(biase_fpkm)[fData(biase_fpkm)$fsc3_features] goolam <- featureNames(goolam_counts)[fData(goolam_counts)$fsc3_features] fan <- featureNames(fan_fpkm)[fData(fan_fpkm)$fsc3_features] upset(fromList(list(Deng = deng, Biase = biase, Goolam = goolam, Fan = fan)), order.by = "freq", nsets = 4) common_features <- get_common_fsc3_features(list(deng_rpkm, goolam_counts, biase_fpkm, fan_fpkm)) deng_rpkm <- fsc3_set_features(deng_rpkm, common_features) biase_fpkm <- fsc3_set_features(biase_fpkm, common_features) goolam_counts <- fsc3_set_features(goolam_counts, common_features) fan_fpkm <- fsc3_set_features(fan_fpkm, common_features) deng_rpkm <- fsc3_get_signatures(deng_rpkm) biase_fpkm <- fsc3_get_signatures(biase_fpkm) goolam_counts <- fsc3_get_signatures(goolam_counts) fan_fpkm <- fsc3_get_signatures(fan_fpkm) # Assignments based on Deng p_data <- pData(deng_rpkm) p_data$fsc3_buckets <- p_data$stage pData(deng_rpkm) <- new("AnnotatedDataFrame", data = p_data) deng_rpkm <- fsc3_get_buckets_signatures(deng_rpkm, threshold = 0.9) goolam_counts <- fsc3_assign_signatures(goolam_counts, deng_rpkm, threshold = 0.7) if(is.factor(pData(goolam_counts)$stage)) { labs_orig <- levels(pData(goolam_counts)$stage)[pData(goolam_counts)$stage] } else { labs_orig <- pData(goolam_counts)$stage } labs_new <- pData(goolam_counts)$fsc3_buckets_assigned plot_sankey(labs_orig, labs_new) irr::kappa2(cbind(labs_orig, labs_new)) mclust::adjustedRandIndex(labs_orig, labs_new) biase_fpkm <- fsc3_assign_signatures(biase_fpkm, deng_rpkm, bucket_threshold = 0.8, assign_threshold = 0.7) if(is.factor(pData(biase_fpkm)$stage)) { labs_orig <- levels(pData(biase_fpkm)$stage)[pData(biase_fpkm)$stage] } else { labs_orig <- pData(biase_fpkm)$stage } labs_new <- pData(biase_fpkm)$fsc3_buckets_assigned plot_sankey(labs_orig, labs_new) irr::kappa2(cbind(labs_orig, labs_new)) mclust::adjustedRandIndex(labs_orig, labs_new) fan_fpkm <- fsc3_assign_signatures(fan_fpkm, deng_rpkm) if(is.factor(pData(fan_fpkm)$stage)) { labs_orig <- levels(pData(fan_fpkm)$stage)[pData(fan_fpkm)$stage] } else { labs_orig <- pData(fan_fpkm)$stage } labs_new <- pData(fan_fpkm)$fsc3_buckets_assigned plot_sankey(labs_orig, labs_new) irr::kappa2(cbind(labs_orig, labs_new)) mclust::adjustedRandIndex(labs_orig, labs_new) # Compare expressions p <- fsc3_plot_sig_expression(deng_rpkm, fColumn = "stage") fsc3_plot_sig_expression(goolam_counts, fColumn = "stage", hc = p$tree_row) fsc3_plot_sig_expression(fan_fpkm, fColumn = "stage", hc = p$tree_row) fsc3_plot_sig_expression(biase_fpkm, fColumn = "stage", hc = p$tree_row) # Compare with Tallulah's genes ta_genes <- read.table("~/Dropbox/scRNASeq-public-datasets/mouse-embryo-devel/intermediate/FourDatasets_Stage_Expression_DB_M3Drop_Genes.txt") ta_genes <- rownames(ta_genes) table(fData(deng_rpkm)$feature_symbol[fData(deng_rpkm)$fsc3_features] %in% ta_genes) table(fData(goolam_counts)$feature_symbol[fData(goolam_counts)$fsc3_features] %in% ta_genes) table(fData(biase_fpkm)$feature_symbol[fData(biase_fpkm)$fsc3_features] %in% ta_genes) table(fData(fan_fpkm)$feature_symbol[fData(fan_fpkm)$fsc3_features] %in% ta_genes) deng_rpkm <- fsc3_get_features(deng_rpkm) common_features <- get_common_fsc3_features(list(deng_rpkm)) biase_fpkm <- fsc3_set_features(biase_fpkm, common_features) # update common_features common_features <- featureNames(biase_fpkm)[fData(biase_fpkm)$fsc3_features] deng_rpkm <- fsc3_set_features(deng_rpkm, common_features) deng_rpkm <- fsc3_get_signatures(deng_rpkm) biase_fpkm <- fsc3_get_signatures(biase_fpkm) p_data <- pData(deng_rpkm) p_data$fsc3_buckets <- p_data$stage pData(deng_rpkm) <- new("AnnotatedDataFrame", data = p_data) deng_rpkm <- fsc3_get_buckets_signatures(deng_rpkm, threshold = 0.6) biase_fpkm <- fsc3_assign_signatures(biase_fpkm, deng_rpkm, threshold = 0.6) if(is.factor(pData(biase_fpkm)$stage)) { labs_orig <- levels(pData(biase_fpkm)$stage)[pData(biase_fpkm)$stage] } else { labs_orig <- pData(biase_fpkm)$stage } labs_new <- pData(biase_fpkm)$fsc3_buckets_assigned plot_sankey(labs_orig, labs_new) irr::kappa2(cbind(labs_orig, labs_new))$value mclust::adjustedRandIndex(labs_orig, labs_new) deng_rpkm <- fsc3_get_features(deng_rpkm) common_features <- get_common_fsc3_features(list(deng_rpkm)) fan_fpkm <- fsc3_set_features(fan_fpkm, common_features) # update common_features common_features <- featureNames(fan_fpkm)[fData(fan_fpkm)$fsc3_features] deng_rpkm <- fsc3_set_features(deng_rpkm, common_features) deng_rpkm <- fsc3_get_signatures(deng_rpkm) fan_fpkm <- fsc3_get_signatures(fan_fpkm) p_data <- pData(deng_rpkm) p_data$fsc3_buckets <- p_data$stage pData(deng_rpkm) <- new("AnnotatedDataFrame", data = p_data) deng_rpkm <- fsc3_get_buckets_signatures(deng_rpkm, threshold = 0.6) fan_fpkm <- fsc3_assign_signatures(fan_fpkm, deng_rpkm, threshold = 0.6) if(is.factor(pData(fan_fpkm)$stage)) { labs_orig <- levels(pData(fan_fpkm)$stage)[pData(fan_fpkm)$stage] } else { labs_orig <- pData(fan_fpkm)$stage } labs_new <- pData(fan_fpkm)$fsc3_buckets_assigned plot_sankey(labs_orig, labs_new) irr::kappa2(cbind(labs_orig, labs_new))$value mclust::adjustedRandIndex(labs_orig, labs_new) deng_rpkm <- fsc3_get_features(deng_rpkm) common_features <- get_common_fsc3_features(list(deng_rpkm)) goolam_counts <- fsc3_set_features(goolam_counts, common_features) # update common_features common_features <- fData(goolam_counts)$feature_symbol[fData(goolam_counts)$fsc3_features] deng_rpkm <- fsc3_set_features(deng_rpkm, common_features) deng_rpkm <- fsc3_get_signatures(deng_rpkm) goolam_counts <- fsc3_get_signatures(goolam_counts) p_data <- pData(deng_rpkm) p_data$fsc3_buckets <- p_data$stage pData(deng_rpkm) <- new("AnnotatedDataFrame", data = p_data) deng_rpkm <- fsc3_get_buckets_signatures(deng_rpkm, threshold = 0.6) goolam_counts <- fsc3_assign_signatures(goolam_counts, deng_rpkm, threshold = 0.6) if(is.factor(pData(goolam_counts)$stage)) { labs_orig <- levels(pData(goolam_counts)$stage)[pData(goolam_counts)$stage] } else { labs_orig <- pData(goolam_counts)$stage } labs_new <- pData(goolam_counts)$fsc3_buckets_assigned plot_sankey(labs_orig, labs_new) irr::kappa2(cbind(labs_orig, labs_new))$value mclust::adjustedRandIndex(labs_orig, labs_new)
d <- readRDS("~/Dropbox/scRNASeq-public-datasets/mouse-bone-marrow/intermediate/Nestorowa.rds") dataset <- d[[1]] ann <- d[[2]] rownames(ann) <- colnames(dataset) pd <- new("AnnotatedDataFrame", data = ann) nestorowa <- newSCESet(fpkmData = as.matrix(dataset), phenoData = pd) is_exprs(nestorowa) <- exprs(nestorowa) > 0 nestorowa <- calculateQCMetrics(nestorowa) # use gene names as feature symbols nestorowa@featureData@data$feature_symbol <- featureNames(nestorowa) saveRDS(nestorowa, "~/Dropbox/scRNASeq-public-datasets/mouse-bone-marrow/nestorowa.rds")
d <- readRDS("~/Dropbox/scRNASeq-public-datasets/mouse-bone-marrow/intermediate/Olsson.rds") dataset <- d[[1]] ann <- d[[2]] rownames(ann) <- colnames(dataset) pd <- new("AnnotatedDataFrame", data = ann) olsson <- newSCESet(fpkmData = as.matrix(dataset), phenoData = pd) is_exprs(olsson) <- exprs(olsson) > 0 olsson <- calculateQCMetrics(olsson) # use gene names as feature symbols olsson@featureData@data$feature_symbol <- featureNames(olsson) saveRDS(olsson, "~/Dropbox/scRNASeq-public-datasets/mouse-bone-marrow/olsson.rds")
d <- readRDS("~/Dropbox/scRNASeq-public-datasets/mouse-bone-marrow/intermediate/GSE59114_C57BL6.rds") dataset <- d[[1]] ann <- d[[2]] rownames(ann) <- colnames(dataset) pd <- new("AnnotatedDataFrame", data = ann) kowalczyk <- newSCESet(fpkmData = as.matrix(dataset), phenoData = pd) is_exprs(kowalczyk) <- exprs(kowalczyk) > 0 kowalczyk <- calculateQCMetrics(kowalczyk) # use gene names as feature symbols kowalczyk@featureData@data$feature_symbol <- featureNames(kowalczyk) saveRDS(kowalczyk, "~/Dropbox/scRNASeq-public-datasets/mouse-bone-marrow/kowalczyk_bl6.rds")
d <- readRDS("~/Dropbox/scRNASeq-public-datasets/mouse-bone-marrow/intermediate/GSE59114_DBA.rds") dataset <- d[[1]] ann <- d[[2]] rownames(ann) <- colnames(dataset) pd <- new("AnnotatedDataFrame", data = ann) kowalczyk <- newSCESet(fpkmData = as.matrix(dataset), phenoData = pd) is_exprs(kowalczyk) <- exprs(kowalczyk) > 0 kowalczyk <- calculateQCMetrics(kowalczyk) # use gene names as feature symbols kowalczyk@featureData@data$feature_symbol <- featureNames(kowalczyk) saveRDS(kowalczyk, "~/Dropbox/scRNASeq-public-datasets/mouse-bone-marrow/kowalczyk_dba.rds")
d <- readRDS("~/Dropbox/scRNASeq-public-datasets/mouse-bone-marrow/intermediate/Grover.rds") dataset <- d[[1]] ann <- d[[2]] rownames(ann) <- colnames(dataset) pd <- new("AnnotatedDataFrame", data = ann) grover <- newSCESet(fpkmData = as.matrix(dataset), phenoData = pd) is_exprs(grover) <- exprs(grover) > 0 grover <- calculateQCMetrics(grover) # use gene names as feature symbols grover@featureData@data$feature_symbol <- featureNames(grover) saveRDS(grover, "~/Dropbox/scRNASeq-public-datasets/mouse-bone-marrow/grover.rds")
d <- readRDS("~/Dropbox/scRNASeq-public-datasets/mouse-bone-marrow/intermediate/Drissen.rds") dataset <- d[[1]] ann <- d[[2]] rownames(ann) <- colnames(dataset) pd <- new("AnnotatedDataFrame", data = ann) drissen <- newSCESet(fpkmData = as.matrix(dataset), phenoData = pd) is_exprs(drissen) <- exprs(drissen) > 0 drissen <- calculateQCMetrics(drissen) # use gene names as feature symbols drissen@featureData@data$feature_symbol <- featureNames(drissen) saveRDS(drissen, "~/Dropbox/scRNASeq-public-datasets/mouse-bone-marrow/drissen.rds")
d <- readRDS("~/Dropbox/scRNASeq-public-datasets/mouse-bone-marrow/intermediate/GSE67120.rds") dataset <- d[[1]] ann <- d[[2]] rownames(ann) <- colnames(dataset) pd <- new("AnnotatedDataFrame", data = ann) zhou <- newSCESet(fpkmData = as.matrix(dataset), phenoData = pd) is_exprs(zhou) <- exprs(zhou) > 0 zhou <- calculateQCMetrics(zhou) # use gene names as feature symbols zhou@featureData@data$feature_symbol <- featureNames(zhou) saveRDS(zhou, "~/Dropbox/scRNASeq-public-datasets/mouse-bone-marrow/zhou.rds")
d <- readRDS("~/Dropbox/scRNASeq-public-datasets/mouse-bone-marrow/intermediate/GSE60781.rds") dataset <- d[[1]] ann <- d[[2]] rownames(ann) <- colnames(dataset) pd <- new("AnnotatedDataFrame", data = ann) sceset <- newSCESet(fpkmData = as.matrix(dataset), phenoData = pd) is_exprs(sceset) <- exprs(sceset) > 0 sceset <- calculateQCMetrics(sceset) # use gene names as feature symbols sceset@featureData@data$feature_symbol <- featureNames(sceset) saveRDS(sceset, "~/Dropbox/scRNASeq-public-datasets/mouse-bone-marrow/schlitzer.rds")
d <- readRDS("~/Dropbox/scRNASeq-public-datasets/mouse-bone-marrow/intermediate/GSE61533.rds") dataset <- d[[1]] ann <- d[[2]] rownames(ann) <- colnames(dataset) pd <- new("AnnotatedDataFrame", data = ann) sceset <- newSCESet(countData = as.matrix(dataset), phenoData = pd) sceset <- calculateQCMetrics(sceset) # use gene names as feature symbols sceset@featureData@data$feature_symbol <- featureNames(sceset) saveRDS(sceset, "~/Dropbox/scRNASeq-public-datasets/mouse-bone-marrow/wilson.rds")
d <- readRDS("~/Dropbox/scRNASeq-public-datasets/mouse-bone-marrow/intermediate/GSE54574.rds") dataset <- d[[1]] ann <- d[[2]] rownames(ann) <- colnames(dataset) pd <- new("AnnotatedDataFrame", data = ann) sceset <- newSCESet(fpkmData = as.matrix(dataset), phenoData = pd) is_exprs(sceset) <- exprs(sceset) > 0 sceset <- calculateQCMetrics(sceset) # use gene names as feature symbols sceset@featureData@data$feature_symbol <- featureNames(sceset) saveRDS(sceset, "~/Dropbox/scRNASeq-public-datasets/mouse-bone-marrow/pereira.rds")
d <- readRDS("~/Dropbox/scRNASeq-public-datasets/mouse-bone-marrow/intermediate/E-GEOD-76983.rds") dataset <- d[[1]] ann <- d[[2]] rownames(ann) <- colnames(dataset) pd <- new("AnnotatedDataFrame", data = ann) sceset <- newSCESet(fpkmData = as.matrix(dataset), phenoData = pd) is_exprs(sceset) <- exprs(sceset) > 0 sceset <- calculateQCMetrics(sceset) # use gene names as feature symbols sceset@featureData@data$feature_symbol <- featureNames(sceset) saveRDS(sceset, "~/Dropbox/scRNASeq-public-datasets/mouse-bone-marrow/grun.rds")
d <- readRDS("~/Dropbox/scRNASeq-public-datasets/mouse-bone-marrow/intermediate/E-GEOD-72857.rds") dataset <- d[[1]] dataset <- dataset[!duplicated(rownames(dataset)), ] ann <- d[[2]] rownames(ann) <- colnames(dataset) pd <- new("AnnotatedDataFrame", data = ann) sceset <- newSCESet(countData = as.matrix(dataset), phenoData = pd) sceset <- calculateQCMetrics(sceset) # use gene names as feature symbols sceset@featureData@data$feature_symbol <- featureNames(sceset) saveRDS(sceset, "~/Dropbox/scRNASeq-public-datasets/mouse-bone-marrow/paul.rds")
nestorowa <- readRDS("~/Dropbox/scRNASeq-public-datasets/mouse-bone-marrow/nestorowa.rds") nestorowa <- fsc3_get_features(nestorowa) olsson <- readRDS("~/Dropbox/scRNASeq-public-datasets/mouse-bone-marrow/olsson.rds") olsson <- fsc3_get_features(olsson) kowalczyk_bl6 <- readRDS("~/Dropbox/scRNASeq-public-datasets/mouse-bone-marrow/kowalczyk_bl6.rds") kowalczyk_bl6 <- fsc3_get_features(kowalczyk_bl6) kowalczyk_dba <- readRDS("~/Dropbox/scRNASeq-public-datasets/mouse-bone-marrow/kowalczyk_dba.rds") kowalczyk_dba <- fsc3_get_features(kowalczyk_dba) grover <- readRDS("~/Dropbox/scRNASeq-public-datasets/mouse-bone-marrow/grover.rds") grover <- fsc3_get_features(grover) drissen <- readRDS("~/Dropbox/scRNASeq-public-datasets/mouse-bone-marrow/drissen.rds") drissen <- fsc3_get_features(drissen) zhou <- readRDS("~/Dropbox/scRNASeq-public-datasets/mouse-bone-marrow/zhou.rds") zhou <- fsc3_get_features(zhou) schlitzer <- readRDS("~/Dropbox/scRNASeq-public-datasets/mouse-bone-marrow/schlitzer.rds") schlitzer <- fsc3_get_features(schlitzer) wilson <- readRDS("~/Dropbox/scRNASeq-public-datasets/mouse-bone-marrow/wilson.rds") wilson <- fsc3_get_features(wilson) pereira <- readRDS("~/Dropbox/scRNASeq-public-datasets/mouse-bone-marrow/pereira.rds") pereira <- fsc3_get_features(pereira) grun <- readRDS("~/Dropbox/scRNASeq-public-datasets/mouse-bone-marrow/grun.rds") grun <- fsc3_get_features(grun) paul <- readRDS("~/Dropbox/scRNASeq-public-datasets/mouse-bone-marrow/paul.rds") paul <- fsc3_get_features(paul) nest <- featureNames(nestorowa)[fData(nestorowa)$fsc3_features] ols <- featureNames(olsson)[fData(olsson)$fsc3_features] kowbl6 <- featureNames(kowalczyk_bl6)[fData(kowalczyk_bl6)$fsc3_features] kowdba <- featureNames(kowalczyk_dba)[fData(kowalczyk_dba)$fsc3_features] grov <- featureNames(grover)[fData(grover)$fsc3_features] dris <- featureNames(drissen)[fData(drissen)$fsc3_features] zho <- featureNames(zhou)[fData(zhou)$fsc3_features] schlitz <- featureNames(schlitzer)[fData(schlitzer)$fsc3_features] wils <- featureNames(wilson)[fData(wilson)$fsc3_features] per <- featureNames(pereira)[fData(pereira)$fsc3_features] gru <- featureNames(grun)[fData(grun)$fsc3_features] pau <- featureNames(paul)[fData(paul)$fsc3_features] upset(fromList(list(Nestorowa = nest, Olsson = ols, Kowalczyk_BL6 = kowbl6, Kowalczyk_DBA = kowdba, Grover = grov, Drissen = dris, Zhou = zho, Schlitzer = schlitz, Wilson = wils, Pereira = per, Grun = gru, Paul = pau)), order.by = "freq", nsets = 12) nestorowa <- fsc3_get_features(nestorowa) common_features <- get_common_fsc3_features(list(nestorowa)) olsson <- fsc3_set_features(olsson, common_features) # update common_features common_features <- featureNames(olsson)[fData(olsson)$fsc3_features] nestorowa <- fsc3_set_features(nestorowa, common_features) nestorowa <- fsc3_get_signatures(nestorowa) olsson <- fsc3_get_signatures(olsson) p_data <- pData(nestorowa) p_data$fsc3_buckets <- p_data$Type pData(nestorowa) <- new("AnnotatedDataFrame", data = p_data) nestorowa <- fsc3_get_buckets_signatures(nestorowa, threshold = 0.6) olsson <- fsc3_assign_signatures(olsson, nestorowa, threshold = 0.6) if(is.factor(pData(olsson)$Type)) { labs_orig <- levels(pData(olsson)$Type)[pData(olsson)$Type] } else { labs_orig <- pData(olsson)$Type } labs_new <- pData(olsson)$fsc3_buckets_assigned plot_sankey(labs_orig, labs_new) irr::kappa2(cbind(labs_orig, labs_new))$value mclust::adjustedRandIndex(labs_orig, labs_new) nestorowa <- fsc3_get_features(nestorowa) common_features <- get_common_fsc3_features(list(nestorowa)) paul <- fsc3_set_features(paul, common_features) # update common_features common_features <- featureNames(paul)[fData(paul)$fsc3_features] nestorowa <- fsc3_set_features(nestorowa, common_features) nestorowa <- fsc3_get_signatures(nestorowa) paul <- fsc3_get_signatures(paul) p_data <- pData(nestorowa) p_data$fsc3_buckets <- p_data$Type pData(nestorowa) <- new("AnnotatedDataFrame", data = p_data) nestorowa <- fsc3_get_buckets_signatures(nestorowa, threshold = 0.6) paul <- fsc3_assign_signatures(paul, nestorowa, threshold = 0.6) if(is.factor(pData(paul)$Type)) { labs_orig <- levels(pData(paul)$Type)[pData(paul)$Type] } else { labs_orig <- pData(paul)$Type } labs_new <- pData(paul)$fsc3_buckets_assigned plot_sankey(labs_orig, labs_new) irr::kappa2(cbind(labs_orig, labs_new))$value mclust::adjustedRandIndex(labs_orig, labs_new) kowalczyk_bl6 <- fsc3_get_features(kowalczyk_bl6) common_features <- get_common_fsc3_features(list(kowalczyk_bl6)) kowalczyk_dba <- fsc3_set_features(kowalczyk_dba, common_features) # update common_features common_features <- featureNames(kowalczyk_dba)[fData(kowalczyk_dba)$fsc3_features] kowalczyk_bl6 <- fsc3_set_features(kowalczyk_bl6, common_features) kowalczyk_bl6 <- fsc3_get_signatures(kowalczyk_bl6) kowalczyk_dba <- fsc3_get_signatures(kowalczyk_dba) p_data <- pData(kowalczyk_bl6) p_data$fsc3_buckets <- p_data$Type pData(kowalczyk_bl6) <- new("AnnotatedDataFrame", data = p_data) kowalczyk_bl6 <- fsc3_get_buckets_signatures(kowalczyk_bl6, threshold = 0.6) kowalczyk_dba <- fsc3_assign_signatures(kowalczyk_dba, kowalczyk_bl6, threshold = 0.6) if(is.factor(pData(kowalczyk_dba)$Type)) { labs_orig <- levels(pData(kowalczyk_dba)$Type)[pData(kowalczyk_dba)$Type] } else { labs_orig <- pData(kowalczyk_dba)$Type } labs_new <- pData(kowalczyk_dba)$fsc3_buckets_assigned plot_sankey(labs_orig, labs_new) irr::kappa2(cbind(labs_orig, labs_new))$value mclust::adjustedRandIndex(labs_orig, labs_new) kowalczyk_dba <- fsc3_get_features(kowalczyk_dba) common_features <- get_common_fsc3_features(list(kowalczyk_dba)) kowalczyk_bl6 <- fsc3_set_features(kowalczyk_bl6, common_features) # update common_features common_features <- featureNames(kowalczyk_bl6)[fData(kowalczyk_bl6)$fsc3_features] kowalczyk_dba <- fsc3_set_features(kowalczyk_dba, common_features) kowalczyk_dba <- fsc3_get_signatures(kowalczyk_dba) kowalczyk_bl6 <- fsc3_get_signatures(kowalczyk_bl6) p_data <- pData(kowalczyk_dba) p_data$fsc3_buckets <- p_data$Type pData(kowalczyk_dba) <- new("AnnotatedDataFrame", data = p_data) kowalczyk_dba <- fsc3_get_buckets_signatures(kowalczyk_dba, threshold = 0.6) kowalczyk_bl6 <- fsc3_assign_signatures(kowalczyk_bl6, kowalczyk_dba, threshold = 0.6) if(is.factor(pData(kowalczyk_bl6)$Type)) { labs_orig <- levels(pData(kowalczyk_bl6)$Type)[pData(kowalczyk_bl6)$Type] } else { labs_orig <- pData(kowalczyk_bl6)$Type } labs_new <- pData(kowalczyk_bl6)$fsc3_buckets_assigned plot_sankey(labs_orig, labs_new) irr::kappa2(cbind(labs_orig, labs_new))$value mclust::adjustedRandIndex(labs_orig, labs_new)
# # download data from http://casestudies.brain-map.org/celltax # # and run this code # d <- read.csv("~/Downloads/data_download/genes_rpkm.csv") # rownames(d) <- d[,1] # d <- d[,2:ncol(d)] # # then do this: # ann_col <- read.csv("~/Downloads/data_download/cell_metadata.csv") # # cell_classification <- read.csv("~/Downloads/data_download/cell_classification.csv") # cluster_metadata <- read.csv("~/Downloads/data_download/cluster_metadata.csv") # cluster_metadata <- cluster_metadata[,1:(ncol(cluster_metadata) - 1)] # # ann_col <- cbind(ann_col, cell_classification[,2:3]) # colnames(ann_col)[length(ann_col)] <- "cluster_id" # # ann_col <- merge(ann_col, cluster_metadata, by = "cluster_id") # ann_col <- ann_col[order(ann_col$long_name), ] # rownames(ann_col) <- ann_col$long_name # ann_col <- ann_col[,c(1, 3:ncol(ann_col))] # # pd <- new("AnnotatedDataFrame", data = ann_col) # sceset <- newSCESet(fpkmData = as.matrix(d), phenoData = pd) # is_exprs(sceset) <- exprs(sceset) > 0 # sceset <- calculateQCMetrics(sceset) # # use gene names as feature symbols # sceset@featureData@data$feature_symbol <- featureNames(sceset) # saveRDS(sceset, "~/Dropbox/scRNASeq-public-datasets/mouse-brain/tasic.rds")
# # download data from http://support.10xgenomics.com/single-cell/datasets/neuron_9k # # and run this code # d <- as.matrix(Seurat::Read10X("~/Downloads/filtered_gene_bc_matrices/mm10")) # sceset <- newSCESet(countData = d) # sceset <- calculateQCMetrics(sceset) # # use gene names as feature symbols # sceset@featureData@data$feature_symbol <- featureNames(sceset) # saveRDS(sceset, "~/Dropbox/scRNASeq-public-datasets/mouse-brain/10x-genomics/neuron_9k.rds")
# # download Primary Visual Cortex (VISp) from here: http://celltypes.brain-map.org/rnaseq # # and run this code # d <- read.csv("~/Dropbox/scRNASeq-public-datasets/mouse-brain/intermediate/VIS_gene_expression_matrix_2016-10-27/fpkm_table.csv") # rownames(d) <- d[,1] # d <- d[,2:ncol(d)] # colnames(d) <- as.numeric(sapply(colnames(d), function(x)substring(x, 2))) # # then do this: # ann_col <- read.csv("~/Dropbox/scRNASeq-public-datasets/mouse-brain/intermediate/VIS_gene_expression_matrix_2016-10-27/columns-samples.csv") # rownames(ann_col) <- ann_col$rnaseq_profile_id # ann_col <- ann_col[,2:ncol(ann_col)] # # ann_row <- read.csv("~/Dropbox/scRNASeq-public-datasets/mouse-brain/intermediate/VIS_gene_expression_matrix_2016-10-27/rows-genes.csv") # rownames(ann_row) <- ann_row$gene_id # ann_row <- ann_row[,2:ncol(ann_row)] # # pd <- new("AnnotatedDataFrame", data = ann_col) # fd <- new("AnnotatedDataFrame", data = ann_row) # sceset <- newSCESet(fpkmData = as.matrix(d), phenoData = pd, featureData = fd) # is_exprs(sceset) <- exprs(sceset) > 0 # sceset <- calculateQCMetrics(sceset) # # use gene names as feature symbols # sceset@featureData@data$feature_symbol <- sceset@featureData@data$gene_symbol # saveRDS(sceset, "~/Dropbox/scRNASeq-public-datasets/mouse-brain/allen-atlas-vis.rds")
# # download anterior lateral motor cortical area (ALM) from here: http://celltypes.brain-map.org/rnaseq # # and run this code # d <- read.csv("~/Dropbox/scRNASeq-public-datasets/mouse-brain/intermediate/ALM_gene_expression_matrix_2016-10-27/fpkm_table.csv") # rownames(d) <- d[,1] # d <- d[,2:ncol(d)] # colnames(d) <- as.numeric(sapply(colnames(d), function(x)substring(x, 2))) # # then do this: # ann_col <- read.csv("~/Dropbox/scRNASeq-public-datasets/mouse-brain/intermediate/ALM_gene_expression_matrix_2016-10-27/columns-cells.csv") # rownames(ann_col) <- ann_col$rnaseq_profile_id # ann_col <- ann_col[,2:ncol(ann_col)] # # ann_row <- read.csv("~/Dropbox/scRNASeq-public-datasets/mouse-brain/intermediate/ALM_gene_expression_matrix_2016-10-27/rows-genes.csv") # rownames(ann_row) <- ann_row$gene_id # ann_row <- ann_row[,2:ncol(ann_row)] # # pd <- new("AnnotatedDataFrame", data = ann_col) # fd <- new("AnnotatedDataFrame", data = ann_row) # sceset <- newSCESet(fpkmData = as.matrix(d), phenoData = pd, featureData = fd) # is_exprs(sceset) <- exprs(sceset) > 0 # sceset <- calculateQCMetrics(sceset) # # use gene names as feature symbols # sceset@featureData@data$feature_symbol <- sceset@featureData@data$gene_symbol # saveRDS(sceset, "~/Dropbox/scRNASeq-public-datasets/mouse-brain/allen-atlas-alm.rds")
# # download dorsal lateral geniculate nucleus (dLGN) of the thalamus from here: http://celltypes.brain-map.org/rnaseq # # and run this code # d <- read.csv("~/Dropbox/scRNASeq-public-datasets/mouse-brain/intermediate/gene_expression_matrix_2016-03-03/fpkm_table.csv") # rownames(d) <- d[,1] # d <- d[,2:ncol(d)] # colnames(d) <- as.numeric(sapply(colnames(d), function(x)substring(x, 2))) # # then do this: # ann_col <- read.csv("~/Dropbox/scRNASeq-public-datasets/mouse-brain/intermediate/gene_expression_matrix_2016-03-03/columns-cells.csv") # rownames(ann_col) <- ann_col$rnaseq_profile_id # ann_col <- ann_col[,2:ncol(ann_col)] # # ann_row <- read.csv("~/Dropbox/scRNASeq-public-datasets/mouse-brain/intermediate/gene_expression_matrix_2016-03-03/rows-genes.csv") # rownames(ann_row) <- ann_row$gene_id # ann_row <- ann_row[,2:ncol(ann_row)] # # pd <- new("AnnotatedDataFrame", data = ann_col) # fd <- new("AnnotatedDataFrame", data = ann_row) # sceset <- newSCESet(fpkmData = as.matrix(d), phenoData = pd, featureData = fd) # is_exprs(sceset) <- exprs(sceset) > 0 # sceset <- calculateQCMetrics(sceset) # # use gene names as feature symbols # sceset@featureData@data$feature_symbol <- sceset@featureData@data$gene_symbol # saveRDS(sceset, "~/Dropbox/scRNASeq-public-datasets/mouse-brain/allen-atlas-dlgn.rds")
vis <- readRDS("~/Dropbox/scRNASeq-public-datasets/mouse-brain/allen-atlas-vis.rds") vis <- fsc3_get_features(vis) alm <- readRDS("~/Dropbox/scRNASeq-public-datasets/mouse-brain/allen-atlas-alm.rds") alm <- fsc3_get_features(alm) dlgn <- readRDS("~/Dropbox/scRNASeq-public-datasets/mouse-brain/allen-atlas-dlgn.rds") dlgn <- fsc3_get_features(dlgn) vi <- as.character(fData(vis)$feature_symbol[fData(vis)$fsc3_features]) al <- as.character(fData(alm)$feature_symbol[fData(alm)$fsc3_features]) dlg <- as.character(fData(dlgn)$feature_symbol[fData(dlgn)$fsc3_features]) upset(fromList(list(VIS = vi, ALM = al, dLGN = dlg)), order.by = "freq", nsets = 3) vis <- fsc3_get_features(vis) common_features <- get_common_fsc3_features(list(vis)) alm <- fsc3_set_features(alm, common_features) # update common_features common_features <- as.character(fData(alm)$feature_symbol)[fData(alm)$fsc3_features] vis <- fsc3_set_features(vis, common_features) vis <- fsc3_get_signatures(vis) alm <- fsc3_get_signatures(alm) p_data <- pData(vis) p_data$fsc3_buckets <- p_data$genotype_driver pData(vis) <- new("AnnotatedDataFrame", data = p_data) vis <- fsc3_get_buckets_signatures(vis, threshold = 0.6) alm <- fsc3_assign_signatures(alm, vis, threshold = 0.6) if(is.factor(pData(alm)$genotype_driver)) { labs_orig <- levels(pData(alm)$genotype_driver)[pData(alm)$genotype_driver] } else { labs_orig <- pData(alm)$genotype_driver } labs_new <- pData(alm)$fsc3_buckets_assigned plot_sankey(labs_orig, labs_new) irr::kappa2(cbind(labs_orig, labs_new))$value mclust::adjustedRandIndex(labs_orig, labs_new)
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.