Nothing
#' \code{cellTypeAssignSCRNA} assigns CDSeq-identified cell types using single cell RNAseq data.
#' @param cdseq_gep CDSeq-estimated gene expression profile matrix with G rows (genes) and T columns (cell types).
#' @param cdseq_prop CDSeq-estimated sample-specific cell-type proportion, a matrix with T rows (cell type) and M (sample size).
#' @param cdseq_gep_sample_specific CDSeq-estimated sample-specific cell type gene expression, in the form of read counts. It is a 3 dimension array, i.e. gene by sample by cell type. The element cdseq_gep_sample_specific[i,j,k] represents the reads mapped to gene i from cell type k in sample j.
#' @param sc_gep a G (genes) by N (cell) matrix or dataframe that contains the gene expression profile for N single cells.
#' @param sc_annotation a dataframe contains two columns "cell_id" and "cell_type". cell_id needs to match with the cell_id in sc_gep but not required to have the same size. cell_type is the cell type annotation for the single cells.
#' @param nb_size size parameter for negative binomial distribution, check rnbinom for details.
#' @param nb_mu mu parameter for negative binomial distribution, check rnbinom for details.
#' @param seurat_count_threshold this parameter will be passed to Seurat subset function (subset = nCount_RNA > seurat_count_threshold) for filtering out single cells whose total counts is less this threshold.
#' @param seurat_scale_factor this parameter will be passed to scale.factor in Seurat function NormalizeData.
#' @param seurat_norm_method this parameter will be passed to normalization.method in Seurat function NormalizeData.
#' @param seurat_select_method this parameter will be passed to selection.method in Seurat function FindVariableFeatures
#' @param seurat_nfeatures this parameter will be passed to nfeatures in Seurat function FindVariableFeatures.
#' @param seurat_npcs this parameter will be passed to npcs in Seurat function RunPCA.
#' @param seurat_dims this parameter will be passed to dims in Seurat function FindNeighbors.
#' @param seurat_reduction this parameter will be passed to reduction in Seurat function FindNeighbors.
#' @param seurat_resolution this parameter will be passed to resolution in Seurat function FindClusters.
#' @param seurat_find_marker this parameter controls if run seurat FindMarker function, default is FALSE.
#' @param seurat_DE_test this parameter will be passed to test.use in Seurat function FindAllMarkers.
#' @param seurat_DE_logfc this parameter will be passed to logfc.threshold in Seurat function FindAllMarkers.
#' @param seurat_top_n_markers the number of top DE markers saved from Seurat output.
#' @param sc_pt_size point size of single cell data in umap and tsne plots
#' @param cdseq_pt_size point size of CDSeq-estimated cell types in umap and tsne plots
#' @param plot_umap set 1 to plot umap figure of scRNAseq and CDSeq-estimated cell types, 0 otherwise.
#' @param plot_tsne set 1 to plot tsne figure of scRNAseq and CDSeq-estimated cell types, 0 otherwise.
#' @param plot_per_sample currently disabled for debugging
#' @param fig_save 1 or 0. 1 means save figures to local and 0 means do not save figures to local.
#' @param fig_path the location where the heatmap figure is saved.
#' @param fig_name the name of umap and tsne figures. Umap figure will have the name of fig_name_umap_date and tsne figure will be named fig_name_tsne_date.
#' @param fig_format "pdf", "jpeg", or "png".
#' @param fig_dpi figure dpi
#' @param verbose if TRUE, some calculation information will be print.
#' @importFrom grDevices rainbow
#' @importFrom stats rmultinom rnbinom
#' @importFrom Seurat CreateSeuratObject NormalizeData FindVariableFeatures ScaleData FindNeighbors FindClusters FindAllMarkers RunUMAP RunTSNE RunPCA
#' @importFrom ggplot2 guide_legend guides aes scale_size_manual scale_shape_manual scale_fill_manual xlab ylab theme ggsave ggplot ggtitle geom_point element_text scale_colour_manual
#' @importFrom dplyr top_n group_by
#' @importFrom magrittr %>%
#' @importFrom rlang .data
#' @importFrom Matrix colSums
#' @export
#' @return cellTypeAssignSCRNA returns a list containing following fields:
#' fig_path: same as the input fig_path
#'
#' fig_name: same as the input fig_name
#'
#' cdseq_synth_scRNA: synthetic scRNAseq data generated using CDSeq-estiamted GEPs
#'
#' cdseq_scRNA_umap: ggplot figure of the umap outcome
#'
#' cdseq_scRNA_tsne: ggplot figure of the tsne outcome
#'
#' cdseq_synth_scRNA_seurat: Seurat object containing the scRNAseq combined with CDSeq-estimated cell types. Cell id for CDSeq-estimated cell types start with "CDSeq".
#'
#' seurat_cluster_purity: for all cells in a Seurat cluster i, the ith value in seurat_cluster_purity is the proportion of the mostly repeated cell annotation from sc_annotation.
#' For example, after Seurat clustering, suppose there are 100 cells in cluster 1, out of these 100 cells, 90 cells' annotation in sc_annotation is cell type A, then
#' the fist value in seurat_cluster_purity is 0.9. This output can be used to assess the agreement between Seurat clustering and the given sc_annotation.
#'
#' seurat_unique_clusters: Unique Seurat cluster numbering. This can be used together with seurat_cluster_gold_label to match the Seurat clusters with given annotations.
#'
#' seurat_cluster_gold_label: The cell type annotations for each unique Seurat cluster based on sc_annotation.
#'
#' seurat_markers: DE genes for each Seurat cluster.
#'
#' seurat_top_markers: Top seurat_top_n_markers DE genes for each Seurat cluster.
#'
#' CDSeq_cell_type_assignment_df: cell type assignment for CDSeq-estimated cell types.
#'
#' cdseq_prop_merged: CDSeq-estimated cell type proportions with cell type annotations.
#'
#' cdseq_gep_sample_specific_merged: sample-specific cell-type read counts. It is a 3d array with dimensions: gene, sample, cell type.
#'
#' input_list: values for input parameters
#'
#' cdseq_sc_comb_umap_df: dataframe for umap plot
#'
#' cdseq_sc_comb_tsne_df: dataframe for tsne plot
cellTypeAssignSCRNA <- function(cdseq_gep = NULL,
cdseq_prop = NULL,
cdseq_gep_sample_specific = NULL,
sc_gep = NULL,
sc_annotation = NULL,
nb_size = NULL,
nb_mu = NULL,
seurat_count_threshold = 100,
seurat_scale_factor = 10000,
seurat_norm_method = "LogNormalize",
seurat_select_method = 'vst',
seurat_nfeatures = 100,
seurat_npcs = 50,
seurat_dims = 1:10,
seurat_reduction = 'pca',
seurat_resolution = 0.8,
seurat_find_marker = FALSE,
seurat_DE_test = "wilcox",
seurat_DE_logfc = 0.25,
seurat_top_n_markers = 10,
sc_pt_size = 1,
cdseq_pt_size = 3,
plot_umap = 1,
plot_tsne = 1,
plot_per_sample = 0,
fig_save = 0,
fig_path = getwd(),
fig_name = "cellTypeAssignSCRNA",
fig_format = "pdf",
fig_dpi = 300,
verbose=FALSE){
#################################################################
## check input ##
#################################################################
#cat("CDSeq version 1.0.7\n")
if(verbose){cat("checking input ... \n")}
# check dimension
nr_cdseq <- nrow(cdseq_gep)
nc_cdseq <- ncol(cdseq_gep)
nr_sc <- nrow(sc_gep)
nc_sc <- ncol(sc_gep)
# check gene names
gene_cdseq <- rownames(cdseq_gep)
gene_sc <- rownames(sc_gep)
if(is.null(gene_cdseq) || is.null(gene_sc)){stop("cdseq_gep and sc_gep should have gene names as row names.")}
if(sum(gene_cdseq == gene_sc) != nr_cdseq ){stop("cdseq_gep and sc_gep should have the same gene names and in the same order.")}
# check cell type proportions
if(!is.null(cdseq_prop)){
nr_prop <- nrow(cdseq_prop)
nc_prop <- ncol(cdseq_prop)
if(nr_cdseq!=nr_sc){stop("cdseq_gep and sc_gep should have the same number of rows (genes).")}
if(nr_prop!=nc_cdseq){stop("column number of cdseq_gep should equal to row number of cdseq_prop.")}
if(is.null(colnames(cdseq_prop))){stop("cdseq_prop column name is missing. Column names are sample ids.")}
if(is.null(rownames(cdseq_prop))){stop("cdseq_prop row name is missing. Row names are the cell type names.")}
if(sum(rownames(cdseq_prop) == colnames(cdseq_gep)) != nr_prop){stop("Row names of cdseq_prop and column names of cdseq_gep should be the same and in same order. They both denote cell types.")}
}
# check cell ids
sc_id <- colnames(sc_gep)
if(is.null(sc_id)){stop("Please provide cell id in sc_gep.")}
# check annotation
if(is.null(sc_annotation)){
warning("sc_annotation (single cell annotation) is not provided. Cluster labels and DE genes will be generated. It is recommanded to use annotated scRNAseq in this function.")
}else{
if(is.null(dim(sc_annotation))){stop("sc_annotation has to be a two dimensional data frame with column names: cell_id and cell_type")}
if(nrow(sc_annotation)!=nc_sc){stop("nrow(sc_annotation) should be equal to ncol(sc_gep).")}
if(sum(colnames(sc_annotation) == c("cell_id","cell_type"))!=2){stop("sc_annotation has to be a two dimensional data frame with column names: cell_id and cell_type")}
#cat("sum(sc_id == sc_annotation$cell_id) = ",sum(sc_id == sc_annotation$cell_id),"length(sc_id)=",length(sc_id),"\n")
if(sum(sc_id == sc_annotation$cell_id) != length(sc_id)){stop("cell id in sc_gep has to be the same as the cell id in sc_annotation.")}
}
# declare variable to avoid R CMD check notes for no visible binding for global variable
avg_logFC <- cluster <- nCount_RNA <- NULL
###################################################################
## Generate synthetic scRNAseq data from CDSeq estimated GEPs ##
## and combine with scRNAseq data ##
###################################################################
if(verbose){cat("generating synthetic scRNAseq data using CDSeq estimated GEPs ...\n")}
# generate synthetic scRNAseq
ncell <- 1
cdseq_synth_scRNA <- matrix(0,nrow = nr_cdseq, ncol = nc_cdseq*ncell)
# take the max read counts from scRNAseq as the mean
if(!is.null(sc_gep) && is.null(nb_mu) && is.null(nb_size)){nb_mu <- max(Matrix::colSums(sc_gep)); nb_size <- nb_mu^2}
for (i in 1:nc_cdseq) {
nreads <- rnbinom(n = ncell,size = nb_size, mu = nb_mu)
for (j in 1:ncell) {
cdseq_synth_scRNA[,(i-1)*ncell + j] <- rmultinom(1,nreads[j],cdseq_gep[,i])
}
}
#colnames(cdseq_synth_scRNA) <- paste("CDSeq_SynthCell",rep(1:nc_cdseq,each = ncell), rep(1:ncell), sep = ".")
colnames(cdseq_synth_scRNA) <- paste(rep(colnames(cdseq_gep),each = ncell), "CDSeq" ,rep(1:ncell), sep = ".")
rownames(cdseq_synth_scRNA) <- rownames(cdseq_gep)
cdseq_sc_comb <- cbind(sc_gep,cdseq_synth_scRNA)
##################################################################
## Run seurat ##
##################################################################
if(verbose){cat("Calling Seurat pipeline ... \n")}
cdseq_synth_scRNA_seurat <- Seurat::CreateSeuratObject(counts = cdseq_sc_comb, project = "cdseq_synth_scRNAseq")
# filter cells
cdseq_synth_scRNA_seurat <- subset(cdseq_synth_scRNA_seurat, subset = nCount_RNA > seurat_count_threshold )#nFeature_RNA > 20 & nFeature_RNA < 2500)
# normalize
cdseq_synth_scRNA_seurat <- Seurat::NormalizeData(cdseq_synth_scRNA_seurat, normalization.method = seurat_norm_method , scale.factor = seurat_scale_factor, verbose = FALSE)
# select genes
cdseq_synth_scRNA_seurat <- Seurat::FindVariableFeatures(cdseq_synth_scRNA_seurat, selection.method = seurat_select_method, nfeatures = seurat_nfeatures, verbose = FALSE)
cdseq_synth_scRNA_seurat <- Seurat::ScaleData(cdseq_synth_scRNA_seurat,verbose = FALSE)
cdseq_synth_scRNA_seurat <- Seurat::RunPCA(cdseq_synth_scRNA_seurat, npcs = seurat_npcs,verbose = FALSE) #features = VariableFeatures(object = cdseq_synth_scRNA_seurat),
cdseq_synth_scRNA_seurat <- Seurat::FindNeighbors(cdseq_synth_scRNA_seurat, dims = seurat_dims, reduction = seurat_reduction, verbose = FALSE)
cdseq_synth_scRNA_seurat <- Seurat::FindClusters(cdseq_synth_scRNA_seurat, resolution = seurat_resolution, verbose = FALSE)
##################################################################
## use seurat cluster and DE genes ##
## for CDSeq-estimatd cell type annotation ##
##################################################################
# find makers
if(seurat_find_marker){
cdseq_synth_scRNA_seurat_markers <- Seurat::FindAllMarkers(cdseq_synth_scRNA_seurat, min.pct = 0.25, logfc.threshold = seurat_DE_logfc, test.use = seurat_DE_test,verbose = FALSE)
cdseq_synth_scRNA_seurat_markers %>% dplyr::group_by(cluster) %>% dplyr::top_n(n = 2, wt = avg_logFC)
seurat_top_markers <- cdseq_synth_scRNA_seurat_markers %>% dplyr::group_by(cluster) %>% dplyr::top_n(n = seurat_top_n_markers, wt = avg_logFC)
}else{
cdseq_synth_scRNA_seurat_markers <- NULL
seurat_top_markers <- NULL
}
#seurat_top_markers_df <- as.data.frame(cbind(seurat_top_markers, singleCellAnnotation = rep("NA",nrow(seurat_top_markers))))
if(!is.null(sc_annotation)){
##################################################################
## use scRNAseq annotation to ##
## annotate CDSeq estimated cell types ##
##################################################################
if(seurat_find_marker){
cdseq_synth_scRNA_seurat_markers_df <- data.frame(cdseq_synth_scRNA_seurat_markers, singleCellAnnotation = rep("Unknown",nrow(cdseq_synth_scRNA_seurat_markers)),stringsAsFactors = FALSE)
}else{
cdseq_synth_scRNA_seurat_markers_df <- NULL
}
# grep single cell id in seurat output
scRNAseq_cell_gene_name <- cdseq_synth_scRNA_seurat@assays$RNA@counts@Dimnames[[1]]
scRNAseq_cell_id_seurat <- cdseq_synth_scRNA_seurat@assays$RNA@counts@Dimnames[[2]]
# construct gold standard cluster label using sc_annotation
scRNA_seurat_comm_cell <- intersection(list(scRNAseq_cell_id_seurat,sc_annotation$cell_id),order = 'stable')
scRNA_seurat_comm_cell_id <- scRNA_seurat_comm_cell$comm.value
scRNA_seurat_comm_idx <- scRNA_seurat_comm_cell$index[[2]]
seurat_scRNA_comm_idx <- scRNA_seurat_comm_cell$index[[1]]
scRNAseq_seurat_goldstandard_annotation <- sc_annotation$cell_type[scRNA_seurat_comm_idx]
scRNAseq_seurat_goldstandard_annotation_cell_id <- scRNAseq_cell_id_seurat[seurat_scRNA_comm_idx]
if(sum(scRNAseq_seurat_goldstandard_annotation_cell_id == sc_annotation$cell_id[scRNA_seurat_comm_idx]) != length(scRNAseq_seurat_goldstandard_annotation_cell_id)){
stop("Something is wrong when taking intersection between sc_annotation cell id and seurat cell id.")
}
# construct seurat cluster label
# NOTICE THAT [scRNAseq_cell_cluster_label_seurat] and [scRNAseq_seurat_goldstandard_annotation] should be in the same order in term of cell ids
scRNAseq_cell_cluster_label_seurat <- cdseq_synth_scRNA_seurat@meta.data$seurat_clusters[seurat_scRNA_comm_idx]
cdseq_cell_idx <- grep("CDSeq",scRNAseq_cell_id_seurat)
cdseq_cell_seurat_cluster_id <- cdseq_synth_scRNA_seurat@meta.data$seurat_clusters[cdseq_cell_idx]
cdseq_cell_id <- scRNAseq_cell_id_seurat[cdseq_cell_idx]
CDSeq_cell_type_assignment_df <- data.frame(cdseq_cell_id = cdseq_cell_id, seurat_cluster = cdseq_cell_seurat_cluster_id, cdseq_cell_type_assignment = rep("not_assigned",length(cdseq_cell_id)),stringsAsFactors = FALSE)
# compute the cluster score (there is a bug due to the large number of cells )
#seurat_cluster_scores <- cluster_scores(c = as.numeric(scRNAseq_cell_cluster_label_seurat) , k = as.numeric(scRNAseq_seurat_goldstandard_annotation) ,beta = 1)
# assign CDSeq-estimated cell types based on single cell annotation
seurat_unique_clusters <- sort(as.numeric(unique(scRNAseq_cell_cluster_label_seurat)))
seurat_cluster_purity <- rep(0, length(seurat_unique_clusters))
seurat_cluster_gold_label <- rep("a",length(seurat_unique_clusters))
for (i in 1:length(seurat_unique_clusters)) {
seurat_cluster_members <- which(as.numeric(scRNAseq_cell_cluster_label_seurat) == seurat_unique_clusters[i])
seurat_cluster_gold_standard_label <- as.character(scRNAseq_seurat_goldstandard_annotation[seurat_cluster_members])
seurat_cluster_assess <- max_rep(seurat_cluster_gold_standard_label)
seurat_cluster_purity[i] <- seurat_cluster_assess$max_element_proportion
seurat_cluster_gold_label[i] <- seurat_cluster_assess$max_element
# add sc_annotation to DE marker data frame
if(seurat_find_marker){
tmp_idx <- which(as.numeric(cdseq_synth_scRNA_seurat_markers_df$cluster) == seurat_unique_clusters[i])
cdseq_synth_scRNA_seurat_markers_df$singleCellAnnotation[tmp_idx] <- seurat_cluster_gold_label[i]
}
cdseq_tmp_idx <- which(as.numeric(CDSeq_cell_type_assignment_df$seurat_cluster) == seurat_unique_clusters[i])
CDSeq_cell_type_assignment_df$cdseq_cell_type_assignment[cdseq_tmp_idx] <- seurat_cluster_gold_label[i]
}
if(seurat_find_marker){
seurat_top_markers_df <- cdseq_synth_scRNA_seurat_markers_df %>% dplyr::group_by(cluster) %>% dplyr::top_n(n = seurat_top_n_markers, wt = avg_logFC)
}else{
seurat_top_markers_df <- NULL
}
if(!is.null(cdseq_prop)){
# current version assumes each CDSeq-estGEP only generate 1 synthetic single cell. May extend to more general case later
# the gsub function extract patterns between two dots.
#cdseq_est_cell_ids <- as.numeric(unlist(lapply(CDSeq_cell_type_assignment_df$cdseq_cell_id, function(x){gsub("^.+?\\.(.+?)\\..*$", "\\1",x)})))
# the sub function extract pattern before a dot
cdseq_est_cell_ids <- unlist(lapply(CDSeq_cell_type_assignment_df$cdseq_cell_id, function(x){sub("\\..*","",x)}))
if(sum(cdseq_est_cell_ids == rownames(cdseq_prop)) != nr_prop){stop("Please make sure the rownames(cdseq_prop) and colnames(cdseq_gep) are the same and in same order.")}
rownames(cdseq_prop) <- CDSeq_cell_type_assignment_df$cdseq_cell_type_assignment
cdseq_prop_merged <- rowsum(cdseq_prop, group = rownames(cdseq_prop))
}
if(!is.null(cdseq_gep_sample_specific)){
cdseq_gep_sample_specific_merged <- array(numeric(),c(dim(cdseq_gep_sample_specific)[1], dim(cdseq_gep_sample_specific)[2],length(unique(CDSeq_cell_type_assignment_df$cdseq_cell_type_assignment)) ))
if(is.null(dimnames(cdseq_gep_sample_specific))){
if(is.null(cdseq_prop)){stop("Need to provide cdseq_prop or provide dimnames for cdseq_gep_sample_specific.")}
message("dimnames(cdseq_gep_sample_specific) is NULL. cellTypeAssignSCRNA assumes its dimnames are same as: rownames(cdseq_gep), colnames(cdseq_prop), rownames(cdseq_prop)")
dimnames(cdseq_gep_sample_specific)[[1]] <- rownames(cdseq_gep)
dimnames(cdseq_gep_sample_specific)[[2]] <- colnames(cdseq_prop)
dimnames(cdseq_gep_sample_specific)[[3]] <- rownames(cdseq_prop)
for (i in 1:dim(cdseq_gep_sample_specific)[2]) {
cdseq_gep_sample_specific_merged[,i,] <- t(rowsum(t(cdseq_gep_sample_specific[,i,]), group = rownames(t(cdseq_gep_sample_specific[,i,]))))
}
dimnames(cdseq_gep_sample_specific_merged)[[1]] <- dimnames(cdseq_gep_sample_specific)[[1]]
dimnames(cdseq_gep_sample_specific_merged)[[2]] <- dimnames(cdseq_gep_sample_specific)[[2]]
# accorading to rowsum funtion, if reorder = TRUE (default), then the result will be in order of sort(unique(group))
dimnames(cdseq_gep_sample_specific_merged)[[3]] <- sort(unique(dimnames(cdseq_gep_sample_specific)[[3]]))
}else{
cdseq_est_cell_ids <- unlist(lapply(CDSeq_cell_type_assignment_df$cdseq_cell_id, function(x){sub("\\..*","",x)}))
if(sum(cdseq_est_cell_ids == dimnames(cdseq_gep_sample_specific)[[3]]) != nr_prop){stop("Please make sure the dimnames(cdseq_gep_sample_specific)[[3]] and colnames(cdseq_gep) are the same and in same order.")}
dimnames(cdseq_gep_sample_specific)[[3]] <- CDSeq_cell_type_assignment_df$cdseq_cell_type_assignment
for (i in 1:dim(cdseq_gep_sample_specific)[2]) {
cdseq_gep_sample_specific_merged[,i,] <- t(rowsum(t(cdseq_gep_sample_specific[,i,]), group = rownames(t(cdseq_gep_sample_specific[,i,]))))
}
dimnames(cdseq_gep_sample_specific_merged)[[1]] <- dimnames(cdseq_gep_sample_specific)[[1]]
dimnames(cdseq_gep_sample_specific_merged)[[2]] <- dimnames(cdseq_gep_sample_specific)[[2]]
# accorading to rowsum funtion, if reorder = TRUE (default), then the result will be in order of sort(unique(group))
dimnames(cdseq_gep_sample_specific_merged)[[3]] <- sort(unique(dimnames(cdseq_gep_sample_specific)[[3]]))
}
####################################################################
## use cell-type-specific-per-sample read counts for clustering ##
####################################################################
cdseq_gep_sample_specific_mat <- matrix(cdseq_gep_sample_specific,nrow = dim(cdseq_gep_sample_specific)[1],ncol = prod(dim(cdseq_gep_sample_specific)[2:3]))
rownames(cdseq_gep_sample_specific_mat) <- rownames(cdseq_gep)
colnames(cdseq_gep_sample_specific_mat) <- paste0("CDSeq_",
dimnames(cdseq_gep_sample_specific)[[2]],
"_",
rep(dimnames(cdseq_gep_sample_specific)[[3]],each=length(dimnames(cdseq_gep_sample_specific)[[2]])))
cdseq_gep_sample_specific_mat_sample_ids <- rep(dimnames(cdseq_gep_sample_specific)[[2]], each = length(dimnames(cdseq_gep_sample_specific)[[3]]) )
cdseq_persample_sc_comb <- cbind(sc_gep,cdseq_gep_sample_specific_mat)
if(verbose){cat("colnames(cdseq_persample_sc_comb)[ncol(sc_gep) + 1] = ", colnames(cdseq_persample_sc_comb)[ncol(sc_gep) + 1],"\n")}
##################################################################
## Run seurat ##
##################################################################
if(verbose){cat("Calling Seurat pipeline: per-sample-cell-type-specific ... \n")}
cdseq_synth_scRNA_seurat_persample <- Seurat::CreateSeuratObject(counts = cdseq_persample_sc_comb, project = "cdseq_synth_scRNAseq_per_sample")
# filter cells
cdseq_synth_scRNA_seurat_persample <- subset(cdseq_synth_scRNA_seurat_persample, subset = nCount_RNA > seurat_count_threshold,verbose = FALSE )#nFeature_RNA > 20 & nFeature_RNA < 2500)
# normalize
cdseq_synth_scRNA_seurat_persample <- Seurat::NormalizeData(cdseq_synth_scRNA_seurat_persample, normalization.method = seurat_norm_method , scale.factor = seurat_scale_factor,verbose = FALSE)
# select genes
cdseq_synth_scRNA_seurat_persample <- Seurat::FindVariableFeatures(cdseq_synth_scRNA_seurat_persample, selection.method = seurat_select_method, nfeatures = seurat_nfeatures,verbose = FALSE)
cdseq_synth_scRNA_seurat_persample <- Seurat::ScaleData(cdseq_synth_scRNA_seurat_persample,verbose = FALSE)
cdseq_synth_scRNA_seurat_persample <- Seurat::RunPCA(cdseq_synth_scRNA_seurat_persample, npcs = seurat_npcs,verbose = FALSE) #features = VariableFeatures(object = cdseq_synth_scRNA_seurat),
cdseq_synth_scRNA_seurat_persample <- Seurat::FindNeighbors(cdseq_synth_scRNA_seurat_persample, dims = seurat_dims, reduction = seurat_reduction,verbose = FALSE)
cdseq_synth_scRNA_seurat_persample <- Seurat::FindClusters(cdseq_synth_scRNA_seurat_persample, resolution = seurat_resolution,verbose = FALSE)
##################################################################
## CDSeq-estimated per-sample-cell-type-specific annotation ##
##################################################################
# grep single cell id in seurat output
scRNAseq_cell_gene_name <- cdseq_synth_scRNA_seurat_persample@assays$RNA@counts@Dimnames[[1]]
scRNAseq_cell_id_seurat <- cdseq_synth_scRNA_seurat_persample@assays$RNA@counts@Dimnames[[2]]
if(verbose){cat("length(scRNAseq_cell_id_seurat) = ", length(scRNAseq_cell_id_seurat),"\n" )}
# construct gold standard cluster label using sc_annotation
scRNA_seurat_comm_cell <- intersection(list(scRNAseq_cell_id_seurat,sc_annotation$cell_id),order = 'stable')
scRNA_seurat_comm_cell_id <- scRNA_seurat_comm_cell$comm.value
scRNA_seurat_comm_idx <- scRNA_seurat_comm_cell$index[[2]]
seurat_scRNA_comm_idx <- scRNA_seurat_comm_cell$index[[1]]
scRNAseq_seurat_goldstandard_annotation <- sc_annotation$cell_type[scRNA_seurat_comm_idx]
scRNAseq_seurat_goldstandard_annotation_cell_id <- scRNAseq_cell_id_seurat[seurat_scRNA_comm_idx]
if(sum(scRNAseq_seurat_goldstandard_annotation_cell_id == sc_annotation$cell_id[scRNA_seurat_comm_idx]) != length(scRNAseq_seurat_goldstandard_annotation_cell_id)){
stop("Something is wrong when taking intersection between sc_annotation cell id and seurat cell id.")
}
# construct seurat cluster label
# NOTICE THAT [scRNAseq_cell_cluster_label_seurat] and [scRNAseq_seurat_goldstandard_annotation] should be in the same order in term of cell ids
scRNAseq_cell_cluster_label_seurat_persample <- cdseq_synth_scRNA_seurat_persample@meta.data$seurat_clusters[seurat_scRNA_comm_idx]
cdseq_cell_idx <- grep("CDSeq",scRNAseq_cell_id_seurat)
cdseq_cell_seurat_cluster_id <- cdseq_synth_scRNA_seurat_persample@meta.data$seurat_clusters[cdseq_cell_idx]
cdseq_cell_id <- scRNAseq_cell_id_seurat[cdseq_cell_idx]
if(verbose){
cat("length(cdseq_cell_id) = ", length(cdseq_cell_id),
" length(cdseq_gep_sample_specific_mat_sample_ids) = ",length(cdseq_gep_sample_specific_mat_sample_ids),
" length(cdseq_cell_seurat_cluster_id) = ",length(cdseq_cell_seurat_cluster_id),"\n")
}
CDSeq_cell_type_assignment_df_persample <- data.frame(cdseq_cell_id = cdseq_cell_id,
#sample_id = cdseq_gep_sample_specific_mat_sample_ids,
seurat_cluster = cdseq_cell_seurat_cluster_id,
cdseq_cell_type_assignment = rep("not_assigned",length(cdseq_cell_id)),stringsAsFactors = FALSE)
# compute the cluster score (there is a bug due to the large number of cells )
#seurat_cluster_scores <- cluster_scores(c = as.numeric(scRNAseq_cell_cluster_label_seurat) , k = as.numeric(scRNAseq_seurat_goldstandard_annotation) ,beta = 1)
# assign CDSeq-estimated cell types based on single cell annotation
seurat_unique_clusters_persample <- sort(as.numeric(unique(scRNAseq_cell_cluster_label_seurat_persample)))
seurat_cluster_purity <- rep(0, length(seurat_unique_clusters_persample))
seurat_cluster_gold_label_persample <- rep("no_label",length(seurat_unique_clusters_persample))
for (i in 1:length(seurat_unique_clusters_persample)) {
seurat_cluster_members <- which(as.numeric(scRNAseq_cell_cluster_label_seurat_persample) == seurat_unique_clusters_persample[i])
seurat_cluster_gold_standard_label <- as.character(scRNAseq_seurat_goldstandard_annotation[seurat_cluster_members])
seurat_cluster_assess <- max_rep(seurat_cluster_gold_standard_label)
seurat_cluster_purity[i] <- seurat_cluster_assess$max_element_proportion
seurat_cluster_gold_label_persample[i] <- seurat_cluster_assess$max_element
# add sc_annotation to DE marker data frame
#tmp_idx <- which(as.numeric(cdseq_synth_scRNA_seurat_markers_df$cluster) == seurat_unique_clusters[i])
#cdseq_synth_scRNA_seurat_markers_df$singleCellAnnotation[tmp_idx] <- seurat_cluster_gold_label[i]
cdseq_tmp_idx <- which(as.numeric(CDSeq_cell_type_assignment_df_persample$seurat_cluster) == seurat_unique_clusters_persample[i])
CDSeq_cell_type_assignment_df_persample$cdseq_cell_type_assignment[cdseq_tmp_idx] <- seurat_cluster_gold_label_persample[i]
}
# annotation cdseq-estimate proportions
}
}
##################################################################
## UMAP ##
##################################################################
if(plot_umap){
if(verbose){cat("Running UMAP: plot synthetic CDSeq-scRNAseq ... \n")}
#################################################################
## plot synthetic CDSeq-scRNAseq ##
#################################################################
cdseq_synth_scRNA_seurat <- Seurat::RunUMAP(cdseq_synth_scRNA_seurat, dims = seurat_dims,verbose = FALSE)
# plot scRNAseq and cdseq estimates
if(is.null(sc_annotation)){
cluster_label <- paste0("cluster_",cdseq_synth_scRNA_seurat$seurat_clusters)#sub_grp#clusterlouvain$membership
}else{
tmp_label <- as.numeric(cdseq_synth_scRNA_seurat$seurat_clusters)
cluster_label <- rep("a",length(tmp_label))
for (i in 1:length(seurat_unique_clusters)) {
cluster_label[which(tmp_label==seurat_unique_clusters[i])] <- seurat_cluster_gold_label[i]
}
}
cluster_label <- factor(cluster_label, levels = unique(cluster_label))
cdseq_sc_comb_umap <- cdseq_synth_scRNA_seurat@reductions$umap@cell.embeddings
cell_sources <- rownames(cdseq_sc_comb_umap)
umap_tot <- 1:nrow(cdseq_sc_comb_umap)
CDSeq_idx <- grep("CDSeq",rownames(cdseq_sc_comb_umap))
scRNA_idx <- umap_tot[-CDSeq_idx]
cell_sources[CDSeq_idx] <- "CDSeq"
cell_sources[scRNA_idx] <- "scRNA"
cell_sources <- factor(cell_sources,levels = c('scRNA','CDSeq') )
#cell_sources <- factor(c(rep('scRNA',length(scRNA_idx)), rep('CDSeq',length(CDSeq_idx))), levels = c('scRNA','CDSeq'))
cdseq_sc_comb_umap_df <- data.frame(V1 = cdseq_sc_comb_umap[,1], V2 = cdseq_sc_comb_umap[,2], cell_sources= cell_sources , cluster_label = cluster_label)
point_stroke <- c(rep(0,length(scRNA_idx)), rep(2,length(CDSeq_idx)))
#cat("nrow(cdseq_sc_comb_umap_df) = ", nrow(cdseq_sc_comb_umap_df), "length(point_stroke) = ", length(point_stroke),"...\n")
cdseq_scRNA_umap<- ggplot(cdseq_sc_comb_umap_df,aes(x=.data$V1, y=.data$V2, colour=as.factor(cell_sources), size=as.factor(cell_sources), shape=as.factor(cell_sources), fill=as.factor(cluster_label), stroke=point_stroke)) +
ggtitle(paste0('CDSeq estimated cell types and scRNAseq')) +
xlab("UMAP 1") + ylab("UMAP 2") +
geom_point() + # this is the edge color
scale_colour_manual(name="cell source",values=c("gray","black"),na.value = NA,label=levels(cell_sources)) +
scale_fill_manual(name="clusters",values=c(rainbow(length(unique(cluster_label)))),na.value = NA,label=levels(cluster_label)) +
scale_size_manual(name="cell source", values = c(sc_pt_size,cdseq_pt_size), na.value = NA,label=levels(cell_sources)) +
scale_shape_manual(name="cell source", values = c(21,23), na.value = NA,label=levels(cell_sources)) +
theme(plot.title = element_text(size = 45, face = "bold"),
axis.title.x = element_text(color="black", size=35,face="bold"),
axis.title.y = element_text(color="black", size=35,face="bold"),
legend.title = element_text(color = "blue", face = "bold", size = 30),
legend.text = element_text(color = "blue", face = "bold", size = 30)) +
guides(color = guide_legend(override.aes = list(size=5)), fill = guide_legend(override.aes = list(shape=21,size=5)))
if(fig_save){
fig_tmp_name <- paste0(fig_path,fig_name,"_umap_",gsub("-|\\s|:","_",Sys.time()),".",fig_format)
if(verbose){cat("save umap at ",fig_tmp_name ,"...\n")}
ggsave(filename = fig_tmp_name,#paste0(fig_path,fig_name,"_umap.",fig_format),
plot = cdseq_scRNA_umap,
width = 25,
height = 20,
dpi = fig_dpi)
}
##################################################################
## plot CDSeq-estimated cell-type-specific gep per sample ##
##################################################################
if(plot_per_sample){
warning("plot_per_sample option is not working properly. I'm working on it.")
if(verbose){cat("Running UMAP: plot CDSeq-estimated cell-type-specific gep per sample ... \n")}
cdseq_synth_scRNA_seurat_persample <- Seurat::RunUMAP(cdseq_synth_scRNA_seurat_persample, dims = seurat_dims,verbose = FALSE)
# plot scRNAseq and cdseq estimates
if(is.null(sc_annotation)){
cluster_label <- paste0("cluster_",cdseq_synth_scRNA_seurat_persample$seurat_clusters)#sub_grp#clusterlouvain$membership
}else{
tmp_label <- as.numeric(cdseq_synth_scRNA_seurat_persample$seurat_clusters)
cluster_label <- rep("no_label",length(tmp_label))
for (i in 1:length(seurat_unique_clusters_persample)) {
cluster_label[which(tmp_label==seurat_unique_clusters_persample[i])] <- seurat_cluster_gold_label_persample[i]
}
}
cluster_label <- factor(cluster_label, levels = unique(cluster_label))
cdseq_sc_comb_umap <- cdseq_synth_scRNA_seurat_persample@reductions$umap@cell.embeddings
cell_sources <- rownames(cdseq_sc_comb_umap)
umap_tot <- 1:nrow(cdseq_sc_comb_umap)
CDSeq_idx <- grep("CDSeq",rownames(cdseq_sc_comb_umap))
scRNA_idx <- umap_tot[-CDSeq_idx]
if(verbose){
cat("length(CDSeq_idx) = ", length(CDSeq_idx),
" length(scRNA_idx) = ", length(scRNA_idx),
" length(cell_sources) = ",length(cell_sources),
" length(cluster_label) = ",length(cluster_label),
" nrow(cdseq_sc_comb_umap) = ",nrow(cdseq_sc_comb_umap),
" ncol(cdseq_sc_comb_umap) = ",ncol(cdseq_sc_comb_umap),"\n")
}
cell_sources[CDSeq_idx] <- "CDSeq"
cell_sources[scRNA_idx] <- "scRNA"
cell_sources <- factor(cell_sources,levels = c('scRNA','CDSeq') )
#cell_sources <- factor(c(rep('scRNA',length(scRNA_idx)), rep('CDSeq',length(CDSeq_idx))), levels = c('scRNA','CDSeq'))
cdseq_sc_comb_umap_per_sample_df <- data.frame(V1 = cdseq_sc_comb_umap[,1], V2 = cdseq_sc_comb_umap[,2], cell_sources= cell_sources , cluster_label = cluster_label)
point_stroke <- c(rep(0,length(scRNA_idx)), rep(2,length(CDSeq_idx)))
if(verbose){
cat("nrow(cdseq_sc_comb_umap_per_sample_df) = ", nrow(cdseq_sc_comb_umap_per_sample_df), "length(point_stroke) = ", length(point_stroke),"...\n")
}
cdseq_scRNA_umap_per_sample<- ggplot(cdseq_sc_comb_umap_per_sample_df,aes(x=.data$V1, y=.data$V2, colour=as.factor(cell_sources), size=as.factor(cell_sources), shape=as.factor(cell_sources), fill=as.factor(cluster_label), stroke=point_stroke)) +
ggtitle(paste0('CDSeq estimated per-sample-cell-type-specific GEP and scRNAseq')) +
xlab("UMAP 1") + ylab("UMAP 2") +
geom_point() + # this is the edge color
scale_colour_manual(name="cell source",values=c("gray","black"),na.value = NA,label=levels(cell_sources)) +
scale_fill_manual(name="clusters",values=c(rainbow(length(unique(cluster_label)))),na.value = NA,label=levels(cluster_label)) +
scale_size_manual(name="cell source", values = c(sc_pt_size,cdseq_pt_size), na.value = NA,label=levels(cell_sources)) +
scale_shape_manual(name="cell source", values = c(21,23), na.value = NA,label=levels(cell_sources)) +
theme(plot.title = element_text(size = 45, face = "bold"),
axis.title.x = element_text(color="black", size=35,face="bold"),
axis.title.y = element_text(color="black", size=35,face="bold"),
legend.title = element_text(color = "blue", face = "bold", size = 30),
legend.text = element_text(color = "blue", face = "bold", size = 30)) +
guides(color = guide_legend(override.aes = list(size=5)), fill = guide_legend(override.aes = list(shape=21,size=5)))
if(fig_save){
fig_tmp_name <- paste0(fig_path,fig_name,"_umap_per_sample_cts_",gsub("-|\\s|:","_",Sys.time()),".",fig_format)
if(verbose){cat("save umap at ",fig_tmp_name ,"...\n")}
ggsave(filename = fig_tmp_name,#paste0(fig_path,fig_name,"_umap.",fig_format),
plot = cdseq_scRNA_umap_per_sample,
width = 25,
height = 20,
dpi = fig_dpi)
}
}
}
##################################################################
## TSNE ##
##################################################################
if(plot_tsne){
if(verbose){cat("Running TSNE: synthetic CDSeq-estimated cell types ... \n")}
cdseq_synth_scRNA_seurat <- Seurat::RunTSNE(cdseq_synth_scRNA_seurat, dims = seurat_dims, check_duplicates = FALSE, verbose = FALSE)
# plot scRNAseq and cdseq estimates
if(is.null(sc_annotation)){
cluster_label <- paste0("cluster_",cdseq_synth_scRNA_seurat$seurat_clusters)#sub_grp#clusterlouvain$membership
}else{
tmp_label <- as.numeric(cdseq_synth_scRNA_seurat$seurat_clusters)
cluster_label <- rep("a",length(tmp_label))
for (i in 1:length(seurat_unique_clusters)) {
cluster_label[which(tmp_label==seurat_unique_clusters[i])] <- seurat_cluster_gold_label[i]
}
}
cluster_label <- factor(cluster_label, levels = unique(cluster_label))
cdseq_sc_comb_tsne <- cdseq_synth_scRNA_seurat@reductions$tsne@cell.embeddings
cell_sources <- rownames(cdseq_sc_comb_tsne)
tsne_tot <- 1:nrow(cdseq_sc_comb_tsne)
CDSeq_idx <- grep("CDSeq",rownames(cdseq_sc_comb_tsne))
scRNA_idx <- tsne_tot[-CDSeq_idx]
cell_sources[CDSeq_idx] <- "CDSeq"
cell_sources[scRNA_idx] <- "scRNA"
cell_sources <- factor(cell_sources,levels = c('scRNA','CDSeq') )
cdseq_sc_comb_tsne_df <- data.frame(V1 = cdseq_sc_comb_tsne[,1], V2 = cdseq_sc_comb_tsne[,2], cell_sources= cell_sources , cluster_label = cluster_label)
point_stroke <- c(rep(0,length(scRNA_idx)), rep(2,length(CDSeq_idx)))
cdseq_scRNA_tsne<- ggplot(cdseq_sc_comb_tsne_df,aes(x=.data$V1, y=.data$V2, colour=as.factor(cell_sources), size=as.factor(cell_sources), shape=as.factor(cell_sources), fill=as.factor(cluster_label), stroke=point_stroke)) +
ggtitle(paste0('CDSeq estimated cell types and scRNAseq')) +
xlab("TSNE 1") + ylab("TSNE 2") +
geom_point() + # this is the edge color
scale_colour_manual(name="cell source",values=c("gray","black"),na.value = NA,label=levels(cell_sources)) +
scale_fill_manual(name="clusters",values=c(rainbow(length(unique(cluster_label)))),na.value = NA,label=levels(cluster_label)) +
scale_size_manual(name="cell source", values = c(sc_pt_size,cdseq_pt_size), na.value = NA,label=levels(cell_sources)) +
scale_shape_manual(name="cell source", values = c(21,23), na.value = NA,label=levels(cell_sources)) +
theme(plot.title = element_text(size = 45, face = "bold"),
axis.title.x = element_text(color="black", size=35,face="bold"),
axis.title.y = element_text(color="black", size=35,face="bold"),
legend.title = element_text(color = "blue", face = "bold", size = 30),
legend.text = element_text(color = "blue", face = "bold", size = 30)) +
guides(color = guide_legend(override.aes = list(size=5)), fill = guide_legend(override.aes = list(shape=21,size=5)))
if(fig_save){
fig_tmp_name <- paste0(fig_path,fig_name,"_tsne_",gsub("-|\\s|:","_",Sys.time()),".",fig_format)
if(verbose){cat("save umap at ", fig_tmp_name ,"...\n")}
ggsave(filename = fig_tmp_name,#paste0(fig_path,fig_name,"_tsne.",fig_format),
plot = cdseq_scRNA_tsne,
width = 25,
height = 20,
dpi = fig_dpi)
}
##################################################################
## plot CDSeq-estimated cell-type-specific gep per sample ##
##################################################################
if(plot_per_sample){
warning("plot_per_sample option is not working properly. I'm working on it.")
if(verbose){cat("Running TSNE: per sample CDSeq-estimated cell types ... \n")}
cdseq_synth_scRNA_seurat_persample <- Seurat::RunTSNE(cdseq_synth_scRNA_seurat_persample, dims = seurat_dims, check_duplicates = FALSE,verbose = FALSE)
# plot scRNAseq and cdseq estimates
if(is.null(sc_annotation)){
cluster_label <- paste0("cluster_",cdseq_synth_scRNA_seurat_persample$seurat_clusters)#sub_grp#clusterlouvain$membership
}else{
tmp_label <- as.numeric(cdseq_synth_scRNA_seurat_persample$seurat_clusters)
cluster_label <- rep("a",length(tmp_label))
for (i in 1:length(seurat_unique_clusters_persample)) {
cluster_label[which(tmp_label==seurat_unique_clusters_persample[i])] <- seurat_cluster_gold_label_persample[i]
}
}
cluster_label <- factor(cluster_label, levels = unique(cluster_label))
cdseq_sc_comb_tsne <- cdseq_synth_scRNA_seurat_persample@reductions$tsne@cell.embeddings
cell_sources <- rownames(cdseq_sc_comb_tsne)
tsne_tot <- 1:nrow(cdseq_sc_comb_tsne)
CDSeq_idx <- grep("CDSeq",rownames(cdseq_sc_comb_tsne))
scRNA_idx <- tsne_tot[-CDSeq_idx]
cell_sources[CDSeq_idx] <- "CDSeq"
cell_sources[scRNA_idx] <- "scRNA"
cell_sources <- factor(cell_sources,levels = c('scRNA','CDSeq') )
cdseq_sc_comb_tsne_df_persample <- data.frame(V1 = cdseq_sc_comb_tsne[,1], V2 = cdseq_sc_comb_tsne[,2], cell_sources= cell_sources , cluster_label = cluster_label)
point_stroke <- c(rep(0,length(scRNA_idx)), rep(2,length(CDSeq_idx)))
cdseq_scRNA_tsne_persample <- ggplot(cdseq_sc_comb_tsne_df_persample,aes(x=.data$V1, y=.data$V2, colour=as.factor(cell_sources), size=as.factor(cell_sources), shape=as.factor(cell_sources), fill=as.factor(cluster_label), stroke=point_stroke)) +
ggtitle(paste0('CDSeq estimated cell types and scRNAseq')) +
xlab("TSNE 1") + ylab("TSNE 2") +
geom_point() + # this is the edge color
scale_colour_manual(name="cell source",values=c("gray","black"),na.value = NA,label=levels(cell_sources)) +
scale_fill_manual(name="clusters",values=c(rainbow(length(unique(cluster_label)))),na.value = NA,label=levels(cluster_label)) +
scale_size_manual(name="cell source", values = c(sc_pt_size,cdseq_pt_size), na.value = NA,label=levels(cell_sources)) +
scale_shape_manual(name="cell source", values = c(21,23), na.value = NA,label=levels(cell_sources)) +
theme(plot.title = element_text(size = 45, face = "bold"),
axis.title.x = element_text(color="black", size=35,face="bold"),
axis.title.y = element_text(color="black", size=35,face="bold"),
legend.title = element_text(color = "blue", face = "bold", size = 30),
legend.text = element_text(color = "blue", face = "bold", size = 30)) +
guides(color = guide_legend(override.aes = list(size=5)), fill = guide_legend(override.aes = list(shape=21,size=5)))
if(fig_save){
fig_tmp_name <- paste0(fig_path,fig_name,"_tsne_per_sample_cts_",gsub("-|\\s|:","_",Sys.time()),".",fig_format)
if(verbose){cat("save umap at ", fig_tmp_name ,"...\n")}
ggsave(filename = fig_tmp_name,#paste0(fig_path,fig_name,"_tsne.",fig_format),
plot = cdseq_scRNA_tsne_persample,
width = 25,
height = 20,
dpi = fig_dpi)
}
}
}
input_list <- list(cdseq_gep = cdseq_gep,
cdseq_prop = cdseq_prop,
cdseq_gep_sample_specific = cdseq_gep_sample_specific,
sc_gep = sc_gep,
sc_annotation = sc_annotation,
nb_size = nb_size,
nb_mu = nb_mu,
seurat_count_threshold = seurat_count_threshold,
seurat_scale_factor = seurat_scale_factor,
seurat_norm_method = seurat_norm_method,
seurat_select_method = seurat_select_method,
seurat_nfeatures = seurat_nfeatures,
seurat_npcs = seurat_npcs,
seurat_dims = seurat_dims,
seurat_reduction = seurat_reduction,
seurat_resolution = seurat_resolution,
seurat_DE_test = seurat_DE_test,
seurat_DE_logfc = seurat_DE_logfc,
seurat_top_n_markers = seurat_top_n_markers,
plot_umap = plot_umap,
plot_tsne = plot_tsne,
fig_path = fig_path,
fig_name = fig_name,
fig_format = fig_format,
fig_dpi = fig_dpi)
output <- list()
output$fig_path <- fig_path
output$fig_name <- fig_name
output$cdseq_synth_scRNA <- cdseq_synth_scRNA
if(plot_umap){output$cdseq_scRNA_umap <- cdseq_scRNA_umap}
if(plot_tsne){output$cdseq_scRNA_tsne <- cdseq_scRNA_tsne}
output$cdseq_synth_scRNA_seurat <- cdseq_synth_scRNA_seurat
if(!is.null(sc_annotation)){
output$seurat_cluster_purity <- seurat_cluster_purity
output$seurat_cluster_gold_label <- seurat_cluster_gold_label
output$seurat_unique_clusters <- seurat_unique_clusters
output$seurat_markers <- cdseq_synth_scRNA_seurat_markers_df
output$CDSeq_cell_type_assignment_df <- CDSeq_cell_type_assignment_df
}else{
output$seurat_markers <- cdseq_synth_scRNA_seurat_markers
}
if(!is.null(cdseq_prop)){output$cdseq_prop_merged <- cdseq_prop_merged}
if(!is.null(cdseq_gep_sample_specific)){output$cdseq_gep_sample_specific_merged <- cdseq_gep_sample_specific_merged}
output$seurat_top_markers <- seurat_top_markers_df
output$input_list <- input_list
if(plot_umap){output$cdseq_sc_comb_umap_df <- cdseq_sc_comb_umap_df}
if(plot_tsne){output$cdseq_sc_comb_tsne_df <- cdseq_sc_comb_tsne_df}
return(output)
}
Any scripts or data that you put into this service are public.
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.