# Methods for the SCsimSet class
# Constructor function =========================================================
#' Create a new SCsimSet object.
#'
#' Based on input parameters, it simulates a scRNA-seq dataset (stage by stage):
#' -> basal gene expression
#' -> cell-to-cell biais (~ batch effect + library size)
#'
#'
#' -> FC (DEG)
#' -> effective means (genes mean expression per cell type)
#' -> basal gene counts
#' -> dropout table
#' -> effective counts (with dropout noise)
#'
#' Sample info
#'@param nGenes Scalar of class \code{"numeric"}, providing the number of genes
#' in our simulated dataset.
#'@param nCells Scalar of class \code{"numeric"}, providing the number of cells
#' in our simulated dataset.
#'@param nPop Scalar of class \code{"numeric"}, providing the number of cell
#' population in our simulated dataset.
#'@param pPop Scalar of class \code{"numeric"}, providing the proportion of
#' each cell population in our dataset (percentage of cells per population).
#'@param seed Scalar of class \code{"numeric"}, providing the random number
#' generator (RNG) state for random number generation.
#'
#' Basal expression
#'@param baseDistr **optional** Character string (one of "gamma",
#' "negative binomial") indicating which distribution to use to generate gene
#' expression base means.
#'@param baseFstParam **optional** Scalar of class \code{"numeric"}, providing
#' the 1st distribution parameter. ("gamma" : shape, "negative binomial" : size)
#'@param baseSndParam **optional** Scalar of class \code{"numeric"}, providing
#' the 2nd distribution parameter. ("gamma" : rate, "negative binomial" : mean)
#'
#'@param nbBatch **optional** Scalar of class \code{"numeric"}, providing the
#' number of batch in the dataset (max. a batch every 100 cells).
#'@param cellsPerBatch **optional** Vector of class \code{"numeric"}, providing
#' the number of cells per batch.
#'@param batchEffect **optional** Vector of class \code{"numeric"}, providing
#' the mean shift in library size for each batch.
#'
#'
#' Cell-to-cell biais
#'@param cellDistr **optional** Character string (one of "gamma",
#' "negative binomial", "normal", or "uniform") indicating which distribution to
#' use.
#'@param cellFstParam Scalar of class \code{"numeric"}, providing the 1st
#' distribution parameter. ("gamma" : shape, "negative binomial" : size,
#' "normal" : mean, "uniform" : lower limit)
#'@param cellSndParam Scalar of class \code{"numeric"}, providing the 2nd
#' distribution parameter. ("gamma" : rate, "negative binomial" : mean,
#' "normal" : standard deviation, "uniform" : higher limit).
#'
#' @return a new SCsimSet object containing:
#' \describe{
#' \item{\code{sampleInfo}:}{Named list which contains sample information:
#' total number of genes (nGenes), total number of cells (nCells), total
#' number of cell population (nCells) and relative size / proportion of cell
#' populations (pPop).}
#' \item{\code{baseMeans}:}{Object of class \code{"numeric"}, which contains
#' a vector with genes mean basal expression.}
#' \item{\code{cellBiais}:}{Object of class \code{"DistrSet"}, which contains
#' distribution parameters of cell-to-cell biais (capture efficiency, batch,
#' library size...)}
#' \item{\code{FcDeg}:}{Object of class \code{"FCSet"}}, which contains DEG
#' information and FC estimates.
#' \item{\code{countDispersion}:}{Scalar of class \code{"numeric"}, providing
#' NB distribution dispersion parameter.}
#' \item{\code{dropoutPct}:}{Scalar of class \code{"numeric"}, providing the
#' sample mean dropout percentage.}
#' \item{\code{dropout}:}{Data Frame which indicates dropout position in
#' the count matrix.}
#' \item{\code{effectiveMeans}:}{Data Frame of the dataset effective means
#' (base means + cell biais + FC).}
#' \item{\code{baseCounts}:}{Data Frame of the dataset base counts (effective
#' means + ND estimates).}
#' \item{\code{effectiveCounts}:}{Data Frame of the dataset effective counts
#' (baseCounts + dropout).}
#'}
#'
#' @details
#' Parameters set up
#'
#' Basal expression parameters example range of value
#' Gamma distribution: shape = [1.7 : 3], rate = [0.5 : 1.5]
#' Negative Binomial distribution: size = [2 : 3], mu = [2 : 4] # low expression
#'
#' Batch effect : number of batch, maximum 1 batch per 100 cells
#'
#'
#'
#' Based on :
#' - the number of genes (nGenes)
#' - the number of cells (nCells)
#' - the number of cell populations (nPop)
#' - the proportion of each cell population (pPop)
#'
#' the construction of the SCsimSet object will generate :
#' -> a descriptive table of the dataset: sampleInfo (list);
#'
#'
#' Based on :
#' - basal expression distribution parameters (baseDistr, baseFstParam,
#' baseSndParam)
#' - cell-to-cell biais distribution parameters (cellDistr, cellFstParam,
#' cellSndParam)
#' - the number of genes (nGenes)
#' - the number of cells (nCells)
#' the construction of the SCsimSet object will generate:
#' -> baseMeans (DistrSet object)
#' -> cellBiais (DistrSet object)
#'
#'
#' Based on :
#' - DEG parameters
#' the construction of the SCsimSet object will generate a FCSet object and
#' effectiveMeans (dataframe)
#'
#'
#' Based on the final dispersion parameter, the construction of the SCsimSet
#' object will generate baseCounts (dataframe)
#'
#'
#' Based on the dropout percentage parameter, the construction of the SCsimSet
#' object will generate:
#' -> dropout (dataframe)
#' -> effective counts
#'
#' @export
#' @examples
#'
newSCsimSet <- function(nGenes, nCells, nPop, pPop, seed = 75,
distribution = "gamma", fstParam = 2, sndParam = 0.75,
nbBatch, cellsPerBatch, batchEffect,
pDEG, pDE, pDM, pDP, pDC, pUp, pDown,
cellMixedDM, mixDM,
cellMixedDP, mixDP, popMixDP,
trajectory, doublet = 2,
distrUpFc = "medium", distrDownFc = "medium",
dropoutPct) {
# Check =====================================================================
## Checking the validity of numerical variable
for (dataname in c("nGenes", "nCells", "nPop", "pPop", "seed")) {
eData <- get(dataname)
if (length(eData) == 0) {
stop(paste0("argument ", dataname, " must be of length > 0"))
} else if (!class(eData) %in% c("numeric", "integer")) {
stop(paste0("argument ", dataname, " must be numeric"))
} else if (any(eData <= 0)){
stop(paste0("argument ", dataname, "must be > 0"))
}
}
## Checking the validity of number of element (n) variable
for (dataname in c("nGenes", "nCells", "nPop", "seed")) {
eData <- get(dataname)
if (eData %% 1 != 0) {
stop(paste0("argument ", dataname, " must be a whole number (integer)"))
}
}
## Check validity of nGenes
if (nGenes < 1000) {
stop("Simulated dataset must contain at least 1000 genes")
}
## Check validity of nCells
if (nCells < 100) {
stop("Simulated dataset must contain at least 100 cells")
}
## Check validity of nPop
if (nPop == 1) {
message("! There is only one cell population in the simulated dataset.")
pPop = 1
} else {
## Check validity of pPop
if (length(pPop) != nPop) {
if (length(pPop) == 1) {
message(paste0("! All cell population will have the same size: ",
round(100/nPop,2), " %"))
pPop = rep(round(100/nPop,2), nPop - 1)
pPop = c(pPop, 100-sum(pPop))
}
} else if (sum(pPop) != 100) {
stop("sum of popProp must be equal to 1")
}
}
# Simulation ================================================================
# Sample Info
sampleInfo <- list(nGenes = nGenes,
nCells = nCells,
nPop = nPop,
pPop = pPop,
baseMean = c(distribution, fstParam, sndParam))
# Basal gene expression
message("-> Compute basal gene expression")
baseMeans <- computeBaseMean(distribution, fstParam, sndParam,
size = nGenes, seed)
# Batch Effect
if (!is.null(nbBatch) & nbBatch > 1){
message("-> Compute batch effect")
batch <- computeBatch(nbBatch, cellsPerBatch, batchEffect, nCells, seed)
}
# Library Size
message("-> Compute cell biais (library size variation)")
libSize <- computeLibrarySize(nPop, pPop, nCells, seed)
# Differentially expressed genes
message("-> Compute DEG")
DEG_table <- describeDEG(nGenes, nCells, nPop, pPop,
pDEG, pDE, pDP, pDM, pDC,
pUp, pDown)
cell_table <- describeCells(nCells, nPop, pPop, seed,
cellMixedDP, cellMixedDM,
popMixDP, mixDP, mixDM,
trajectory, doublet, DEG_table)
FC_table <- computeFC(DEG_table, seed, trajectory,
popMixDP, distrUpFc, distrDownFc)
# Dataset Construction ======================================================
# Effective Means
message("-> Compute genes effective mean expression")
effectiveMeans <- replicate(nCells, baseMeans)
# Apply Batch effect
if (!is.null(nbBatch) & nbBatch > 1){
for (b in 1:nbBatch){
effectiveMeans[ , unlist(batch[[1]][b])] <-
t(t(effectiveMeans[ , unlist(batch[[1]][b])]) *
as.vector(unlist(batch[[2]][b])))
}
}
# Apply Library Size
effectiveMeans <- t(t(effectiveMeans) * as.vector(unlist(libSize)))
colnames(effectiveMeans) <- colnames(cell_table)
# Apply DEG
for (g in 1:nrow(effectiveMeans)) {
for (p in 1:nPop) {
if (DEG_table[g, "type"] == "DC"){
tmpCellIndex <- colnames(cell_table[1, cell_table[1, ] == p])
} else {
tmpCellIndex <- colnames(cell_table[g + 1, cell_table[g + 1, ] == p])
}
effectiveMeans[g, tmpCellIndex] <-
(effectiveMeans[g, tmpCellIndex] * (2^FC_table[g, p]))
}
}
rownames(effectiveMeans) <- rownames(FC_table)
# Apply doublet
doubletIndex <- colnames(cell_table[1, !cell_table[1, ] %in% 1:nPop] )
for (i in doubletIndex) {
effectiveMeans[ ,i] <- rowMeans(effectiveMeans[,
unlist(strsplit(cell_table[1, i], "_"))])
}
# Basal counts
message("-> Compute genes basal counts ")
set.seed(seed)
baseCounts <- matrix(rnbinom(nGenes*nCells, size = 50,
mu = effectiveMeans), ncol = nCells)
colnames(baseCounts) <- colnames(effectiveMeans)
rownames(baseCounts) <- rownames(effectiveMeans)
# Dropout ===================================================================
message("-> Add dropout events")
dropout <- matrix(1, ncol = ncol(baseCounts), nrow = nrow(baseCounts))
colnames(dropout) <- colnames(baseCounts)
rownames(dropout) <- rownames(baseCounts)
effectiveCounts <- baseCounts
isExpr_mat = baseCounts == 0
pctDropout_genes <- colSums(isExpr_mat) / nrow(isExpr_mat)
pctDropout_cells <- rowSums(isExpr_mat) / ncol(isExpr_mat)
set.seed(seed)
if(median(pctDropout_genes) < (dropoutPct/100)) {
# rank cells by total counts / library size
sumCounts <- colSums(baseCounts)
sumCounts <- sort(sumCounts, decreasing = T)
# Assign dropout percentage
cellsDropout <- sort(rnorm(length(sumCounts),
mean = dropoutPct/100, sd = 0.05))
names(cellsDropout) <- names(sumCounts)
# Estimate the number of genes to dropout
nbGenesDropout <- ceiling(cellsDropout * nGenes)
# measure genes mean expression
genesExpression <- rowMeans(baseCounts)
names(genesExpression) <- rownames(baseCounts)
genesExpression <- sort(genesExpression)
# Assign dropout probability
# dropoutProba <- (1/(pi * (1 + x^2))) * (1/max(1/(pi * (1 + x^2))))
# dropoutProba <- 1 * exp(-1 * log2(genesExpression+1))
lambda <- 0.4
dropoutProba <- abs(((lambda * exp(-lambda * genesExpression)) /
(max(lambda * exp(-lambda * genesExpression)) + 0.025)))
for (i in 1:length(dropoutProba)) {
rdm <- runif(1, min = 0, max = 1)
if (rdm < 0.2) {
dropoutProba[[i]] <- dropoutProba[[i]] + runif(1, min = 0, max = 0.2)
} else if (rdm > 0.8) {
dropoutProba[[i]] <- dropoutProba[[i]] - runif(1, min = 0, max = 0.1)
}
if (dropoutProba[[i]] >= 1) {
dropoutProba[[i]] <- 1 - (dropoutProba[[i]] - 1)
} else if (dropoutProba[[i]] < 0) {
dropoutProba[[i]] <- abs(dropoutProba[[i]])
}
}
# plot(log2(genesExpression+1), dropoutProba)
names(dropoutProba) <- names(genesExpression)
pb <- txtProgressBar(min = 0, max = nCells, style = 3)
for (i in colnames(baseCounts)) {
nbZero <- sum(baseCounts[,i] == 0)
rdmGenes <- sample(names(dropoutProba), length(dropoutProba))
g <- 1
while(nbZero < nbGenesDropout[[i]]) {
rdm <- runif(1, min = 0, max = 1)
if (g > length(rdmGenes)) {
g <- 1
rdmGenes <- sample(rdmGenes, length(rdmGenes))
}
rdmGene <- rdmGenes[g]
if (rdm < dropoutProba[[rdmGene]]) {
dropout[rdmGene, i] <- 0
effectiveCounts[rdmGene, i] <- 0
nbZero <- sum(effectiveCounts[, i] == 0)
rdmGenes <- rdmGenes[rdmGenes != rdmGene]
}
g <- g + 1
}
setTxtProgressBar(pb, grep(i, colnames(baseCounts))[1])
}
close(pb)
}
isExpr_mat = effectiveCounts == 0
pctDropout_genes <- colSums(isExpr_mat) / nrow(isExpr_mat)
pctDropout_cells <- rowSums(isExpr_mat) / ncol(isExpr_mat)
dropoutPct <- median(pctDropout_genes)
# Object construction =======================================================
message("\n -- Single Cell dataset simulation completed --")
# Library size dataframe
cellPopId <- c()
for (i in 1:nPop){
cellPopId <- c(cellPopId, rep(i, length(libSize[[i]])))
}
lib_df <- data.frame(population = as.integer(cellPopId),
librarySize = unlist(libSize))
# Batch dataFrame
if (!is.null(nbBatch) & nbBatch > 1){
batchPop <- c()
for (i in 1:nbBatch){
batchPop <- c(batchPop, rep(i, length(unlist(batch[[1]][i]))))
}
batch_table <- as.data.frame(cbind(batchCell = unlist(batch[[1]]),
batchId = batchPop,
batchValue = unlist(batch[[2]])))
batch_table <- batch_table[with(batch_table, order(batchCell)), ]
batch_table <- cbind(population = as.integer(cellPopId),
batch_table)
cell_table <- rbind(batch = batch_table$batchId, cell_table)
} else {
batch_table <- data.frame()
}
# DEG dataframes
DEG_df <- cbind(DEG_table[,2:4], FC_table)
rownames(DEG_df) <- DEG_table$geneLabel
##### Generate new SCsimSet object #####
scsimset <- new("SCsimSet",
sampleInfo = sampleInfo,
batch_table = batch_table,
lib_df = lib_df,
DEG_df = DEG_df,
cell_table = cell_table,
dropout = dropout,
baseMeans = baseMeans,
effectiveMeans = effectiveMeans,
baseCounts = baseCounts,
effectiveCounts = effectiveCounts)
scsimset
}
# Graphic functions ===========================================================
setMethod("plotDatasetInfo", "SCsimSet",
function(object) {
tmpTibble <- tibble::tibble(pPop = object@sampleInfo$pPop,
nPop = as.factor(1:object@sampleInfo$nPop))
gene <- paste0(" ", object@sampleInfo$nGenes, " Genes ")
cell <- paste0(" ", object@sampleInfo$nCells, " Cells ")
ggplot2::ggplot(tmpTibble, ggplot2::aes(x="", y=pPop)) +
ggplot2::geom_bar(ggplot2::aes(fill = nPop), stat = "identity") +
ggplot2::scale_y_continuous(labels = function(x){ paste0(x, "%") }) +
ggplot2::coord_flip() +
ggplot2::ggtitle(bquote(" " %<-% .(cell) %->% " ")) +
ggplot2::xlab(bquote(" " %<-% .(gene) %->% " ")) +
ggplot2::labs(y = "") +
ggplot2::scale_fill_discrete(name="Cell \nPopulation") +
# ggplot2::theme_bw() +
ggplot2::theme(
plot.title = ggplot2::element_text(hjust = 0.5, size = 13),
axis.text.y = ggplot2::element_blank(),
axis.title.y = ggplot2::element_text(size = 12),
axis.ticks.y = ggplot2::element_blank(),
axis.title.x = ggplot2::element_text( size = 13),
axis.text.x = ggplot2::element_text(size = 12),
legend.background = ggplot2::element_rect(fill = "transparent",
colour = "grey20"),
legend.position = "bottom",
plot.margin = grid::unit(c(1.5,1.5,1.5,1.5), "lines"),
plot.background = ggplot2::element_rect(fill = "transparent"),
panel.background = ggplot2::element_rect(fill = "transparent"),
panel.border = ggplot2::element_blank(),
panel.grid.major = ggplot2::element_blank(),
panel.grid.minor = ggplot2::element_blank())
}
)
setMethod("plotBaseMean", "SCsimSet",
function(object) {
tmpTibble <- tibble::tibble(simulated = object@baseMeans)
bin <- (max(tmpTibble$simulated) - min(tmpTibble$simulated))/100
ggplot2::ggplot(tmpTibble, ggplot2::aes(x=simulated)) +
ggplot2::geom_histogram(ggplot2::aes(y = ..density..),
colour = "deepskyblue3", fill = "deepskyblue1",
alpha=1/3, binwidth = bin) +
ggplot2::geom_line(ggplot2::aes(x=simulated), stat = 'density',
colour = "brown4", size = 0.7) +
ggplot2::annotate("text", y= Inf, x = Inf, hjust = 1, vjust = 1.5,
label = paste0("Distribution = ",
object@sampleInfo$baseMean[1]),
colour="grey20", size = 4) +
ggplot2::xlab("Gene expression") +
ggplot2::ylab("Density") +
ggplot2::theme(axis.text = ggplot2::element_text(size=11),
axis.title = ggplot2::element_text(colour="grey20", size = 12),
axis.line = ggplot2::element_line(colour = "grey30"),
legend.background = ggplot2::element_rect(fill = "transparent",
colour = "grey20"),
plot.background = ggplot2::element_rect(fill = "transparent"),
panel.background = ggplot2::element_rect(fill = "transparent"),
panel.border = ggplot2::element_blank(),
panel.grid.major = ggplot2::element_blank(),
panel.grid.minor = ggplot2::element_blank())
})
setMethod("plotBatchInDataset", "SCsimSet",
function(object) {
batch_prop <- round(((as.vector(table(object@batch_table[,
c("population","batchId")])))/object@sampleInfo$nCells)*100)
tmpTibble <- tibble::tibble(
pPop = rep(rep(object@sampleInfo$pPop,
length(unique(object@batch_table$batchId))),batch_prop),
nPop = rep(rep(as.factor(1:object@sampleInfo$nPop),
length(unique(object@batch_table$batchId))),batch_prop),
batchID = rep(as.factor(sort(rep(unique(object@batch_table$batchId),
object@sampleInfo$nPop))),batch_prop))
ggplot2::ggplot(tmpTibble, ggplot2::aes(x=nPop, color = nPop,
fill = batchID)) +
ggplot2::geom_bar(stat = "count", size=0.8) +
ggplot2::scale_fill_brewer(palette="Greys", name="Batch")+
ggplot2::scale_colour_discrete(name = "Cell \npopulation") +
ggplot2::scale_y_continuous(labels = function(x){
paste0(x, "%") }) +
ggplot2::xlab("Cell Population") +
ggplot2::labs(y = "Cell population proportion") +
ggplot2::theme(
plot.title = ggplot2::element_text(hjust = 0.5, size = 13),
axis.title.y = ggplot2::element_text(size = 12),
axis.ticks.y = ggplot2::element_blank(),
axis.title.x = ggplot2::element_text(colour = "grey30", size = 12),
axis.text.x = ggplot2::element_text(size = 12),
axis.text.y = ggplot2::element_text(size = 12),
legend.background = ggplot2::element_rect(fill = "transparent",
colour = "grey20"),
plot.margin = grid::unit(c(1.5,1.5,1.5,1.5), "lines"),
plot.background = ggplot2::element_rect(fill = "transparent"),
panel.background = ggplot2::element_rect(fill = "transparent"),
panel.border = ggplot2::element_blank(),
panel.grid.major = ggplot2::element_blank(),
panel.grid.minor = ggplot2::element_blank())
})
setMethod("plotBatch", "SCsimSet",
function(object) {
tmpTibble <- tibble::as.tibble(object@batch_table[,
c("batchId", "batchValue")])
tmpTibble$batchId <- as.factor(tmpTibble$batchId)
ggplot2::ggplot(tmpTibble, ggplot2::aes(x = batchValue, fill = batchId)) +
ggplot2::geom_density(alpha = 0.5) +
ggplot2::scale_fill_brewer(palette="Greys", name="Batch") +
ggplot2::xlab("Batch Effect") +
ggplot2::ylab("Density") +
ggplot2::theme(
axis.line = ggplot2::element_line(colour = "grey30"),
axis.ticks.y = ggplot2::element_blank(),
axis.title = ggplot2::element_text(colour = "grey30", size = 13),
axis.text = ggplot2::element_text(size = 12),
legend.background = ggplot2::element_rect(fill = "transparent",
colour = "grey20"),
plot.margin = grid::unit(c(1.5,1.5,1.5,1.5), "lines"),
plot.background = ggplot2::element_rect(fill = "transparent"),
panel.background = ggplot2::element_rect(fill = "transparent"),
panel.border = ggplot2::element_blank(),
panel.grid.major = ggplot2::element_blank(),
panel.grid.minor = ggplot2::element_blank())
})
setMethod("plotCellBiais", "SCsimSet",
function(object) {
tmpTibble <- tibble::as.tibble(object@lib_df)
tmpTibble$population <- as.factor(tmpTibble$population)
ggplot2::ggplot(tmpTibble, ggplot2::aes(x = librarySize,
fill = population)) +
ggplot2::geom_density(alpha = 0.5) +
ggplot2::scale_fill_discrete(name="Population") +
ggplot2::xlab("Library Effect") +
ggplot2::ylab("Density") +
ggplot2::theme(
axis.line = ggplot2::element_line(colour = "grey30"),
axis.ticks.y = ggplot2::element_blank(),
axis.title = ggplot2::element_text(colour = "grey30", size = 13),
axis.text = ggplot2::element_text(size = 12),
legend.background = ggplot2::element_rect(fill = "transparent",
colour = "grey20"),
plot.margin = grid::unit(c(1.5,1.5,1.5,1.5), "lines"),
plot.background = ggplot2::element_rect(fill = "transparent"),
panel.background = ggplot2::element_rect(fill = "transparent"),
panel.border = ggplot2::element_blank(),
panel.grid.major = ggplot2::element_blank(),
panel.grid.minor = ggplot2::element_blank())
})
setMethod("plotLibrarySize", "SCsimSet",
function(object) {
tmpTibble <- tibble::tibble(librarySize = colSums(object@effectiveCounts),
population = as.factor(object@lib_df$population))
ggplot2::ggplot(tmpTibble, ggplot2::aes(x = librarySize,
fill = population)) +
ggplot2::geom_density(alpha = 0.5) +
ggplot2::scale_fill_discrete(name="Population") +
ggplot2::xlab("Library Size") +
ggplot2::ylab("Density") +
ggplot2::theme(
axis.line = ggplot2::element_line(colour = "grey30"),
axis.ticks.y = ggplot2::element_blank(),
axis.title = ggplot2::element_text(colour = "grey30", size = 13),
axis.text = ggplot2::element_text(size = 12),
legend.background = ggplot2::element_rect(fill = "transparent",
colour = "grey20"),
plot.margin = grid::unit(c(1.5,1.5,1.5,1.5), "lines"),
plot.background = ggplot2::element_rect(fill = "transparent"),
panel.background = ggplot2::element_rect(fill = "transparent"),
panel.border = ggplot2::element_blank(),
panel.grid.major = ggplot2::element_blank(),
panel.grid.minor = ggplot2::element_blank())
})
setMethod("plotDEG", "SCsimSet",
function(object) {
tmp_pct <- as.data.frame((
table(object@DEG_df$population)/nrow(object@DEG_df))*100)
tmpTibble <- tibble::tibble(Percentage = tmp_pct[, 2],
Population = paste0("Pop ", (tmp_pct[, 1])),
Type = c("noDEG", rep("DEG", nrow(tmp_pct) - 1)))
tmp_color <- c("#d98c8c", "lightblue", "lightblue",
get_color_hexa(nrow(tmp_pct)-1))
names(tmp_color) <- c("DEG", "noDEG", NA,
paste0("Pop ", 1:(nrow(tmp_pct)-1)))
tmpTibble$Population[tmpTibble$Population == "Pop 0"] <- NA
ggplot2::ggplot(tmpTibble, ggplot2::aes(x = "", y = Percentage,
fill = Type)) +
ggplot2::geom_bar(stat = "identity", color = "grey20") +
ggplot2::scale_fill_manual(name = "Gene type",values = tmp_color) +
ggplot2::geom_label(ggplot2::aes(label=Population,
fill=factor(Population,
levels = sort(tmpTibble$Population, decreasing = T))),
na.rm = T, show.legend = F,
position = ggplot2::position_stack(vjust = 0.5)) +
ggplot2::theme(
axis.text.x = ggplot2::element_blank(),
axis.title.x = ggplot2::element_blank(),
axis.ticks.x = ggplot2::element_blank(),
axis.title.y = ggplot2::element_text(colour = "grey30", size = 12),
axis.text.y = ggplot2::element_text(size = 11),
legend.background = ggplot2::element_rect(fill = "transparent",
colour = "grey20"),
plot.margin = grid::unit(c(1,1,1,1), "lines"),
plot.background = ggplot2::element_rect(fill = "transparent"),
panel.background = ggplot2::element_rect(fill = "transparent"),
panel.border = ggplot2::element_blank(),
panel.grid.major = ggplot2::element_blank(),
panel.grid.minor = ggplot2::element_blank())
})
setMethod("plotDEGTypes", "SCsimSet",
function(object) {
# Create data frame for plot
tmpTibble <- tibble::tibble(Gene = c("DE", "DM", "DP", "DC"))
col_n <- c("Gene", paste0("Pop ",
1:(length(unique(object@DEG_df$population))-1)))
for (i in 1:(length(unique(object@DEG_df$population))-1)) {
tmp_pct <- as.data.frame((
table(object@DEG_df$type[object@DEG_df$population == i])/
nrow(object@DEG_df[object@DEG_df$population == i, ]))*100)
rownames(tmp_pct) <- tmp_pct$Var1
tmpTibble <- cbind(tmpTibble, tmp_pct[unlist(tmpTibble[,1]),2])
}
colnames(tmpTibble) <- col_n
tmpTibble <- tidyr::gather(tmpTibble, Population, Percentage,
col_n[2:length(col_n)])
tmpTibble$Gene <- factor(tmpTibble$Gene,
levels = c("DE", "DM", "DP", "DC"))
# create color scale
tmp_color <- c("#42668a", "#6199d1", "#669999", "#3d8f66",
get_color_hexa(length(unique(tmpTibble$Population))))
names(tmp_color) <- c("DE", "DM", "DP", "DC",
paste0("Pop ", 1:length(unique(tmpTibble$Population))))
# plot
ggplot2::ggplot(tmpTibble, ggplot2::aes(x = Population, y = Percentage,
fill=Gene)) +
ggplot2::geom_bar(stat = "identity", color = "grey30") +
ggplot2::scale_fill_manual(values=tmp_color, name = "Gene type") +
ggplot2::geom_label(ggplot2::aes(label=Population, fill = Population),
na.rm = T, show.legend = F,
position = ggplot2::position_fill(vjust = 0.5)) +
ggplot2::theme(
axis.text.x = ggplot2::element_blank(),
axis.title.x = ggplot2::element_blank(),
axis.ticks.x = ggplot2::element_blank(),
axis.title.y = ggplot2::element_text(colour = "grey50", size = 12),
axis.text.y = ggplot2::element_text(size = 11),
legend.background = ggplot2::element_rect(fill = "transparent",
colour = "grey20"),
plot.margin = grid::unit(c(1,1,1,1), "lines"),
plot.background = ggplot2::element_rect(fill = "transparent"),
panel.background = ggplot2::element_rect(fill = "transparent"),
panel.border = ggplot2::element_blank(),
panel.grid.major = ggplot2::element_blank(),
panel.grid.minor = ggplot2::element_blank())
})
setMethod("plotCellsContent", "SCsimSet",
function(object) {
tmpDF <- object@cell_table
if (ncol(object@batch_table) == 0) {
tmpDF <- tmpDF[2:nrow(tmpDF),]
} else {
tmpDF <- tmpDF[3:nrow(tmpDF),]
}
# Change DC value - Common DEG
tmpDF[tmpDF == -1 ] <- object@sampleInfo$nPop + 1
# Change doublet value
tmpDF[, !object@cell_table["cellPop",] %in% 1:object@sampleInfo$nPop] <- -1
# Gene Label & color
label <- rownames(tmpDF)
for (i in 1:length(rownames(tmpDF))) {
label[i] <- paste(unlist(strsplit(rownames(tmpDF)[i], "_"))[2:3],
collapse = "_")
}
nDEGtype <- length(unique(label))
colorLabel <- RColorBrewer::brewer.pal(n=nDEGtype, name="Paired")
names(colorLabel) <- unique(label)
# Cell label & color
if (ncol(object@batch_table) == 0) {
labelPop <- as.vector(object@cell_table[1,])
} else {
labelPop <- as.vector(object@cell_table[2,])
}
labelPop[!labelPop %in% 1:object@sampleInfo$nPop] <- "Doublet"
labelPop[labelPop %in% 1:object@sampleInfo$nPop] <- paste0("Pop ",
labelPop[labelPop %in% 1:object@sampleInfo$nPop])
if (object@sampleInfo$nPop <= 2) {
colorLabelPop <- RColorBrewer::brewer.pal(n=3,
name="YlOrRd")[1:(object@sampleInfo$nPop + 1)]
} else {
colorLabelPop <- RColorBrewer::brewer.pal(n=(object@sampleInfo$nPop + 1),
name="YlOrRd")
}
col_common <- colorLabelPop[length(colorLabelPop)]
colorLabelPop <- c("dodgerblue4",
colorLabelPop[1:(length(colorLabelPop)-1)])
names(colorLabelPop) <- c("Doublet", paste0("Pop ",
1:object@sampleInfo$nPop))
annot_col <- data.frame(Pop = unlist(labelPop))
rownames(annot_col) <- colnames(tmpDF)
annot_row <- data.frame(DEG = label)
rownames(annot_row) <- rownames(tmpDF)
annot_color <- list(DEG = colorLabel,
Pop = colorLabelPop)
colorht <- c("dodgerblue4", "lightblue",
colorLabelPop[2:length(colorLabelPop)], col_common)
pheatmap::pheatmap(tmpDF, color = colorht, border_color = NA,
show_rownames = F, show_colnames = F,
cluster_rows = F, cluster_cols = F,
annotation_colors = annot_color,
annotation_row = annot_row, annotation_col = annot_col,
annotation_names_row = F, annotation_names_col = F)
})
setMethod("plotDEGDensity", "SCsimSet",
function(object) {
degType <- as.character(unique(scSim@DEG_df$type))
degType <- degType[degType != "NDE"]
tmpTibble <- tibble::tibble()
for (type in degType) {
fc <- scSim@DEG_df[
(scSim@DEG_df$type == type) &
(scSim@DEG_df$regulation == "Up"),]
if (nrow(fc) > 1) {
topExpr <- sort(rowMeans(scSim@effectiveCounts[rownames(fc),]),
decreasing = T)
fc <- fc[names(topExpr[1:5]),]
fc$max <- rowMeans(fc[4:ncol(fc)])
fc <- fc[with(fc, order(max, decreasing = T)),]
fc <- fc[rownames(fc[1,]),]
}
if (nrow(tmpTibble) == 0) {
tmpTibble <- tibble::tibble(
Count = log2(scSim@effectiveCounts[rownames(fc),] + 1),
Population = as.character(scSim@lib_df$population),
Type = rep(type, ncol(scSim@effectiveCounts)))
} else {
t <- tibble::tibble(
Count = log2(scSim@effectiveCounts[rownames(fc),] + 1),
Population = as.character(scSim@lib_df$population),
Type = rep(type, ncol(scSim@effectiveCounts)))
tmpTibble <- rbind(tmpTibble, t)
}
}
# NDE case
fc <- scSim@DEG_df[
(scSim@DEG_df$type == "NDE"),]
fc <- sort(rowMeans(scSim@effectiveCounts[rownames(fc),]),
decreasing = T)
if (nrow(tmpTibble) == 0) {
tmpTibble <- tibble::tibble(
Count = log2(scSim@effectiveCounts[names(fc[1]),] + 1),
Population = as.character(scSim@lib_df$population),
Type = rep("NDE", ncol(scSim@effectiveCounts)))
} else {
t <- tibble::tibble(
Count = log2(scSim@effectiveCounts[names(fc[1]),] + 1),
Population = as.character(scSim@lib_df$population),
Type = rep("NDE", ncol(scSim@effectiveCounts)))
tmpTibble <- rbind(tmpTibble, t)
}
ggplot2::ggplot(tmpTibble,
ggplot2::aes(x = Count, fill = Population)) +
ggplot2::geom_density(alpha = 0.5) +
ggplot2::ylim(0,1) +
ggplot2::facet_grid(~Type)
})
setMethod("plotDropoutQC", "SCsimSet",
function(object) {
isExpr_mat = object@baseCounts == 0
pctDropout_cells <- rowSums(isExpr_mat) / ncol(isExpr_mat) * 100
tmpTibble <- tibble::tibble(
Cells = pctDropout_cells,
State = factor(rep(paste0("Before ", round(mean(pctDropout_cells),1),
"%"), length(pctDropout_cells))))
isExpr_mat = object@effectiveCounts == 0
pctDropout_cells <- rowSums(isExpr_mat) / ncol(isExpr_mat) * 100
tmpTibble <- rbind(tmpTibble, tibble::tibble(
Cells = pctDropout_cells,
State = factor(rep(paste0("After ", round(mean(pctDropout_cells),1),
"%"), length(pctDropout_cells)))))
p1 = ggplot2::ggplot(tmpTibble, ggplot2::aes(x=Cells)) +
ggplot2::geom_density(colour = "deepskyblue3", fill = "deepskyblue1",
alpha=1/3) +
ggplot2::xlim(0,100) +
ggplot2::theme(
axis.ticks = ggplot2::element_line(colour="grey", size=0.5),
plot.background = ggplot2::element_rect(fill = "transparent"),
panel.background = ggplot2::element_rect(fill = "transparent"),
panel.border = ggplot2::element_blank(),
panel.grid.major = ggplot2::element_blank(),
panel.grid.minor = ggplot2::element_blank()) +
ggplot2::ggtitle("Dropout per cells") +
ggplot2::labs(x="Dropout percentage") +
ggplot2::facet_grid(~State)
# Genes dropout
isExpr_mat = object@baseCounts == 0
pctDropout_genes <- colSums(isExpr_mat) / nrow(isExpr_mat) * 100
tmpTibble <- tibble::tibble(
Genes = pctDropout_genes,
State = factor(rep(paste0("Before ", round(mean(pctDropout_genes),1),
"%"), length(pctDropout_genes))))
isExpr_mat = object@effectiveCounts == 0
pctDropout_genes <- colSums(isExpr_mat) / nrow(isExpr_mat) * 100
tmpTibble <- rbind(tmpTibble, tibble::tibble(
Genes = pctDropout_genes,
State = factor(rep(paste0("After ", round(mean(pctDropout_genes),1),
"%"), length(pctDropout_genes)))))
p2 = ggplot2::ggplot(tmpTibble, ggplot2::aes(x=Genes)) +
ggplot2::geom_density(colour = "deepskyblue3", fill = "deepskyblue1",
alpha=1/3) +
ggplot2::theme_grey() + ggplot2::xlim(0,100) +
ggplot2::labs(x="Dropout percentage") +
ggplot2::theme(
axis.ticks=ggplot2::element_line(colour="grey", size=0.5),
plot.background = ggplot2::element_rect(fill = "transparent"),
panel.background = ggplot2::element_rect(fill = "transparent"),
panel.border = ggplot2::element_blank(),
panel.grid.major = ggplot2::element_blank(),
panel.grid.minor = ggplot2::element_blank()) +
ggplot2::ggtitle("Dropout per genes") +
ggplot2::facet_grid(~State)
scater::multiplot(p1,p2)
})
setMethod("plotDropoutProba", "SCsimSet",
function(object) {
isExpr_mat = object@baseCounts == 0
pctDropout_cells <- rowSums(isExpr_mat) / ncol(isExpr_mat) * 100
tmpTibble <- tibble::tibble(
Cells = pctDropout_cells,
State = factor(rep(paste0("Before ", round(mean(pctDropout_cells),1),
"%"), length(pctDropout_cells))),
Expression = log2(rowMeans(object@baseCounts)+1))
isExpr_mat = object@effectiveCounts == 0
pctDropout_cells <- rowSums(isExpr_mat) / ncol(isExpr_mat) * 100
tmpTibble <- rbind(tmpTibble, tibble::tibble(
Cells = pctDropout_cells,
State = factor(rep(paste0("After ", round(mean(pctDropout_cells),1),
"%"), length(pctDropout_cells))),
Expression = log2(rowMeans(object@effectiveCounts)+1)))
p1 = ggplot2::ggplot(tmpTibble, ggplot2::aes(x=Expression, y=Cells)) +
ggplot2::geom_point(colour = "deepskyblue3", fill = "deepskyblue1",
alpha=1/3) +
ggplot2::theme_grey() +
ggplot2::labs(y="Dropout %", x="Mean gene expression") +
ggplot2::theme(
axis.ticks=ggplot2::element_line(colour="grey", size=0.5),
text = ggplot2::element_text(size=15),
plot.background = ggplot2::element_rect(fill = "transparent"),
panel.background = ggplot2::element_rect(fill = "transparent"),
panel.border = ggplot2::element_blank(),
panel.grid.major = ggplot2::element_blank(),
panel.grid.minor = ggplot2::element_blank()) +
ggplot2::ggtitle(paste0("% Dropout per cells \nMean: ",
round(median(pctDropout_cells),2))) +
ggplot2::facet_grid(~State)
# Genes dropout
isExpr_mat = object@baseCounts == 0
pctDropout_genes <- colSums(isExpr_mat) / nrow(isExpr_mat) * 100
tmpTibble <- tibble::tibble(
Genes = pctDropout_genes,
State = factor(rep(paste0("Before ", round(mean(pctDropout_genes),1),
"%"), length(pctDropout_genes))),
Expression = log2(colSums(object@baseCounts)+1))
isExpr_mat = object@effectiveCounts == 0
pctDropout_genes <- colSums(isExpr_mat) / nrow(isExpr_mat) * 100
tmpTibble <- rbind(tmpTibble, tibble::tibble(
Genes = pctDropout_genes,
State = factor(rep(paste0("After ", round(mean(pctDropout_genes),1),
"%"), length(pctDropout_genes))),
Expression = log2(colSums(object@effectiveCounts)+1)))
p2 = ggplot2::ggplot(tmpTibble, ggplot2::aes(x=Expression, y=Genes)) +
ggplot2::geom_point(colour = "deepskyblue3", fill = "deepskyblue1",
alpha=1/3) +
ggplot2::theme_grey() +
ggplot2::labs(y="Dropout %", x="Total counts") +
ggplot2::theme(
axis.ticks=ggplot2::element_line(colour="grey", size=0.5),
text = ggplot2::element_text(size=15),
plot.background = ggplot2::element_rect(fill = "transparent"),
panel.background = ggplot2::element_rect(fill = "transparent"),
panel.border = ggplot2::element_blank(),
panel.grid.major = ggplot2::element_blank(),
panel.grid.minor = ggplot2::element_blank()) +
ggplot2::ggtitle(paste0("% Dropout per genes \nMean: ",
round(median(pctDropout_genes),2))) +
ggplot2::facet_grid(~State)
scater::multiplot(p1,p2)
})
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.