###################################################
### Setting accessor functions
###################################################
#' @title getTSCANResults accessor function
#' @description SCTK allows user to access all TSCAN related results with
#' \code{"getTSCANResults"}. See details.
#' @param x Input \linkS4class{SingleCellExperiment} object.
#' @param analysisName Algorithm name implemented, should be one of
#' \code{"Pseudotime"}, \code{"DEG"}, or \code{"ClusterDEAnalysis"}.
#' @param pathName Sub folder name within the \code{analysisName}. See details.
#' @param value Value to be stored within the \code{pathName} or
#' \code{analysisName}
#' @details
#' When \code{analysisName = "Pseudotime"}, returns the list result from
#' \code{\link{runTSCAN}}, including the MST structure.
#'
#' When \code{analysisName = "DEG"}, returns the list result from
#' \code{\link{runTSCANDEG}}, including \code{DataFrame}s containing genes that
#' increase/decrease along each the pseudotime paths. \code{pathName} indicates
#' the path index, the available options of which can be listed by
#' \code{listTSCANTerminalNodes}.
#'
#' When \code{analysisName = "ClusterDEAnalysis"}, returns the list result from
#' \code{\link{runTSCANClusterDEAnalysis}}. Here \code{pathName} needs to match
#' with the \code{useCluster} argument when running the algorithm.
#' @export
#' @rdname getTSCANResults
#' @return Get or set TSCAN results
#' @examples
#' data("mouseBrainSubsetSCE", package = "singleCellTK")
#' mouseBrainSubsetSCE <- runTSCAN(inSCE = mouseBrainSubsetSCE,
#' useReducedDim = "PCA_logcounts")
#' results <- getTSCANResults(mouseBrainSubsetSCE, "Pseudotime")
setGeneric("getTSCANResults", signature = "x",
function(x, analysisName = NULL, pathName = NULL) {
standardGeneric("getTSCANResults")
}
)
#' @export
#' @rdname getTSCANResults
#'
setMethod("getTSCANResults", signature(x = "SingleCellExperiment"),
function(x, analysisName = NULL, pathName = NULL){
result.names <- listTSCANResults(x)
if(!analysisName %in% result.names) {
stop("The analysis was not found for TSCAN results")
}
if (analysisName == "Pseudotime")
results <- S4Vectors::metadata(x)$sctk$Traj$TSCAN$Pseudotime
else {
if (is.null(pathName)) {
results <- S4Vectors::metadata(x)$sctk$Traj$TSCAN[[analysisName]]
} else {
pathName <- as.character(pathName)
results <- S4Vectors::metadata(x)$sctk$Traj$TSCAN[[analysisName]][[pathName]]
}
}
return(results)
})
#' @export
#' @rdname getTSCANResults
#'
setGeneric("getTSCANResults<-",
function(x, analysisName, pathName = NULL, value)
standardGeneric("getTSCANResults<-")
)
#' @export
#' @rdname getTSCANResults
setReplaceMethod("getTSCANResults", signature(x = "SingleCellExperiment"),
function(x, analysisName, pathName = NULL, value) {
if (analysisName == "Pseudotime")
S4Vectors::metadata(x)$sctk$Traj$TSCAN$Pseudotime <- value
else {
if (is.null(pathName))
stop("Have to specify `pathName` for TSCAN DE analyses")
pathName <- as.character(pathName)
S4Vectors::metadata(x)$sctk$Traj$TSCAN[[analysisName]][[pathName]] <- value
}
return(x)
})
#' @export
#' @rdname getTSCANResults
setGeneric("listTSCANResults", signature = "x",
function(x) {standardGeneric("listTSCANResults")}
)
#' @export
#' @rdname getTSCANResults
setMethod("listTSCANResults", "SingleCellExperiment", function(x){
all.results <- S4Vectors::metadata(x)$sctk$Traj$TSCAN
if (is.null(all.results) ||
!"Pseudotime" %in% names(all.results)) {
stop("No TSCAN result found. Please run `runTSCAN` first.")
}
return(names(all.results))
})
#' @export
#' @rdname getTSCANResults
setGeneric("listTSCANTerminalNodes", signature = "x",
function(x) {
standardGeneric("listTSCANTerminalNodes")
}
)
#' @export
#' @rdname getTSCANResults
setMethod("listTSCANTerminalNodes", signature(x = "SingleCellExperiment"),
function(x){
if (is.null(S4Vectors::metadata(x)$sctk$Traj$TSCAN$Pseudotime))
stop("No TSCAN result found. Please run `runTSCAN()` first.")
results <- S4Vectors::metadata(x)$sctk$Traj$TSCAN$Pseudotime$pathClusters
return(names(results))
})
###################################################
### STEP 1:: creating cluster and MST
###################################################
#' @title Run TSCAN to obtain pseudotime values for cells
#' @description Wrapper for obtaining a pseudotime ordering of the cells by
#' projecting them onto the minimum spanning tree (MST)
#' @param inSCE Input \linkS4class{SingleCellExperiment} object.
#' @param useReducedDim Character. A low-dimension representation in
#' \code{reducedDims}, will be used for both clustering if \code{cluster} not
#' specified and MST construction. Default \code{"PCA"}.
#' @param cluster Grouping for each cell in \code{inSCE}. A vector with equal
#' length to the number of the cells in \code{inSCE}, or a single character for
#' retriving \code{colData} variable. Default \code{NULL}, will run
#' \code{runScranSNN} to obtain.
#' @param starter Character. Specifies the starting node from which to compute
#' the pseudotime. Default \code{NULL}, will select an arbitrary node.
#' @param seed An integer. Random seed for clustering if \code{cluster} is not
#' specified. Default \code{12345}.
#' @return The input \code{inSCE} object with pseudotime ordering of the cells
#' along the paths and the cluster label stored in \code{colData}, and other
#' unstructured information in \code{metadata}.
#' @export
#' @author Nida Pervaiz
#' @examples
#' data("mouseBrainSubsetSCE", package = "singleCellTK")
#' mouseBrainSubsetSCE <- runTSCAN(inSCE = mouseBrainSubsetSCE,
#' useReducedDim = "PCA_logcounts")
runTSCAN <- function(inSCE,
useReducedDim = "PCA",
cluster = NULL,
starter = NULL,
seed = 12345) {
if (is.null(cluster)) {
# DON'T RETURN TO `inSCE`
# It really overwrites existing cluster labels
sce <- runScranSNN(inSCE, useReducedDim = useReducedDim, clusterName = "scranSNN_cluster", k = 8, weightType = "jaccard", seed = seed)
cluster <- colData(sce)$scranSNN_cluster
rm(sce)
} else {
cluster <- .manageCellVar(inSCE, var = cluster)
}
if (length(unique(cluster)) == 1) {
stop("Only one cluster found. Unable to calculate.")
}
message(date(), " ... Running TSCAN to estimate pseudotime")
inSCE <- scran::computeSumFactors(inSCE, clusters = cluster)
by.cluster <- scuttle::aggregateAcrossCells(inSCE, ids = cluster)
centroids <- SingleCellExperiment::reducedDim(by.cluster, useReducedDim)
mst <- TSCAN::createClusterMST(centroids, clusters = NULL)
# Map each cell to the closest edge on the MST, reporting also the distance to
# the corresponding vertices.
map.tscan <- TSCAN::mapCellsToEdges(inSCE , mst = mst,
use.dimred = useReducedDim,
clusters = cluster)
# Compute a pseudotime for each cell lying on each path through the MST from a
# given starting node.
tscan.pseudo <- TSCAN::orderCells(map.tscan, mst, start = starter)
common.pseudo <- TrajectoryUtils::averagePseudotime(tscan.pseudo)
colData(inSCE)$TSCAN_clusters <- cluster
colData(inSCE)$TSCAN_pseudotime <- common.pseudo
pathClusters <- list()
pathList <- data.frame()
for(i in seq(ncol(tscan.pseudo))){
pathIndex = as.character(colnames(tscan.pseudo)[i])
colDataVarName <- paste0("Path_",pathIndex,"_pseudotime")
inSCE[[colDataVarName]] <- TrajectoryUtils::pathStat(tscan.pseudo)[,pathIndex]
nonNA.idx <- !is.na(colData(inSCE)[[colDataVarName]])
pathClusters[[pathIndex]] <- unique(colData(inSCE)$TSCAN_clusters[nonNA.idx])
pathList[i,1] <- pathIndex
pathList[i,2] <- toString(pathClusters[[pathIndex]])
message(date(), " ... Clusters involved in path index ", pathIndex,
" are: ", paste(pathClusters[[i]], collapse = ", "))
}
maxWidth <- max(stringr::str_length(pathList[, 1]))
choiceList <- paste0(stringr::str_pad(pathList[, 1], width=maxWidth,
side="right"),
"|", pathList[, 2])
branchClusters <- c()
edgeList <- mst
for (i in seq_along(unique(colData(inSCE)$TSCAN_clusters))){
degree <- sum(edgeList[i] != 0)
if (degree > 1) branchClusters <- c(branchClusters, i)
}
## Save these results in a list and then make S4 accessor that passes the
## entire list
result <- list(pseudo = tscan.pseudo, mst = mst,
maptscan = map.tscan,
pathClusters = pathClusters,
branchClusters = branchClusters,
pathIndexList = choiceList)
getTSCANResults(inSCE, analysisName = "Pseudotime") <- result
message(date(), " ... Number of estimated paths is ", ncol(tscan.pseudo))
return (inSCE)
}
###################################################
### plot pseudotime values
###################################################
#' @title Plot MST pseudotime values on cell 2D embedding
#' @description A wrapper function which visualizes outputs from the
#' \code{\link{runTSCAN}} function. Plots the pseudotime ordering of the cells
#' and project them onto the MST.
#' @param inSCE Input \linkS4class{SingleCellExperiment} object.
#' @param useReducedDim Saved dimension reduction name in \code{inSCE} object.
#' Required.
#' @return A \code{.ggplot} object with the pseudotime ordering of the cells
#' colored on a cell 2D embedding, and the MST path drawn on it.
#' @export
#' @author Nida Pervaiz
#' @examples
#' data("mouseBrainSubsetSCE", package = "singleCellTK")
#' mouseBrainSubsetSCE <- runTSCAN(inSCE = mouseBrainSubsetSCE,
#' useReducedDim = "PCA_logcounts")
#' plotTSCANResults(inSCE = mouseBrainSubsetSCE,
#' useReducedDim = "TSNE_logcounts")
plotTSCANResults <- function(inSCE, useReducedDim = "UMAP") {
results <- getTSCANResults(inSCE, analysisName = "Pseudotime")
clusters <- colData(inSCE)$TSCAN_clusters
by.cluster <- scuttle::aggregateAcrossCells(inSCE, ids = clusters)
line.data <- TSCAN::reportEdges(by.cluster, mst = results$mst,
clusters = NULL, use.dimred = useReducedDim)
scater::plotReducedDim(inSCE, dimred = useReducedDim,
colour_by = I(colData(inSCE)$TSCAN_pseudotime),
text_by = "TSCAN_clusters", text_colour = "black") +
suppressMessages(ggplot2::scale_colour_viridis_c(name = "Pseudotime")) +
ggplot2::geom_path(data = line.data,
ggplot2::aes_string(x = names(line.data)[2],
y = names(line.data)[3],
group = 'edge'))
}
###################################################
### STEP 2:: identify expressive genes
###################################################
#' @title Test gene expression changes along a TSCAN trajectory path
#' @description Wrapper for identifying genes with significant changes with
#' respect to one of the TSCAN pseudotime paths
#' @param inSCE Input \linkS4class{SingleCellExperiment} object.
#' @param pathIndex Path index for which the pseudotime values should be used.
#' This corresponds to the terminal node of specific path from the root
#' node to the terminal node. Run \code{listTSCANTerminalNodes(inSCE)} for
#' available options.
#' @param useAssay Character. The name of the assay to use for testing the
#' expression change. Should be log-normalized. Default \code{"logcounts"}
#' @param discardCluster Cluster(s) which are not of use or masks other
#' interesting effects can be discarded. Default \code{NULL}.
#' @return The input \code{inSCE} with results updated in \code{metadata}.
#' @export
#' @author Nida Pervaiz
#' @examples
#' data("mouseBrainSubsetSCE", package = "singleCellTK")
#' mouseBrainSubsetSCE <- runTSCAN(inSCE = mouseBrainSubsetSCE,
#' useReducedDim = "PCA_logcounts")
#' terminalNodes <- listTSCANTerminalNodes(mouseBrainSubsetSCE)
#' mouseBrainSubsetSCE <- runTSCANDEG(inSCE = mouseBrainSubsetSCE,
#' pathIndex = terminalNodes[1])
runTSCANDEG <- function(inSCE,
pathIndex,
useAssay = "logcounts",
discardCluster = NULL) {
nx <- inSCE
if (!is.null(discardCluster)) {
if (any(!discardCluster %in% unique(colData(nx)$TSCAN_clusters))) {
stop("Not all `discardCluster` exist in TSCAN clusters")
}
nx <- nx[, !(colData(nx)$TSCAN_clusters %in% discardCluster)]
}
pseudo <- TSCAN::testPseudotime(nx,
pseudotime = nx[[paste0("Path_", pathIndex,
"_pseudotime")]],
assay.type = useAssay)
pseudo <- pseudo[order(pseudo$FDR),]
up.left <- pseudo[pseudo$logFC < 0,]
up.right <- pseudo[pseudo$logFC > 0,]
## Save these results in a list and then make S4 accessor that passes the
## entire list
pathresults <- list(discardClusters = discardCluster, upLeft = up.left,
upRight = up.right, useAssay = useAssay)
getTSCANResults(inSCE, analysisName = "DEG",
pathName = as.character(pathIndex)) <- pathresults
return (inSCE)
}
###################################################
### plot heatmap of top genes
###################################################
#' @title Plot heatmap of genes with expression change along TSCAN pseudotime
#' @description A wrapper function which visualizes outputs from the
#' \code{\link{runTSCANDEG}} function. Plots the top genes that change in
#' expression with increasing pseudotime along the path in the MST.
#' \code{\link{runTSCANDEG}} has to be run in advance with using the same
#' \code{pathIndex} of interest.
#' @param inSCE Input \linkS4class{SingleCellExperiment} object.
#' @param pathIndex Path index for which the pseudotime values should be used.
#' Should have being used in \code{\link{runTSCANDEG}}.
#' @param direction Should we show features with expression increasing or
#' decreeasing along the increase in TSCAN pseudotime? Choices are
#' \code{"both"}, \code{"increasing"} or \code{"decreasing"}.
#' @param topN An integer. Only to plot this number of top genes along the path
#' in the MST, in terms of FDR value. Use \code{NULL} to cancel the top N
#' subscription. Default \code{30}.
#' @param log2fcThreshold Only output DEGs with the absolute values of log2FC
#' larger than this value. Default \code{NULL}.
#' @param useAssay A single character to specify a feature expression matrix in
#' \code{assays} slot. The expression of top features from here will be
#' visualized. Default \code{NULL} use the one used for
#' \code{\link{runTSCANDEG}}.
#' @param featureDisplay Whether to display feature ID and what ID type to
#' display. Users can set default ID type by \code{\link{setSCTKDisplayRow}}.
#' \code{NULL} will display when number of features to display is less than 60.
#' \code{FALSE} for no display. Variable name in \code{rowData} to indicate ID
#' type. \code{"rownames"} or \code{TRUE} for using \code{rownames(inSCE)}.
#' @return A ComplexHeatmap in \code{.ggplot} class
#' @export
#' @author Nida Pervaiz
#' @importFrom S4Vectors metadata
#' @examples
#' data("mouseBrainSubsetSCE", package = "singleCellTK")
#' mouseBrainSubsetSCE <- runTSCAN(inSCE = mouseBrainSubsetSCE,
#' useReducedDim = "PCA_logcounts")
#' terminalNodes <- listTSCANTerminalNodes(mouseBrainSubsetSCE)
#' mouseBrainSubsetSCE <- runTSCANDEG(inSCE = mouseBrainSubsetSCE,
#' pathIndex = terminalNodes[1])
#' plotTSCANPseudotimeHeatmap(mouseBrainSubsetSCE,
#' pathIndex = terminalNodes[1])
plotTSCANPseudotimeHeatmap <- function(inSCE,
pathIndex,
direction = c("both", "increasing",
"decreasing"),
topN = 50,
log2fcThreshold = NULL,
useAssay = NULL,
featureDisplay = metadata(inSCE)$featureDisplay){
results <- getTSCANResults(inSCE, analysisName = "DEG", pathName = pathIndex)
direction <- match.arg(direction)
# Organizing cells
cell.idx <- rep(TRUE, ncol(inSCE))
if(!is.null(results$discardClusters)){
cell.idx <- cell.idx &
(!colData(inSCE)$TSCAN_clusters %in% results$discardClusters)
}
colPathPseudo <- paste0("Path_", pathIndex, "_pseudotime")
cell.idx <- cell.idx & !is.na(colData(inSCE)[[colPathPseudo]])
cell.order <- order(colData(inSCE)[[colPathPseudo]])
cell.order <- cell.order[cell.order %in% which(cell.idx)]
# Organizing genes
if (direction == "both") {
degR <- results$upRight
degL <- results$upLeft
if (!is.null(log2fcThreshold)) {
degR <- degR[abs(degR$logFC) > log2fcThreshold,]
degL <- degL[abs(degL$logFC) > log2fcThreshold,]
}
genesR <- rownames(degR)[seq(min(topN, nrow(degR)))]
genesL <- rownames(degL)[seq(min(topN, nrow(degL)))]
genesR <- genesR[!is.na(genesR)]
genesL <- genesL[!is.na(genesL)]
genes <- c(genesL, genesR)
if (length(genes) < 1) stop("No feature passed the filter.")
direction.df <- data.frame(
Direction = factor(c(rep("decreasing", length(genesL)),
rep("increasing", length(genesR))),
levels = c("increasing", "decreasing")),
row.names = genes)
} else {
if (direction == "increasing") deg <- results$upRight
if (direction == "decreasing") deg <- results$upLeft
if (!is.null(log2fcThreshold)) {
deg <- deg[abs(deg$logFC) > log2fcThreshold,]
}
topN <- min(topN, nrow(deg))
if (topN < 1) stop("No feature passed the filter.")
genes <- rownames(deg)[seq(topN)]
direction.df <- data.frame(Direction = rep(direction, length(genes)),
row.names = genes)
}
rowLabel <- featureDisplay
if (is.null(featureDisplay)) {
if (length(genes) <= 60) rowLabel <- TRUE else rowLabel <- FALSE
} else if (featureDisplay == "rownames") {
rowLabel <- TRUE
}
cellAnnotationColor = list(
.viridisPseudoTimeColor(colData(inSCE)[[colPathPseudo]])
)
names(cellAnnotationColor) <- colPathPseudo
if (is.null(useAssay)) useAssay <- results$useAssay
plotSCEHeatmap(inSCE = inSCE,
useAssay = useAssay,
cellIndex = cell.order,
featureIndex = genes,
colDend = FALSE,
rowDend = FALSE,
colDataName = c("TSCAN_clusters", colPathPseudo),
rowLabel = rowLabel,
featureAnnotations = direction.df,
rowSplitBy = "Direction",
cellAnnotationColor = cellAnnotationColor
)
}
#' Basing on pseudotime is presented within range from 0 to 100
#' Use the viridis color scale to match with `plotTSCANResult` embedding color
#' @param x numeric vector of pseudotime
#' @return function object returned by circlize::colorRamp2
#' @noRd
.viridisPseudoTimeColor <- function(x) {
a <- max(x, na.rm = TRUE)
b <- min(x, na.rm = TRUE)
chunk <- (a - b) / 4
circlize::colorRamp2(seq(0,4) * chunk + b,
c("#440154", "#3b528b", "#21918c", "#5ec962", "#fde725"))
}
###################################################
### plot expressive genes
###################################################
#' @title Plot expression changes of top features along a TSCAN pseudotime path
#' @description A wrapper function which visualizes outputs from the
#' \code{\link{runTSCANDEG}} function. Plots the genes that increase or decrease
#' in expression with increasing pseudotime along the path in the MST.
#' \code{\link{runTSCANDEG}} has to be run in advance with using the same
#' \code{pathIndex} of interest.
#' @param inSCE Input \linkS4class{SingleCellExperiment} object.
#' @param pathIndex Path index for which the pseudotime values should be used.
#' Should have being used in \code{\link{runTSCANDEG}}.
#' @param direction Should we show features with expression increasing or
#' decreeasing along the increase in TSCAN pseudotime? Choices are
#' \code{"increasing"} or \code{"decreasing"}.
#' @param topN An integer. Only to plot this number of top genes that are
#' increasing/decreasing in expression with increasing pseudotime along
#' the path in the MST. Default 10
#' @param useAssay A single character to specify a feature expression matrix in
#' \code{assays} slot. The expression of top features from here will be
#' visualized. Default \code{NULL} use the one used for
#' \code{\link{runTSCANDEG}}.
#' @param featureDisplay Specify the feature ID type to display. Users can set
#' default value with \code{\link{setSCTKDisplayRow}}. \code{NULL} or
#' \code{"rownames"} specifies the rownames of \code{inSCE}. Other character
#' values indicates \code{rowData} variable.
#' @return A \code{.ggplot} object with the facets of the top genes. Expression
#' on y-axis, pseudotime on x-axis.
#' @export
#' @author Nida Pervaiz
#' @importFrom utils head
#' @importFrom S4Vectors metadata
#' @importFrom SummarizedExperiment rowData
#' @examples
#' data("mouseBrainSubsetSCE", package = "singleCellTK")
#' mouseBrainSubsetSCE <- runTSCAN(inSCE = mouseBrainSubsetSCE,
#' useReducedDim = "PCA_logcounts")
#' terminalNodes <- listTSCANTerminalNodes(mouseBrainSubsetSCE)
#' mouseBrainSubsetSCE <- runTSCANDEG(inSCE = mouseBrainSubsetSCE,
#' pathIndex = terminalNodes[1])
#' plotTSCANPseudotimeGenes(mouseBrainSubsetSCE,
#' pathIndex = terminalNodes[1],
#' useAssay = "logcounts")
plotTSCANPseudotimeGenes <- function(inSCE,
pathIndex,
direction = c("increasing", "decreasing"),
topN = 10,
useAssay = NULL,
featureDisplay = metadata(inSCE)$featureDisplay){
results <- getTSCANResults(inSCE, analysisName = "DEG", pathName = pathIndex)
direction = match.arg(direction)
if (!is.null(featureDisplay) &&
featureDisplay == "rownames")
featureDisplay <- NULL
if(!is.null(results$discardClusters)){
inSCE <- inSCE[,!colData(inSCE)$TSCAN_clusters %in% results$discardClusters]
}
if (direction == "decreasing") features <- rownames(results$upLeft)
if (direction == "increasing") features <- rownames(results$upRight)
features <- head(features, topN)
if (!is.null(featureDisplay)) {
features.idx <- featureIndex(features, inSCE)
features <- rowData(inSCE)[[featureDisplay]][features.idx]
}
if (is.null(useAssay)) useAssay <- results$useAssay
scater::plotExpression(inSCE,
features = features,
exprs_values = useAssay,
swap_rownames = featureDisplay,
x = paste0("Path_",pathIndex,"_pseudotime"),
colour_by = "TSCAN_clusters")
}
###################################################
### STEP3:: identify DE genes in branch cluster
###################################################
#' @title Find DE genes between all TSCAN paths rooted from given cluster
#' @description This function finds all paths that root from a given cluster
#' \code{useCluster}, and performs tests to identify significant features for
#' each path, and are not significant and/or changing in the opposite direction
#' in the other paths. Using a branching cluster (i.e. a node with degree > 2)
#' may highlight features which are responsible for the branching event. MST has
#' to be pre-calculated with \code{\link{runTSCAN}}.
#' @param inSCE Input \linkS4class{SingleCellExperiment} object.
#' @param useCluster The cluster to be regarded as the root, has to existing in
#' \code{colData(inSCE)$TSCAN_clusters}.
#' @param useAssay Character. The name of the assay to use. This assay should
#' contain log normalized counts. Default \code{"logcounts"}.
#' @param fdrThreshold Only out put DEGs with FDR value smaller than this value.
#' Default \code{0.05}.
#' @return The input \code{inSCE} with results updated in \code{metadata}.
#' @export
#' @author Nida Pervaiz
#' @examples
#' data("mouseBrainSubsetSCE", package = "singleCellTK")
#' mouseBrainSubsetSCE <- runTSCAN(inSCE = mouseBrainSubsetSCE,
#' useReducedDim = "PCA_logcounts")
#' mouseBrainSubsetSCE <- runTSCANClusterDEAnalysis(inSCE = mouseBrainSubsetSCE,
#' useCluster = 1)
runTSCANClusterDEAnalysis <- function(inSCE,
useCluster,
useAssay = "logcounts",
fdrThreshold = 0.05) {
results <- getTSCANResults(inSCE, analysisName = "Pseudotime")
message(date(), " ... Finding DEG between TSCAN branches")
tscan.pseudo <- TSCAN::orderCells(results$maptscan, mst = results$mst,
start = useCluster)
nPaths <- colnames(tscan.pseudo)
pathClusters <- list()
pathList <- data.frame()
x <- inSCE[,colData(inSCE)$TSCAN_clusters == useCluster]
store <- list()
genes <- list()
for(i in seq(nPaths)){
# Get Pseudotime of a path
pathIndex = as.character(nPaths[i])
branchPseudotime <- TrajectoryUtils::pathStat(tscan.pseudo)[,pathIndex]
colDataVarName <- paste0("TSCAN_Cluster", useCluster,
"_Path_", pathIndex, "_pseudotime")
colData(inSCE)[[colDataVarName]] <- branchPseudotime
nonNA.idx <- !is.na(branchPseudotime)
pathClusters[[pathIndex]] <- unique(colData(inSCE)$TSCAN_clusters[nonNA.idx])
pathList[i,1] <- pathIndex
pathList[i,2] <- toString(pathClusters[[pathIndex]])
message(date(), " ... Clusters involved in path index ", pathIndex,
" are: ", paste(pathClusters[[i]], collapse = ", "))
# Test along the path
pseudo.df <- TSCAN::testPseudotime(
x, df = 1,
pseudotime = branchPseudotime[colData(inSCE)$TSCAN_clusters == useCluster],
assay.type = useAssay)
pseudo.df <- pseudo.df[order(pseudo.df$p.value),]
store[[pathIndex]] <- pseudo.df
}
# Filter against all other paths
genes <- list()
for(i in seq(nPaths)){
pseudo.df <- store[[nPaths[i]]]
paths <- setdiff(seq(nPaths), i)
idx <- pseudo.df$FDR < fdrThreshold
for(j in seq_along(paths)){
pseudo.otherPath <- store[[paths[j]]]
idx <- idx & (pseudo.otherPath$p.value >= 0.05 |
sign(pseudo.otherPath$logFC) != sign(pseudo.df$logFC))
}
pseudo.df <- pseudo.df[which(idx), ]
genes[[nPaths[i]]] <- pseudo.df[order(pseudo.df$FDR),]
}
maxWidth <- max(stringr::str_length(pathList[, 1]))
choiceList <- paste0(stringr::str_pad(pathList[, 1], width=maxWidth,
side="right"),
"|", pathList[, 2])
expGenes <- list(DEgenes = genes, useAssay = useAssay,
terminalNodes = tscan.pseudo,
pathIndexList = choiceList)
getTSCANResults(inSCE, analysisName = "ClusterDEAnalysis",
pathName = useCluster) <- expGenes
return(inSCE)
}
###################################################
### plot branch cluster
###################################################
#' @title Plot TSCAN pseudotime rooted from given cluster
#' @description This function finds all paths that root from a given cluster
#' \code{useCluster}. For each path, this function plots the recomputed
#' pseudotime starting from the root on a scatter plot which contains cells only
#' in this cluster. MST has to be pre-calculated with \code{\link{runTSCAN}}.
#' @param inSCE Input \linkS4class{SingleCellExperiment} object.
#' @param useCluster The cluster to be regarded as the root, has to existing in
#' \code{colData(inSCE)$TSCAN_clusters}.
#' @param useReducedDim Saved dimension reduction name in the
#' SingleCellExperiment object. Required.
#' @param combinePlot Must be either \code{"all"} or \code{"none"}. \code{"all"}
#' will combine plots of pseudotime along each path into a single \code{.ggplot}
#' object, while \code{"none"} will output a list of plots. Default
#' \code{"all"}.
#' @return
#' \item{combinePlot = "all"}{A \code{.ggplot} object}
#' \item{combinePlot = "none"}{A list of \code{.ggplot}}
#' @export
#' @author Nida Pervaiz
#' @importFrom utils head
#' @examples
#' data("mouseBrainSubsetSCE", package = "singleCellTK")
#' mouseBrainSubsetSCE <- runTSCAN(inSCE = mouseBrainSubsetSCE,
#' useReducedDim = "PCA_logcounts")
#' plotTSCANClusterPseudo(mouseBrainSubsetSCE, useCluster = 1,
#' useReducedDim = "TSNE_logcounts")
plotTSCANClusterPseudo <- function(inSCE, useCluster, useReducedDim = "UMAP",
combinePlot = c("all", "none")){
# Get the plotting info of edges
results <- getTSCANResults(inSCE, analysisName = "Pseudotime")
combinePlot <- match.arg(combinePlot)
clusters <- colData(inSCE)$TSCAN_clusters
by.cluster <- scuttle::aggregateAcrossCells(inSCE, ids = clusters)
line.data <- TSCAN::reportEdges(by.cluster, mst = results$mst,
clusters = NULL, use.dimred = useReducedDim)
line.data.sub <- .getClustersLineData(line.data, useCluster)
# Get Branch pseudotime
tscan.pseudo <- TSCAN::orderCells(results$maptscan, mst = results$mst,
start = useCluster)
nPaths <- colnames(tscan.pseudo)
x <- inSCE[,colData(inSCE)$TSCAN_clusters == useCluster]
plotList <- list()
for (i in seq(nPaths)) {
branchPseudotime <- TrajectoryUtils::pathStat(tscan.pseudo)[,i]
branchPseudotime <- branchPseudotime[colData(inSCE)$TSCAN_clusters == useCluster]
x$pseudotime <- branchPseudotime
plotList[[i]] <- scater::plotReducedDim(x, dimred = useReducedDim,
colour_by = "pseudotime",
text_by = "TSCAN_clusters",
text_colour = "black") +
ggplot2::geom_line(data = line.data.sub,
ggplot2::aes_string(x = colnames(line.data.sub)[2],
y = colnames(line.data.sub)[3],
group = "edge"))
}
if (combinePlot == "all") {
plotList <- cowplot::plot_grid(plotlist = plotList)
}
return(plotList)
}
###################################################
### plot gene of interest in branch cluster
###################################################
#' @title Plot features identified by \code{\link{runTSCANClusterDEAnalysis}} on
#' cell 2D embedding with MST overlaid
#' @description A wrapper function which plot the top features expression
#' identified by \code{\link{runTSCANClusterDEAnalysis}} on the 2D embedding of
#' the cells cluster used in the analysis. The related MST edges are overlaid.
#' @param inSCE Input \linkS4class{SingleCellExperiment} object.
#' @param useCluster Choose a cluster used for identifying DEG with
#' \code{\link{runTSCANClusterDEAnalysis}}. Required.
#' @param pathIndex Specifies one of the branching paths from \code{useCluster}
#' and plot the top DEGs on this path. Ususally presented by the terminal
#' cluster of a path. By default \code{NULL} plot top DEGs of all paths.
#' @param useReducedDim A single character for the matrix of 2D embedding.
#' Should exist in \code{reducedDims} slot. Default \code{"UMAP"}.
#' @param topN Integer. Use top N genes identified. Default \code{9}.
#' @param useAssay A single character for the feature expression matrix. Should
#' exist in \code{assayNames(inSCE)}. Default \code{NULL} for using the one used
#' in \code{\link{runTSCANClusterDEAnalysis}}.
#' @param featureDisplay Specify the feature ID type to display. Users can set
#' default value with \code{\link{setSCTKDisplayRow}}. \code{NULL} or
#' \code{"rownames"} specifies the rownames of \code{inSCE}. Other character
#' values indicates \code{rowData} variable.
#' @param combinePlot Must be either \code{"all"} or \code{"none"}. \code{"all"}
#' will combine plots of each feature into a single \code{.ggplot} object,
#' while \code{"none"} will output a list of plots. Default \code{"all"}.
#' @return A \code{.ggplot} object of cell scatter plot, colored by the
#' expression of a gene identified by \code{\link{runTSCANClusterDEAnalysis}},
#' with the layer of trajectory.
#' @export
#' @author Yichen Wang
#' @importFrom S4Vectors metadata
#' @examples
#' data("mouseBrainSubsetSCE", package = "singleCellTK")
#' mouseBrainSubsetSCE <- runTSCAN(inSCE = mouseBrainSubsetSCE,
#' useReducedDim = "PCA_logcounts")
#' mouseBrainSubsetSCE <- runTSCANClusterDEAnalysis(inSCE = mouseBrainSubsetSCE,
#' useCluster = 1)
#' plotTSCANClusterDEG(mouseBrainSubsetSCE, useCluster = 1,
#' useReducedDim = "TSNE_logcounts")
plotTSCANClusterDEG <- function(
inSCE,
useCluster,
pathIndex = NULL,
useReducedDim = "UMAP",
topN = 9,
useAssay = NULL,
featureDisplay = metadata(inSCE)$featureDisplay,
combinePlot = c("all", "none")
) {
MSTResults <- getTSCANResults(inSCE, analysisName = "Pseudotime")
if (length(useCluster) != 1) stop("Can only specify one cluster")
DEResults <- getTSCANResults(inSCE, analysisName = "ClusterDEAnalysis",
pathName = useCluster)
if (is.null(DEResults)) {
stop("Branch DE result of specified cluster not found. Run ",
"`runTSCANClusterDEAnalysis()` with the same `useCluster` first.")
}
combinePlot <- match.arg(combinePlot)
if (is.null(pathIndex)) {
deg <- do.call(rbind, DEResults$DEgenes)
deg <- deg[order(deg$FDR),]
} else {
pathIndex <- as.character(pathIndex)
deg <- DEResults$DEgenes[[pathIndex]]
}
features <- rownames(deg)[seq(min(topN, nrow(deg), na.rm = TRUE))]
if (is.null(useAssay)) useAssay <- DEResults$useAssay
plotTSCANDimReduceFeatures(inSCE = inSCE, features = features,
useReducedDim = useReducedDim,
useAssay = useAssay, useCluster = useCluster,
featureDisplay = featureDisplay,
combinePlot = combinePlot)
}
#' @title Plot feature expression on cell 2D embedding with MST overlaid
#' @description A wrapper function which plots all cells or cells in chosen
#' cluster. Each point is a cell colored by the expression of a feature of
#' interest, the relevant edges of the MST are overlaid on top.
#' @param inSCE Input \linkS4class{SingleCellExperiment} object.
#' @param features Choose the feature of interest to explore the expression
#' level on the trajectory. Required.
#' @param useReducedDim A single character for the matrix of 2D embedding.
#' Should exist in \code{reducedDims} slot. Default \code{"UMAP"}.
#' @param useAssay A single character for the feature expression matrix. Should
#' exist in \code{assayNames(inSCE)}. Default \code{"logcounts"}.
#' @param by Where should \code{features} be found? \code{NULL},
#' \code{"rownames"} for \code{rownames(inSCE)}, otherwise will be regarded as
#' \code{rowData} variable.
#' @param useCluster Choose specific clusters where gene expression needs to be
#' visualized. By default \code{NULL}, all clusters are chosen.
#' @param featureDisplay Specify the feature ID type to display. Users can set
#' default value with \code{\link{setSCTKDisplayRow}}. \code{NULL} or
#' \code{"rownames"} specifies the rownames of \code{inSCE}. Other character
#' values indicates \code{rowData} variable.
#' @param combinePlot Must be either \code{"all"} or \code{"none"}. \code{"all"}
#' will combine plots of each feature into a single \code{.ggplot} object,
#' while \code{"none"} will output a list of plots. Default \code{"all"}.
#' @return A \code{.ggplot} object of cell scatter plot, colored by the
#' expression of a gene of interest, with the layer of trajectory.
#' @export
#' @author Yichen Wang
#' @importFrom S4Vectors metadata
#' @examples
#' data("mouseBrainSubsetSCE", package = "singleCellTK")
#' mouseBrainSubsetSCE <- runTSCAN(inSCE = mouseBrainSubsetSCE,
#' useReducedDim = "PCA_logcounts")
#' plotTSCANDimReduceFeatures(inSCE = mouseBrainSubsetSCE,
#' features = "Tshz1",
#' useReducedDim = "TSNE_logcounts")
plotTSCANDimReduceFeatures <- function(
inSCE,
features,
useReducedDim = "UMAP",
useAssay = "logcounts",
by = "rownames",
useCluster = NULL,
featureDisplay = metadata(inSCE)$featureDisplay,
combinePlot = c("all", "none"))
{
results <- getTSCANResults(inSCE, analysisName = "Pseudotime")
combinePlot <- match.arg(combinePlot)
clusters <- colData(inSCE)$TSCAN_clusters
by.cluster <- scuttle::aggregateAcrossCells(inSCE, ids = clusters)
line.data <- TSCAN::reportEdges(by.cluster, mst = results$mst,
clusters = NULL, use.dimred = useReducedDim)
if (!is.null(useCluster)) {
line.data <- .getClustersLineData(line.data, useCluster)
inSCE <- inSCE[, clusters %in% useCluster]
}
if (by == "rownames") by <- NULL
plotList <- list()
features <- stats::na.omit(features)
for (f in features) {
# Should not enter the loop if features is length zero after NA omit
g <- plotSCEDimReduceFeatures(inSCE, feature = f,
useAssay = useAssay,
featureLocation = by,dim1 = 1, dim2 = 2,
featureDisplay = featureDisplay,
reducedDimName = useReducedDim,
title = f) +
ggplot2::geom_line(data = line.data,
ggplot2::aes_string(x = colnames(line.data)[2],
y = colnames(line.data)[3],
group = "edge"),
inherit.aes = FALSE)
# Text labeling code credit: scater
reddim <- as.data.frame(SingleCellExperiment::reducedDim(inSCE,
useReducedDim))
text_out <- scater::retrieveCellInfo(inSCE, "TSCAN_clusters",
search = "colData")
by_text_x <- vapply(split(reddim[,1], text_out$val),
stats::median, FUN.VALUE = 0)
by_text_y <- vapply(split(reddim[,2], text_out$val),
stats::median, FUN.VALUE = 0)
g <- g + ggrepel::geom_text_repel(
data = data.frame(x = by_text_x,
y = by_text_y,
label = names(by_text_x)),
mapping = ggplot2::aes_string(x = "x",
y = "y",
label = "label"),
inherit.aes = FALSE, na.rm = TRUE,
)
plotList[[f]] <- g
}
if (combinePlot == "all") {
if (length(plotList) > 0)
plotList <- cowplot::plot_grid(plotlist = plotList)
}
return(plotList)
}
#' Extract edges that connects to the clusters of interest
#' @param line.data data.frame of three columns. First column should be named
#' "edge", the other two are coordinates of one of the vertices that this
#' edge connects.
#' @param useCluster Vector of clusters, that are going to be the vertices for
#' edge finding.
#' @return data.frame, subset rows of line.data
#' @noRd
.getClustersLineData <- function(line.data, useCluster) {
idx <- vapply(useCluster, function(x){
grepl(paste0("^", x, "--"), line.data$edge) |
grepl(paste0("--", x, "$"), line.data$edge)
}, logical(nrow(line.data)))
# `idx` returned was c("matrix", "array") class. Each column is a logical
# vector for edge selection of each cluster.
# Next, do "or" for each row.
idx <- rowSums(idx) > 0
line.data[idx,]
}
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.