Nothing
#' Analyze biological interpretations of metagene
#' @description Identify the biological pathways (gene sets from Reactome) that
#' each metagene (factor) might belongs to.
#' @param object A \linkS4class{liger} object with valid factorization result.
#' @param genesets Character vector of the Reactome gene sets names to be
#' tested. Default \code{NULL} uses all the gene sets from the Reactome.
#' @param useW Logical, whether to use the shared factor loadings (\eqn{W}).
#' Default \code{TRUE}.
#' @param useV A character vector of the names, a numeric or logical
#' vector of the index of the datasets where the \eqn{V} matrices will be
#' included for analysis. Default \code{NULL} uses all datasets.
#' @param customGenesets A named list of character vectors of entrez gene ids.
#' Default \code{NULL} uses all the gene symbols from the input matrix.
#' @param gene_sets,mat_w,mat_v,custom_gene_sets \bold{Deprecated}. See Usage
#' section for replacement.
#' @return A list of matrices with GSEA analysis for each factor
#' @export
#' @examples
#' \donttest{
#' if (requireNamespace("org.Hs.eg.db", quietly = TRUE) &&
#' requireNamespace("reactome.db", quietly = TRUE) &&
#' requireNamespace("fgsea", quietly = TRUE) &&
#' requireNamespace("AnnotationDbi", quietly = TRUE)) {
#' runGSEA(pbmcPlot)
#' }
#' }
runGSEA <- function(
object,
genesets = NULL,
useW = TRUE,
useV = NULL,
customGenesets = NULL,
# Deprecated coding style
gene_sets = genesets,
mat_w = useW,
mat_v = useV,
custom_gene_sets = customGenesets
) {
if (!requireNamespace("org.Hs.eg.db", quietly = TRUE)) # nocov start
cli::cli_abort(
"Package {.pkg org.Hs.eg.db} is needed for this function to work.
Please install it by command:
{.code BiocManager::install('org.Hs.eg.db')}")
if (!requireNamespace("reactome.db", quietly = TRUE))
cli::cli_abort(
"Package {.pkg reactome.db} is needed for this function to work.
Please install it by command:
{.code BiocManager::install('reactome.db')}")
if (!requireNamespace("fgsea", quietly = TRUE))
cli::cli_abort(
"Package {.pkg fgsea} is needed for this function to work.
Please install it by command:
{.code BiocManager::install('fgsea')}")
if (!requireNamespace("AnnotationDbi", quietly = TRUE))
cli::cli_abort(
"Package {.pkg AnnotationDbi} is needed for this function to work.
Please install it by command:
{.code BiocManager::install('AnnotationDbi')}") # nocov end
.deprecateArgs(list(gene_sets = "genesets",
mat_w = "useW",
mat_v = "useV",
custom_gene_sets = "customGenesets"))
useV <- .checkUseDatasets(object, useDatasets = useV)
.checkValidFactorResult(object, useV)
# list of V matrices: gene x k
Vs <- getMatrix(object, "V", dataset = useV, returnList = TRUE)
# Get gene ranks in each factor
geneLoading <- Reduce("+", Vs)
if (isTRUE(useW)) geneLoading <- geneLoading + getMatrix(object, "W")
geneRanks <- t(apply(geneLoading, MARGIN = 2, FUN = rank))
colnames(geneRanks) <- toupper(colnames(geneRanks))
# Rename gene with entrez ID
geneID <- as.character(
AnnotationDbi::mapIds(
org.Hs.eg.db::org.Hs.eg.db,
colnames(geneRanks),
"ENTREZID",
"SYMBOL"
)
)
geneRanks <- geneRanks[, !is.na(geneID)]
geneID <- geneID[!is.na(geneID)]
colnames(geneRanks) <- geneID
# Prepare `pathways` which should be a list, where each
# element is a character vector of entrez IDs.
if (is.null(customGenesets)) {
pathways <- fgsea::reactomePathways(colnames(geneRanks))
if (!is.null(genesets)) {
pathways <- pathways[intersect(genesets, names(pathways))]
}
} else {
if (inherits((customGenesets)[1], "tbl_df")) {
pathways <- split(customGenesets,
x = customGenesets$entrez_gene,
f = customGenesets$gs_name)
pathways <- lapply(pathways, as.character)
} else {
pathways <- customGenesets
}
}
gsea <- apply(geneRanks, MARGIN = 1, function(x) {
fgsea::fgsea(
pathways,
x,
minSize = 15,
maxSize = 500,
nperm = 10000
)
})
gsea <- lapply(gsea, function(x) {
as.data.frame(x[order(x$padj), ])
})
names(gsea) <- paste0("Factor_", seq_along(gsea))
return(gsea)
}
#' Run Gene Ontology enrichment analysis on differentially expressed genes.
#' @description
#' This function forms genesets basing on the differential expression result,
#' and calls gene ontology (GO) analysis method provided by gprofiler2.
#' @param result Data frame of unfiltered output from \code{\link{runMarkerDEG}}
#' or \code{\link{runPairwiseDEG}}.
#' @param group Selection of one group available from \code{result$group}.
#' Default \code{NULL} uses all groups involved in DE \code{result} table.
#' @param useBg Logical, whether to set all genes involved in DE analysis
#' (before threshold filtering) as a domain background of GO analysis. Default
#' \code{TRUE}.
#' @param orderBy Name of DE statistics metric to order the gene list for each
#' group. Choose from \code{"logFC"} (default), \code{"pval"} or \code{"padj"}.
#' Or set \code{NULL} to turn off ranked mode.
#' @param logFCThresh The log2FC threshold above which the genes will be used.
#' Default \code{1}.
#' @param padjThresh The adjusted p-value threshold less than which the genes
#' will be used. Default \code{0.05}.
#' @param splitReg Whether to have queries of both up-regulated and
#' down-regulated genes for each group. Default \code{FALSE} only queries
#' up-regulated genes and should be preferred when \code{result} comes from
#' marker detection test. When \code{result} comes from group-to-group DE test,
#' it is recommended to set \code{splitReg = TRUE}.
#' @param ... Additional arguments passed to \code{gprofiler2::gost()}. Useful
#' ones are:
#'
#' \describe{
#' \item{\code{organism}}{The organism to be used for the analysis. "hsapiens"
#' for human, "mmusculus" for mouse.}
#' \item{\code{evcodes}}{Whether to include overlapping genes for each term.
#' Default \code{FALSE}.}
#' \item{\code{significant}}{Whether to filter out non-significant terms.
#' Default \code{TRUE}.}
#' }
#' Arguments \code{query}, \code{custom_bg}, \code{domain_scope}, and
#' \code{ordered_query} are pre-specified by this wrapper function.
#' @references Kolberg, L. et al, 2020 and Raudvere, U. et al, 2019
#' @return A list object where each element is a result list for a group. Each
#' result list contains two elements:
#' \item{result}{data.frame of main GO analysis result.}
#' \item{meta}{Meta information for the query.}
#'
#' See \code{gprofiler2::gost()}. for detailed explanation.
#' @export
#' @examples
#' # Setting `significant = FALSE` because it's hard for a gene list obtained
#' # from small test dataset to represent real-life biology.
#' \donttest{
#' if (requireNamespace("gprofiler2", quietly = TRUE)) {
#' go <- runGOEnrich(deg.pw, group = "0.stim", significant = FALSE)
#' }
#' }
runGOEnrich <- function(
result,
group = NULL,
useBg = TRUE,
orderBy = "padj",
logFCThresh = 1,
padjThresh = 0.05,
splitReg = FALSE,
...
) {
if (!requireNamespace("gprofiler2", quietly = TRUE)) # nocov start
cli::cli_abort(
"Package {.pkg gprofiler2} is needed for this function to work.
Please install it by command:
{.code install.packages('gprofiler2')}") # nocov end
group <- group %||% unique(result$group)
if (any(!group %in% result$group)) {
cli::cli_abort("Selected groups not available in {.code result$group}: {.val {group[!group %in% result$group]}}")
}
bg <- NULL
domain_scope <- "annotated" # gprofiler2 default
if (isTRUE(useBg)) {
bg <- unique(result$feature)
domain_scope <- "custom"
}
filter <- result$group %in% group &
abs(result$logFC) > logFCThresh &
result$padj < padjThresh
filter[is.na(filter)] <- FALSE
result <- result[filter, , drop = FALSE]
resultUp <- result[result$logFC > 0,]
if (isTRUE(splitReg)) resultDown <- result[result$logFC < 0,]
ordered_query <- FALSE
if (!is.null(orderBy)) {
ordered_query <- TRUE
if (length(orderBy) > 1) cli::cli_abort("Only one {.code orderBy} metric allowed")
if (!orderBy %in% c("logFC", "pval", "padj")) {
cli::cli_abort("{.code orderBy} should be one of {.val logFC}, {.val pval} or {.val padj}.")
}
if (orderBy == "logFC") {
resultUp <- resultUp[order(resultUp$logFC, decreasing = TRUE),]
if (isTRUE(splitReg))
resultDown <- resultDown[order(resultDown$logFC),]
} else {
resultUp <- resultUp[order(resultUp[[orderBy]]),]
if (isTRUE(splitReg))
resultDown <- resultDown[order(resultDown[[orderBy]]),]
}
}
resultList <- list()
for (g in unique(result$group)) {
query <- list(Up = resultUp$feature[resultUp$group == g])
if (splitReg) query$Down <- resultDown$feature[resultDown$group == g]
resultList[[g]] <- gprofiler2::gost(
query = query, custom_bg = bg, domain_scope = domain_scope,
ordered_query = ordered_query, ...
)
}
return(resultList)
}
#' Visualize GO enrichment test result in dot plot
#' @param result Returned list object from \code{\link{runGOEnrich}}.
#' @param group Character vector of group names, must be available in
#' \code{names(result)}. Default \code{NULL} make plots for all groups.
#' @param query A single string selecting from which query to show the result.
#' Choose from \code{"Up"} for results using up-regulated genes, \code{"Down"}
#' for down-regulated genes. Default \code{"Up"}.
#' @param pvalThresh Numeric scalar, cutoff for p-value where smaller values are
#' considered as significant. Default \code{0.05}.
#' @param n Number of top terms to be shown, ranked by p-value. Default
#' \code{20}.
#' @param termIDMatch Regular expression pattern to match the term ID. Default
#' \code{"^GO"} for only using GO terms from returned results.
#' @param colorPalette,colorDirection Viridis palette options. Default
#' \code{"E"} and \code{1}.
#' @param xlab,ylab Axis title for x and y axis. Default
#' \code{"-log10(P-value)"} and \code{"Term name"}, respectively.
#' @inheritDotParams .ggplotLigerTheme title subtitle legendColorTitle legendSizeTitle showLegend legendPosition baseSize titleSize subtitleSize xTextSize xTitleSize yTextSize yTitleSize legendTextSize legendTitleSize plotly
#' @return A ggplot object if only one group or a list of ggplot objects.
#' @export
#' @examples
#' \donttest{
#' defaultCluster(pbmc) <- pbmcPlot$leiden_cluster
#' # Test the DEG between "stim" and "ctrl", within each cluster
#' result <- runPairwiseDEG(
#' pbmc,
#' groupTest = "stim",
#' groupCtrl = "ctrl",
#' variable1 = "dataset",
#' splitBy = "defaultCluster"
#' )
#' # Setting `significant = FALSE` because it's hard for a gene list obtained
#' # from small test dataset to represent real-life biology.
#' if (requireNamespace("gprofiler2", quietly = TRUE)) {
#' go <- runGOEnrich(result, group = "0.stim", splitReg = TRUE, significant = FALSE)
#' # The toy example won't have significant result.
#' plotGODot(go)
#' }
#' }
plotGODot <- function(
result,
group = NULL,
query = c("Up", "Down"),
pvalThresh = 0.05,
n = 20,
termIDMatch = "^GO",
colorPalette = "E",
colorDirection = 1,
xlab = '-log10(P-value)',
ylab = 'Term name',
...
) {
group <- group %||% names(result)
if (any(!group %in% names(result))) {
cli::cli_abort(
c(x = "Specified group{?s} not available in {.var result}: {.val {group[!group %in% names(result)]}}",
i = "Available one{?s} {?is/are}: {.val {names(result)}}")
)
}
query <- match.arg(query)
plotList <- list()
for (i in seq_along(group)) {
gname <- group[i]
resdf <- result[[gname]]$result
resdf <- resdf[resdf$query == query, , drop = FALSE]
if (is.null(resdf) || nrow(resdf) == 0) {
cli::cli_alert_warning(
"No significant result returned for group {.val {gname}} and query {.val {query}}."
)
next
}
resdf %<>% dplyr::filter(
grepl(termIDMatch, .data[['term_id']]),
.data[['p_value']] <= pvalThresh
)
if (nrow(resdf) == 0) {
cli::cli_alert_warning(
"No enough matching terms ({.field termIDMatch}) nor significant terms (p-value <= {.val {pvalThresh}}) for group {.val {gname}}."
)
next
}
g <- resdf %>%
dplyr::select(dplyr::all_of(c(
'term_name',
'p_value',
'intersection_size'
))) %>%
dplyr::arrange(.data[['p_value']]) %>%
dplyr::slice_head(n = n) %>%
dplyr::mutate(
term_name = factor(
.data[['term_name']],
levels = rev(.data[['term_name']])
)
) %>%
ggplot2::ggplot(ggplot2::aes(
x = -log10(.data[['p_value']]),
y = .data[['term_name']],
size = .data[['intersection_size']],
color = -log10(.data[['p_value']])
)) +
ggplot2::geom_point()
plotList[[gname]] <- .ggplotLigerTheme(
plot = g,
colorPalette = colorPalette,
colorDirection = colorDirection,
xlab = xlab,
ylab = ylab,
panelBorder = TRUE,
...
)
}
if (length(plotList) == 1) {
return(plotList[[1]])
} else {
return(plotList)
}
}
Any scripts or data that you put into this service are public.
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.