Nothing
#' Create Diagrams for Enriched Terms
#'
#' @param result_df Data frame of enrichment results. Must-have columns for
#' KEGG human pathway diagrams (\code{hsa_kegg = TRUE}) are: 'ID' and 'Term_Description'.
#' Must-have columns for the rest are: 'Term_Description', 'Up_regulated' and
#' 'Down_regulated'
#' @param input_processed input data processed via \code{\link{input_processing}},
#' not necessary when \code{hsa_KEGG = FALSE}
#' @param hsa_KEGG boolean to indicate whether human KEGG gene sets were used for
#' enrichment analysis or not (default = \code{TRUE})
#' @inheritParams return_pin_path
#' @param ... additional arguments for \code{\link{visualize_hsa_KEGG}} (used
#' when \code{hsa_kegg = TRUE}) or \code{\link{visualize_term_interactions}}
#' (used when \code{hsa_kegg = FALSE})
#'
#' @return Depending on the argument \code{hsa_KEGG}, creates visualization of
#' interactions of genes involved in the list of enriched terms in
#' \code{result_df} and saves them in the folder 'term_visualizations' under
#' the current working directory.
#'
#'
#' @details For \code{hsa_KEGG = TRUE}, KEGG human pathway diagrams are created,
#' affected nodes colored by up/down regulation status.
#' For other gene sets, interactions of affected genes are determined (via a shortest-path
#' algorithm) and are visualized (colored by change status) using igraph.
#'
#'
#' @export
#'
#' @seealso See \code{\link{visualize_hsa_KEGG}} for the visualization function
#' of human KEGG diagrams. See \code{\link{visualize_term_interactions}} for the
#' visualization function that generates diagrams showing the interactions of
#' input genes in the PIN. See \code{\link{run_pathfindR}} for the wrapper
#' function of the pathfindR workflow.
#'
#' @examples
#' \dontrun{
#' visualize_terms(result_df, input_processed)
#' visualize_terms(result_df, hsa_KEGG = FALSE, pin_name_path = 'IntAct')
#' }
visualize_terms <- function(result_df, input_processed = NULL, hsa_KEGG = TRUE, pin_name_path = "Biogrid",
...) {
############ Argument Checks
if (!is.data.frame(result_df)) {
stop("`result_df` should be a data frame")
}
if (!is.logical(hsa_KEGG)) {
stop("the argument `hsa_KEGG` should be either TRUE or FALSE")
}
if (hsa_KEGG) {
nec_cols <- "ID"
} else {
nec_cols <- c("Term_Description", "Up_regulated", "Down_regulated")
}
if (!all(nec_cols %in% colnames(result_df))) {
stop("`result_df` should contain the following columns: ", paste(dQuote(nec_cols),
collapse = ", "))
}
if (hsa_KEGG) {
if (is.null(input_processed)) {
stop("`input_processed` should be specified when `hsa_KEGG = TRUE`")
}
}
############ Generate Diagrams
if (hsa_KEGG) {
visualize_hsa_KEGG(hsa_kegg_ids = result_df$ID, input_processed = input_processed,
...)
} else {
visualize_term_interactions(result_df = result_df, pin_name_path = pin_name_path,
...)
}
}
#' Visualize Interactions of Genes Involved in the Given Enriched Terms
#'
#' @param result_df Data frame of enrichment results. Must-have columns
#' are: 'Term_Description', 'Up_regulated' and 'Down_regulated'
#' @inheritParams return_pin_path
#' @param show_legend Boolean to indicate whether to display the legend (\code{TRUE})
#' or not (\code{FALSE}) (default: \code{TRUE})
#'
#' @return Creates PNG files visualizing the interactions of genes involved
#' in the given enriched terms (annotated in the \code{result_df}) in the PIN used
#' for enrichment analysis (specified by \code{pin_name_path}). The PNG files are
#' saved in the folder 'term_visualizations' under the current working directory.
#'
#' @details The following steps are performed for the visualization of interactions
#' of genes involved for each enriched term: \enumerate{
#' \item shortest paths between all affected genes are determined (via \code{\link[igraph]{igraph}})
#' \item the nodes of all shortest paths are merged
#' \item the PIN is subsetted using the merged nodes (genes)
#' \item using the PIN subset, the graph showing the interactions is generated
#' \item the final graph is visualized using \code{\link[igraph]{igraph}}, colored by changed
#' status (if provided), and is saved as a PNG file.
#' }
#'
#' @export
#'
#' @seealso See \code{\link{visualize_terms}} for the wrapper function
#' for creating enriched term diagrams. See \code{\link{run_pathfindR}} for the
#' wrapper function of the pathfindR enrichment workflow.
#'
#' @examples
#' \dontrun{
#' visualize_term_interactions(result_df, pin_name_path = 'IntAct')
#' }
visualize_term_interactions <- function(result_df, pin_name_path, show_legend = TRUE) {
############ Initial Steps fix naming issue
result_df$Term_Description <- gsub("\\/", "-", result_df$Term_Description)
## load PIN
pin_path <- return_pin_path(pin_name_path)
pin <- utils::read.delim(file = pin_path, header = FALSE, stringsAsFactors = FALSE)
pin$V2 <- NULL
pin[, 1] <- base::toupper(pin[, 1])
pin[, 2] <- base::toupper(pin[, 2])
## pin graph
pin_g <- igraph::graph_from_data_frame(pin, directed = FALSE)
## Create visualization output directory
dir.create("term_visualizations", showWarnings = FALSE)
############ Visualize interactions by enriched term
for (i in base::seq_len(nrow(result_df))) {
current_row <- result_df[i, ]
up_genes <- base::toupper(unlist(strsplit(current_row$Up_regulated, ", ")))
down_genes <- base::toupper(unlist(strsplit(current_row$Down_regulated, ", ")))
current_genes <- c(down_genes, up_genes)
## Add active snw genes if listed
if (!is.null(result_df$non_Signif_Snw_Genes)) {
snw_genes <- unlist(strsplit(current_row$non_Signif_Snw_Genes, ", "))
snw_genes <- base::toupper(snw_genes)
current_genes <- c(current_genes, snw_genes)
} else {
snw_genes <- NULL
}
if (length(current_genes) < 2) {
message(paste0("< 2 genes, skipping visualization of ", current_row$Term_Description))
} else {
cat("Visualizing:", current_row$Term_Description, paste(rep(" ", 200),
collapse = ""), "\r")
## Find genes without direct interaction
cond1 <- pin$V1 %in% current_genes
cond2 <- pin$V3 %in% current_genes
direct_interactions <- pin[cond1 & cond2, ]
tmp <- c(direct_interactions$V1, direct_interactions$V3)
missing_genes <- current_genes[!current_genes %in% tmp]
## Find shortest path between genes without direct interaction and
## other current_genes
s_path_genes <- c()
for (gene in missing_genes) {
tmp <- suppressWarnings(igraph::shortest_paths(pin_g, from = which(names(igraph::V(pin_g)) ==
gene), to = which(names(igraph::V(pin_g)) %in% current_genes),
output = "vpath"))
tmp <- unique(unlist(lapply(tmp$vpath, function(x) names(x))))
s_path_genes <- unique(c(s_path_genes, tmp))
}
final_genes <- unique(c(current_genes, s_path_genes))
cond1 <- pin$V1 %in% final_genes
cond2 <- pin$V3 %in% final_genes
final_interactions <- pin[cond1 & cond2, ]
g <- igraph::graph_from_data_frame(final_interactions, directed = FALSE)
cond1 <- names(igraph::V(g)) %in% up_genes
cond2 <- names(igraph::V(g)) %in% down_genes
cond3 <- names(igraph::V(g)) %in% snw_genes
igraph::V(g)$color <- ifelse(cond1, "green", ifelse(cond2, "red", ifelse(cond3,
"blue", "gray60")))
#### Generate diagram
term_diagram <- magick::image_graph(width = 1200, height = 900, res = 100)
# Plot the tree object
igraph::plot.igraph(g, layout = igraph::layout.fruchterman.reingold,
edge.curved = FALSE, vertex.size = 10, vertex.label.dist = 0, vertex.label.color = "black",
asp = FALSE, vertex.label.cex = 0.8, edge.width = 1.2, edge.arrow.mode = 0,
main = paste(current_row$Term_Description, "\n Involved Gene Interactions in",
pin_name_path))
if (show_legend) {
if (is.null(snw_genes)) {
graphics::legend("topleft", legend = c("Upregulated Input Genes",
"Downregulated Input Genes", "Other"), col = c("green", "red",
"gray60"), pch = 19, cex = 1.5, bty = "n")
} else {
graphics::legend("topleft", legend = c("Non-input Active Snw. Genes",
"Upregulated Input Genes", "Downregulated Input Genes", "Other"),
col = c("blue", "green", "red", "gray60"), pch = 19, cex = 1.5,
bty = "n")
}
}
grDevices::dev.off()
#### Add logo to diagram Read logo
path_logo <- system.file("extdata", "logo.png", package = "pathfindR")
logo_img <- magick::image_read(path_logo)
### Add logo
term_diagram <- magick::image_composite(term_diagram, magick::image_scale(logo_img,
"x100"), gravity = "northeast", offset = "+10+10")
# remove forbidden characters
current_row$Term_Description <- gsub("[, :<>?*|\"/\\]", "_", current_row$Term_Description)
# limit to 100 characters
current_row$Term_Description <- substr(current_row$Term_Description,
1, min(50, nchar(current_row$Term_Description)))
path_to_png <- file.path("term_visualizations", paste0(current_row$Term_Description,
".png"))
#### Save file
magick::image_write(term_diagram, path = path_to_png, format = "png")
}
}
}
#' Visualize Human KEGG Pathways
#'
#' @param hsa_kegg_ids hsa KEGG ids of pathways to be colored and visualized
#' @param input_processed input data processed via \code{\link{input_processing}}
#' @inheritParams color_kegg_pathway
#' @param key_gravity gravity value (character) for the color key legend placement
#' (see \code{\link[magick]{gravity_types}})
#' @param logo_gravity gravity value (character) for the logo placement
#' (see \code{\link[magick]{gravity_types}})
#'
#' @return Creates colored visualizations of the enriched human KEGG pathways
#' and saves them in the folder 'term_visualizations' under the current working
#' directory.
#'
#' @seealso See \code{\link{visualize_terms}} for the wrapper function for
#' creating enriched term diagrams. See \code{\link{run_pathfindR}} for the
#' wrapper function of the pathfindR enrichment workflow.
#'
#' @export
#'
#' @examples
#' \dontrun{
#' visualize_hsa_KEGG(hsa_kegg_ids, input_processed)
#' }
visualize_hsa_KEGG <- function(hsa_kegg_ids, input_processed, scale_vals = TRUE,
node_cols = NULL, quiet = TRUE, key_gravity = "northeast", logo_gravity = "southeast") {
############ Arg checks
### hsa_kegg_ids
if (!is.atomic(hsa_kegg_ids)) {
stop("`hsa_kegg_ids` should be a vector of hsa KEGG IDs")
}
if (!all(grepl("^hsa[0-9]{5}$", hsa_kegg_ids))) {
stop("`hsa_kegg_ids` should be a vector of valid hsa KEGG IDs")
}
### input_processed
if (!is.data.frame(input_processed)) {
stop("`input_processed` should be a data frame")
}
nec_cols <- c("GENE", "CHANGE")
if (!all(nec_cols %in% colnames(input_processed))) {
stop("`input_processed` should contain the following columns: ", paste(dQuote(nec_cols),
collapse = ", "))
}
############ Create change vector Convert gene symbols into NCBI gene IDs
tmp <- AnnotationDbi::mget(input_processed$GENE, AnnotationDbi::revmap(org.Hs.eg.db::org.Hs.egSYMBOL),
ifnotfound = NA)
input_processed$EG_ID <- vapply(tmp, function(x) as.character(x[1]), "EGID")
input_processed <- input_processed[!is.na(input_processed$EG_ID), ]
### A rule of thumb for the 'kegg' ID is entrezgene ID for eukaryote
### species
input_processed$KEGG_ID <- paste0("hsa:", input_processed$EG_ID)
############ Fetch all pathway genes, create vector of change values and
############ Generate colored pathway diagrams for each pathway
change_vec <- input_processed$CHANGE
names(change_vec) <- input_processed$KEGG_ID
cat("Downloading pathway diagrams of", length(hsa_kegg_ids), "KEGG pathways\n\n")
pw_vis_list <- list()
pb <- utils::txtProgressBar(min = 0, max = length(hsa_kegg_ids), style = 3)
for (i in seq_len(length(hsa_kegg_ids))) {
pw_id <- hsa_kegg_ids[i]
pw_vis_list[[pw_id]] <- color_kegg_pathway(pw_id = pw_id, change_vec = change_vec,
scale_vals = scale_vals, node_cols = node_cols, quiet = quiet)
utils::setTxtProgressBar(pb, i)
}
close(pb)
############ Add logo and color key legend per each diagram
### Read logo
path_logo <- system.file("extdata", "logo.png", package = "pathfindR")
logo_img <- magick::image_read(path_logo)
dir.create("term_visualizations", showWarnings = FALSE)
cat("Saving colored pathway diagrams of", length(pw_vis_list), "KEGG pathways\n\n")
if (length(pw_vis_list) != 0) {
pb <- utils::txtProgressBar(min = 0, max = length(pw_vis_list), style = 3)
for (i in seq_len(length(pw_vis_list))) {
### Read image
f_path <- pw_vis_list[[i]]$file_path
pw_diag <- magick::image_read(f_path)
### Add logo
pw_diag <- magick::image_composite(pw_diag, magick::image_scale(logo_img,
"x90"), gravity = logo_gravity, offset = "+10+10")
### Prep for color keys
key_col_df <- data.frame(bin_val = seq_along(pw_vis_list[[i]]$all_key_cols),
color = pw_vis_list[[i]]$all_key_cols, y_val = 1)
key_breaks <- pw_vis_list[[i]]$all_brks
names(key_breaks) <- seq_along(key_breaks)
key_breaks <- c(key_breaks[1], mean(key_breaks[1:6]), key_breaks[6],
mean(key_breaks[6:11]), key_breaks[11])
brks <- c(0.5, 3, 5.5, 8, 10.5)
### Generate color legend image
col_key_legend <- magick::image_graph(width = 200, height = 90, res = 100)
g <- ggplot2::ggplot(key_col_df, ggplot2::aes(.data$bin_val, .data$y_val))
g <- g + ggplot2::geom_tile(fill = key_col_df$color, colour = "black")
g <- g + ggplot2::scale_x_continuous(expand = c(0, 0), breaks = brks,
labels = base::format(key_breaks, digits = 2))
g <- g + ggplot2::scale_y_discrete(expand = c(0, 0))
g <- g + ggplot2::theme_bw()
g <- g + ggplot2::theme(panel.grid.major = ggplot2::element_blank(),
panel.grid.minor = ggplot2::element_blank(), axis.title.x = ggplot2::element_blank(),
axis.title.y = ggplot2::element_blank(), axis.ticks = ggplot2::element_line(colour = "black",
linewidth = 0.6), axis.ticks.length = ggplot2::unit(0.2, "cm"),
axis.text.x = ggplot2::element_text(size = 14, face = "bold"), panel.border = ggplot2::element_rect(colour = "black",
fill = NA, linewidth = 0.5), plot.margin = ggplot2::unit(c(0, 0.7,
0, 0.7), "cm"))
print(g)
grDevices::dev.off()
### Add color key legend
if (all(change_vec == 1e+06)) {
final_pw_img <- pw_diag
} else {
final_pw_img <- magick::image_composite(pw_diag, magick::image_scale(col_key_legend,
"x45"), gravity = key_gravity, offset = "+10+10")
}
final_path <- file.path("term_visualizations", basename(f_path))
magick::image_write(final_pw_img, path = final_path, format = "png")
utils::setTxtProgressBar(pb, i)
}
close(pb)
}
}
#' Color hsa KEGG pathway
#'
#' @param pw_id hsa KEGG pathway id (e.g. hsa05012)
#' @param change_vec vector of change values, names should be hsa KEGG gene ids
#' @param scale_vals should change values be scaled? (default = \code{TRUE})
#' @param node_cols low, middle and high color values for coloring the pathway nodes
#' (default = \code{NULL}). If \code{node_cols=NULL}, the low, middle and high color
#' are set as 'green', 'gray' and 'red'. If all change values are 1e6 (in case no
#' changes are supplied, this dummy value is assigned by
#' \code{\link{input_processing}}), only one color ('#F38F18' if NULL) is used.
#' @param quiet If \code{TRUE}, suppress status messages (if any), and the
#' progress bar while downloading file(s)
#'
#' @return list containing: \enumerate{
#' \item file_path: path to colored hsa KEGG pathway diagram
#' \item all_key_cols: colors used for each change value bin
#' \item all_brks: breaks used for separating change values into bins
#' }
#'
#' @examples
#' \dontrun{
#' pw_id <- 'hsa00010'
#' change_vec <- c(-2, 4, 6)
#' names(change_vec) <- c('hsa:2821', 'hsa:226', 'hsa:229')
#' result <- pathfindR:::color_kegg_pathway(pw_id, change_vec)
#' }
color_kegg_pathway <- function(pw_id, change_vec, scale_vals = TRUE, node_cols = NULL,
quiet = TRUE) {
############ Arg checks
if (!is.logical(scale_vals)) {
stop("`scale_vals` should be logical")
}
## check node_cols
if (!is.null(node_cols)) {
if (!is.atomic(node_cols)) {
stop("`node_cols` should be a vector of colors")
}
if (!all(change_vec == 1e+06) & length(node_cols) != 3) {
stop("the length of `node_cols` should be 3")
}
isColor <- function(x) {
tryCatch(is.matrix(grDevices::col2rgb(x)), error = function(e) FALSE)
}
if (!all(vapply(node_cols, isColor, TRUE))) {
stop("`node_cols` should be a vector of valid colors")
}
}
############ Set node palette if node_cols not supplied, use default
############ color(s)
if (!is.null(node_cols)) {
if (all(change_vec == 1e+06)) {
message("all `change_vec` values are 1e6, using the first color in `node_cols`")
low_col <- mid_col <- high_col <- node_cols[1]
} else {
low_col <- node_cols[1]
mid_col <- node_cols[2]
high_col <- node_cols[3]
}
} else if (all(change_vec == 1e+06)) {
## NO CHANGES SUPPLIED
low_col <- mid_col <- high_col <- "#F38F18"
} else {
low_col <- "red"
mid_col <- "gray"
high_col <- "green"
}
############ Summarization for genes that are in the same node and handling
############ of non-input pathway genes download KGML to determine gene
############ nodes
pwKGML <- tempfile()
KGML_download_status <- download_KGML_file(pw_id, pwKGML, quiet)
if (is.na(KGML_download_status) | is.na(file.info(pwKGML)$size)) {
return(NULL)
}
current_pw <- KEGGgraph::parseKGML(pwKGML)
all_nodes <- KEGGgraph::nodes(current_pw)
gene_nodes <- list()
for (node in all_nodes) {
if (KEGGgraph::getType(node) == "gene") {
gene_nodes[[KEGGgraph::getEntryID(node)]] <- KEGGgraph::getName(node)
}
}
gene_nodes <- unique(gene_nodes)
gene_nodes <- gene_nodes[order(vapply(gene_nodes, length, 1L))]
## summarize over all pathway gene nodes keeping one unique gene per each
## node
pw_vis_changes <- c()
for (i in seq_len(length(gene_nodes))) {
node <- gene_nodes[[i]]
cond <- names(change_vec) %in% node
tmp_val <- mean(change_vec[cond])
if (length(node) != 1) {
rest_nodes <- unlist(gene_nodes[-i])
chosen_nm <- setdiff(node, rest_nodes)
} else {
chosen_nm <- node
}
chosen_nm <- chosen_nm[!chosen_nm %in% names(pw_vis_changes)][1]
if (is.na(chosen_nm)) {
tmp <- node[!node %in% names(pw_vis_changes)]
chosen_nm <- ifelse(length(tmp) == 0, node[1], tmp[1])
}
names(tmp_val) <- chosen_nm
pw_vis_changes <- c(pw_vis_changes, tmp_val)
}
## if no input genes present in chosen pathway
if (all(is.na(pw_vis_changes))) {
return(NULL)
}
## average over duplicate node names
if (anyDuplicated(names(pw_vis_changes))) {
dup_names <- names(pw_vis_changes)[duplicated(names(pw_vis_changes))]
for (dup_name in dup_names) {
tmp <- mean(pw_vis_changes[names(pw_vis_changes) == dup_name])
names(tmp) <- dup_name
pw_vis_changes <- pw_vis_changes[names(pw_vis_changes) != dup_name]
pw_vis_changes <- c(pw_vis_changes, tmp)
}
}
############ Determine node colors
vals <- pw_vis_changes[!is.na(pw_vis_changes)]
### Scaling
if (!all(vals == 1e+06) & scale_vals) {
pos_idx <- which(vals >= 0)
neg_idx <- which(vals < 0)
UL <- max(abs(vals))
pos_vals <- c(UL, vals[pos_idx])
pos_vals <- (pos_vals - min(pos_vals))/diff(range(pos_vals))
neg_vals <- c(-UL, vals[neg_idx])
neg_vals <- (neg_vals - min(neg_vals))/diff(range(neg_vals)) - 1
vals[pos_idx] <- pos_vals[-1]
vals[neg_idx] <- neg_vals[-1]
}
### determine limit
lim <- round(max(abs(vals)), 2)
### if no vals exist set lim to 1 maybe raise a warning also
lim <- ifelse(is.na(lim), 1, lim)
### generate low colors
low_vals <- vals[vals < 0]
low_brks <- seq(from = -lim, to = 0, length.out = 6)
low_bins <- cut(low_vals, breaks = low_brks, ordered_result = TRUE, include.lowest = TRUE)
key_col_fn <- grDevices::colorRampPalette(c(low_col, mid_col))
low_key_cols <- key_col_fn(5)
names(low_key_cols) <- levels(low_bins)
names(low_bins) <- names(low_vals)
### generate high colors
high_vals <- vals[vals > 0]
high_brks <- seq(from = 0, to = lim, length.out = 6)
high_bins <- cut(high_vals, breaks = high_brks, ordered_result = TRUE, include.lowest = TRUE)
key_col_fn <- grDevices::colorRampPalette(c(mid_col, high_col))
high_key_cols <- key_col_fn(5)
names(high_key_cols) <- levels(high_bins)
names(high_bins) <- names(high_vals)
all_brks <- c(low_brks, high_brks[-1])
all_key_cols <- c(low_key_cols, high_key_cols)
############ Label each pw gene with the appropriate color
fg_cols <- ifelse(names(pw_vis_changes) %in% names(low_bins), low_key_cols[low_bins[names(pw_vis_changes)]],
ifelse(names(pw_vis_changes) %in% names(high_bins), high_key_cols[high_bins[names(pw_vis_changes)]],
"#ffffff"))
bg_cols <- rep("#000000", length(pw_vis_changes))
## if leq than 60, disregard non-input genes
if (length(fg_cols) >= 60) {
keep <- fg_cols != "#ffffff"
fg_cols <- fg_cols[keep]
bg_cols <- bg_cols[keep]
pw_vis_changes <- pw_vis_changes[keep]
}
############ Download colored KEGG pathway diagram
pw_url <- obtain_colored_url(pw_id = pw_id, KEGG_gene_ids = names(pw_vis_changes),
fg_cols = fg_cols, bg_cols = bg_cols)
fname <- paste0(pw_id, "_pathfindR.png")
f_path <- file.path(tempdir(check = TRUE), fname)
dl_stat <- download_kegg_png(pw_url, f_path, quiet)
if (!is.na(dl_stat)) {
return(list(file_path = f_path, all_key_cols = all_key_cols, all_brks = all_brks))
}
}
#' Obtain KGML file for a KEGG pathway (hsa)
#'
#' @param pw_id KEGG pathway ID
#' @param pwKGML destination file
#' @inheritParams color_kegg_pathway
#'
#' @return download status (0 for success), if warning/error returns NA
download_KGML_file <- function(pw_id, pwKGML, quiet = TRUE) {
status <- tryCatch({
utils::download.file(url = KEGGgraph::getKGMLurl(pathwayid = sub("hsa", "",
pw_id), organism = "hsa"), destfile = pwKGML, quiet = quiet)
}, error = function(e) {
message(paste("Cannot download KGML file for:", pw_id))
message("Here's the original error message:")
message(e$message)
return(NA)
}, warning = function(w) {
message(paste("Cannot download KGML file for:", pw_id))
message("Here's the original error message:")
message(w$message)
return(NA)
})
return(status)
}
#' Obtain URL for a KEGG pathway diagram with a given set of genes marked
#'
#' @param pw_id KEGG pathway ID
#' @param KEGG_gene_ids KEGG gene IDs for marking
#' @param fg_cols colors for the text and border
#' @param bg_cols background colors of the objects in a pathway diagram.
#'
#' @return URL for colored KEGG pathway diagram
obtain_colored_url <- function(pw_id, KEGG_gene_ids, fg_cols, bg_cols) {
pw_url <- tryCatch({
KEGGREST::color.pathway.by.objects(pathway.id = pw_id, object.id.list = KEGG_gene_ids,
fg.color.list = bg_cols, bg.color.list = fg_cols)
}, error = function(e) {
message(paste("Cannot retrieve PNG url:", pw_id))
message("Here's the original error message:")
message(e$message)
return(NA)
})
return(pw_url)
}
#' Download Colored KEGG Diagram PNG
#'
#' @param pw_url url to download
#' @param f_path local path to save the file
#' @inheritParams color_kegg_pathway
#'
#' @return download status
download_kegg_png <- function(pw_url, f_path, quiet = TRUE) {
res <- tryCatch({
utils::download.file(url = pw_url, destfile = f_path, mode = "wb", quiet = quiet)
}, error = function(e) {
message(paste("Cannot download PNG file:", pw_url))
message("Here's the original error message:")
message(e$message)
return(NA)
})
return(res)
}
#' Create Bubble Chart of Enrichment Results
#'
#' This function is used to create a ggplot2 bubble chart displaying the
#' enrichment results.
#'
#' @param result_df a data frame that must contain the following columns: \describe{
#' \item{Term_Description}{Description of the enriched term}
#' \item{Fold_Enrichment}{Fold enrichment value for the enriched term}
#' \item{lowest_p}{the lowest adjusted-p value of the given term over all iterations}
#' \item{Up_regulated}{the up-regulated genes in the input involved in the given term's gene set, comma-separated}
#' \item{Down_regulated}{the down-regulated genes in the input involved in the given term's gene set, comma-separated}
#' \item{Cluster(OPTIONAL)}{the cluster to which the enriched term is assigned}
#' }
#' @param top_terms number of top terms (according to the 'lowest_p' column)
#' to plot (default = 10). If \code{plot_by_cluster = TRUE}, selects the top
#' \code{top_terms} terms per each cluster. Set \code{top_terms = NULL} to plot
#' for all terms.If the total number of terms is less than \code{top_terms},
#' all terms are plotted.
#' @param plot_by_cluster boolean value indicating whether or not to group the
#' enriched terms by cluster (works if \code{result_df} contains a
#' 'Cluster' column).
#' @param num_bubbles number of sizes displayed in the legend \code{# genes}
#' (Default = 4)
#' @param even_breaks whether or not to set even breaks for the number of sizes
#' displayed in the legend \code{# genes}. If \code{TRUE} (default), sets
#' equal breaks and the number of displayed bubbles may be different than the
#' number set by \code{num_bubbles}. If the exact number set by
#' \code{num_bubbles} is required, set this argument to \code{FALSE}
#'
#' @return a \code{\link[ggplot2]{ggplot2}} object containing the bubble chart.
#' The x-axis corresponds to fold enrichment values while the y-axis indicates
#' the enriched terms. Size of the bubble indicates the number of significant
#' genes in the given enriched term. Color indicates the -log10(lowest-p) value.
#' The closer the color is to red, the more significant the enrichment is.
#' Optionally, if 'Cluster' is a column of \code{result_df} and
#' \code{plot_by_cluster == TRUE}, the enriched terms are grouped by clusters.
#'
#' @import ggplot2
#' @export
#'
#' @examples
#' g <- enrichment_chart(example_pathfindR_output)
enrichment_chart <- function(result_df, top_terms = 10, plot_by_cluster = FALSE,
num_bubbles = 4, even_breaks = TRUE) {
message("Plotting the enrichment bubble chart")
necessary <- c("Term_Description", "Fold_Enrichment", "lowest_p", "Up_regulated",
"Down_regulated")
if (!all(necessary %in% colnames(result_df))) {
stop("The input data frame must have the columns:\n", paste(necessary, collapse = ", "))
}
if (!is.logical(plot_by_cluster)) {
stop("`plot_by_cluster` must be either TRUE or FALSE")
}
if (!is.numeric(top_terms) & !is.null(top_terms)) {
stop("`top_terms` must be either numeric or NULL")
}
if (!is.null(top_terms)) {
if (top_terms < 1) {
stop("`top_terms` must be > 1")
}
}
# sort by lowest adj.p
result_df <- result_df[order(result_df$lowest_p), ]
## Filter for top_terms
if (!is.null(top_terms)) {
if (plot_by_cluster & "Cluster" %in% colnames(result_df)) {
keep_ids <- tapply(result_df$ID, result_df$Cluster, function(x) {
x[seq_len(min(top_terms, length(x)))]
})
keep_ids <- unlist(keep_ids)
result_df <- result_df[result_df$ID %in% keep_ids, ]
} else if (top_terms < nrow(result_df)) {
result_df <- result_df[seq_len(top_terms), ]
}
}
num_genes <- vapply(result_df$Up_regulated, function(x) length(unlist(strsplit(x,
", "))), 1)
num_genes <- num_genes + vapply(result_df$Down_regulated, function(x) length(unlist(strsplit(x,
", "))), 1)
result_df$Term_Description <- factor(result_df$Term_Description, levels = rev(unique(result_df$Term_Description)))
log_p <- -log10(result_df$lowest_p)
g <- ggplot2::ggplot(result_df, ggplot2::aes(.data$Fold_Enrichment, .data$Term_Description))
g <- g + ggplot2::geom_point(ggplot2::aes(color = log_p, size = num_genes), na.rm = TRUE)
g <- g + ggplot2::theme_bw()
g <- g + ggplot2::theme(axis.text.x = ggplot2::element_text(size = 10), axis.text.y = ggplot2::element_text(size = 10),
plot.title = ggplot2::element_blank())
g <- g + ggplot2::xlab("Fold Enrichment")
g <- g + ggplot2::theme(axis.title.y = ggplot2::element_blank())
g <- g + ggplot2::labs(size = "# genes", color = expression(-log[10](p)))
## breaks for # genes
if (max(num_genes) < num_bubbles) {
g <- g + ggplot2::scale_size_continuous(breaks = seq(0, max(num_genes)))
} else {
if (even_breaks) {
brks <- base::seq(0, max(num_genes), round(max(num_genes)/(num_bubbles +
1)))
} else {
brks <- base::round(base::seq(0, max(num_genes), length.out = num_bubbles +
1))
}
g <- g + ggplot2::scale_size_continuous(breaks = brks)
}
g <- g + ggplot2::scale_color_gradient(low = "#f5efef", high = "red")
if (plot_by_cluster & "Cluster" %in% colnames(result_df)) {
g <- g + ggplot2::facet_grid(result_df$Cluster ~ ., scales = "free_y", space = "free",
drop = TRUE)
} else if (plot_by_cluster) {
message("For plotting by cluster, there must a column named `Cluster` in the input data frame!")
}
return(g)
}
#' Create Term-Gene Graph
#'
#' @param result_df A dataframe of pathfindR results that must contain the following columns: \describe{
#' \item{Term_Description}{Description of the enriched term (necessary if \code{use_description = TRUE})}
#' \item{ID}{ID of the enriched term (necessary if \code{use_description = FALSE})}
#' \item{lowest_p}{the lowest adjusted-p value of the given term over all iterations}
#' \item{Up_regulated}{the up-regulated genes in the input involved in the given term's gene set, comma-separated}
#' \item{Down_regulated}{the down-regulated genes in the input involved in the given term's gene set, comma-separated}
#' }
#' @param num_terms Number of top enriched terms to use while creating the graph. Set to \code{NULL} to use
#' all enriched terms (default = 10, i.e. top 10 terms)
#' @param layout The type of layout to create (see \code{\link[ggraph]{ggraph}} for details. Default = 'stress')
#' @param use_description Boolean argument to indicate whether term descriptions
#' (in the 'Term_Description' column) should be used. (default = \code{FALSE})
#' @param node_size Argument to indicate whether to use number of significant genes ('num_genes')
#' or the -log10(lowest p value) ('p_val') for adjusting the node sizes (default = 'num_genes')
#'
#' @return a \code{\link[ggraph]{ggraph}} object containing the term-gene graph.
#' Each node corresponds to an enriched term (beige), an up-regulated gene (green)
#' or a down-regulated gene (red). An edge between a term and a gene indicates
#' that the given term involves the gene. Size of a term node is proportional
#' to either the number of genes (if \code{node_size = 'num_genes'}) or
#' the -log10(lowest p value) (if \code{node_size = 'p_val'}).
#'
#' @details This function (adapted from the Gene-Concept network visualization
#' by the R package \code{enrichplot}) can be utilized to visualize which input
#' genes are involved in the enriched terms as a graph. The term-gene graph
#' shows the links between genes and biological terms and allows for the
#' investigation of multiple terms to which significant genes are related. The
#' graph also enables determination of the overlap between the enriched terms
#' by identifying shared and distinct significant term-related genes.
#'
#' @import ggraph
#' @export
#'
#' @examples
#' p <- term_gene_graph(example_pathfindR_output)
#' p <- term_gene_graph(example_pathfindR_output, num_terms = 5)
#' p <- term_gene_graph(example_pathfindR_output, node_size = 'p_val')
term_gene_graph <- function(result_df, num_terms = 10, layout = "stress", use_description = FALSE,
node_size = "num_genes") {
############ Argument Checks Check num_terms is NULL or numeric
if (!is.numeric(num_terms) & !is.null(num_terms)) {
stop("`num_terms` must either be numeric or NULL!")
}
### Check use_description is boolean
if (!is.logical(use_description)) {
stop("`use_description` must either be TRUE or FALSE!")
}
### Set column for term labels
ID_column <- ifelse(use_description, "Term_Description", "ID")
### Check node_size
val_node_size <- c("num_genes", "p_val")
if (!node_size %in% val_node_size) {
stop("`node_size` should be one of ", paste(dQuote(val_node_size), collapse = ", "))
}
if (!is.data.frame(result_df)) {
stop("`result_df` should be a data frame")
}
### Check necessary columnns
necessary_cols <- c(ID_column, "lowest_p", "Up_regulated", "Down_regulated")
if (!all(necessary_cols %in% colnames(result_df))) {
stop(paste(c("All of", paste(necessary_cols, collapse = ", "), "must be present in `results_df`!"),
collapse = " "))
}
############ Initial steps set num_terms to NULL if number of enriched
############ terms is smaller than num_terms
if (!is.null(num_terms)) {
if (nrow(result_df) < num_terms) {
num_terms <- NULL
}
}
### Order and filter for top N genes
result_df <- result_df[order(result_df$lowest_p, decreasing = FALSE), ]
if (!is.null(num_terms)) {
result_df <- result_df[1:num_terms, ]
}
### Prep data frame for graph
graph_df <- data.frame()
for (i in base::seq_len(nrow(result_df))) {
up_genes <- unlist(strsplit(result_df$Up_regulated[i], ", "))
down_genes <- unlist(strsplit(result_df$Down_regulated[i], ", "))
genes <- c(up_genes, down_genes)
for (gene in genes) {
graph_df <- rbind(graph_df, data.frame(Term = result_df[i, ID_column],
Gene = gene))
}
}
up_genes <- lapply(result_df$Up_regulated, function(x) unlist(strsplit(x, ", ")))
up_genes <- unlist(up_genes)
############ Create graph object and plot create igraph object
g <- igraph::graph_from_data_frame(graph_df, directed = FALSE)
cond_term <- names(igraph::V(g)) %in% result_df[, ID_column]
cond_up_gene <- names(igraph::V(g)) %in% up_genes
igraph::V(g)$type <- ifelse(cond_term, "term", ifelse(cond_up_gene, "up", "down"))
# Adjust node sizes
if (node_size == "num_genes") {
sizes <- igraph::degree(g)
sizes <- ifelse(igraph::V(g)$type == "term", sizes, 2)
size_label <- "# genes"
} else {
idx <- match(names(igraph::V(g)), result_df[, ID_column])
sizes <- -log10(result_df$lowest_p[idx])
sizes[is.na(sizes)] <- 2
size_label <- "-log10(p)"
}
igraph::V(g)$size <- sizes
igraph::V(g)$label.cex <- 0.5
igraph::V(g)$frame.color <- "gray"
igraph::V(g)$color <- ifelse(igraph::V(g)$type == "term", "#E5D7BF", ifelse(igraph::V(g)$type ==
"up", "green", "red"))
### Create graph
p <- ggraph::ggraph(g, layout = layout)
p <- p + ggraph::geom_edge_link(alpha = 0.8, colour = "darkgrey")
p <- p + ggraph::geom_node_point(ggplot2::aes(color = .data$color, size = .data$size))
p <- p + ggplot2::scale_size(range = c(5, 10), breaks = round(seq(round(min(igraph::V(g)$size)),
round(max(igraph::V(g)$size)), length.out = 4)), name = size_label)
p <- p + ggplot2::theme_void()
p <- p + suppressWarnings(ggraph::geom_node_text(ggplot2::aes(label = .data$name),
nudge_y = 0.2, repel = TRUE, max.overlaps = 20))
p <- p + ggplot2::scale_color_manual(values = unique(igraph::V(g)$color), name = NULL,
labels = c("enriched term", "up-regulated gene", "down-regulated gene"))
if (is.null(num_terms)) {
p <- p + ggplot2::ggtitle("Term-Gene Graph")
} else {
p <- p + ggplot2::ggtitle("Term-Gene Graph", subtitle = paste(c("Top", num_terms,
"terms"), collapse = " "))
}
p <- p + ggplot2::theme(plot.title = ggplot2::element_text(hjust = 0.5), plot.subtitle = ggplot2::element_text(hjust = 0.5))
return(p)
}
#' Create Terms by Genes Heatmap
#'
#' @param result_df A dataframe of pathfindR results that must contain the following columns: \describe{
#' \item{Term_Description}{Description of the enriched term (necessary if \code{use_description = TRUE})}
#' \item{ID}{ID of the enriched term (necessary if \code{use_description = FALSE})}
#' \item{lowest_p}{the highest adjusted-p value of the given term over all iterations}
#' \item{Up_regulated}{the up-regulated genes in the input involved in the given term's gene set, comma-separated}
#' \item{Down_regulated}{the down-regulated genes in the input involved in the given term's gene set, comma-separated}
#' }
#' @param genes_df the input data that was used with \code{\link{run_pathfindR}}.
#' It must be a data frame with 3 columns: \enumerate{
#' \item Gene Symbol (Gene Symbol)
#' \item Change value, e.g. log(fold change) (optional)
#' \item p value, e.g. adjusted p value associated with differential expression
#' } The change values in this data frame are used to color the affected genes
#' @param num_terms Number of top enriched terms to use while creating the plot. Set to \code{NULL} to use
#' all enriched terms (default = 10)
#' @inheritParams term_gene_graph
#' @inheritParams plot_scores
#' @param legend_title legend title (defaut = 'change')
#' @param sort_terms_by_p boolean to indicate whether to sort terms by 'lowest_p'
#' (\code{TRUE}) or by number of genes (\code{FALSE}) (default = \code{FALSE})
#' @param ... additional arguments for \code{\link{input_processing}} (used if
#' \code{genes_df} is provided)
#'
#' @return a ggplot2 object of a heatmap where rows are enriched terms and
#' columns are involved input genes. If \code{genes_df} is provided, colors of
#' the tiles indicate the change values.
#' @export
#'
#' @examples
#' term_gene_heatmap(example_pathfindR_output, num_terms = 3)
term_gene_heatmap <- function(result_df, genes_df, num_terms = 10, use_description = FALSE,
low = "red", mid = "black", high = "green", legend_title = "change", sort_terms_by_p = FALSE,
...) {
############ Arg checks
if (!is.logical(use_description)) {
stop("`use_description` must either be TRUE or FALSE!")
}
### Set column for term labels
ID_column <- ifelse(use_description, "Term_Description", "ID")
if (!is.data.frame(result_df)) {
stop("`result_df` should be a data frame")
}
nec_cols <- c(ID_column, "lowest_p", "Up_regulated", "Down_regulated")
if (!all(nec_cols %in% colnames(result_df))) {
stop("`result_df` should have the following columns: ", paste(dQuote(nec_cols),
collapse = ", "))
}
if (!missing(genes_df)) {
suppressMessages(input_testing(genes_df))
}
if (!is.null(num_terms)) {
if (!is.numeric(num_terms)) {
stop("`num_terms` should be numeric or NULL")
}
if (num_terms < 1) {
stop("`num_terms` should be > 0 or NULL")
}
}
############ Init prep steps
result_df <- result_df[order(result_df$lowest_p), ]
### select num_terms genes
if (!is.null(num_terms)) {
if (num_terms < nrow(result_df)) {
result_df <- result_df[1:num_terms, ]
}
}
### process input genes (if provided)
if (!missing(genes_df)) {
genes_df <- input_processing(input = genes_df, ...)
}
### parse genes from enrichment results
parse_genes <- function(vec, idx) {
return(unname(unlist(strsplit(vec[idx], ", "))))
}
up_genes <- apply(result_df, 1, parse_genes, which(colnames(result_df) == "Up_regulated"))
down_genes <- apply(result_df, 1, parse_genes, which(colnames(result_df) == "Down_regulated"))
if (length(down_genes) == 0) {
down_genes <- rep(NA, nrow(result_df))
}
if (length(up_genes) == 0) {
up_genes <- rep(NA, nrow(result_df))
}
names(up_genes) <- names(down_genes) <- result_df[, ID_column]
############ Create terms-by-genes matrix and order
all_genes <- unique(c(unlist(up_genes), unlist(down_genes)))
all_genes <- all_genes[!is.na(all_genes)]
all_terms <- result_df[, ID_column]
term_genes_mat <- matrix(0, nrow = nrow(result_df), ncol = length(all_genes),
dimnames = list(all_terms, all_genes))
for (i in seq_len(nrow(term_genes_mat))) {
current_term <- rownames(term_genes_mat)[i]
current_genes <- c(up_genes[[current_term]], down_genes[[current_term]])
current_genes <- current_genes[!is.na(current_genes)]
term_genes_mat[i, match(current_genes, colnames(term_genes_mat))] <- 1
}
### Order by column
term_genes_mat <- term_genes_mat[, order(colSums(term_genes_mat), decreasing = TRUE)]
### Order by row
ordering_func <- function(row) {
n <- length(row)
pow <- 2^-(0:(n - 1))
return(row %*% pow)
}
term_genes_mat <- term_genes_mat[order(apply(term_genes_mat, 1, ordering_func),
decreasing = TRUE), ]
### Transform the matrix
var_names <- list()
var_names[["Enriched_Term"]] <- factor(rownames(term_genes_mat), levels = rev(rownames(term_genes_mat)))
var_names[["Symbol"]] <- factor(colnames(term_genes_mat), levels = colnames(term_genes_mat))
term_genes_df <- expand.grid(var_names, KEEP.OUT.ATTRS = FALSE, stringsAsFactors = FALSE)
value <- as.vector(term_genes_mat)
value <- data.frame(value)
term_genes_df <- cbind(term_genes_df, value)
term_genes_df$value[term_genes_df$value == 0] <- NA
bg_df <- expand.grid(Enriched_Term = all_terms, Symbol = all_genes)
if (sort_terms_by_p) {
bg_df$Enriched_Term <- factor(bg_df$Enriched_Term, levels = rev(result_df[,
ID_column]))
} else {
bg_df$Enriched_Term <- factor(bg_df$Enriched_Term, levels = rev(rownames(term_genes_mat)))
}
bg_df$Symbol <- factor(bg_df$Symbol, levels = colnames(term_genes_mat))
if (!missing(genes_df)) {
for (i in seq_len(nrow(term_genes_df))) {
if (!is.na(term_genes_df$value[i])) {
if (all(genes_df$CHANGE == 1e+06)) {
term_genes_df$value[i] <- ifelse(term_genes_df$Symbol[i] %in% up_genes[[as.character(term_genes_df$Enriched_Term[i])]],
1, -1)
} else {
term_genes_df$value[i] <- genes_df$CHANGE[genes_df$GENE == term_genes_df$Symbol[i]]
}
}
}
if (all(genes_df$CHANGE == 1e+06)) {
term_genes_df$value <- factor(term_genes_df$value, levels = c(-1, 1))
}
} else {
for (i in seq_len(nrow(term_genes_df))) {
if (!is.na(term_genes_df$value[i])) {
term_genes_df$value[i] <- ifelse(term_genes_df$Symbol[i] %in% unlist(up_genes),
"up", "down")
}
}
}
g <- ggplot2::ggplot(bg_df, ggplot2::aes(x = .data$Symbol, y = .data$Enriched_Term))
g <- g + ggplot2::geom_tile(fill = "white", color = "white")
g <- g + ggplot2::theme(axis.ticks.y = ggplot2::element_blank(), axis.text.x = ggplot2::element_text(angle = 90,
hjust = 1), axis.text.y = ggplot2::element_text(colour = "#000000"), axis.title.x = ggplot2::element_blank(),
axis.title.y = ggplot2::element_blank(), panel.grid.major.x = ggplot2::element_blank(),
panel.grid.major.y = ggplot2::element_blank(), panel.grid.minor.x = ggplot2::element_blank(),
panel.grid.minor.y = ggplot2::element_blank(), panel.background = ggplot2::element_rect(fill = "#ffffff"))
g <- g + ggplot2::geom_tile(data = term_genes_df, ggplot2::aes(fill = .data$value),
color = "gray60")
if (!missing(genes_df)) {
if (all(genes_df$CHANGE == 1e+06)) {
g <- g + ggplot2::scale_fill_manual(values = c(low, high), na.value = "white",
name = legend_title)
} else {
g <- g + ggplot2::scale_fill_gradient2(low = low, mid = mid, high = high,
na.value = "white", name = legend_title)
}
} else {
g <- g + ggplot2::scale_fill_manual(values = c(low, high), na.value = "white",
name = legend_title)
}
return(g)
}
#' Create UpSet Plot of Enriched Terms
#'
#' @inheritParams term_gene_heatmap
#' @param method the option for producing the plot. Options include 'heatmap',
#' 'boxplot' and 'barplot'. (default = 'heatmap')
#'
#' @return UpSet plots are plots of the intersections of sets as a matrix. This
#' function creates a ggplot object of an UpSet plot where the x-axis is the
#' UpSet plot of intersections of enriched terms. By default (i.e.
#' \code{method = 'heatmap'}) the main plot is a heatmap of genes at the
#' corresponding intersections, colored by up/down regulation (if
#' \code{genes_df} is provided, colored by change values). If
#' \code{method = 'barplot'}, the main plot is bar plots of the number of genes
#' at the corresponding intersections. Finally, if \code{method = 'boxplot'} and
#' if \code{genes_df} is provided, then the main plot displays the boxplots of
#' change values of the genes at the corresponding intersections.
#' @export
#'
#' @examples
#' UpSet_plot(example_pathfindR_output)
UpSet_plot <- function(result_df, genes_df, num_terms = 10, method = "heatmap", use_description = FALSE,
low = "red", mid = "black", high = "green", ...) {
############ Arg checks
if (!is.logical(use_description)) {
stop("`use_description` must either be TRUE or FALSE!")
}
### Set column for term labels
ID_column <- ifelse(use_description, "Term_Description", "ID")
if (!is.data.frame(result_df)) {
stop("`result_df` should be a data frame")
}
nec_cols <- c(ID_column, "lowest_p", "Up_regulated", "Down_regulated")
if (!all(nec_cols %in% colnames(result_df))) {
stop("`result_df` should have the following columns: ", paste(dQuote(nec_cols),
collapse = ", "))
}
if (!missing(genes_df)) {
suppressMessages(input_testing(genes_df))
}
if (!is.null(num_terms)) {
if (!is.numeric(num_terms)) {
stop("`num_terms` should be numeric or NULL")
}
if (num_terms < 1) {
stop("`num_terms` should be > 0 or NULL")
}
}
valid_opts <- c("heatmap", "boxplot", "barplot")
if (!method %in% valid_opts) {
stop("`method` should be one of` ", paste(dQuote(valid_opts), collapse = ", "))
}
########## Init prep steps
result_df <- result_df[order(result_df$lowest_p), ]
### select num_terms genes
if (!is.null(num_terms)) {
if (num_terms < nrow(result_df)) {
result_df <- result_df[1:num_terms, ]
}
}
### process input genes (if provided)
if (!missing(genes_df)) {
genes_df <- input_processing(input = genes_df, ...)
}
### parse genes from enrichment results
parse_genes <- function(vec, idx) {
return(unname(unlist(strsplit(vec[idx], ", "))))
}
up_genes <- apply(result_df, 1, parse_genes, which(colnames(result_df) == "Up_regulated"))
down_genes <- apply(result_df, 1, parse_genes, which(colnames(result_df) == "Down_regulated"))
if (length(down_genes) == 0) {
down_genes <- rep(NA, nrow(result_df))
}
if (length(up_genes) == 0) {
up_genes <- rep(NA, nrow(result_df))
}
names(up_genes) <- names(down_genes) <- result_df[, ID_column]
############ Create terms-by-genes matrix and order
all_genes <- unique(c(unlist(up_genes), unlist(down_genes)))
all_terms <- result_df[, ID_column]
term_genes_mat <- matrix(0, nrow = nrow(result_df), ncol = length(all_genes),
dimnames = list(all_terms, all_genes))
for (i in seq_len(nrow(term_genes_mat))) {
current_term <- rownames(term_genes_mat)[i]
current_genes <- c(up_genes[[current_term]], down_genes[[current_term]])
term_genes_mat[i, match(current_genes, colnames(term_genes_mat))] <- 1
}
### Transform the matrix
var_names <- list()
var_names[["Enriched_Term"]] <- factor(rownames(term_genes_mat), levels = rownames(term_genes_mat))
var_names[["Symbol"]] <- factor(colnames(term_genes_mat), levels = colnames(term_genes_mat))
term_genes_df <- expand.grid(var_names, KEEP.OUT.ATTRS = FALSE, stringsAsFactors = FALSE)
value <- as.vector(term_genes_mat)
value <- data.frame(value)
term_genes_df <- cbind(term_genes_df, value)
term_genes_df <- term_genes_df[term_genes_df$value != 0, ]
### Order according to frequencies
term_genes_df$Enriched_Term <- factor(term_genes_df$Enriched_Term, levels = names(sort(table(term_genes_df$Enriched_Term),
decreasing = TRUE)))
term_genes_df$Symbol <- factor(term_genes_df$Symbol, levels = rev(names(sort(table(term_genes_df$Symbol)))))
terms_lists <- rev(split(term_genes_df$Enriched_Term, term_genes_df$Symbol))
plot_df <- data.frame(Gene = names(terms_lists), Term = I(terms_lists), Up_Down = ifelse(names(terms_lists) %in%
unlist(up_genes), "up", "down"), stringsAsFactors = FALSE)
bg_df <- expand.grid(Gene = unique(plot_df$Gene), Term = unique(plot_df$Term))
if (method == "heatmap") {
g <- ggplot2::ggplot(bg_df, ggplot2::aes(x = .data$Term, y = .data$Gene))
g <- g + ggplot2::geom_tile(fill = "white", color = "gray60")
if (missing(genes_df)) {
g <- g + ggplot2::geom_tile(data = plot_df, ggplot2::aes(x = .data$Term,
y = .data$Gene, fill = .data$Up_Down), color = "gray60")
g <- g + ggplot2::scale_fill_manual(values = c(low, high))
} else {
plot_df$Value <- genes_df$CHANGE[match(names(plot_df$Term), genes_df$GENE)]
g <- g + ggplot2::geom_tile(data = plot_df, ggplot2::aes(x = .data$Term,
y = .data$Gene, fill = .data$Value), color = "gray60")
g <- g + ggplot2::scale_fill_gradient2(low = low, mid = mid, high = high)
}
g <- g + ggplot2::theme_minimal()
g <- g + ggplot2::theme(axis.title = ggplot2::element_blank(), panel.grid.major = ggplot2::element_blank(),
panel.grid.minor = ggplot2::element_blank(), legend.title = ggplot2::element_blank())
} else if (method == "boxplot") {
if (missing(genes_df)) {
stop("For `method = boxplot`, you must provide `genes_df`")
}
plot_df$Value <- genes_df$CHANGE[match(names(plot_df$Term), genes_df$GENE)]
g <- ggplot2::ggplot(plot_df, ggplot2::aes(x = .data$Term, y = .data$Value))
g <- g + ggplot2::geom_boxplot()
g <- g + ggplot2::geom_jitter(width = 0.1)
} else {
g <- ggplot2::ggplot(plot_df, ggplot2::aes(x = .data$Term))
g <- g + ggplot2::geom_bar()
}
g <- g + ggupset::scale_x_upset(order_by = ifelse(missing(genes_df), "freq",
"degree"), reverse = !missing(genes_df))
return(g)
}
Any scripts or data that you put into this service are public.
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.