#' Plot STRING network of genes colored by enriched pathways
#'
#' To be used in conjunction with map_string
#'
#' @param map List output by map_string
#' @param layout Character string for network layout algorithm. See options in igraph::layout_with_
#' @param edge_min Numeric minimum edges a node must have to be displayed. Default is 0 meaning orphan nodes are included
#' @param edge_max Numeric maximum edges a node must have to be displayed. Default in Inf. Set to 0 to see only orphan nodes
#' @param enriched_only Logical if should include only genes in significantly enriched terms. Default FALSE
#' @param enrichment Data frame output by `BIGprofiler`, `BIGenrichr`, or `BIGsea`. For use in coloring nodes
#' @param overlap Numeric minimum of total significant genes in a enrichment term to be used as colors (`BIGprofiler`, `BIGenrichr`)
#' @param fdr_cutoff Numeric maximum FDR of enrichment terms to be used as colors (`BIGprofiler`, `BIGenrichr`, `BIGsea`)
#' @param colors Character vector of custom colors to use. Must be at least a long as total significant terms plus 1 for the "none" group
#' @param text_size Numeric size of gene labels on network nodes. Default of 2
#' @param node_size Numeric size of network nodes. Default of 1
#'
#' @return ggplot STRING network object
#' @export
#'
#' @examples
#' #NOT RUN
#' # map <- map_string(genes = c("MTHFD2","ISOC1","IL2RB","SAMHD1","NAMPT","NOD1",
#' # "NFKB1","IFIT3","ZBP1","IL15RA","SP110","ITGB7",
#' # "SERPING1","B2M","CXCL11","USP18","MAPKAPK2","DKK1"),
#' # version = 12, score_threshold = 400)
#' # plot_string(map)
#' #
#' # Add enrichment colors
#' # library(dplyr)
#' # library(SEARchways)
#' # genes.OI <- example.model$lmerel %>%
#' # filter(variable == "virus" & FDR < 0.2) %>%
#' # select(variable, gene)
#' # example_enrich <- SEARchways::BIGprofiler(gene_df = genes.OI,
#' # category = "H", ID = "ENSEMBL")
#' #
#' # map2 <- map_string(genes = genes.OI$gene,
#' # version = 11.5, score_threshold = 400)
#' # plot_string(map2, enrichment = example_enrich, fdr_cutoff=0.2)
#' # plot_string(map2, enrichment = example_enrich, fdr_cutoff=0.2, enriched_only=TRUE)
#' #
#' # Add GSEA colors
#' # genes.FC <- example.model$lmerel %>%
#' # filter(variable == "virus") %>%
#' # select(variable, gene, estimate)
#' # example_gsea <- BIGsea(gene_df = genes.FC, category = "H", ID = "ENSEMBL")
#' #
#' # plot_string(map2, enrichment = example_gsea, fdr_cutoff = 0.3,
#' # edge_max = 0, enriched_only=TRUE)
plot_string <- function(map, layout='fr',
edge_min=0, edge_max=Inf, enriched_only = FALSE,
enrichment=NULL, overlap=2, fdr_cutoff=0.2,
colors=NULL, text_size=2, node_size=1){
pathway <- STRING_id <- combined_score <- gene <- none <- total <- value <- group_in_pathway <- FDR <- genes <- leadingEdge <- legend.title <- NULL
#### Format enrichment colors ####
if(!is.null(enrichment)){
if("k/K" %in% colnames(enrichment)){
#Get significant enrichments
col.mat <- enrichment %>%
dplyr::ungroup() %>%
dplyr::filter(group_in_pathway >= overlap & FDR <= fdr_cutoff) %>%
dplyr::select(pathway, genes) %>%
dplyr::rename(gene=genes)
legend.title <- "Enriched pathways"
}
if("NES" %in% colnames(enrichment)){
#Get significant GSEA
col.mat <- enrichment %>%
dplyr::ungroup() %>%
dplyr::filter(FDR <= fdr_cutoff) %>%
dplyr::select(pathway, leadingEdge) %>%
dplyr::rename(gene=leadingEdge)
legend.title <- "GSEA leading edge"
}
#Error if no terms to plot
if(nrow(col.mat) == 0) {stop("No significant enrichment/GSEA terms.
Try increasing fdr_cutoff.")}
#Format enrichment results for scatterpie plotting
col.mat.format <- col.mat %>%
#Split gene lists within terms
tidyr::unnest(gene) %>%
dplyr::distinct(gene, pathway) %>%
dplyr::mutate(value=1) %>%
#Add string ID
dplyr::inner_join(map[["map"]], by = "gene") %>%
dplyr::select(-gene) %>%
dplyr::distinct() %>%
#Calculate terms per ID
dplyr::group_by(STRING_id) %>%
dplyr::mutate(total = sum(value)) %>%
dplyr::ungroup() %>%
#Calculate proportions within terms
dplyr::arrange(match(pathway, unique(enrichment$pathway))) %>%
tidyr::pivot_wider(names_from = pathway, values_fill = 0) %>%
dplyr::mutate(dplyr::across(-c(STRING_id,total), ~./total))
#Add to STRING data and create dummy group for genes without enrichment
map.unique <- map[["map"]] %>%
#collapse gene names
dplyr::group_by(STRING_id) %>%
dplyr::mutate(gene = paste(unique(gene), collapse=" / ")) %>%
#add enrichment color info
dplyr::left_join(col.mat.format, by = "STRING_id") %>%
dplyr::distinct() %>%
#no enrichment group
dplyr::mutate(none = ifelse(is.na(total),1,0)) %>%
dplyr::ungroup() %>%
#fill in 0
dplyr::mutate_if(is.numeric, ~tidyr::replace_na(., 0))
#Remove none is not used, remove it
if(sum(map.unique$none)==0){
map.unique <- map.unique %>%
dplyr::select(-none)
}
} else{
map.unique <- map[["map"]] %>%
#collapse gene names
dplyr::group_by(STRING_id) %>%
dplyr::mutate(gene = paste(unique(gene), collapse=" / ")) %>%
dplyr::ungroup()
}
##Remove nodes not in an enriched pathway (leave only colored nodes)
## all nodes currently
if(enriched_only){
##Nodes without enrichment
unenrich <- map.unique %>%
dplyr::filter(none==1 &
STRING_id %in% igraph::vertex_attr(map[["subgraph"]])$name) %>%
dplyr::distinct(STRING_id) %>%
dplyr::pull(STRING_id)
##Remove unconnected that are also unenriched
subgraph.filter <- igraph::delete.vertices(map[["subgraph"]], unenrich)
} else {
subgraph.filter <- map[["subgraph"]]
}
if(length(igraph::vertex_attr(subgraph.filter)$name)==0){
stop("No genes in network with significant enrichment. Consider increasing fdr_cutoff or setting enriched_only=FALSE")
}
#### Filter nodes by number of edges ####
if(edge_max == edge_min){
nodes_to_remove <- which(igraph::degree(subgraph.filter)!=edge_min)
} else {
nodes_to_remove <- which(igraph::degree(subgraph.filter)<edge_min |
igraph::degree(subgraph.filter)>edge_max)
}
if(length(nodes_to_remove) > 0){
subgraph.filter2 <- igraph::delete.vertices(subgraph.filter, nodes_to_remove)
} else {
subgraph.filter2 <- subgraph.filter
}
if(length(igraph::vertex_attr(subgraph.filter2)$name)==0){
stop("No genes in network remain after edge filtering. Consider changind edge_min and/or edge_max")
}
#### Arrange metadata as in network ####
map.arrange <- map.unique %>%
dplyr::filter(STRING_id %in% igraph::vertex_attr(subgraph.filter2)$name) %>%
dplyr::arrange(match(STRING_id, c(igraph::vertex_attr(subgraph.filter2)$name)))
# Set attributes
## Check order first
if(!identical(igraph::vertex_attr(subgraph.filter2)$name, map.arrange$STRING_id)){
stop("igraph gene order does not match color information.")
}
##gene names
igraph::V(subgraph.filter2)$symbol <- map.arrange$gene
##enrichment colors
for(term in colnames(map.arrange)[-c(1:3)]){
igraph::vertex_attr(subgraph.filter2)[[term]] <- unlist(map.arrange[term])
}
#### Set color values ####
if(!is.null(colors)){
color.vec <- colors
} else if(!is.null(enrichment)){
if("none" %in% colnames(map.arrange)){
#ggplot colors for sets in plot
color <- scales::hue_pal()(ncol(col.mat.format)-2)
#Replace none with grey
all.term <- sort(colnames(map.arrange)[-c(1:3)])
none.index <- match("none", all.term)
color.vec <- c(color[1:none.index-1], "grey70",
color[none.index:length(color)])
color.vec <- color.vec[!is.na(color.vec)]
} else {
color.vec <- scales::hue_pal()(ncol(col.mat.format)-2)
}
} else if (is.null(enrichment) & is.null(colors)){
color.vec <- "#d9d9d9"
}
#### Plot ####
#Get xy of nodes for manual layout
set.seed(8434)
#### set layout ####
if(layout == "fr"){ xy <- igraph::layout_with_fr(subgraph.filter2) } else
if(layout == "bipar"){ xy <- igraph::layout_as_bipartite(subgraph.filter2) } else
if(layout == "star"){ xy <- igraph::layout_as_star(subgraph.filter2) } else
if(layout == "tree"){ xy <- igraph::layout_as_tree(subgraph.filter2) } else
if(layout == "circle"){ xy <- igraph::layout_in_circle(subgraph.filter2) } else
if(layout == "kk"){ xy <- igraph::layout_with_kk(subgraph.filter2) } else
if(layout == "graphopt"){ xy <- igraph::layout_with_graphopt(subgraph.filter2) } else
if(layout == "gem"){ xy <- igraph::layout_with_gem(subgraph.filter2) } else
if(layout == "dh"){ xy <- igraph::layout_with_dh(subgraph.filter2) } else
if(layout == "sphere"){ xy <- igraph::layout_on_sphere(subgraph.filter2) } else
if(layout == "grid"){ xy <- igraph::layout_on_grid(subgraph.filter2) } else
if(layout == "lgl"){ xy <- igraph::layout_with_lgl(subgraph.filter2) } else
if(layout == "mds"){ xy <- igraph::layout_with_mds(subgraph.filter2) } else
if(layout == "sugi"){ xy <- igraph::layout_with_sugiyama(subgraph.filter2) }
#### plot ####
igraph::V(subgraph.filter2)$x <- xy[, 1]
igraph::V(subgraph.filter2)$y <- xy[, 2]
plot <- ggraph::ggraph(subgraph.filter2, layout= "manual",
x = igraph::V(subgraph.filter2)$x,
y = igraph::V(subgraph.filter2)$y) +
#Edges
ggraph::geom_edge_link(ggplot2::aes(width=combined_score), color="grey70") +
ggraph::scale_edge_width(range = c(0.2,2), name="STRING score")
#Add nodes
suppressWarnings(
if(!is.null(enrichment)){
if(length(colnames(map.arrange)[-c(1:3)])>1){
plot.col <- plot +
scatterpie::geom_scatterpie(data=igraph::as_data_frame(subgraph.filter2,
"vertices"),
cols=colnames(map.arrange)[-c(1:3)], color=NA,
pie_scale = node_size) +
ggplot2::scale_fill_manual(values = color.vec, name = legend.title) +
ggnetwork::geom_nodetext(ggplot2::aes(x = igraph::V(subgraph.filter2)$x,
y = igraph::V(subgraph.filter2)$y,
label=igraph::V(subgraph.filter2)$symbol),
size=text_size) +
ggnetwork::theme_blank() + ggplot2::coord_fixed()
} else{
plot.col <- plot +
ggnetwork::geom_nodes(ggplot2::aes(x = igraph::V(subgraph.filter2)$x,
y = igraph::V(subgraph.filter2)$y,
fill=NULL,
color=colnames(map.arrange)[-c(1:3)]),
size = node_size*10) +
ggnetwork::geom_nodetext(ggplot2::aes(x = igraph::V(subgraph.filter2)$x,
y = igraph::V(subgraph.filter2)$y,
label = igraph::V(subgraph.filter2)$symbol),
size=text_size) +
ggnetwork::theme_blank() + ggplot2::coord_fixed() +
ggplot2::scale_color_manual(values = color.vec, name = legend.title)
}
} else{
plot.col <- plot +
ggnetwork::geom_nodes(ggplot2::aes(x = igraph::V(subgraph.filter2)$x,
y = igraph::V(subgraph.filter2)$y,
fill=NULL), size = node_size*10,
color=color.vec) +
ggnetwork::geom_nodetext(ggplot2::aes(x = igraph::V(subgraph.filter2)$x,
y = igraph::V(subgraph.filter2)$y,
label = igraph::V(subgraph.filter2)$symbol),
size=text_size) +
ggnetwork::theme_blank() + ggplot2::coord_fixed()
}
)
return(plot.col)
}
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.