#' imprints_network
#'
#' Function to plot an interactive STRING network with the IMPRINTS barplots in each nodes
#'
#' @param data Dataset after imprints_caldiff
#' @param hits The Uniprots ids from which you want to plot the network.
#' If NULL, it will take all the proteins from the data.
#' @param treatment The treatment from which you want to plot to plot the barplots.
#' If NULL, it will plot all the treatments present in the data.
#' @param GOterm Either a data frame containing the columns 'id', 'GOterm' and eventually 'color';
#' a character corresponding to a data base or a NULL object.
#' If you choose a data base, it will perform an enrichment analysis and assign a GO term
#' with the lowest FDR to each protein.
#' The data bases now supported are: COMPARTMENTS, Process, Component, Function, TISSUES,
#' Keyword, KEGG, SMART, PMID, RCTM, WikiPathway and NetworkNeighborAL
#' @param required_score A numeric between 0 and 1000 to set the required score
#' for the interaction in the STRING network.
#' @param node_wolink Logical to tell if you want to print node without links in the network.
#' Default set to TRUE
#' @param witherrorbar Logical to tell if you want to print the error bar or not on the bar plots.
#' @param FC_border Logical to tell if you want to color the nodes borders according to the
#' maximum Fold-Change from each protein.
#' @param colorbar A vector of colors corresponding to each treatment.
#' @param colorFC If FC_border is set to TRUE, a vector of three color corresponding
#' to the min, mid and max value from the maximum fold change.
#' @param species The species; either human, mouse or rat.
#' @param physics_type A string corresponding to the solver of the network; default is forceAtlas2Based.
#' Possible options: 'barnesHut', 'repulsion', 'hierarchicalRepulsion', 'forceAtlas2Based'.
#' @param physics_enabled Logical to toggle the physics system on or off.
#' @param ... Options for network visualization. Options available are:
#' borderWidth, imagePadding, node_size, edge_length, edge_color
#' font_background, font_color, legend_width, file_type
#'
#'
#' @return visNetwork object
#'
#' @export
imprints_network <- function(data, hits = NULL, treatment = NULL, GOterm = NULL,
required_score = 400, node_wolink = TRUE,
witherrorbar = TRUE, FC_border = TRUE,
colorbar = NULL, colorFC = c("#0041FF", "#FFFFFF", "#FF0000"),
species = c("human", "mouse", "rat"),
physics_type = c("forceAtlas2Based", "barnesHut",
"repulsion", "hierarchicalRepulsion"),
physics_enabled = TRUE, ...){
# load if necessary packages
if(!("STRINGdb" %in% names(sessionInfo()$otherPkgs))){
library(STRINGdb)
}
if(!("visNetwork" %in% names(sessionInfo()$otherPkgs))){
library(visNetwork)
}
# define options for network
opts <- list(borderWidth = 5, imagePadding = 8,
node_size = 40, edge_length = 60,
edge_color = "#00000075", legend_width = 0.2,
font_background = "#A6A6A669", font_color = "#343434",
file_type = "png")
dot_opts <- match.call(expand.dots = FALSE)$...
if(!is.null(dot_opts)){
if(is.null(names(dot_opts))){
warning("The options parameters should be named")
}
else{
opts_to_change <- names(opts)[names(opts) %in% names(dot_opts)]
if(length(opts_to_change)){
opts[opts_to_change] <- dot_opts[opts_to_change]
}
else{
warning(paste("The options parameters can only be", paste(names(opts), collapse = ", ")))
}
}
}
if(is.null(treatment)){
treatment <- get_treat_level(data)
}
else{
treatment_in <- treatment %in% get_treat_level(data)
if(!all(treatment_in)){
message(paste("Error: ", treatment[!treatment_in], "wasn't found in your data."))
return()
}
}
if(!is.null(colorbar)){
if(length(colorbar) != length(treatment)){
message("Error: The number of colors you gave doesn't match the number of conditons.")
return()
}
}
if(length(colorFC) != 3){
message("Error: You need to provide 3 colors for colorFC.")
return()
}
species <- match.arg(species)
if(species == "human"){
species <- 9606
}
else if(species == "mouse"){
species <- 10090
}
else if(species == "rat"){
species <- 10116
}
if(!is.null(hits)){
hits_in <- hits %in% data$id
if(all(!hits_in)){
message("Error: None of the proteins you selected are in your data")
return()
}
if(any(!hits_in)){
message(paste(paste(hits[!hits_in], collapse = ", "), "were not found in your data and hence, have been removed."))
hits <- hits[hits_in]
}
data <- data[which(!is.na(match(data$id, hits))),]
}
# extracting Gene info
data_genes <- data[,c("id", "description")] %>%
dplyr::group_by(id) %>%
dplyr::mutate(description = stringr::word(stringr::str_extract(description, "(?<=GN=).*(?=)"), 1))
colnames(data_genes)[2] <- "Gene"
data_genes <- as.data.frame(data_genes) # STRINGdb cannot handle tibble
run_enrich <- FALSE
if(inherits(GOterm, "data.frame")){
good_col <- c("id", "GOterm") %in% colnames(GOterm)
if(any(!good_col)){
message(paste("Error:", c("id", "GOterm")[!good_col], "wasn't found in your GOterm data.frame"))
return()
}
if(!("color" %in% colnames(GOterm))){
GOterm$color <- scales::col_factor("Spectral", domain = NULL)(GOterm$GOterm)
}
}
else if(inherits(GOterm, "character")){
if(length(GOterm) > 1){
GOterm <- GOterm[1]
message(paste("Only", GOterm, "has been kept; you should provide only one data base."))
}
GOterm_in <- GOterm %in% c("COMPARTMENTS", "Process", "Component",
"Function", "TISSUES", "Keyword",
"KEGG", "SMART", "PMID",
"RCTM", "WikiPathways", "NetworkNeighborAL")
if(GOterm_in){
run_enrich <- TRUE
}
else{
message(paste(GO_term, "is not in the database available yet. Please, choose one from\n
COMPARTMENTS, Process, Component, Function, TISSUES, Keyword, KEGG,\n
SMART, PMID, RCTM, WikiPathways or NetworkNeighborAL"))
return()
}
}
message("Start to map genes...")
if(!file.exists("STRING_data")){
dir.create("STRING_data")
}
string_db <- STRINGdb::STRINGdb$new(species=species, #ID 9606 correspond to human
score_threshold=200,
input_directory= file.path(getwd(), "STRING_data"),
version = ifelse(packageVersion("STRINGdb") >= '2.12.0', "12.0", "11.5"))
string_id <- string_db$map(data_genes, "id", removeUnmappedRows = TRUE)
message("Get STRING network...")
interact <- string_db$get_interactions(string_id$STRING_id)
if(nrow(interact) == 0){
message("Warning: No interactions were found between the proteins you selected !")
return(NULL)
}
interact$from <- sapply(interact$from, function(x) string_id$id[which(string_id$STRING_id == x)][1])
interact$to <- sapply(interact$to, function(x) string_id$id[which(string_id$STRING_id == x)][1])
interact <- interact[-which(duplicated(interact)),] # remove potential duplicated rows
if(length(which(interact$combined_score >= required_score)) == 0){
message("Warning: No interactions passed the required interaction score ! Try to decrease it")
if(!node_wolink){
return(NULL)
}
}
message("Generates barplots...")
if(is.null(colorbar)){
bar <- imprints_barplotting_app(data, treatmentlevel = treatment,
witherrorbar = witherrorbar,
printBothName = FALSE)
}
else{
bar <- imprints_barplotting_app(data, treatmentlevel = treatment,
colorpanel = colorbar, witherrorbar = witherrorbar,
printBothName = FALSE)
}
bar <- lapply(bar, function(g){
g <- g +
theme(legend.position = "none",
plot.title = element_blank(),
axis.text.x = element_blank(),
axis.text.y = element_blank(),
axis.title = element_blank(),
axis.ticks = element_blank(),
axis.line = element_line(color = NA),
panel.background = element_rect(fill = "transparent", color = NA),
plot.background = element_rect(fill = "transparent", color = NA)
);
list(g)
})
names(bar) <- data$id
# preparing legend if needed
lnodes <- data.frame(matrix(nrow = 0, ncol = 5))
colnames(lnodes) <- c("label", "color.border", "color.background", "shape", "borderWidth")
if(FC_border){
to_rm <- get_treat_level(data)[!(get_treat_level(data) %in% treatment)]
if(length(to_rm)){
data <- data[,-grep(paste0("_", to_rm, "$", collapse = "|"), colnames(data))]
}
if(length(treatment) > 1){
message("More than one treatment has been selected. The biggest maximum FC in absolute value will be taken.")
}
FC <- data
if(length(grep("countNum|sumUniPeps|sumPSMs", colnames(FC)))){
FC <- FC[,-grep("countNum|sumUniPeps|sumPSMs", colnames(FC))]
}
FC <- FC %>%
tidyr::gather("treatment", "value", -id, -description) %>%
tidyr::separate(treatment, into = c("temperature", "biorep", "treatment"), sep = "_") %>%
dplyr::group_by(id, description, temperature, treatment) %>%
dplyr::summarise(value = mean(value, na.rm = TRUE)) %>%
dplyr::group_by(id, description) %>%
dplyr::reframe(FC = value[which.max(abs(value))]) %>%
dplyr::mutate(description = stringr::word(stringr::str_extract(description, "(?<=GN=).*(?=)"), 1)) %>%
dplyr::rename(name = id)
FC$description <- NULL
rng_FC <- round(range(FC$FC, na.rm = TRUE), 1)
rng_FC[1] <- rng_FC[1] - 0.1
rng_FC[2] <- rng_FC[2] + 0.1
rng_FC <- append(rng_FC, 0, 1)
FC$colorFC <- scales::gradient_n_pal(colorFC, rng_FC, "Lab")(FC$FC)
if(length(which(is.na(FC$colorFC)))){
FC$colorFC[which(is.na(FC$colorFC))] <- "#666666"
}
# updating legend
lnodes <- as.data.frame(rbind(lnodes,
data.frame(label = paste(rng_FC),
color.border = colorFC,
color.background = "#CACACA",
shape = "circle",
borderWidth = opts$borderWidth))
)
}
if(run_enrich){
message("Start enrichment analysis...")
enrich <- string_db$get_enrichment(string_id$STRING_id, category = GOterm)
if(nrow(enrich) == 0){
message("Warning: No enrichment were found !")
enrich <- NULL
}
else{
enrich <- enrich[,c("preferredNames", "fdr", "description")] %>%
dplyr::group_by(description, fdr) %>%
dplyr::reframe(Gene = strsplit(preferredNames, ",")[[1]])
colnames(enrich)[1] <- "GOterm"
enrich <- enrich %>%
dplyr::ungroup() %>% dplyr::group_by(Gene) %>%
dplyr::group_modify(~ {
if(nrow(.x) > 1){
y <- .x[which.min(.x$fdr),]
return(y)
}
else{
return(.x)
}
}) %>%
dplyr::select(-fdr)
enrich <- enrich %>%
dplyr::right_join(data_genes, by = "Gene") %>%
dplyr::rename(name = id)
enrich$Gene <- NULL
enrich$GOterm[which(is.na(enrich$GOterm))] <- "none"
enrich$color <- scales::col_factor("Spectral", domain = NULL)(enrich$GOterm)
enrich$color[which(enrich$GOterm == "none")] <- "#CACACA"
}
}
else if(!is.null(GOterm)){
enrich <- GOterm %>%
dplyr::right_join(data_genes, by = "id") %>%
dplyr::rename(name = id)
enrich$Gene <- NULL
enrich$GOterm[which(is.na(enrich$GOterm))] <- "none"
enrich$color[which(enrich$GOterm == "none")] <- "#CACACA"
}
else{
enrich <- NULL
}
### creating graph
graph <- interact %>%
dplyr::filter(combined_score >= required_score) %>%
dplyr::mutate(combined_score = combined_score/1000)
colnames(graph)[3] <- "value"
edges <- data.frame(graph)
itself <- which(edges$from == edges$to)
if(length(itself)){
edges <- edges[-itself,]
}
dup_edge <- duplicated(edges[,c("from", "to")])
if(any(dup_edge)){
edges <- edges[-which(dup_edge),]
}
# adding info
if(node_wolink){
graph <- data.frame(name = data_genes$id,
Gene = data_genes$Gene) %>%
unique() %>%
dplyr::group_by(name) %>%
dplyr::mutate(bar = bar[[name]]
)
}
else{
data_genes <- data_genes %>%
tibble::column_to_rownames(var = "id")
graph <- graph %>% dplyr::select(from, to) %>%
tidyr::gather("key", "name") %>%
dplyr::select(name) %>%
unique() %>%
dplyr::group_by(name) %>%
dplyr::mutate(bar = bar[[name]],
Gene = data_genes[name,"Gene"]
)
}
if(FC_border){
graph <- graph %>%
dplyr::left_join(FC, by = "name")
}
if(!is.null(enrich)){
graph <- graph %>%
dplyr::left_join(enrich, by = "name")
# updating legend
enrich_leg <- unique(graph[,c("GOterm", "color")])
lnodes <- as.data.frame(rbind(lnodes,
data.frame(label = enrich_leg$GOterm,
color.border = enrich_leg$color,
color.background = enrich_leg$color,
shape = "ellipse",
borderWidth = opts$borderWidth))
)
}
# generates img for each barplot
f_rnd <- paste0("img_", paste(sample(c(1:9, letters), 12, TRUE), collapse = ""))
dir.create(f_rnd)
lapply(graph$bar, function(img){
n <- img$data$id[1]
n <- stringr::word(n, 1, sep = "\n")
ggsave(plot = img, paste0(f_rnd, "/", n , ".png"),
width = 1, height = 1)
})
# convert it in base 64 so js can read them
bar_base64 <- sapply(graph$name, function(x){
f <- paste0(f_rnd, "/", x, ".png")
res <- RCurl::base64Encode(readBin(f, "raw", file.info(f)[1, "size"]), "txt")
res <- paste0("data:image/png;base64,", res[1])
})
unlink(f_rnd, recursive = TRUE) # remove directory created
nodes <- data.frame(id = graph$name,
label = graph$Gene,
shape = "circularImage",
image = bar_base64
)
if(FC_border){
nodes$color.border <- graph$colorFC
}
if(!is.null(enrich)){
nodes$group <- graph$GOterm
nodes$color.background <- graph$color
}
physics_type <- match.arg(physics_type)
net <- visNetwork(nodes, edges, width = "100%", height = "800px") %>%
visNodes(borderWidth = opts$borderWidth, shapeProperties = list(useBorderWithImage = TRUE),
imagePadding = opts$imagePadding,
font = list(background = opts$font_background, color = opts$font_color),
size = opts$node_size) %>%
visEdges(color = opts$edge_color, length = opts$edge_length) %>%
visPhysics(solver = physics_type, enabled = physics_enabled) %>%
visInteraction(navigationButtons = TRUE) %>%
visEvents(selectNode = "function(properties) {
if(properties.event.srcEvent.shiftKey){
var win = window.open('https://www.uniprot.org/uniprot/' + properties.nodes, '_blank');
win.focus();
}
}") %>%
visExport(type = opts$file_type, name = paste0(format(Sys.time(), "%y%m%d_%H%M_"), "network"),
float = "left", label = paste("Save network as", opts$file_type),
background = "transparent", style= "") %>%
visOptions(manipulation = TRUE)
if(nrow(lnodes) != 0){
net <- net %>%
visLegend(addNodes = lnodes, stepY = 50,
width = opts$legend_width, useGroups = FALSE)
}
message("Done !")
return(net)
}
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.