############################################################################################
####################### get layers values based on several info ####################################
############################################################################################
#' get group names based on annotation
#'
#'function to get the group names based on the annotation of the nodes (tooltip column)
#' @param data List of two ffdf objects (format generated by the geNet() function )
#' @return Object of class "dataframe", which reports the old names associated with the new names
#' @export
#' @examples
#' \dontrun{get_group_names_based_on_tooltip(data)}
#' @import ff
get_group_names_based_on_tooltip<-function(data){
all_groups<-unique(as.character(data$nodes$group[])) # All groups
# analyze tooltip of each group
new_groups_names<-vapply(1:length(all_groups),function(i){
current_group_name<-all_groups[i]
inds<-which(as.character(data$nodes$group[])==current_group_name)
tooltips_current_group<-as.character(data$nodes$title[])[inds]
n_nodes<-nrow(data$nodes[inds,])
if(length(inds)==1){
n_nodes<-1
}
list_words<-lapply(1:length(tooltips_current_group),function(j){
name<-tooltips_current_group[j]
splitted_name<-strsplit(name," ")
return(splitted_name)
})
vector_words<-unlist(list_words)
# clean the vector of words, removing not meaningful words
vector_words<-vector_words[vector_words != ""] # remove empty characters
vec_words_to_remove<-c("family|protein|subunit|transporter|regulator|transcriptional|domain-containing|of|type")
inds<-grep(vec_words_to_remove,vector_words)
vector_words<-vector_words[-inds]
# measure frequency of the words in the current group
table_counts<-table(vector_words)
table_counts<-sort(table_counts,decreasing = T)
selected_keywords<-names(table_counts)
# the final name of the group contains the 4 most common keywords
final_name<-paste0(selected_keywords[1:4],collapse = "_")
# we replace commas with dots in the final name (commas are reserved characters)
final_name<-gsub(",",".",final_name)
final_name<-sprintf("%s_n%s_group%d",final_name,n_nodes,i)
return(final_name)
},character(1))
df_new_groups_names<-as.data.frame(cbind(all_groups,new_groups_names))
# check correctness of the operation
output<- length(all_groups) == nrow(df_new_groups_names)
if(output!=T){
stop("Number of predicted group names different from the number of original group")
}
return(df_new_groups_names)
}
#' get the group names based on the number of nodes
#'
#'function to get the group names based on the number of nodes in each group.
#' @param data List of two ffdf objects (format generated by the geNet() function )
#' @return Object of class "dataframe", which reports the old names associated with the new names
#' @export
#' @examples \dontrun{get_group_names_based_on_n_nodes(data)}
#' @import ff
get_group_names_based_on_n_nodes<-function(data){
all_groups<-unique(as.character(data$nodes$group[])) # All groups
# analyze each group
new_groups_names<-vapply(1:length(all_groups),function(i){
current_group_name<-all_groups[i]
inds<-which(as.character(data$nodes$group[])==current_group_name)
n_nodes<-nrow(data$nodes[inds,])
if(length(inds)==1){
n_nodes<-1
}
final_name<-sprintf("n%d_group%d",n_nodes,i)
return(final_name)
},character(1))
df_new_groups_names<-as.data.frame(cbind(all_groups,new_groups_names))
# check correctness of the operation
output<- length(all_groups) == nrow(df_new_groups_names)
if(output!=T){
stop("length(all_groups) == nrow(df_new_groups_names)")
}
return(df_new_groups_names)
}
#' get new colors based on the strains
#'
#' function to predict the colors based on the strains occurrence of genes
#' i.e.,show nodes/genes associated with the selected strains (i.e., that gene is present in that strains)
#' @param input_binary_df Object of class "dataframe", containing presence/absence of genes. Mandatory argument.
#' @param data visnetwork dataframe generated by the geNet algorithm. Mandatory argument.
#' @param strains_names the vector containing the names of the strains to visualize. Mandatory argument.
#' @param col_nodes Color of the nodes matched with the strains. Default to "black".
#' @return Object of class "dataframe", which reports the new colors for each node of the network
#' @export
#' @examples
#' \dontrun{get_colors_based_on_strains(input_binary_df,data,strains_names=c("strain_x"),col_nodes="black")}
#' @import ff
get_colors_based_on_strains<-function(input_binary_df,data,strains_names,col_nodes="black"){
splitted_strains_names<-strsplit(strains_names,",")[[1]]
string<-paste0(splitted_strains_names,sep = "$",collapse = "|")
inds<-grep(string,row.names(input_binary_df))
if(length(inds)!=0){
stop("selected strains not found")
}
input_binary_df_selected_strains<-input_binary_df[inds,]
print("------ presence indices extraction------" )
list_all_inds<-lapply(1:nrow(input_binary_df_selected_strains),function(i){
vec_i<-input_binary_df_selected_strains[i,]
inds_1<-which(vec_i==1)
return(inds_1)
})
common_inds<-Reduce(intersect, list_all_inds)
if(length(common_inds)==0){
stop("the selected strains don't have any common nodes")
}
id_reference_common<-colnames(input_binary_df_selected_strains)[common_inds]
out<-as.character(data$nodes$id[]) %in% id_reference_common
inds<-which(out==T)
genes_id_selected<-as.character(data$nodes$id[])[inds]
vec_colors<-rep(col_nodes,length(genes_id_selected))
data_new_colors<-as.data.frame(cbind(genes_id_selected,vec_colors))
return(data_new_colors)
}
#' get_colors_based_on_size
#'
#' function to get the colors based on the size of the nodes
#'
#' @param data List of two ffdf objects (format generated by the geNet() function )
#' @param size_thr size threshold. Mandatory argument.
#' @param direction values greater/less than (or equal) the selected threshold
#' @param col_nodes color of the nodes that match the specified condition. Default to "black".
#' @return data_new_colors: object of class "dataframe", which reports the new colors for each node of the network
#' @export
#' @examples
#' \dontrun{get_colors_based_on_size(data,size_thr=10,direction="less",col_nodes="black")}
#' @import ff
get_colors_based_on_size<-function(data,size_thr,direction="less",col_nodes="black"){
if(direction=="less"){
inds<-which(data$nodes$size[]<=size_thr)
}else if(direction=="greater"){
inds<-which(data$nodes$size[]>=size_thr)
}else{
stop("direction argument not valid, please specify a correct direction argument: greater,less")
}
genes_id_selected<-as.character(data$nodes$id[])[inds]
vec_colors<-rep(col_nodes,length(genes_id_selected))
data_new_colors<-as.data.frame(cbind(genes_id_selected,vec_colors))
return(data_new_colors)
}
#' get_colors_based_on_gene_ID
#'
#' function to get the colors based on the ID of the nodes
#'
#' @param data List of two ffdf objects (format generated by the geNet() function )
#' @param vec_gene_ids vector of genes IDs to color. Mandatory argument.
#' @param col_nodes color of the nodes that match the specified IDs. Default to "black".
#' @return Object of class "dataframe", which reports the new colors for each node of the network
#' @export
#' @examples
#' \dontrun{get_colors_based_on_gene_ID(data,vec_gene_ids="None",col_nodes="black")}
#' @import ff
get_colors_based_on_gene_ID<-function(data,vec_gene_ids="None",col_nodes="black"){
string<-paste0(vec_gene_ids,collapse = "|")
inds<-grep(string,as.character(data$nodes$id[]))
if(length(inds)==0){
stop("selected IDs not found in the visnetwork data object")
}
genes_id_selected<-as.character(data$nodes$id[])[inds]
vec_colors<-rep(col_nodes,length(genes_id_selected))
data_new_colors<-as.data.frame(cbind(genes_id_selected,vec_colors))
return(data_new_colors)
}
#' get_groups_based_on_gene_ID
#'
#' function to get the groups based on the ID of the nodes
#'
#' @param data List of two ffdf objects (format generated by the geNet() function )
#' @param vec_gene_ids vector of genes IDs to which the groups must be matched. Mandatory argument.
#' @return Object of class "dataframe", which reports the groups for the specified genes ID.
#' @export
#' @examples
#' \dontrun{get_groups_based_on_gene_ID(data,vec_gene_ids=c("group_200","group_253"))
#' # Get the groups for the nodes IDs "group_200" and "group_253"
#' }
#' @import ff
get_groups_based_on_gene_ID<-function(data,vec_gene_ids){
string<-paste0(vec_gene_ids,collapse = "|")
inds<-grep(string,as.character(data$nodes$id[]))
if(length(inds)==0){
warning("selected id not found")
return(NULL)
}
groups<-as.character(data$nodes$group[])[inds]
gene_ids<-as.character(data$nodes$id[])[inds]
df<-as.data.frame(cbind(gene_ids,groups))
return(df)
}
#' get_gene_ID_based_on_tooltip
#'
#' function to get the gene IDs based on the annotation (tooltip column)
#'
#' @param data List of two ffdf objects (format generated by the geNet() function )
#' @param vec_tooltip vector of annotation to match with the gene IDs. Mandatory argument.
#' @return Object of class "dataframe", which reports the genes ID for the specified annotation keywords.
#' @export
#' @examples
#' \dontrun{get_gene_ID_based_on_tooltip(data,vec_tooltip=c("protein","secretion"))
#' # Get the nodes IDs whose annotation reports the word "protein" and "secretion"
#' }
#' @import ff
get_gene_ID_based_on_tooltip<-function(data,vec_tooltip){
string<-paste0(vec_tooltip,collapse = "|")
inds<-grep(string,as.character(data$nodes$title[]))
if(length(inds)==0){
stop("selected tooltip not found")
}
gene_ids<-as.character(data$nodes$id[])[inds]
tooltip<-as.character(data$nodes$title[])[inds]
df<-as.data.frame(cbind(gene_ids,tooltip))
return(df)
}
#' get_groups_based_on_clustering
#'
#' function to get the groups based on a clustering algorithm
#' Note: the color of the nodes is based on the group (using the function get_new_colors_based_on_groups())
#' to evaluate the clustering method we use the modularity measure.
#' The modularity of a graph with respect to some division (or vertex types) measures how good the division is,
#' or how separated are the different vertex types from each other.
#' @param igraph_network igraph network generated by the gen_igraph_network() function. Mandatory argument.
#' @param method clustering method:
#' * louvain: predict clusters using the Louvain algorithm.
#' The "modularity" is the objective function to maximize. Small clusters may be hidden. It detects only the bigger clusters.
#' * infomap: predict clusters using the Infomap algorithm.
#' The "map equation" is the objective function to minimize. Compared to the Louvain algorithm, it is able to identify smaller clusters (subclusters)
#' * fastgreedy: predicts clusters using the Fastgreedy algorithm
#' * walktrap: predicts clusters using the Walktrap algorithm
#'
#' Default to "infomap" to detect smaller clusters. The Louvain algorithm is faster than the Infomap algorithm in large networks.
#' @return Object of class "dataframe", which reports the genes ID associated with the new groups
#' @export
#' @examples
#' \dontrun{get_groups_based_on_clustering(igraph_network,method="infomap")}
#' @import ff igraph
get_groups_based_on_clustering<-function(igraph_network,method="infomap"){
if(method=="louvain"){
print("----- generating louvain partitions-----")
lc <- cluster_louvain(igraph_network)
partition<-as.vector(membership(lc))
nodes_id<-vertex_attr(igraph_network,name="name")
# tot_n_clusters<-length(unique(partition))
df_groups<-data.frame(nodes_id,partition)
}else if(method=="infomap"){
print("--- generating infomap partitions")
lc <- cluster_infomap(igraph_network)
partition<-as.vector(membership(lc))
nodes_id<-vertex_attr(igraph_network,name="name")
# tot_n_clusters<-length(unique(partition))
df_groups<-data.frame(nodes_id,partition)
}else if(method=="fastgreedy"){
print("--- generating fastgreedy partitions ----")
lc <- cluster_fast_greedy(igraph_network)
partition<-as.vector(membership(lc))
}else if(method=="walktrap"){
print("--- generating walktrap partitions ------")
lc <- cluster_walktrap(igraph_network)
partition<-as.vector(membership(lc))
nodes_id<-vertex_attr(igraph_network,name="name")
# tot_n_clusters<-length(unique(partition))
df_groups<-data.frame(nodes_id,partition)
}else{
stop("input clustering method not valid")
}
print("---- generate colors based on groups -------")
gc()
df_groups<-get_new_colors_based_on_groups(df_groups=df_groups)
gc()
print("Done")
return(df_groups)
}
#' get_new_colors_based_on_groups
#'
#' function to get the colors based on the groups
#' @param df_groups object of class "dataframe", which reports the genes ID associated with the groups. Mandatory argument.
#' @return Object of class "dataframe", which reports the gene IDs and the groups associated with the colors
#' @export
#' @examples
#' \dontrun{get_new_colors_based_on_groups(df_groups)}
#' @import RColorBrewer grDevices
get_new_colors_based_on_groups<-function(df_groups){
numb_clusters<-length(unique(df_groups[,2]))
# we generate vec of unique colors
qual_col_pals <- brewer.pal.info[brewer.pal.info$category == 'qual',]
col_vector <- unlist(mapply(brewer.pal, qual_col_pals$maxcolors, rownames(qual_col_pals)))
col_vector <- colorRampPalette(col_vector)(numb_clusters)
# extract random colors
unique_groups<-unique(df_groups[,2])
rand_int<-sample.int(length(col_vector),length(unique_groups))
colors_selected<-col_vector[rand_int]
df_temp<-as.data.frame(cbind(unique_groups,colors_selected))
df_temp$unique_groups<-as.character(df_temp$unique_groups)
final_col_vec<-rep("a",nrow(df_groups))
gc()
# assign colors to the clusters
for(i in 1:nrow(df_temp)){
current_group<-df_temp$unique_groups[i]
current_color<-df_temp$colors_selected[i]
inds<-which(df_groups[,2]==current_group)
final_col_vec[inds]<-current_color
}
df_groups_colors<-cbind(df_groups,final_col_vec)
return(df_groups_colors)
}
#' get groups based on the annotation
#'
#' function to get the groups based on the annotation (tooltip column)
#' @param data List of two ffdf objects (format generated by the geNet() function )
#' @param select_annotation vector of annotation words to match with the groups
#' @return Object of class "vector", which reports the groups selected
#' @export
#' @examples
#' \dontrun{get_groups_based_on_tooltip(data,select_tooltip=c("protein","secretion"))
#' # Get the groups where the annotations words "protein" and "secretion" appear.
#' }
#' @import ff
get_groups_based_on_annotation<-function(data,select_annotation){
string<-paste0(select_annotation,collapse = "|")
inds<-grep(string,as.character(data$nodes$title[]))
if(length(inds)==0){
groups_selected<-"No groups with the selected annotation"
}else{
groups_selected<-unique(as.character(data$nodes$group[])[inds])
}
return(groups_selected)
}
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.