##plot research space
#frebrua
#remember to source config.R and createResearchSpace.R
#config(cl='ucsd')
#' @export
create_rs_ucsd <- function()
{
file_rdata<- "New_rs_sim_pr_1971_2010_n_-1_aw_0_jf_0_awjf_0.1_min_prod_0.RData"
mean_degree <- 10
nodes <- load_taxonomy("ucsd")
#for(pl_seed in c(113,155,799,19,70,72)) to make a search for the most similar to the SCIMago 72
# plot_rs_final(file_rs = file_rdata, mean_degree = mean_degree, mst = mst, pl_seed = pl_seed, mode="max")
plot_map(file_rs = file_rdata, nodes=nodes, mean_degree = mean_degree, mst = mst, pl_seed = 72, mode="max") #pc lab utfsm seed 72
}
#' @title Create Scimago Research Space
#' @description Open RSpace on Scimago taxonomy, plots and returns an iGraph object.
#' @param data A numeric matrix with entities \eqn{i} in the rows and categories \eqn{j} in the columns. Cells show the respective value (value of abundance) of entity \eqn{i} in the category \eqn{j}. It can also be a transpose of the previous matrix, that is, a matrix with categories in the rows and entities in the columns. Yet in that case, the argument "category_row" has to be set to TRUE. The matrix must include names for the rows and the columns. The argument "data", also accepts a dataframe with three columns in the following order: entity, category and value.
#'
#' @param category_row A flag to indicate that categories are in the rows. The analysis assumes that the categories are in the columns of the matrix. If the categories are in the rows and the entities in the columns, then the argument "category_row" has to be set to TRUE. The default value is FALSE.
#' @examples
#' create_rs_scimago()
#' @return An iGraph object.
#' @export
create_rs_simago <- function()
{
file_rdata<- "New_rs_sim_pr_1971_2010_n_-1_aw_0_jf_0_awjf_0.1_min_prod_0.RData"
mean_degree <- 10
nodes <- load_taxonomy("scimago")
plot_map(file_rs = file_rdata, nodes=nodes, mean_degree = mean_degree, mst = mst, pl_seed = 69, v_min_size = 10, cex=1, mode="max")
}
#mode is the mode that you want to use in the graph
#' @export
plot_map <- function(file_rs, nodes, mean_degree=5, mode="max", mst=TRUE, pl_seed=69, prop_to_lab=0.2, cex=1, pl=TRUE, pl_mst=FALSE, title='', subtitle='Subt', v_min_size=10, v_col='orig')
{
title<- toupper(mode)
library("igraph")
#TODO, take this to the place of DATA!!!
#path_rs <- "/Users/mguevara/Dropbox/doctorado/MIT_PROJECT/TESIS_RESEARCH_SPACE/DATA/RESEARCH_SPACE/SCIMAGO/RESEARCH\ SPACE\ OUTPUT" #HARD CODED
path_rs <- "/Users/mguevara/Dropbox/doctorado/MIT_PROJECT/TESIS_RESEARCH_SPACE/DATA/RESEARCH_SPACE/UCSD/RESEARCH\ SPACE\ OUTPUT" #HARD CODED
load(file.path(path_rs, file_rs)) #load complete information of the research space wanted all variables are rs_
#adj <- dis_categories(pantheon)
#pheatmap(adj, cluster_rows=FALSE, cluster_cols=FALSE)
#load(file_rs)
adj_orig <- rs_adj
#diag(adj_orig) <- 0
adj_clean <- adj_orig
diag(adj_clean) <-0
#cleanning NA
adj_clean[is.na(adj_clean)] <- 0
cat_not_connected <- rownames(adj_clean)[rowSums(adj_clean) == 0 | colSums(adj_clean) == 0]
print(paste("Number of categories not conected", length(cat_not_connected)))
cat_connected <- rownames(adj_clean)[rowSums(adj_clean) != 0 & colSums(adj_clean) != 0] #note that in the case of probability the Min of upper triangular and lower triangular is taken, so connection in both upper and lower triangle of the matrix is required!!!
#cleanning un connected categories
adj_cc <- adj_orig[cat_connected, cat_connected]
adj_cc[is.na(adj_cc)] <- 0 #in case NAs
#creating full graph for union future process
#adj_full <- (adj_cc-min(adj_cc,na.rm=TRUE))/(max(adj_cc, na.rm=TRUE)-min(adj_cc, na.rm=TRUE)) #normalizing between zero and one
adj_full <- adj_cc
g_full <- graph.adjacency(adjmatrix = adj_full, mode = mode, weighted = TRUE, diag = FALSE) #note minimum because the min probability ORIGINAL
#g_full <- graph.adjacency(adjmatrix = adj_full, mode = "directed", weighted = TRUE, diag = FALSE) #modified to be directed
print("Computing minimum spnanning tree")
#normalize similarities
#dist_matrix <- 1 - (adj_cc-min(adj_cc,na.rm=TRUE))/(max(adj_cc, na.rm=TRUE)-min(adj_cc, na.rm=TRUE)) #normalizing between zero and one
dist_matrix <- 1 - (adj_cc)/(max(adj_cc, na.rm=TRUE)) #normalizing per le maximum, numerator should not has values of zero!
#MST must use the DISTANCE matrix NOT the similarities!!!!!
mode_mst<- mode
if(mode=="max")
mode_mst<- "min"
if(mode=="min")
mode_mst <- "max"
g_mst <- graph.adjacency(adjmatrix = dist_matrix, mode = mode_mst, weighted = TRUE, diag = FALSE) #max is the worst case ORIGINAL
#g_mst <- graph.adjacency(adjmatrix = dist_matrix, mode = "directed", weighted = TRUE, diag = FALSE) #max is the worst case
#g_mst <- simplify(g_mst, remove.multiple = TRUE, remove.loops = TRUE, edge.attr.comb = list(weight="min", "ignore")) #taking the minimum of the probability
#E(g_mst)$sim <- -1*(E(g_mst)$weight -1)
#g_original <<- g_mst
#g_mst=delete.edges(g_mst, which(E(g_mst)$weight <=1)) # here's my condition.
g_mst <- minimum.spanning.tree(g_mst)
if(pl_mst==TRUE)
{
main <- paste( title, "\n Minimum Spanning Tree")
par(mfrow=c(1,2))
plot_graph_smart_2(g_mst, main=main, lay = 'fr', v_label = 'no', v_size='degree', cex = cex, v_min_size=v_min_size) #force directed
plot_graph_smart_2(g_mst, main=main, lay = 'rt', v_label = 'no', v_size='degree', cex = cex, v_min_size=v_min_size) #tree
par(mfrow=c(1,1))
plot_graph_smart_2(g_mst, main=main, lay = 'rt', v_label = 'com', v_size=2, cex = cex, v_min_size=v_min_size) #tree
}
#flagging edges of Minimum ST for future combination
E(g_mst)$mst <- 1
#######THERSHOLD up to certain mean degree (ideally 4)
print("Computing threshold graph")
filter <- min(rs_adj) #initial filter
#if(rs_sim=='n') increment <- 1
#if(rs_sim=='pr') increment <- 0.001
increment <- 0.001
degree_rs <- mean_degree + 1 #to go into the loop
adj_th <- adj_cc
#adj_th <- (adj_th-min(adj_th,na.rm=TRUE))/(max(adj_th, na.rm=TRUE)-min(adj_th, na.rm=TRUE)) #normalizing between zero and one
while(degree_rs > mean_degree)
#while(kk > kkk )
{
#print("here!!")
adj_th[adj_th < filter] <- 0
cat_non_zero <- rownames(adj_th)[rowSums(adj_th)!=0]
adj_th <- adj_th[cat_non_zero, cat_non_zero]
#print(adj_th)
g <- graph.adjacency(adjmatrix = adj_th, mode = mode, weighted = TRUE, diag = FALSE) #note minimum because the min probability
#g <- graph.adjacency(adjmatrix = adj_th, mode = "directed", weighted = TRUE, diag = FALSE) #note mi
degree_rs <- mean(degree(g))
#print(paste("Degree_RS", degree_rs))
filter <- filter + increment
}
#print(mean(degree(g)))
#print("Degreeeeee")
#print(degree(g))
#print(max(degree(g),na.rm=TRUE))
#print(V(g)$size)
#fc <- fastgreedy.community(g); colors <- rainbow(max(membership(fc)))
#V(g)$color = colors[membership(fc)]
#V(g)$membership <- fc$membership
#if(length(V(g)) < (prop_to_lab * length(rownames(nodes))) )
# V(g)$label = as.character(nodes$subd_name[match(V(g)$name, nodes$Id)]) #watch out with the name of the column!!
#else
#{
#print("Filtering labels...")
# nod_max_com <- get_max_com(g) #get the nodes with max degree per community
#print(nod_max_com)
# V(g)$label <- ''
#V(g)$label[V(g)$name %in% nod_max_com] <- as.character(nodes$subd_name[match(nod_max_com, nodes$Id)])
# nodes_to_label <- V(g)$name[V(g)$name %in% nod_max_com] #to get the order of iGraph
# V(g)$label[V(g)$name %in% nod_max_com] <- as.character(nodes$subd_name[match(nodes_to_label, nodes$Id)])
#}
# set.seed(pl_seed)
# g$layout <- layout.fruchterman.reingold(g)
#title_th <- paste("Only threshold" , title)
#par(mfrow=c(1,1))
#plot_graph_smart_2(g, main = title_th, sub_add = paste(subtitle, "Seed plot: ",
# pl_seed, '| Threshold for links: ', filter-increment),lay = "fr", cex = cex, v_min_size=v_min_size)
g_th <- g
#flagging edges from threshold graph to future combination
E(g_th)$threshold <- 1
#MERGING GRAPHS.....
#E(g)$weight <- E(g)$weight/max(E(g)$weight, na.rm=TRUE) #NORMALIZING BETWEEN 0 AND 1
#first Full + MST
print("Merging graphs")
g <- graph.union(g_full, g_mst, g_th) #note union is computed by NAME, not by ID, so it is correct to our propose
print("Union created")
E(g)$weight <- E(g)$weight_1
E(g)$mst[is.na(E(g)$mst)] <- 0
E(g)$threshold[is.na(E(g)$threshold)] <- 0
E(g)$used <- E(g)$mst + E(g)$threshold #union
#print("Deliting edges ...")
#g <- delete.edges(g, E(g)[E(g)$used < 1]) #podding edges that are not in mst and not in Threshold graph MOVED TO EACH FUNCTION
#E(g)$weight_used[E(g)$used >= 1] <- E(g)$weight[E(g)$used >= 1] #storing weights of edges that are the ones to apply the filter
#print("Edges_deleted...")
#E(g)$weight[!is.na(E(g)$sim)] <- E(g)$sim #adding mst weights in mst they are in sim not in weight since in weight_1 they are distances.
#E(g)$weight[is.na(E(g)$weight)] <- E(g)$weight_2[is.na(E(g)$weight)]
#E(g)$weight <- E(g)$sim #adding mst weights in mst they are in sim not in weight since in weight_1 they are distances.
#E(g)$weight[is.na(E(g)$weight)] <- E(g)$weight_2[is.na(E(g)$weight)]
lab='com'
#nodes = ucsd_nodes #HARD CODED
if(length(V(g)) < (prop_to_lab * length(rownames(nodes))) )
lab='all'
title= paste("MST+Threshold ",title, "\nMean degree:", mean_degree, "SEED", pl_seed)
par(mfrow=c(1,1))
#original community over all the graph before deletion
fc <- infomap.community(g); colors <- rainbow(length(fc))
V(g)$color <- colors[membership(fc)]
print("Ploting threshold graph")
g_to_plot <- delete.edges(g, E(g)[E(g)$used < 1])
plot_graph_smart_2(g_to_plot, nodes=nodes, main = title, sub_add = paste(subtitle, "Seed plot: ",
pl_seed, '| Threshold for links: ', filter-increment),lay = "fr", v_label=lab, cex = cex, v_min_size=v_min_size, v_col='orig', pdf_w = FALSE, file_name=paste('Research Space ', taxo, ' - ClColors', sep=''), pl_seed=pl_seed ) #v_col color based on community com, original = orig, NULL for the community over all the matrix
#ploting with other colors
plot_graph_smart_2(g_to_plot,nodes=nodes, main = title, sub_add = paste(subtitle, "Seed plot: ",
pl_seed, '| Threshold for links: ', filter-increment),lay = "fr", v_label=lab, cex = cex, v_min_size=v_min_size, v_col='com', pdf_w = FALSE, file_name=paste('Research Space ', taxo, ' - ComColors', sep=''), pl_seed=pl_seed ) #v_col color based on community com, original = orig, NULL for the community over all the matrix
#par(mfrow=c(1,2))
#plot_graph_smart_2(g_to_plot, main = paste('Research Space in', taxo, 'classification'), sub = paste("Original Taxonomy Colors \n Communities:",length(unique(nodes$color))),lay = "fr", v_label='no', v_col='orig', cex = cex, v_min_size=v_min_size)
#plot_graph_smart_2(g_to_plot, main = paste('Research Space in', taxo, 'classification'), sub='Autodetected Community Colors Assigned', lay = "fr", v_label='no', v_col='com', cex = cex, v_min_size=v_min_size)
g_plot <<- g_to_plot
return(g)
}
##ploting graphs with smart labeling and colors
#sub_add add text to the default sub or to the sub parameter
#sub subtitle to plot
#main, main title of the plot
#g the graph to plot
#cex a factor to scale all the things in the plot
#pl_seed the seed to use in the plot
#lay the layout to use, a mnemonic fr or drl or cir
#' @export
plot_graph_smart_2 <- function(g, nodes, main='', sub=NULL, sub_add='', cex=1, pl_seed="77", lay='fr', v_label='com', v_size='degree', v_col='com', v_min_size=10, pdf_w=FALSE, file_name=NULL, legend=NULL)
{
#nodes = ucsd_nodes #HARD CODED!!
######GRAPH PROPERTIES
set.seed(pl_seed)
if(lay=='fr') #force directed
g$layout <- layout.fruchterman.reingold(g)
if(lay=='drl')
g$layout <- layout.drl(g)
if(lay=='rt') #tree oriented
g$layout <- layout.reingold.tilford(g)
if(lay=='sph') #sphere
g$layout <- layout.sphere(g)
#fc <- fastgreedy.community(g, weights = NULL); colors <- rainbow(length(fc))
print("Detecting communities...")
#fc <- fastgreedy.community(g); colors <- rainbow(length(fc))
fc <- infomap.community(g); colors <- rainbow(length(fc))
print("Degree computing...")
mean_degree <- trunc(mean(degree(g)))
sub_default <- paste( 'Number of nodes: ', length(V(g)), '/', nrow(nodes), ' | Mean degree: ', mean_degree, ' | Detected communities: ', length(fc))
if(is.null(sub))
sub <- sub_default
#adding sub_add to the subtitle
if(!is.null(sub_add))
sub <- paste(sub_add, '\n', sub)
V(g)$membership <- fc$membership
######## VERTICES PROPERTIES
print("labeling...in")
if(v_label =='com')
{
#g_mst_copy<<- g_mst
nod_max_com <- get_max_com(g) #get the nodes with max degree per community
#print(nod_max_com)
V(g)$label <- ''
nodes_to_label <- V(g)$name[V(g)$name %in% nod_max_com] #to get the order of iGraph
V(g)$label[V(g)$name %in% nod_max_com] <- as.character(nodes$subd_name[match(nodes_to_label, nodes$Id)])
#V(g)$label = as.character(nodes$subd_name[match(V(g)$name, nodes$Id)])
}
if(v_label == 'all')
V(g)$label = as.character(nodes$subd_name[match(V(g)$name, nodes$Id)])
if(v_label == 'no')
V(g)$label <- ''
print("Coloring .... in")
#color #com for community, #orig for size in original, #own to mantain the color of g
if(v_col == 'com')
V(g)$color <- colors[membership(fc)]
if(v_col == 'orig')
V(g)$color <- as.character(nodes$color[match(V(g)$name, nodes$Id)])
#sub <- paste(sub, '\n Number of full communities:', length(unique(V(g)$color)))
#size
print("Sizing Nodes...")
if(is.numeric(v_size)==TRUE) #equal size for every node
V(g)$size <- v_size
if(v_size=='degree') #got size according degree
V(g)$size <- ((degree(g)+v_min_size)/max(degree(g))) * v_min_size
if(v_size == 'orig') #got size according original size in nodes info
{
V(g)$size <- as.numeric(nodes$size[match(V(g)$name, nodes$Id)])
V(g)$size <- (V(g)$size/max(V(g)$size)) *5
} #own does not change the size of the g
if(v_size == 'own')
{
V(g)$size <- (V(g)$size/max(V(g)$size)) *5
}
#EDGES PROPERTIES
E(g)$arrow.size=0.1
E(g)$curved <- TRUE
E(g)$color <- 'lightgray'
#edge.label.color="gray",
print("Plotting Graph....in ")
#id <- tkplot(g)
#tkconfigure(igraph:::.tkplot.get(id)$canvas, "bg"="black")
par(lend = 1) # square line ends for the color legend
#areas_df <- subset(nodes, select=c('Discipline','color'))
#nodes["Discipline"] <- nodes$area_name #HARDCODED! for scimago
nodes["Discipline"] <- nodes$disc_name #HARDCODED! for ucsd
areas_df <- subset(nodes, select=c('Discipline','color'))
areas_df <-unique(areas_df)
areas_df$color <- factor(areas_df$color)
areas_df$Discipline <- factor(areas_df$Discipline)
#legend = c(paste("Inactive", num_ina), paste("Undeveloped", num_und), paste("Growing", num_grw), paste("Developed", num_dev), paste("Opportunity", num_rcm)) # category labels
#deleted rs_plot
plot.igraph(g,
vertex.label.cex=0.05*cex,
main= list(main, cex=1*cex),
vertex.label.font=0,
vertex.label.family='Helvetica',
vertex.label.color='black',
edge.label.cex=0.6*cex,
edge.label.family="Helvetica",
sub=list(sub, cex=0.8*cex),
asp=FALSE
)
if(!is.null(legend))
{
legend("bottomleft", # location of the legend on the heatmap plot
legend = legend,
pch=21, merge=FALSE,
# #pch=19, merge=FALSE,
pt.bg <- c("white", rev(heat.colors(3)), "white"),
col = node.colors <- c("white", rev(heat.colors(3)), "white"),
# #lty= c(1,1,1,1,NA), # line style
lty= NA, # line style
lwd = 1, # line width
# #lwd = NA, # line width
pt.cex = 0.8*cex,
box.col = "lightgrey",
# #fill = "white"
cex= 0.6*cex
)
}
# legend("bottomleft", # location of the legend on the heatmap plot
# legend = as.vector(areas_df$Discipline),
# pch=19, merge=FALSE,
# pt.bg = as.vector(areas_df$color),
# col = as.vector(areas_df$color),
# #lty= NA, # line style
# lwd = 1, # line width
# pt.cex = 0.1*cex,
# box.col = "lightgrey",
# #fill = "white",
# cex= 1*cex
# )
print("Should have printed...")
#EXPORTING PDF file
if(!is.null(file_name))
{
#path_rs =""
dev_file_name <- file.path()
dev_file_name <- file_name
#dev.print(pdf, file=dev_file_name, widht=6, height=3 );
pdf(dev_file_name, width=16, height=12, family='Helvetica', pointsize=8)
plot.igraph(g,
vertex.label.cex=1.1*cex,
main= list(main, cex=1*cex),
vertex.label.font=0,
vertex.label.family='Helvetica',
vertex.label.color='black',
edge.label.cex=0.6*cex,
edge.label.family="Helvetica",
#edge.width=0.1,
# sub=list(sub, cex=0.8*cex),
asp=FALSE
)
if(is.null(legend))
{
legend("bottomleft", # location of the legend on the heatmap plot
legend = as.vector(areas_df$Discipline),
pch=19, merge=FALSE,
pt.bg = 'black',
col = as.vector(areas_df$color),
#lty= NA, # line style
lwd = 1, # line width
pt.cex = 3*cex,
box.col = "lightgrey",
#fill = "white",
cex= 1.5*cex
)
}
else
{ legend("bottomleft", # location of the legend on the heatmap plot
legend = legend,
pch=21, merge=FALSE,
# #pch=19, merge=FALSE,
pt.bg <- c("white", rev(heat.colors(3)), "white"),
col = node.colors <- c("white", rev(heat.colors(3)), "white"),
# #lty= c(1,1,1,1,NA), # line style
lty= NA, # line style
lwd = 1, # line width
# #lwd = NA, # line width
pt.cex = 0.8*cex,
box.col = "lightgrey",
# #fill = "white"
cex= 0.6*cex
)
}
dev.off()
}
rs <- g
#exporting to an R object to use it for overlays and others
#save(rs, rs_plot, file = file.path(path_rs,'RESEARCH SPACE OUTPUT', paste('RS_iGRAPH_', taxo, '.RData', sep='')))
}
#get a vector of nodes with max degree value inside each community
# graph with membership
# n_com_max number of maximum values per category, in case they are equal max values
#' @export
get_max_com <- function(g, n_com_max=1, n_com)
{
V(g)$degree <- degree(g)
#getting betweenness centrality
#g_lcc <- g
# decompose.graph(g)
#g_lcc <- g_lcc[[1]] #largest connected component
#print(E(g_lcc)$weight==0)
#V(g_lcc)$betweenness <- betweenness(g_lcc, directed = FALSE)
#E(g_lcc)[E(g_lcc)$weight<=0] <- NULL
#E(g_lcc)[is.na(E(g_lcc)$weight)] <- NULL
#V(g)$betweenness[V(g_lcc)$name] <- V(g_lcc)$betweenness
V(g)$betweenness <- betweenness(g)
if(is.null(V(g)$membership))
{
#fc <- fastgreedy.community(g)
fc <- infomap.community(g)
V(g)$membership <- fc$membership
}
communities <- unique(V(g)$membership)
max_total <- vector()
#print(communities)
for(com in communities)
{
#print(paste("comm", com))
max_deg <- sort(V(g)$degree[V(g)$membership==com ], decreasing=TRUE)
max_betw <- sort(V(g)$betweenness[V(g)$membership==com], decreasing = TRUE)
n_max <- as.integer(log(length(max_deg), base=2)) #maximum nodes to label as a function of log base 2
max_com <- V(g)$name[V(g)$membership==com & V(g)$degree > max_deg[n_max]]
max_com <- unique(c(max_com, V(g)$name[V(g)$membership==com & V(g)$betweenness > max_betw[n_max]]))
#if(length(max_com) > n_com_max) #avoiding too much max in the same community
#max_com <- max_com[1:n_com_max]
if(length(max_com)==0 && length(max_deg) > 1) #in case no one node has been labaled in the community
{
nodes_com <- V(g)$name[V(g)$membership==com]
max_com <- nodes_com[trunc(length(nodes_com)/2)]
}
max_total <- c(max_total, max_com)
}
return(max_total)
}
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.