##################################################
## R scripts for miRNet
## Description: miRNA-gene network analysis methods
## Author: Jeff Xia, jeff.xia@mcgill.ca
###################################################
#' Create miRNA Network
#' @export
CreateMirNets <- function(net.type){
library(igraph);
dataSet$mirnet <<- net.type;
if(net.type == "multilist" || net.type == "snp2mir2gene" || net.type == "snp2mir2dis" || net.type == "snp2mir2mol" || net.type == "snp2mir2lnc" || net.type == "snp2mir2tf"){
require('igraph');
for(i in 1:length(dataSet$mirtable)){
if(i == 1){
dataSet$mir.res = dataSet[dataSet$mirtable[i]][[1]];
}else{
dataSet$mir.res = rbind(dataSet$mir.res, dataSet[dataSet$mirtable[i]][[1]]);
}
}
my.nodes <- dataSet$mir.res[, c(1, 3)];
my.nodes$direction = rep("unknown", nrow(my.nodes))
#if("trrust" %in% dataSet$tfTargetType){
my.nodes$direction[which(paste0(my.nodes[,1], my.nodes[,2]) %in% dataSet$directionInx)] = dataSet$regDirection
my.nodes = as.data.frame(my.nodes)
#}
nd.nms <- c(dataSet$mir.res[, 1], dataSet$mir.res[, 3]);
nd.ids <- c(dataSet$mir.res[, 2], dataSet$mir.res[, 4]);
names(nd.ids) <- nd.nms;
dups <- duplicated(nd.ids); #note using unique will lose the names attribute
dataSet$node.anot <<- nd.ids[!dups];
colnames(my.nodes) = c("from", "to", "direction");
mir.graph <-simplify(graph_from_data_frame(my.nodes, directed=FALSE, vertices=NULL), edge.attr.comb="first");
}else{
if(data.type == "xeno.mir"){
my.nodes <- dataSet$mir.res[, c("miRNA", "Gene")];
nd.nms <- c(dataSet$mir.res[, "miRNA"], dataSet$mir.res[, "Gene"]);
nd.ids <- c(dataSet$mir.res[, "Accession"], dataSet$mir.res[, "Entrez"]);
}else {
my.nodes <- dataSet$mir.res[, c(1, 3)];
nd.nms <- c(dataSet$mir.res[, 1], dataSet$mir.res[, 3]);
nd.ids <- c(dataSet$mir.res[, 2], dataSet$mir.res[, 4]);
}
names(nd.ids) <- nd.nms;
dups <- duplicated(nd.ids); #note using unique will lose the names attribute
dataSet$node.anot <<- nd.ids[!dups];
library(igraph);
mir.graph <- simplify(graph_from_data_frame(my.nodes, directed=FALSE));
if(length(dataSet$nodeNumbers) == 0){
dataSet$nodeNumber = nrow(dataSet$mir.res);
dataSet<<- dataSet
}
}
# add node expression value
match.index <- match(V(mir.graph)$name, rownames(dataSet$mir.mapped));
expr.vals <- dataSet$mir.mapped[match.index, 1];
mir.graph <- set_vertex_attr(mir.graph, "abundance", index = V(mir.graph), value = expr.vals);
mir.graph <<- mir.graph;
substats <- DecomposeMirGraph(net.type, mir.graph, 2);
if(!is.null(substats)){
mir.graph <<- mir.graph;
mir.query <- nrow(dataSet$mir.mapped);
#mir.query <- nrow(dataSet$mir.orig); #original query
if(net.type == "multilist" || net.type == "snp2mir2gene" || net.type == "snp2mir2dis" || net.type == "snp2mir2mol" || net.type == "snp2mir2lnc" || net.type == "snp2mir2tf"){
mir.count <- length(unique(my.nodes[,1]));#matched mir
tgt.count <- length(unique(my.nodes[,2]));#matched target
}else{
mir.count <- length(unique(my.nodes[,1]));#matched mir
tgt.count <- length(unique(my.nodes[,2]));#matched target
}
if(.on.public.web){
return(c(mir.query, mir.count, tgt.count, ecount(mir.graph), length(mir.nets), substats));
}else{
return(paste("Network files are generated!"))
}
}else{
return(0);
}
}
# decompose to individual connected subnetworks, discard very small ones (defined by minNodeNum)
#' Decompose to Subnetworks
#' @export
DecomposeMirGraph <- function(net.type, gObj, minNodeNum = 2){
comps <-decompose(gObj, min.vertices=minNodeNum);
if(length(comps) == 0){
current.msg <<- paste("No connected nodes found after this filtering!");
return(NULL);
}
# first get stats
queries <- unique(dataSet$seeds);
net.stats <- as.data.frame(matrix(0, ncol = 3, nrow = length(comps)));
dataSet$query.nums <- vector()
dataSet$type.nums <- vector()
for(i in 1:length(comps)){
g <- comps[[i]];
if(vcount(g) > 0){
my.stat <- GetNetStatByType(g);
dataSet$type.nums = c(dataSet$type.nums, my.stat$node.num)
dataSet$query.nums = c(dataSet$query.nums, my.stat$query.num);
net.stats[i,] <- c(
vcount(g),
ecount(g),
sum(queries %in% V(g)$name)
);
}
}
# now sort graph based on node size and add names
ord.inx <- order(net.stats[,1], decreasing=TRUE);
net.stats <- net.stats[ord.inx,];
comps <- comps[ord.inx];
names(comps) <- rownames(net.stats) <- paste("mirnet", 1:length(comps), sep="");
net.stats <- cbind(rownames(net.stats), net.stats);
colnames(net.stats) <- c("Name", "Node", "Edge", "Query");
# note, we report stats for all nets (at least 2 nodes);
# but only contruct at least min node
hit.inx <- net.stats$Node >= minNodeNum;
comps <- comps[hit.inx];
sub.stats <- NULL;
json.res <- rep(list(list()), length(comps));
i <- 0;
for(nm in names(comps)){
sub.stats <- c(sub.stats, vcount(comps[[nm]]));
}
# now save the components
mir.nets <<- comps;
net.stats <<- net.stats[,-1]; # remove the first name col
# update the mir.res edge table
# both side of the edge must present in all.nodes
all.nodes <- V(gObj)$name;
if(net.type == "multilist" || net.type == "snp2mir2gene" || net.type == "snp2mir2dis" || net.type == "snp2mir2mol" || net.type == "snp2mir2lnc" || net.type == "snp2mir2tf"){
hit.inx <- (dataSet$mir.res[,1] %in% all.nodes) & (dataSet$mir.res[,3] %in% all.nodes);
}else{
if(data.type == "xeno.mir"){
hit.inx <- (dataSet$mir.res[, "miRNA"] %in% all.nodes) & (dataSet$mir.res[, "Gene"] %in% all.nodes);
}else{
hit.inx <- (dataSet$mir.res[,1] %in% all.nodes) & (dataSet$mir.res[,3] %in% all.nodes);
}
}
dataSet$mir.filtered <- dataSet$mir.res[hit.inx, ];
dataSet<<-dataSet
return(sub.stats);
}
#' Reduce Edge Density
#' @export
ReduceEdgeDensity <- function(net.type, nd.type="all"){
library(igraph);
all.nms <- V(mir.graph)$name;
edge.mat <- as_edgelist(mir.graph);
dgrs <- degree(mir.graph);
nodes2rm <- NULL;
set.seed(8574);
if(length(all.nms) > 50){
# only get top 50 with highest density (degree)
inx <- rank(-dgrs) < 50;
seed.vec <- all.nms[inx];
}else{
seed.vec <- all.nms;
}
paths.list <-list();
# now calculate the shortest paths only between these densely connected nodes
for(pos in 1:length(seed.vec)){
paths.list[[pos]] <- get.shortest.paths(mir.graph, seed.vec[pos], seed.vec[-pos])$vpath;
}
nds.inxs <- unique(unlist(paths.list));
nodes2rm <- all.nms[-nds.inxs];
# keep queries
if(nd.type == "mir"){ # only apply removing to miRNA nodes
mir.nms <- unique(edge.mat[,1]);
nodes2rm <- nodes2rm[nodes2rm %in% mir.nms];
}else if(nd.type=="other"){
my.nms <- unique(edge.mat[,2]);
nodes2rm <- nodes2rm[nodes2rm %in% my.nms];
}else{
#nothing to do
}
path.list <- NULL; gc();
nodes2rm <- unique(nodes2rm);
mir.graph <- simplify(delete.vertices(mir.graph, nodes2rm));
current.msg <<- paste("A total of", length(nodes2rm) , "was reduced.");
substats <- DecomposeMirGraph(net.type, mir.graph, 2);
if(!is.null(substats)){
mir.graph <<- mir.graph;
#CHECK
return(c(nrow(dataSet$mir.orig), length(unique(dataSet$mir.filtered[,1])), length(unique(dataSet$mir.filtered[,3])), ecount(mir.graph), length(mir.nets), substats));
}else{
return(0);
}
}
#' Filter miRNA Network
#' @export
FilterMirNet <- function(net.type, nd.type, min.dgr, min.btw){
library(igraph);
all.nms <- V(mir.graph)$name;
edge.mat <- as_edgelist(mir.graph);
dgrs <- degree(mir.graph);
nodes2rm.dgr <- nodes2rm.btw <- NULL;
if(nd.type == "mir"){
mir.nms <- unique(edge.mat[,1]);
hit.inx <- all.nms %in% mir.nms;
}else if(nd.type=="other"){
my.nms <- unique(edge.mat[,2]);
hit.inx <- all.nms %in% my.nms;
}else{ # all
hit.inx <- rep(TRUE, length(all.nms));
}
if(min.dgr > 0){
rm.inx <- dgrs <= min.dgr & hit.inx;
nodes2rm.dgr <- V(mir.graph)$name[rm.inx];
}
if(min.btw > 0){
btws <- betweenness(mir.graph);
rm.inx <- btws <= min.btw & hit.inx;
nodes2rm.btw <- V(mir.graph)$name[rm.inx];
}
nodes2rm <- unique(c(nodes2rm.dgr, nodes2rm.btw));
mir.graph <- simplify(delete.vertices(mir.graph, nodes2rm));
current.msg <<- paste("A total of", length(nodes2rm) , "was reduced.");
substats <- DecomposeMirGraph(net.type, mir.graph, 2);
if(!is.null(substats)){
mir.graph <<- mir.graph;
return(c(nrow(dataSet$mir.orig), length(unique(dataSet$mir.filtered[,1])), length(unique(dataSet$mir.filtered[,3])), ecount(mir.graph), length(mir.nets), substats));
}else{
return(0);
}
}
#' Filter miRNA Network by List
#' @export
FilterMirNetByList <- function(net.type, ids, id.type, remove){
library(igraph);
lines <- strsplit(ids, "\r|\n|\r\n")[[1]];
lines<- sub("^[[:space:]]*(.*?)[[:space:]]*$", "\\1", lines, perl=TRUE);
nms.vec = unique(unlist(dataSet$mir.res[, c(1,2,3,4)]))
# need to first convert to correct id used in the graph
hit.inx <- nms.vec %in% lines;
nodes2rm = nms.vec[hit.inx]
if(remove== "true"){
nodes2rm <- nodes2rm[nodes2rm %in% V(mir.graph)$name]; # make sure they are in the igraph object
}else{
nodes2rm <- V(mir.graph)$name[!(V(mir.graph)$name %in% nodes2rm)]; # make sure they are in the igraph object
}
mir.graph <- simplify(delete.vertices(mir.graph, nodes2rm));
current.msg <<- paste("A total of", length(nodes2rm) , "was reduced.");
substats <- DecomposeMirGraph(net.type, mir.graph, 2);
if(!is.null(substats)){
mir.graph <<- mir.graph;
return(c(nrow(dataSet$mir.orig), length(unique(dataSet$mir.filtered[,1])), length(unique(dataSet$mir.filtered[,3])), ecount(mir.graph), length(mir.nets), substats));
}else{
return(0);
}
}
#' Convert Igraph to JSON
#' @export
convertIgraph2JSON <- function(g, filenm){
#g <<- g
#save.image("net.RData");
library(igraph);
nms <- V(g)$name;
lbls <- as.character(dataSet$node.anot[nms]);
# get layers
if(anal.type == "multilist" || anal.type == "snp2mir"){
my.nodes <- dataSet$mir.res[, c(1, 3)];
} else if(anal.type == "snp2mir"){
my.nodes <- dataSet$mir.res[,c(2, 6, 8)];
} else{
my.nodes <- dataSet$mir.res[, c(1, 3)];
}
m <- as.matrix(my.nodes);
layers = ceiling(match(V(g)$name, m)/nrow(m));
# setup shape (mir square, gene circle)
shapes <- rep("circle", length(nms));
# get edge data
edge.mat <- as_edgelist(g);
edge.mat <- cbind(id=1:nrow(edge.mat), source=edge.mat[,1], target=edge.mat[,2], type=rep("arrow", nrow(edge.mat)));
# now get coords
pos.xy <- PerformLayOut(g, layers, "Default");
node.btw <- as.numeric(betweenness(g));
node.dgr <- as.numeric(degree(g));
if(anal.type %notin% c("array", "rnaseq", "qpcr")){
node.exp <- as.character(vertex_attr(g, name="abundance", index = V(g)));
}else{
node.exp <- as.numeric(vertex_attr(g, name="abundance", index = V(g)));
}
if(vcount(g) > 1000){
minSize = 2;
}else if(vcount(g) > 300){
minSize = 2;
}else{
minSize = 3;
}
# node size to betweenness values
node.sizes <- as.numeric(rescale2NewRange(node.btw, minSize, 8));
# update mir node size, shape and color
if(anal.type == "multilist" || anal.type == "snp2mir"){
mir.inx <- nms %in% mir.nmsu;
}else{
mir.inx <- nms %in% edge.mat[,2];
}
shapes[mir.inx] <- "square";
# update snp node size, shape and color
snp.inx <- as.vector(sapply(nms, function(x) substr(x, 1, 2) == "rs"));
shapes[snp.inx] <- "diamond";
#if(anal.type == "multilist"){
# update disease node shape
dis.inx <- nms %in% net.info$dis.nms;
shapes[dis.inx] <- "equilateral";
circ.inx <- nms %in% net.info$circ.nms
pseudo.inx <- nms %in% net.info$pseudo.nms
lnc.inx <- nms %in% net.info$lnc.nms;
tf.inx <- nms %in% net.info$tf.nms;
epi.inx <- nms %in% net.info$epi.nms;
snc.inx <- nms %in% net.info$snc.nms;
mol.inx <- nms %in% net.info$mol.nms;
gene.inx <- nms %in% net.info$gene.nms;
protein.inx <- nms %in% net.info$protein.nms;
# extract other molecule index
#}
# slightly highlight mir node in general
if(substring(anal.type, 1,3) == "mir"){ #highlight miRNA if they are the query
node.sizes[mir.inx] <- node.sizes[mir.inx] + 1;
}else{
node.sizes[mir.inx] <- node.sizes[mir.inx] + 0.4;
}
node.types <- rep("", length(node.dgr));
node.types[mir.inx] <- paste("miRNA", node.types[mir.inx]);
node.types[gene.inx] <- paste("Gene", node.types[gene.inx]);
node.types[snp.inx] <- paste("SNP", node.types[snp.inx]);
node.types[lnc.inx] <- paste("lncRNA", node.types[lnc.inx]);
node.types[snc.inx] <- paste("sncRNA", node.types[snc.inx]);
node.types[circ.inx] <- paste("circRNA", node.types[circ.inx]);
node.types[dis.inx] <-paste("Disease", node.types[dis.inx]);
node.types[tf.inx] <- paste("TF", node.types[tf.inx]);
node.types[mol.inx] <- paste("Compound", node.types[mol.inx]);
node.types[epi.inx] <- paste("Epigenetic", node.types[epi.inx]);
node.types[pseudo.inx] <- paste("Pseudogene", node.types[pseudo.inx]);
node.types[protein.inx] <- paste("Protein", node.types[protein.inx]);
n.types <- rep("", length(node.dgr));
n.types[mir.inx] <- "miRNA";
n.types[gene.inx] <- "Gene";
n.types[snp.inx] <- "SNP";
n.types[lnc.inx] <- "lncRNA";
n.types[snc.inx] <- "sncRNA";
n.types[circ.inx] <- "circRNA";
n.types[dis.inx] <- "Disease";
n.types[tf.inx] <- "TF";
n.types[mol.inx] <- "Compound";
n.types[epi.inx] <- "Epigenetic";
n.types[pseudo.inx] <- "Pseudogene";
n.types[protein.inx] <- "Protein";
node.types = trimws(node.types);
node.types = gsub(" ", "_", node.types);
node.cols = rep("#ff4500", length(node.dgr));
ntype = unique(n.types)
color.vec = gg_color_hue(length(ntype))
for(i in 1:length(ntype)){
#if(ntype[i] != "Gene"){
node.cols[which(n.types ==ntype[i])]=color.vec[i]
#}
}
node.cols[mir.inx] <- "#306EFF"; # dark blue
# update mir node color
topo.colsw <- node.cols;
colVec = unique(node.cols);
node.cols[mir.inx] <- "#98F5FF";
topo.colsb <- node.cols;
freq = table(node.types)
duplicated.types=node.types
for(i in 1:length(unique(node.types))){
duplicated.types[duplicated.types == names(freq[i])]=order(freq)[i]
}
if((length(freq) == 3 && "Gene" %in% names(freq) && "TF" %in% names(freq) && "miRNA" %in% names(freq))){
V(g)$layers = as.numeric(duplicated.types);
duplicated.types[node.types == "Gene"] = 2
duplicated.types[node.types == "TF"] = 3
duplicated.types[node.types == "miRNA"] = 1
}else if(length(freq) == 3 && "SNP" %in% names(freq) && "Gene" %in% names(freq)&& "miRNA" %in% names(freq)){
V(g)$layers = as.numeric(duplicated.types);
duplicated.types[node.types == "miRNA"] = 2
duplicated.types[node.types == "SNP"] = 1
duplicated.types[node.types == "Gene"] = 3
}else if(length(freq) == 3 && "lncRNA" %in% names(freq) && "Gene" %in% names(freq)&& "miRNA" %in% names(freq)){
duplicated.types[node.types == "miRNA"] = 2
duplicated.types[node.types == "lncRNA"] = 1
duplicated.types[node.types == "Gene"] = 3
V(g)$layers = as.numeric(duplicated.types);
}else{
duplicated.types = as.numeric(duplicated.types)
V(g)$layers = duplicated.types
}
V(g)$group = as.numeric(duplicated.types); #concentric circle
node.cols[mir.inx] <- "#306EFF"; # dark blue
# update mir node color
topo.colsw <- node.cols;
node.cols[mir.inx] <- "#98F5FF";
topo.colsb <- node.cols;
# color based on expression
bad.inx <- is.na(node.exp) | node.exp==0;
if(!all(bad.inx)){
exp.val <- node.exp;
node.colsb.exp <- getExpColors(node.exp, c("#78ff4d", "#FA8072", "#ebebeb"));
node.colsw.exp <- getExpColors(node.exp, c("#269b00", "#b30000", "#333333"));
node.colsb.exp[bad.inx] <- "#d3d3d3";
node.colsw.exp[bad.inx] <- "#c6c6c6";
}else{
node.colsb.exp <- rep("#d3d3d3",length(node.exp));
node.colsw.exp <- rep("#c6c6c6",length(node.exp));
}
seed.inx <- nms %in% unique(dataSet$seeds);
seed_arr <- rep("notSeed",length(node.dgr));
seed_arr[seed.inx] <- "seed";
# now create the json object
nodes <- vector(mode="list");
for(i in 1:length(node.sizes)){
nodes[[i]] <- list(
id=nms[i],
size=node.sizes[i],
molType=node.types[i],
type=shapes[i],
seedArr =seed_arr[i],
url=lbls[i],
colorb=topo.colsb[i],
colorw=topo.colsw[i],
x=pos.xy[i,1],
y=pos.xy[i,2],
attributes=list(
expr = node.exp[i],
expcolb=node.colsb.exp[i],
expcolw=node.colsw.exp[i],
degree=node.dgr[i], # actual degree in orginal network
between=node.btw[i])
);
}
current.mirnet <<- g
# save node table
nd.tbl <- data.frame(Id=nms, Label=nms, Degree=node.dgr, Betweenness=node.btw);
fileNm <- paste("node_table_", substring(filenm, 0, nchar(filenm)-5), ".csv", sep="")
fast.write.csv(nd.tbl, file=fileNm, row.names=FALSE);
# covert to json
library(RJSONIO);
edge.color = rep("#d3d3d3",nrow(edge.mat));
up.inx <- E(g)$direction == "+";
down.inx <- E(g)$direction == "-";
edge.color[up.inx] = "#FF0000" #red
edge.color[down.inx] = "#11679A" #blue
edge.mat =cbind(edge.mat, color=edge.color)
netData <- list(mirnet=dataSet$mirnet, mirtarget=dataSet$mirtarget, organism=dataSet$org, nodes=nodes, edges=edge.mat, nodeColors = colVec, nodeTypes=ntype);
sink(filenm);
cat(toJSON(netData));
sink();
# also save to GraphML
write_graph(g, file="mirnet.graphml", format="graphml");
if(!.on.public.web){
library(httr);
r <- POST("localhost:8080/miRNet/faces/R_REQUEST?type=network", body = list(organism = data.org, idtype = "entrez", network = toJSON(netData)))
#TO-DO: need to check org and idtype
}
}
#' Prepare Graph ML
#' @export
PrepareGraphML <- function(net.nm){
write_graph(mir.nets[[net.nm]], file=paste(net.nm, ".graphml", sep=""), format="graphml");
}
#' Prepare CSV
#' @export
PrepareCSV <- function(table.nm){
if(anal.type == "multilist" || anal.type == "snp2mir" || anal.type == "tf2genemir" || anal.type == "gene2tfmir"){
fast.write.csv(dataSet[[table.nm]], file=paste(table.nm, ".csv", sep=""), row.names = FALSE);
} else {
fast.write.csv(dataSet$mir.res, file=paste(dataSet$mirnet, ".csv", sep=""), row.names = FALSE);
}
}
#' Prepare miRNA Network
#' @export
PrepareMirNet <- function(mir.nm, file.nm){
my.mirnet <- mir.nets[[mir.nm]];
current.mirnet <<- my.mirnet;
current.mir.nm <<- mir.nm;
convertIgraph2JSON(my.mirnet, file.nm);
if(.on.public.web){
return(1);
}else{
return(paste("Network files are downloaded!"))
}
}
#' Apply Graph Layout
#' @param g Graph object.
#' @param layer Optional: layer information.
#' @param algo Layout algorithm: Default, FrR, circle, random, lgl, gopt, circular_tripartite, tripartite, concentric, backbone, mds.
#' @param focus Focus node.
#' @export
PerformLayOut <- function(g, layers, algo, focus=""){
vc <- vcount(g);
if(algo == "Default"){
if(vc > 3000) {
pos.xy <- layout.lgl(g, maxiter = 100);
}else if(vc < 150){
pos.xy <- layout.kamada.kawai(g);
}else{
pos.xy <- layout.fruchterman.reingold(g);
}
}else if(algo == "FrR"){
pos.xy <- layout_with_fr(g, area=34*vc^2);
}else if(algo == "circle"){
pos.xy <- layout_in_circle(g);
}else if(algo == "random"){
pos.xy <- layout_randomly (g);
}else if(algo == "lgl"){
pos.xy <- layout_with_lgl(g);
}else if(algo == "gopt"){
pos.xy <- layout_with_graphopt(g)
}else if(algo == "circular_tripartite"){
library(ggforce)
l <- layout_with_sugiyama(g, layers = V(g)$group*(vc/3) +30)
layout <- l$layout
radial <- radial_trans(
r.range = rev(range(layout[,2])),
a.range = range(layout[,1]),
offset = 0
)
coords <- radial$transform(layout[,2], layout[,1])
layout[,1] <- coords$x
layout[,2] <- coords$y
pos.xy= layout
}else if(algo == "tripartite"){
l <- layout_with_sugiyama(g, layers = V(g)$layers*(vc/4))
pos.xy <- -l$layout[,2:1]
}else if(algo == "concentric"){
library(graphlayouts)
# the fist element in the list for concentric is the central node.
if(focus==""){
inx=1;
}else{
inx = which(V(g)$name == focus)
}
coords <- layout_with_focus(g,inx)
pos.xy <- coords$xy
}else if(algo == "backbone"){
library(graphlayouts)
if(length(V(g)$name)<2000){
coords = layout_with_stress(g)
pos.xy = coords
}else{
coords = layout_with_sparse_stress(g,pivots=100)
pos.xy = coords
}
}else if(algo == "mds"){
library(graphlayouts)
coords = layout_with_pmds(g,length(V(g)$name)/10)
pos.xy = coords/100
rownames(pos.xy) = NULL
}
pos.xy;
}
#' Update Network Layout
#' @export
UpdateNetworkLayout <- function(algo, filenm, focus){
# get layers
if(anal.type == "multilist" || anal.type == "snp2mir"){
my.nodes <- dataSet$mir.res[, c(1, 3)];
} else if(anal.type == "snp2mir"){
my.nodes <- dataSet$mir.res[,c(2, 6, 8)];
} else{
my.nodes <- dataSet$mir.res[, c(1, 3)];
}
m <- as.matrix(my.nodes);
layers = ceiling(match(V(current.mirnet)$name, m)/nrow(m));
pos.xy <- PerformLayOut(current.mirnet, layers, algo, focus);
nms <- V(current.mirnet)$name;
nodes <- vector(mode="list");
for(i in 1:length(nms)){
nodes[[i]] <- list(
id=nms[i],
x=pos.xy[i,1],
y=pos.xy[i,2]
);
}
# now only save the node pos to json
library(RJSONIO);
netData <- list(nodes=nodes);
sink(filenm);
cat(toJSON(netData));
sink();
return(filenm);
}
#' Get Network Names
#' @export
GetNetNames <- function(){
rownames(net.stats);
}
#' Get Table Names
#' @export
GetTableNames <- function(){
if(anal.type == "multilist"|| anal.type == "gene" || anal.type == "qpcr"){
res <- unique(dataSet$mirtable);
}else if(anal.type == "gene2tfmir" ){
res <- dataSet$type
}else if(anal.type == "snp2mir" || anal.type == "tf2genemir"){
res <- dataSet$mirtable;
} else {
res <- dataSet$mirnet
}
infoSet <- readSet(infoSet, "infoSet");
infoSet$paramSet$tableNames <- res;
saveSet(infoSet, "infoSet");
return(res)
}
#' Get Seeds Column
#' @export
GetSeedsColumn <- function(){
tbls = unique(dataSet$mirtable);
vec = vector();
for( i in 1:length(tbls)){
nms = strsplit(tbls[i], "2")[[1]];
orignms = nms
nms= gsub("mir", "miRNA",nms)
nms= gsub("tf", "TF",nms)
nms= gsub("rna", "RNA",nms)
nms= gsub("pseudo", "pseudo-gene",nms)
nms= gsub("epi", "Epigenetic Modif.",nms)
nms= gsub("dis", "Disease",nms)
nms= gsub("snp", "SNP",nms)
nms= gsub("mol", "Compounds",nms)
nms= gsub("lnc", "lncRNA",nms)
nms= gsub("snc", "sncRNA",nms)
nms= gsub("circ", "circRNA",nms)
nms= gsub("xeno", "xeno-miRNA",nms)
if(nms[1] == "protein"){
vec[i]=paste0(nms[1],":" ,length(unique(dataSet[tbls[i]][[1]][,1])) + length(unique(dataSet[tbls[i]][[1]][,3])) )
}else if(tbls[i] %in% c("tf2gene", "gene2tf") ){
vec[i]=paste0(nms[1],":" ,length(unique(dataSet[tbls[i]][[1]][,1])),", ",nms[2],": ",length(unique(dataSet[tbls[i]][[1]][,3])))
}else if(tbls[i] %in% c("tf2mir")){
vec[i]=paste0(nms[1],":" ,length(unique(dataSet[tbls[i]][[1]][,3])),", ",nms[2],": ",length(unique(dataSet[tbls[i]][[1]][,1])))
}else if(orignms[1] == "mir"){
vec[i]=paste0(nms[1],":" ,length(unique(dataSet[tbls[i]][[1]][,1])),", ",nms[2],": ",length(unique(dataSet[tbls[i]][[1]][,3])))
}else if(orignms[2] == "mir"){
vec[i]=paste0(nms[1],":" ,length(unique(dataSet[tbls[i]][[1]][,3])),", ",nms[2],": ",length(unique(dataSet[tbls[i]][[1]][,1])))
}else{
vec[i]=paste0(nms[1],":" ,length(unique(dataSet[tbls[i]][[1]][,1])),", ",nms[2],": ",length(unique(dataSet[tbls[i]][[1]][,3])))
}
}
infoSet <- readSet(infoSet, "infoSet");
infoSet$paramSet$seedsColumn <- vec;
saveSet(infoSet, "infoSet");
return(vec)
}
#' Get Network Stats
#' @export
GetNetStats <- function(){
as.matrix(net.stats);
}
#' Get Network Names as String
#' @export
GetNetsNameString <- function(){
paste(rownames(net.stats), collapse="||");
}
#' Get Minimally Connected Graphs
#' @export
GetMinConnectedGraphs <- function(net.type, max.len = 200){
library(igraph);
set.seed(8574);
# first get shortest paths for all pair-wise seeds
my.seeds <- unique(dataSet$seeds);
sd.len <- length(my.seeds);
paths.list <-list();
# first trim mir.graph to remove no-seed nodes of degree 1
dgrs <- degree(mir.graph);
keep.inx <- dgrs > 1 | (names(dgrs) %in% my.seeds);
nodes2rm <- V(mir.graph)$name[!keep.inx];
mir.graph <- simplify(delete.vertices(mir.graph, nodes2rm));
# need to restrict the operation b/c get.shortest.paths is very time consuming
# for top max.len highest degrees
if(sd.len > max.len){
hit.inx <- names(dgrs) %in% my.seeds;
sd.dgrs <- dgrs[hit.inx];
sd.dgrs <- rev(sort(sd.dgrs));
# need to synchronize all (dataSet$seeds) and top seeds (my.seeds)
dataSet$seeds <- names(sd.dgrs);
my.seeds <- dataSet$seeds[1:max.len];
sd.len <- max.len;
current.msg <<- paste("The minimum connected network was computed using the top", sd.len, "seed nodes in the network based on their degrees.");
}else{
current.msg <<- paste("The minimum connected network was computed using all seed nodes in the network.");
}
# now calculate the shortest paths for
# each seed vs. all other seeds (note, to remove pairs already calculated previously)
for(pos in 1:sd.len){
paths.list[[pos]] <- get.shortest.paths(mir.graph, my.seeds[pos], dataSet$seeds[-(1:pos)])$vpath;
}
nds.inxs <- unique(unlist(paths.list));
nodes2rm <- V(mir.graph)$name[-nds.inxs];
g <- simplify(delete.vertices(mir.graph, nodes2rm));
nodeList <- as_data_frame(g, "vertices");
nodeList <- as.data.frame(nodeList[, 1]);
colnames(nodeList) <- c("ID");
fast.write.csv(nodeList, file="orig_node_list.csv", row.names=F);
edgeList <- as_data_frame(g, "edges");
edgeList <- cbind(rownames(edgeList), edgeList);
colnames(edgeList) <- c("Id", "Source", "Target");
fast.write.csv(edgeList, file="orig_edge_list.csv", row.names=F);
path.list <- NULL;
substats <- DecomposeMirGraph(net.type, g);
if(!is.null(substats)){
mir.graph <<- g;
return(c(nrow(dataSet$mir.orig), length(unique(dataSet$mir.filtered[,1])), length(unique(dataSet$mir.filtered[,3])), ecount(mir.graph), length(mir.nets), substats));
}else{
return(0);
}
}
#' Update Subnetwork Stats
#' @export
UpdateSubnetStats <- function(){
old.nms <- names(mir.nets);
net.stats <- ComputeSubnetStats(mir.nets);
ord.inx <- order(net.stats[,1], decreasing=TRUE);
net.stats <- net.stats[ord.inx,];
rownames(net.stats) <- old.nms[ord.inx];
net.stats <<- net.stats;
}
#' Compute Subnetwork Stats
#' @export
ComputeSubnetStats <- function(comps){
net.stats <- as.data.frame(matrix(0, ncol = 3, nrow = length(comps)));
colnames(net.stats) <- c("Node", "Edge", "Query");
queries <- rownames(dataSet$mir.mapped);
for(i in 1:length(comps)){
g <- comps[[i]];
net.stats[i,] <- c(vcount(g),ecount(g),sum(queries %in% V(g)$name));
}
return(net.stats);
}
#' Number of Queries
#' @export
GetQueryNum <-function(){
return(dataSet$query.nums)
}
#' Number of Types
#' @export
GetTypeNum <-function(){
return(dataSet$type.nums)
}
# return information based on node type
#' Get Network Stats by Type
#' @export
GetNetStatByType <- function(g){
nd.queries <- V(g)$name;
uniq.ins <- unique(rownames(dataSet$mir.mapped));
sd.queries <- uniq.ins[uniq.ins %in% nd.queries];
if(length(dataSet$mirtable)>0 && (anal.type %notin% c("qpcr", "rnaseq", "array", "xenomir-search", "xenomir-browse"))){
vec <- dataSet$mirtable
vec <- vec[vec != "protein2protein"]
vec <- strsplit(vec, "2");
vec <- unique(unlist(vec))
sd.res <- "";
nd.res <- "";
for( i in 1:length(vec)){
if(vec[i] == "mir"){
nms = mir.nmsu
lbl = "miRNA"
}else if(vec[i] == "circ"){
nms = net.info$circ.nms
lbl = "circRNA";
}else if(vec[i] == "snc"){
nms = net.info$snc.nms
lbl = "sncRNA";
}else if(vec[i] == "pseudo"){
nms = net.info$pseudo.nms
lbl = "pseudogene";
}else if(vec[i] == "lnc"){
nms = net.info$lnc.nms
lbl = "lncRNA";
}else if(vec[i] == "dis"){
nms = net.info$dis.nms
lbl = "Disease";
}else if(vec[i] == "mol"){
nms = net.info$mol.nms
lbl = "Compound";
}else if(vec[i] == "tf"){
nms = net.info$tf.nms
lbl = "TF";
}else if(vec[i] == "gene"){
nms = net.info$gene.nms
lbl = "Gene";
}else if(vec[i] == "protein"){
nms = net.info$protein.nms
lbl = "Protein";
}else if(vec[i] == "epi"){
nms = net.info$epi.nms
lbl = "Epigenetic modif.";
}else if(vec[i] == "snp"){
nms = unique(c(dataSet$snp2mir$rsID, dataSet$snp2mirbs$rsID))
lbl = "SNP";
}else if(vec[i] == "xeno"){
nms = unique(c(dataSet$xeno2gene[,3]))
lbl = "Xeno-miRNA";
}
nms <- unique(nms);
if(sum(nms %in% nd.queries)>0 && !grepl(lbl, nd.res)){
nd.res <- paste0(lbl,": ", sum(nms %in% nd.queries), "; ", nd.res)
}
if(sum(nms %in% sd.queries)>0){
sd.res <- paste0(lbl,": ", sum(nms %in% sd.queries), "; ", sd.res)
}
}
my.stat <- list(
node.num = nd.res,
query.num = sd.res
);
}else{
my.stat <- list(
query.num = sum(sd.queries %in% V(g)$name),
node.num = length(V(g)$name)
);
}
return(my.stat);
}
#' Number of Unmapped
#' @export
GetUnmappedNum <- function(){
library(igraph);
totalOrig <- 0
allData <- vector()
if(length(dataSet$data)>0){
for(i in 1:length(names(dataSet$data))){
nm=names(dataSet$data)[i]
totalOrig = totalOrig + nrow(dataSet$data[[nm]])
allData = c(allData, as.vector(rownames(dataSet$data[[nm]])));
}
return(totalOrig-sum(unique(allData) %in% V(mir.graph)$name))
}else{
queries <- unique(rownames(dataSet$mir.mapped));
return(length(queries)-sum(queries %in% V(mir.graph)$name))
}
}
# from to should be valid nodeIDs
#' Get Shortest Paths
#' @export
GetShortestPaths <- function(from, to){
paths <- get.all.shortest.paths(current.mirnet, from, to)$res;
if(length(paths) == 0){
return (paste("No connection between the two nodes!"));
}
path.vec <- vector(mode="character", length=length(paths));
for(i in 1:length(paths)){
path.inx <- paths[[i]];
path.sybls <- path.ids <- V(current.mirnet)$name[path.inx];
pids <- paste(path.ids, collapse="->");
psbls <- paste(path.sybls, collapse="->");
path.vec[i] <- paste(c(pids, psbls), collapse=";")
}
if(length(path.vec) > 50){
path.vec <- path.vec[1:50];
}
all.paths <- paste(path.vec, collapse="||");
return(all.paths);
}
#' Extract miRNA Network Module
#' @export
ExtractMirNetModule<- function(nodeids){
set.seed(8574);
nodes <- strsplit(nodeids, ";")[[1]];
g <- current.mirnet;
# try to see if the nodes themselves are already connected
hit.inx <- V(g)$name %in% nodes;
gObj <- induced.subgraph(g, V(g)$name[hit.inx]);
# now find connected components
comps <-decompose(gObj, min.vertices=1);
if(length(comps) == 1){ # nodes are all connected
g <- comps[[1]];
}else{
# extract modules
paths.list <-list();
sd.len <- length(nodes);
for(pos in 1:sd.len){
paths.list[[pos]] <- get.shortest.paths(g, nodes[pos], nodes[-(1:pos)])$vpath;
}
nds.inxs <- unique(unlist(paths.list));
nodes2rm <- V(g)$name[-nds.inxs];
g <- simplify(delete.vertices(g, nodes2rm));
}
nodeList <- as_data_frame(g, "vertices");
if(nrow(nodeList) < 3){
return ("NA");
}
module.count <- module.count + 1;
module.nm <- paste("module", module.count, sep="");
colnames(nodeList) <- c("Id", "Label");
ndFileNm = "mirnet_node_list.csv";
fast.write.csv(nodeList, file=ndFileNm, row.names=F);
edgeList <- as_data_frame(g, "edges");
edgeList <- cbind(rownames(edgeList), edgeList);
colnames(edgeList) <- c("Id", "Source", "Target");
edgFileNm = "mirnet_edge_list.csv";
fast.write.csv(edgeList, file=edgFileNm, row.names=F);
filenm <- paste(module.nm, ".json", sep="");
convertIgraph2JSON(g, filenm);
# record the module
mir.nets[[module.nm]] <<- g;
UpdateSubnetStats();
module.count <<- module.count
return (filenm);
}
# exclude nodes in current.net (networkview)
#' Exlude Nodes
#' @export
ExcludeNodes <- function(nodeids, filenm){
nodes2rm <- strsplit(nodeids, ";")[[1]];
current.mirnet <- delete.vertices(current.mirnet, nodes2rm);
# need to remove all orphan nodes
bad.vs<-V(current.mirnet)$name[degree(current.mirnet) == 0];
current.mirnet <<- delete.vertices(current.mirnet, bad.vs);
# return all those nodes that are removed
nds2rm <- paste(c(bad.vs, nodes2rm), collapse="||");
# update topo measures
node.btw <- as.numeric(betweenness(current.mirnet));
node.dgr <- as.numeric(degree(current.mirnet));
if(anal.type %notin% c("array", "rnaseq", "qpcr")){
node.exp <- as.character(vertex_attr(current.mirnet, name="abundance", index = V(current.mirnet)));
}else{
node.exp <- as.numeric(vertex_attr(current.mirnet, name="abundance", index = V(current.mirnet)));
}
nms <- V(current.mirnet)$name;
nodes <- vector(mode="list");
for(i in 1:length(nms)){
nodes[[i]] <- list(
id=nms[i],
expr = node.exp[i],
degree=node.dgr[i],
between=node.btw[i],
expr = node.exp[i]
);
}
# now only save the node pos to json
library(RJSONIO);
netData <- list(deletes=nds2rm,nodes=nodes);
sink(filenm);
cat(toJSON(netData));
sink();
return(filenm);
}
#' Layout in Circles
#' @export
layout_in_circles <- function(g, group=1) {
layout <- lapply(split(V(g), group), function(x) {
layout_in_circle(induced_subgraph(g,x))
})
layout <- Map(`*`, layout, seq_along(layout))
x <- matrix(0, nrow=vcount(g), ncol=2)
split(x, group) <- layout
x
}
#' Get Network Stats as Matrix
#' @export
GetNetStatMatrix <-function(){
return(signif(as.matrix(res), 5));
}
#' Network Stats Column Names
#' @export
GetNetStatColNames <-function(){
return(signif(as.matrix(res), 5));
}
#' Network Names
#' @export
GetNetNms <-function(){
return(rownames(dataSet$resTable));
}
#' Compute Network based on Prize-collecting Steiner Forest Graph
#' @export
ComputePCSFNet <- function(){
edg <- as.data.frame(as_edgelist(mir.graph));
edg$V3 <- rep(1, nrow(edg));
colnames(edg) <- c("from", "to", "cost");
node_names <- unique(c(as.character(edg[,1]),as.character(edg[,2])))
ppi <- graph_from_data_frame(edg[,1:2],vertices=node_names,directed=F)
E(ppi)$weight <- as.numeric(edg[,3])
ppi <- simplify(ppi)
expr.vec <- as.vector(dataSet$mir.mapped)
expr.vec[expr.vec == "*"] <- 1;
expr.vec <- as.numeric(expr.vec)
names(expr.vec) <- rownames(dataSet$mir.mapped);
g <- Compute.SteinerForest(ppi, expr.vec, w = 5, b = 100, mu = 0.0005);
nodeList <- as_data_frame(g, "vertices");
colnames(nodeList) <- c("Id", "Label");
write.csv(nodeList, file="orig_node_list.csv", row.names=F, quote=F);
edgeList <- as_data_frame(g, "edges");
edgeList <- cbind(rownames(edgeList), edgeList);
colnames(edgeList) <- c("Id", "Source", "Target");
write.csv(edgeList, file="orig_edge_list.csv", row.names=F, quote=F);
path.list <- NULL;
substats <- DecomposeMirGraph(dataSet$mirnet, g);
if(!is.null(substats)){
mir.graph <<- g;
return(c(nrow(dataSet$mir.orig), length(unique(dataSet$mir.filtered[,1])), nrow(nodeList), nrow(edgeList), length(mir.nets), substats));
}else{
return(0);
}
}
# Adapted from PCSF
# https://github.com/IOR-Bioinformatics/PCSF
Compute.SteinerForest <- function(ppi, terminals, w = 2, b = 1, mu = 0.0005, dummies=NA){
ppi <<- ppi;
terminals <<- terminals;
w <<- w;
b <<- b;
mu <<- mu;
dummies <<- dummies;
#save.image("stein.RData");
# Gather the terminal genes to be analyzed, and their scores
terminal_names <- names(terminals)
terminal_values <- as.numeric(terminals)
# Incorporate the node prizes
node_names <- V(ppi)$name
node_prz <- vector(mode = "numeric", length = length(node_names))
index <- match(terminal_names, node_names)
percent <- signif((length(index) - sum(is.na(index)))/length(index)*100, 4)
if (percent < 5){
print("Less than 1% of your terminal nodes are matched in the interactome!");
return(NULL);
}
paste0(" ", percent, "% of your terminal nodes are included in the interactome\n");
terminal_names <- terminal_names[!is.na(index)]
terminal_values <- terminal_values[!is.na(index)]
index <- index[!is.na(index)]
node_prz[index] <- terminal_values
if(missing(dummies)||is.null(dummies)||is.na(dummies)){
dummies <- terminal_names #re-assign this to allow for input
}
## Prepare input file for MST-PCSF implementation in C++
# Calculate the hub penalization scores
node_degrees <- igraph::degree(ppi)
hub_penalization <- - mu*node_degrees
# Update the node prizes
node_prizes <- b*node_prz
index <- which(node_prizes==0)
node_prizes[index] <- hub_penalization[index]
# Construct the list of edges
edges <- ends(ppi,es = E(ppi))
from <- c(rep("DUMMY", length(dummies)), edges[,1])
to <- c(dummies, edges[,2])
cost <- c(rep(w, length(dummies)), E(ppi)$weight)
#PCSF will faill if there are NAs in weights, this will check and fail gracefully
if(any(is.na(E(ppi)$weight))){
print("NAs found in the weight vector!");
return (NULL);
}
## Feed the input into the PCSF algorithm
output <- XiaLabCppLib::call_sr(from,to,cost,node_names,node_prizes)
# Check the size of output subnetwork and print a warning if it is 0
if(length(output[[1]]) != 0){
# Contruct an igraph object from the MST-PCSF output
e <- data.frame(output[[1]], output[[2]], output[[3]])
e <- e[which(e[,2]!="DUMMY"), ]
names(e) <- c("from", "to", "weight")
# Differentiate the type of nodes
type <- rep("Steiner", length(output[[4]]))
index <- match(terminal_names, output[[4]])
index <- index[!is.na(index)]
type[index] <- "Terminal"
v <- data.frame(output[[4]], output[[5]], type)
names(v) <- c("terminals", "prize", "type")
subnet <- graph_from_data_frame(e,vertices=v,directed=F)
dummy_edges <- which(output[[1]] == "DUMMY" | output[[2]] == "DUMMY")
# Check if dummy edges exist
if (length(dummy_edges) > 0) {
# Remove the "DUMMY" edges from the edge lists (output[[1]], output[[2]]) and weights (output[[3]])
output[[1]] <- output[[1]][-dummy_edges]
output[[2]] <- output[[2]][-dummy_edges]
output[[3]] <- output[[3]][-dummy_edges]
}
# Now remove the "DUMMY" vertices from the subnet graph
subnet <- delete_vertices(subnet, "DUMMY")
# Check the number of edges and length of weights
if(length(E(subnet)) == length(output[[3]])){
E(subnet)$weight <- as.numeric(output[[3]])
} else {
print(paste("Mismatch in edges and weights. Edges:", length(E(subnet)), "Weights:", length(output[[3]])))
# Optionally, trim the weight vector
output[[3]] <- output[[3]][1:length(E(subnet))]
E(subnet)$weight <- as.numeric(output[[3]])
}
subnet <- delete_vertices(subnet, names(which(degree(subnet)==0)));
return(subnet)
} else{
print("Subnetwork can not be identified for a given parameter set")
return(NULL);
}
}
# support walktrap, infomap and lab propagation
#' Find Communities
#' @param method Method: walktrap, infomap, labelprop.
#' @param use.weight Logical.
#' @export
FindCommunities <- function(method="walktrap", use.weight=FALSE){
library(igraph)
# make sure this is the connected
current.net <- current.mirnet
g <- current.net;
if(!is.connected(g)){
g <- decompose(current.net, min.vertices=2)[[1]];
}
total.size <- length(V(g));
if(method == "walktrap"){
fc <- walktrap.community(g);
}else if(method == "infomap"){
fc <- infomap.community(g);
}else if(method == "labelprop"){
fc <- label.propagation.community(g);
}else{
print(paste("Unknown method:", method));
return ("NA||Unknown method!");
}
if(length(fc) == 0 || modularity(fc) == 0){
return ("NA||No communities were detected!");
}
# only get communities
communities <- communities(fc);
community.vec <- vector(mode="character", length=length(communities));
gene.community <- NULL;
qnum.vec <- NULL;
pval.vec <- NULL;
rowcount <- 0;
nms <- V(g)$name;
for(i in 1:length(communities)){
# update for igraph 1.0.1
path.ids <- communities[[i]];
psize <- length(path.ids);
if(psize < 5){
next; # ignore very small community
}
hits <- dataSet$seeds %in% path.ids;
qnums <- sum(hits);
if(qnums == 0){
next; # ignor community containing no queries
}
rowcount <- rowcount + 1;
pids <- paste(path.ids, collapse="->");
##path.sybls <- V(g)$name[path.inx];
#path.sybls <- sybls[path.ids];
com.mat <- cbind(path.ids, path.ids, rep(i, length(path.ids)));
gene.community <- rbind(gene.community, com.mat);
qnum.vec <- c(qnum.vec, qnums);
# calculate p values (comparing in- out- degrees)
#subgraph <- induced.subgraph(g, path.inx);
subgraph <- induced.subgraph(g, path.ids);
in.degrees <- degree(subgraph);
#out.degrees <- degree(g, path.inx) - in.degrees;
out.degrees <- degree(g, path.ids) - in.degrees;
ppval <- wilcox.test(in.degrees, out.degrees)$p.value;
ppval <- signif(ppval, 3);
pval.vec <- c(pval.vec, ppval);
# calculate community score
community.vec[rowcount] <- paste(c(psize, qnums, ppval, pids), collapse=";");
}
ord.inx <- order(pval.vec, decreasing=F);
community.vec <- community.vec[ord.inx];
qnum.vec <- qnum.vec[ord.inx];
ord.inx <- order(qnum.vec, decreasing=T);
community.vec <- community.vec[ord.inx];
all.communities <- paste(community.vec, collapse="||");
colnames(gene.community) <- c("Id", "Label", "Module");
fast.write.csv(gene.community, file="module_table.csv", row.names=F);
df <- convertModuleToDF(all.communities);
df <- df[,-c(3,5)];
#record table for report
type = "module";
infoSet <- readSet(infoSet, "infoSet");
infoSet$imgSet$enrTables[[type]] <- list()
infoSet$imgSet$enrTables[[type]]$table <- df;
df1<- data.frame(Size=df$Size,Pval=df$P.value)
infoSet$imgSet$enrTables[[type]]$res.mat <- df1;
infoSet$imgSet$enrTables[[type]]$library <- "";
infoSet$imgSet$enrTables[[type]]$algo <- method;
saveSet(infoSet,"infoSet");
return(all.communities);
}
convertModuleToDF <- function(dataString) {
# Split the string into lines based on the '||' separator
lines <- strsplit(dataString, "\\|\\|")[[1]]
# Initialize an empty list to store each row's data
rows <- list()
# Iterate over each line and split further by ';' to extract individual fields
i <- 1;
for (line in lines) {
parts <- strsplit(line, ";")[[1]]
if (length(parts) >= 4) { # Ensure there are enough parts to form a complete row
# Extract and store the data for this row
rows[[length(rows) + 1]] <- list(
Module = paste0("Module ", i),
Size = parts[1],
Name = as.numeric(parts[2]),
`P-value` = as.numeric(parts[3]),
IDs = parts[4]
)
i = i +1;
}
}
# Convert the list of rows into a dataframe
df <- do.call(rbind, lapply(rows, function(row) as.data.frame(row, stringsAsFactors = FALSE)))
# Column names are already set via the list names, so this line is actually redundant
return(df)
}
#' Get Network Topology
#' @export
GetNetworkTopology <- function(netnm){
library(igraph)
g <- mir.nets[[netnm]];
globalProperties <-list();
globalProperties[["Diameter"]] <-igraph::diameter(g);
globalProperties[["Radius"]] <-igraph::radius(g);
globalProperties[["Average path length"]] <-signif(igraph::mean_distance(g), 3);
globalProperties[["Clustering coefficient"]] <- signif(igraph::transitivity(g, type="global"), 3);
propertiesVector <- c(globalProperties[[1]], globalProperties[[2]], globalProperties[[3]], globalProperties[[4]]);
print(propertiesVector);
return(propertiesVector);
}
#' Plot Degree Histogram
#' @export
PlotDegreeHistogram <- function(imgNm, netNm = "NA", dpi=72, format="png"){
library(igraph)
dpi<-as.numeric(dpi)
#imgNm <- paste(imgNm, "dpi", dpi, ".", format, sep="");
Cairo(file=imgNm, width=400, height=400, type="png", bg="white");
library(ggplot2)
if(netNm != "NA"){
current.mirnet <- mir.nets[[netNm]];
}
G.degrees <- igraph::degree(current.mirnet)
G.degree.histogram <- as.data.frame(table(G.degrees))
G.degree.histogram[,1] <- as.numeric(G.degree.histogram[,1])
p <- ggplot(G.degree.histogram, aes(x = G.degrees, y = Freq)) +
geom_point() +
scale_x_continuous("Degree\n(nodes containing that amount of connections)",
breaks = c(1, 3, 10, 30, 100, 300),
trans = "log10") +
scale_y_continuous("Frequency\n(number of nodes)",
breaks = c(1, 3, 10, 30, 100, 300, 1000),
trans = "log10") +
ggtitle("Degree Distribution (log-log)") +
theme_bw() +
theme(plot.title = element_text(hjust = 0.5))
print(p)
dev.off();
dataSet$imgSet$degreeHistogram <- list();
dataSet$imgSet$degreeHistogram$netName <- netNm;
dataSet <<- dataSet;
}
#' Plot Betweenness Histogram
#' @export
PlotBetweennessHistogram <- function(imgNm, netNm = "NA",dpi=72, format="png"){
library(igraph);
dpi<-as.numeric(dpi)
#imgNm <- paste(imgNm, "dpi", dpi, ".", format, sep="");
Cairo(file=imgNm, width=400, height=400, type="png", bg="white");
library(ggplot2)
if(netNm != "NA"){
current.mirnet <- mir.nets[[netNm]];
}
G.degrees <- igraph::betweenness(current.mirnet)
G.degree.histogram <- as.data.frame(table(G.degrees))
G.degree.histogram[,1] <- as.numeric(G.degree.histogram[,1])
p <- ggplot(G.degree.histogram, aes(x = G.degrees, y = Freq)) +
geom_point() +
scale_x_continuous("Betweenness\n(nodes with that amount of betweenness)",
breaks = c(1, 3, 10, 30, 100, 300,1000,3000,10000,30000),
trans = "log10") +
scale_y_continuous("Frequency\n(number of nodes)",
breaks = c(1, 3, 10, 30, 100, 300, 1000),
trans = "log10") +
ggtitle("Betweenness Distribution (log-log)") +
theme_bw() +
theme(plot.title = element_text(hjust = 0.5))
print(p)
dev.off();
dataSet$imgSet$betwennessHistogram <- list();
dataSet$imgSet$betwennessHistogram$netName <- netNm;
dataSet <<- dataSet;
}
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.