##################################################
## R scripts for NetworkAnalyst
## Description: biological network analysis methods
## Author: Jeff Xia, jeff.xia@mcgill.ca
###################################################
# table.nm is the org code used for sqlite table (ppi)
# for chem type, table.nm is drugbank or ctd
# note, last two param only for STRING database
BuildMultiNet <- function(tblNm1="NA", tblNm2="NA", tblNm3="NA", tblNm4="NA", order=1){
opts.vec <- c(tblNm1, tblNm2, tblNm3, tblNm4);
opts.vec <- strsplit(opts.vec, "__");
tbl.vec <- vector();
dbNm.vec <- vector();
for(i in 1:length(opts.vec)){
dbNm.vec[i] <- opts.vec[[i]][1];
tbl.vec[i] <- opts.vec[[i]][2];
}
node.resu <<- "empty";
edge.resu <<- "empty";
res.list <- list();
for ( i in 1:length(tbl.vec)){
if(tbl.vec[i] != "NA"){
net.type <<- tbl.vec[i];
if(dbNm.vec[i] == "ppi"){
tblNm <- paste0(data.org, "_", tbl.vec[i]);
}else{
tblNm <- tbl.vec[i];
}
res = SearchNetDB(dbNm.vec[i], tblNm, require.exp=TRUE, min.score = 900, order=1);
res.list[[i]] = res;
}
}
return(res);
}
#' Query database using input list or previously computed network
#'
#' @param db.type Input the name of the created dataSetObj (see Init.Data)
#' @param type Network type (gene, met, mir, tf, mic, peak, m2m, snp)
#' @param dbType Database name (i.e innatedb)
#' @param inputType Omics type of input features (gene, protein, met, tf, mic, peak, snp)
#'
#' @export
#'
SearchNetDB <- function(db.type, table.nm, require.exp=TRUE, min.score = 900, order=1){
regids<<- "";
db.typeu <<- db.type;
result.list <- .prepareSigProteinJSON();
paramSet <- readSet(paramSet, "paramSet");
data.org <- paramSet$data.org;
sqlite.path <- paramSet$sqlite.path;
lib.path <- paramSet$lib.path;
net.type <- gsub(paste0(data.org,"_"),'',table.nm);
SetNetType(net.type);
protein.vec <- result.list$protein.vec; # this actually is entrez IDs?
seed.proteins <<- protein.vec;
# now do the database search
if(db.type == "ppi"){
protein.vec <- doPpiIDMapping(sqlite.path, protein.vec, data.org);
res <- QueryPpiSQLite(sqlite.path, table.nm, protein.vec, require.exp, min.score);
# no hits
if(nrow(res)==0){ return(c(0,0)); }
edge.res <- data.frame(Source=res[,1],Target=res[,2]);
row.names(edge.res) <- 1:nrow(res);
fast.write(edge.res, file="orig_edge_list.csv",row.names=FALSE);
node.ids <- c(res[,1], res[,2])
node.nms <- c(res[,3], res[,4]);
node.types <- c(rep("Protein", nrow(res)), rep("Protein", nrow(res)))
}else if(db.type == "tf"){
table.nm <- paste(data.org,table.nm, sep="_");
res <- QueryTFSQLite(sqlite.path, table.nm, protein.vec);
# no hits
if(nrow(res)==0){ return(c(0,0)); }
edge.res <- data.frame(Source=res[,"entrez"],Target=res[,"tfid"]);
row.names(edge.res) <- 1:nrow(res);
fast.write(edge.res, file="orig_edge_list.csv",row.names=FALSE);
node.ids <- c(res[,"entrez"], res[,"tfid"])
node.nms <- c(res[,"symbol"], res[,"tfname"]);
node.types <- c(rep("Protein", nrow(res)), rep("TF", nrow(res)))
}else if(db.type == "crossppi"){
res <- QueryOtherPpiSQLite(sqlite.path, table.nm, protein.vec, data.org);
# no hits
if(nrow(res)==0){ return(c(0,0)); }
edge.res <- data.frame(Source=res[,"id1"],Target=res[,"id2"]);
row.names(edge.res) <- 1:nrow(res);
fast.write(edge.res, file="orig_edge_list.csv",row.names=FALSE);
node.ids <- c(res[,"id1"], res[,"id2"])
node.nms <- c(res[,"name1"], res[,"name2"]);
node.types <- c(rep("Protein", nrow(res)), rep("Foreign_Protein", nrow(res)))
if(table.nm == "microbiolink1"){
edge.extra.info <- data.frame(domain=c(res[,"PFAM_domain1"], res[,"PFAM_domain2"]))
node.extra.info <- data.frame(id=node.ids, species=c(rep("Human", nrow(res)), res[,"species2"]), domain=c(res[,"PFAM_domain1"], res[,"PFAM_domain2"]))
node.infoU <<- node.extra.info[!duplicated(node.extra.info$id),];
}else if(table.nm == "microbiolink2"){
edge.extra.info <- data.frame(domain=c(res[,"linear_motif1"], res[,"PFAM_domain2"]))
node.extra.info <- data.frame(id=node.ids, species=c(rep("Human", nrow(res)), res[,"species2"]), domain=c(res[,"linear_motif1"], res[,"PFAM_domain2"]))
node.infoU <<- node.extra.info[!duplicated(node.extra.info$id),];
}
}else if(db.type == "signal"){
res <- QueryOtherPpiSQLite(sqlite.path,table.nm, protein.vec, data.org);
# no hits
if(nrow(res)==0){ return(c(0,0)); }
edge.res <- data.frame(Source=res[,"id1"],Target=res[,"id2"], Direction=res[, "EFFECT"], Mechanism=res[,"MECHANISM"]);
row.names(edge.res) <- 1:nrow(res);
fast.write(edge.res, file="orig_edge_list.csv",row.names=FALSE);
node.ids <- c(res[,"id1"], res[,"id2"])
node.nms <- c(res[,"name1"], res[,"name2"]);
edge.extra.info <- res[,-c(1:4)];
node.extra.info <- c(res[,"TYPEA"], res[,"TYPEB"])
node.infoU <<- node.extra.info
node.types <- c(res[,"TYPEA"], res[,"TYPEB"])
}else if(db.type == "mir"){ # in miRNA, table name is org code, colname is id type
db.nm <- table.nm;
res <- QueryMirSQLite(sqlite.path, data.org, "entrez", protein.vec, db.nm);
# no hits
if(nrow(res)==0){ return(c(0,0)); }
edge.res <- data.frame(Source=res[,"entrez"],Target=res[,"mir_acc"]);
row.names(edge.res) <- 1:nrow(res);
fast.write(edge.res, file="orig_edge_list.csv",row.names=FALSE);
node.ids <- c(res[,"entrez"], res[,"mir_acc"])
node.nms <- c(res[,"symbol"], res[,"mir_id"]);
node.types <- c(rep("Protein", nrow(res)), rep("miRNA", nrow(res)))
}else if(db.type == "drug"){
# note, all drug data is on human,
protein.vec <- doEntrez2UniprotMapping(protein.vec);
protein.vec <- protein.vec[!is.na(protein.vec)];
res <- QueryDrugSQLite(sqlite.path, protein.vec);
# no hits
if(nrow(res)==0){ return(c(0,0)); }
entrez.vec <- .doGeneIDMapping(res[,"upid"], "uniprot", data.org, "vec");
edge.res <- data.frame(Source=entrez.vec,Target=res[,"dbid"]);
row.names(edge.res) <- 1:nrow(res);
fast.write(edge.res, file="orig_edge_list.csv",row.names=FALSE);
node.ids <- c(entrez.vec, res[,"dbid"]);
node.nms <- c(res[,"symbol"], res[,"dbname"]);
node.types <- c(rep("Protein", nrow(res)), rep("Drug", nrow(res)));
protein.vec <- .doGeneIDMapping(protein.vec, "uniprot", data.org, "vec");
}else if(db.type == "disease"){
res <- QueryDiseaseSQLite(sqlite.path, protein.vec);
# no hits
if(nrow(res)==0){ return(c(0,0)); }
edge.res <- data.frame(Source=res[,"entrez"],Target=res[,"diseaseId"]);
row.names(edge.res) <- 1:nrow(res);
fast.write(edge.res, file="orig_edge_list.csv",row.names=FALSE);
node.ids <- c(res[,"entrez"], res[,"diseaseId"])
node.nms <- c(res[,"symbol"], res[,"diseaseName"]);
node.types <- c(rep("Protein", nrow(res)), rep("Disease", nrow(res)))
}else if(db.type == "tfmir"){
res <- QueryTfmirSQLite(sqlite.path, protein.vec, data.org);
# no hits
if(nrow(res)==0){ return(c(0,0)); }
edge.res <- data.frame(Source=res[,"id1"],Target=res[,"id2"]);
row.names(edge.res) <- 1:nrow(res);
fast.write(edge.res, file="orig_edge_list.csv",row.names=FALSE);
node.ids <- c(res[,"id1"], res[,"id2"])
node.nms <- c(res[,"name1"], res[,"name2"]);
node.types <- c(rep("Protein", 2*nrow(res)))
db.path <- paste(lib.path, data.org, "/mirlist.rds", sep="");
db.map <- readRDS(db.path);
tf.inx <- node.ids %in% edge.res[,"Source"];
mir.inx <- node.ids %in% db.map[,1];
node.types[tf.inx] <- "TF";
node.types[mir.inx] <- "miRNA";
}else if(db.type == "cellcoex"){
res <- QueryCellCoexSQLite(sqlite.path, protein.vec, data.org);
# no hits
if(nrow(res)==0){ return(c(0,0)); }
edge.res <- data.frame(Source=res[,"id1"],Target=res[,"id2"]);
row.names(edge.res) <- 1:nrow(res);
fast.write(edge.res, file="orig_edge_list.csv",row.names=FALSE);
node.ids <- c(res[,"id1"], res[,"id2"])
node.nms <- c(res[,"name1"], res[,"name2"]);
node.types <- c(rep("Protein", nrow(res)), rep("Protein", nrow(res)))
}else if(db.type == "tissueppi"){
res <- QueryDiffNetSQLite(sqlite.path, protein.vec);
# no hits
if(nrow(res)==0){ return(c(0,0)); }
edge.res <- data.frame(Source=res[,"id1"],Target=res[,"id2"]);
row.names(edge.res) <- 1:nrow(res);
fast.write(edge.res, file="orig_edge_list.csv",row.names=FALSE);
node.ids <- c(res[,"id1"], res[,"id2"])
node.nms <- c(res[,"name1"], res[,"name2"]);
node.types <- c(rep("Protein", nrow(res)), rep("Protein", nrow(res)))
}else if(db.type == "tissuecoex"){
# note, all drug data is on human,
res <- QueryTissueCoexSQLite(sqlite.path, protein.vec, data.org);
# no hits
if(nrow(res)==0){ return(c(0,0)); }
edge.res <- data.frame(Source=res[,"id1"],Target=res[,"id2"]);
row.names(edge.res) <- 1:nrow(res);
fast.write(edge.res, file="orig_edge_list.csv",row.names=FALSE);
node.ids <- c(res[,"id1"], res[,"id2"])
node.nms <- c(res[,"name1"], res[,"name2"]);
node.types <- c(rep("Protein", nrow(res)), rep("Protein", nrow(res)))
}else if(db.type == "chem"){
res <- QueryChemSQLite(sqlite.path, data.org, protein.vec);
if(nrow(res)==0){ return(c(0,0)); }
edge.res <- data.frame(Source=res[,"entrez"],Target=res[,"ctdid"]);
row.names(edge.res) <- 1:nrow(res);
fast.write(edge.res, file="orig_edge_list.csv",row.names=FALSE);
node.ids <- c(res[,"entrez"], res[,"ctdid"])
node.nms <- c(res[,"symbol"], res[,"name"]);
node.types <- c(rep("Protein", nrow(res)), rep("Chemical", nrow(res)))
}else if(db.type == "met"){
table.nm <- paste(data.org, met.type, sep="_");
res <- QueryMetSQLiteNet(sqlite.path, table.nm, protein.vec, netInv);
if(nrow(res)==0){ return(c(0,0)); }
edge.res <- data.frame(Source=res[,"entrez"],Target=res[,"kegg"]);
row.names(edge.res) <- 1:nrow(res);
fast.write(edge.res, file="orig_edge_list.csv",row.names=FALSE);
node.ids <- c(res[,"kegg"], res[,"entrez"])
node.nms <- c(res[,"met"], res[,"symbol"]);
node.types <- c(rep("Metabolite", nrow(res)), rep("Protein", nrow(res)))
}
if(!is.null(node.ids)){
seed.inx <- node.ids %in% unique(seed.proteins);
node.types[seed.inx] <- "Seed"
}
if(order == 0 && db.type == "ppi"){
keep.inx <- node.ids %in% protein.vec;
node.ids <- node.ids[keep.inx];
node.nms <- node.nms[keep.inx];
node.types <- node.types[keep.inx];
keep.inx <- edge.res[,1] %in% protein.vec;
edge.res <- edge.res[keep.inx,];
keep.inx <- edge.res[,2] %in% protein.vec;
edge.res <- edge.res[keep.inx,];
}
node.res <- data.frame(Id=node.ids, Label=node.nms, Types=node.types);
node.res <- node.res[!duplicated(node.res$Id),];
if( grepl("microbiolink", table.nm, fixed = TRUE) || db.type == "signal"){
edge.res <- cbind(edge.res, edge.extra.info);
edge.infoU <<- edge.res;
}
nodeListu <<- node.res;
table.nmu <<- table.nm;
fast.write(node.res, file="orig_node_list.csv", row.names=FALSE);
ppi.net <<- list(
db.type=db.type,
order=1,
seeds=protein.vec,
table.nm=table.nm,
node.data = node.res,
edge.data = edge.res,
require.exp = require.exp,
min.score = min.score
);
paramSet$nets[[db.type]] <- ppi.net
saveSet(paramSet, "paramSet");
analSet$ppi.net <- ppi.net;
output <- c(nrow(node.res), nrow(res))
return(saveSet(analSet, "analSet", output))
}
.prepareSigProteinJSON <- function(){
paramSet <- readSet(paramSet, "paramSet");
anal.type <- paramSet$anal.type;
if(anal.type == "genelist"){
result.list <- .prepareListSeeds();
}else{ # single expression data or meta.mat
result.list <- .prepareExpressSeeds();
}
return(result.list);
}
.prepareListSeeds <- function(){
#save.image("seed.RData");
protein.list <- list();
gene.list <- list();
paramSet <- readSet(paramSet, "paramSet");
selectedNetDataset <- paramSet$selectedNetDataset;
msgSet <- readSet(msgSet, "msgSet");
data.org <- paramSet$data.org;
mdata.all <- paramSet$mdata.all;
if(paramSet$numOfLists > 1){
if(selectedNetDataset %in% c("intersect","union")){
dataSet <- list();
dataSet$name <- selectedNetDataset
my.vec <- names(mdata.all);
com.ids <- NULL;
list.vec <- list()
for(i in 1:length(my.vec)){
datSet <- readDataset(my.vec[i]);
if(is.null(com.ids)){
com.ids <- datSet$GeneAnotDB[,"gene_id"];
prot.mat <- datSet$prot.mat
list.vec[[i]] <- com.ids
}else{
if(selectedNetDataset == "intersect"){
com.ids <- datSet$GeneAnotDB[,"gene_id"];
list.vec[[i]] <- com.ids
}else{
com.ids <- union(com.ids, datSet$GeneAnotDB[,"gene_id"]);
}
prot.mat <- rbind(prot.mat, as.matrix(datSet$prot.mat[rownames(datSet$prot.mat) %in% com.ids,]))
}
}
if(selectedNetDataset == "intersect"){
com.ids <- Reduce(intersect, list.vec)
prot.mat <- as.matrix(datSet$prot.mat[rownames(datSet$prot.mat) %in% com.ids,])
}else{
com.ids <- unique(as.character(com.ids[!is.na(com.ids)])); # make sure it is interpreted as name not index
}
com.symbols <- doEntrez2SymbolMapping(com.ids, data.org, paramSet$data.idType);
dataSet$GeneAnotDB <- data.frame(gene_id=com.ids, accession=com.symbols);
dataSet$prot.mat <- prot.mat;
dataSet$sig.mat <- prot.mat
dataSet$seeds.proteins <- com.ids
}else{
my.vec <- names(mdata.all);
# make sure reference is the first
inx <- which(my.vec == selectedNetDataset);
my.vec <- my.vec[-inx];
com.ids <- NULL;
ids.list <- list()
for(i in 1:length(my.vec)){
dataSet <- readDataset(my.vec[i]);
ids.list[[i]]=dataSet$GeneAnotDB[,"gene_id"]
}
dataSet <- readDataset(selectedNetDataset);
ids <- unique(unlist(ids.list));
com.ids <-setdiff(dataSet$GeneAnotDB[,"gene_id"], ids);
prot.mat <- as.matrix(dataSet$prot.mat[which(rownames(dataSet$prot.mat) %in% com.ids),])
com.symbols <- doEntrez2SymbolMapping(com.ids, data.org, paramSet$data.idType);
dataSet$GeneAnotDB <- data.frame(gene_id=com.ids, accession=com.symbols);
dataSet$prot.mat <- prot.mat;
dataSet$sig.mat <- prot.mat
dataSet$seeds.proteins <- com.ids
}
}else{
dataSet <- readDataset("datalist1");
}
# return a json array object
# each object for a single dataset its sig proteins
meta.vec <- meta.gene.vec <- meta.seed.expr <- NULL;
file.create("seed_proteins.txt");
GeneAnotDB <- NULL;
gene.mat <- dataSet$sig.mat;
prot.mat <- dataSet$prot.mat;
write(paste("#DataSet:", dataSet$name),file="sig_genes.txt",append=TRUE);
write.table(dataSet$sig.mat, file="sig_genes.txt", append=TRUE);
meta.gene.vec <- c(meta.gene.vec, rownames(gene.mat));
gene.list[[dataSet$name]] <- list(gene=rownames(gene.mat),logFC=unname(gene.mat[,1]));
GeneAnotDB <- rbind(GeneAnotDB, dataSet$GeneAnotDB);
meta.seed.expr <- c(meta.seed.expr, prot.mat[,1]);
write(paste("#DataSet:", dataSet$name),file="seed_proteins.txt",append=TRUE);
write.table(cbind(Emblprotein=rownames(prot.mat), Expression=prot.mat[,1]), file="seed_proteins.txt", row.names=F, quote=F,append=TRUE);
protein.vec <- prot.mat[,1];
meta.vec <- c(meta.vec, names(protein.vec));
if(length(protein.vec) == 1){
protein.vec <- as.matrix(protein.vec)
}
protein.list[[dataSet$name]] <- signif(protein.vec, 3);
gene.list$name <- dataSet$name;
paramSet$seed.genes <- unique(meta.gene.vec);
meta.seed.df <- as.matrix(meta.seed.expr);
rownames(meta.seed.df) <- names(meta.seed.expr);
res <- RemoveDuplicates(meta.seed.df, "max", quiet=F, paramSet, msgSet);
seed.expr <- res[[1]];
msgSet <- res[[2]];
paramSet$seed.expr <- seed.expr[,1];
protein.vec <- unique(meta.vec);
result <- list(
gene.list = gene.list,
protein.list = protein.list,
protein.vec = protein.vec
);
saveSet(paramSet, "paramSet");
return(result)
}
# 2nd order network, only for PPI
ExpandNetworkSearch <- function(){
paramSet <- readSet(paramSet, "paramSet");
data.org <- paramSet$data.org;
sqlite.path <- paramSet$sqlite.path
if(ppi.net$order == 0){
SearchNetDB(ppi.net$db.type, ppi.net$table.nm);
CreateGraph();
}
protein.vec <- V(overall.graph)$name;
table.nm <- ppi.net$table.nm;
if(db.typeu == "ppi"){
res <- QueryPpiSQLite(sqlite.path, table.nm, protein.vec, ppi.net$require.exp, ppi.net$min.score);
}else if (db.typeu == "tissuecoex"){
res <- QueryTissueCoexSQLite(sqlite.path, protein.vec, data.org);
}else if (db.typeu == "tissueppi"){
res <- QueryDiffNetSQLite(sqlite.path, protein.vec);
}else if (db.typeu == "cellcoex"){
res <- QueryCellCoexSQLite(sqlite.path, protein.vec, data.org);
}
edge.res <- data.frame(Source=res[,1],Target=res[,2]);
#row.names(edge.res) <- res[,5];
node.ids <- c(res[,1], res[,2])
node.nms <- c(res[,3], res[,4]);
node.res <- data.frame(Id=node.ids, Label=node.nms);
node.res <- node.res[!duplicated(node.res$Id),];
if(nrow(node.res) < 10000){ # only overwrite if within the range
fast.write(edge.res, file="orig_edge_list.csv",row.names=FALSE);
fast.write(node.res, file="orig_node_list.csv", row.names=FALSE);
ppi.net <<- list(db.type=db.typeu, order=2, seeds=protein.vec, table.nm=table.nm, node.data = node.res, edge.data = edge.res);
}
analSet$ppi.net <- ppi.net;
output <- nrow(node.res);
return(saveSet(analSet, "analSet", output));
}
# zero-order network - create ppi nets from only input (seeds)
#' Create network from only input (seeds)
#'
#' @export
#'
BuildSeedProteinNet <- function(){
nodes <- V(overall.graph)$name;
hit.inx <- nodes %in% seed.proteins;
nodes2rm <- nodes[!hit.inx];
g <- simplify(delete.vertices(overall.graph, nodes2rm));
nodeList <- get.data.frame(g, "vertices");
nodeList <- nodeList[,1:2];
colnames(nodeList) <- c("Id", "Label");
fast.write(nodeList, file="orig_node_list.csv");
nd.inx <- ppi.net$node.data[,1] %in% nodeList[,1];
edgeList <- get.data.frame(g, "edges");
edgeList <- edgeList[,1:2];
colnames(edgeList) <- c("Source", "Target");
fast.write(edgeList, file="orig_edge_list.csv");
# update ppi.net
ppi.net$order <- 0;
ppi.net$node.data <- nodeList;
ppi.net$edge.data <- edgeList;
ppi.net <<- ppi.net;
analSet$ppi.net <- ppi.net;
return(saveSet(analSet, "analSet"));
}
# create igraph from the edgelist saved from graph DB
# and decompose into subnets
#' Create igraph object from edgelist created from database selection and decompose into connected subnetworks
#'
#' @export
#'
CreateGraph <- function(){
require('igraph');
paramSet <- readSet(paramSet, "paramSet");
data.org <- paramSet$data.org;
seed.expr <- paramSet$seed.expr;
seed.genes <- paramSet$seed.genes;
node.list <- ppi.net$node.data;
edge.list <- ppi.net$edge.data;
seed.proteins <- ppi.net$seeds;
overall.graph <- simplify(graph.data.frame(edge.list, directed=FALSE, vertices=node.list));
if("domain" %in% colnames(edge.list)){
overall.graph <- simplify(graph.data.frame(edge.list, directed=F, vertices=node.list));
overall.graph <- set.vertex.attribute(overall.graph, "species", index = V(overall.graph), value = node.infoU$species[which(V(overall.graph)$name %in% node.infoU$id)]);
overall.graph <- set.vertex.attribute(overall.graph, "domain", index = V(overall.graph), value = node.infoU$domain[which(V(overall.graph)$name %in% node.infoU$id)]);
}else if("Direction" %in% colnames(edge.list)){
overall.graph <- simplify(graph.data.frame(edge.list, directed=F, vertices=node.list));
overall.graph <-set_edge_attr(overall.graph, "direction",index = E(overall.graph), value = as.character(edge.list$Direction))
overall.graph <-set_edge_attr(overall.graph, "mechanism",index = E(overall.graph), value = as.character(edge.infoU$Mechanism))
}else if("TYPEA" %in% colnames(edge.list)){
overall.graph <- simplify(graph.data.frame(edge.list, directed=F, vertices=node.list));
overall.graph <-set_edge_attr(overall.graph, "direction",index = E(overall.graph), value = as.character(edge.infoU$EFFECT))
overall.graph <-set_edge_attr(overall.graph, "mechanism",index = E(overall.graph), value = as.character(edge.list$Mechanism))
overall.graph <- set.vertex.attribute(overall.graph, "type", index = V(overall.graph), value = node.infoU);
}else {
overall.graph <- simplify(graph.data.frame(edge.list, directed=FALSE, vertices=node.list));
overall.graph <-set_edge_attr(overall.graph, "direction",index = E(overall.graph), value = "NA")
}
# add node expression value
if(ppi.net$db.type == "ppi"){
## Temp fix seed.expr all in entrez?
## all converted to new IDs based on PPI from the given organism
newIDs <- doPpiIDMapping(sqlite.path, names(seed.expr), data.org)
}else{# all entrez in mirNet
newIDs <- names(seed.expr);
}
match.index <- match(V(overall.graph)$name, newIDs);
expr.vals <- seed.expr[match.index];
# expr.vec <- abs(expr.vals) # logFC can be negative!!
expr.vec <- expr.vals;
names(expr.vec)<- V(overall.graph)$name;
expr.vec <<- expr.vec[!is.na(expr.vec)]
overall.graph <- set.vertex.attribute(overall.graph, "abundance", index = V(overall.graph), value = expr.vals);
hit.inx <- seed.proteins %in% node.list[,1];
seed.proteins <<- seed.proteins[hit.inx];
analSet <- DecomposeGraph(overall.graph,analSet);
substats <- analSet$substats;
overall.graph <<- overall.graph;
analSet$overall.graph <- overall.graph;
if(!is.null(substats)){
output <- c(length(seed.genes), length(seed.proteins), nrow(node.list), nrow(edge.list), length(ppi.comps), substats);
}else{
output <- 0;
}
return( saveSet(analSet, "analSet", output) );
}
#' Filter bipartite network by degree and/or betweenness
#'
#' @export
#'
FilterBipartiNet <- function(nd.type, min.dgr, min.btw){
paramSet <- readSet(paramSet, "paramSet");
seed.genes <- paramSet$seed.genes;
all.nms <- V(overall.graph)$name;
edge.mat <- get.edgelist(overall.graph);
dgrs <- degree(overall.graph);
nodes2rm.dgr <- nodes2rm.btw <- NULL;
if(nd.type == "gene"){
hit.inx <- all.nms %in% edge.mat[,1];
}else if(nd.type=="other"){
hit.inx <- all.nms %in% edge.mat[,2];
}else{ # all
hit.inx <- rep(TRUE, length(all.nms));
}
if(min.dgr > 0){
rm.inx <- dgrs <= min.dgr & hit.inx;
nodes2rm.dgr <- V(overall.graph)$name[rm.inx];
}
if(min.btw > 0){
btws <- betweenness(overall.graph);
rm.inx <- btws <= min.btw & hit.inx;
nodes2rm.btw <- V(overall.graph)$name[rm.inx];
}
nodes2rm <- unique(c(nodes2rm.dgr, nodes2rm.btw));
overall.graph <- simplify(delete.vertices(overall.graph, nodes2rm));
current.msg <<- paste("A total of", length(nodes2rm) , "was reduced.");
analSet <- DecomposeGraph(overall.graph,analSet);
substats <- analSet$substats;
if(!is.null(substats)){
overall.graph <<- overall.graph;
output <- c(length(seed.genes),length(seed.proteins), vcount(overall.graph), ecount(overall.graph), length(ppi.comps), substats);
}else{
output <- 0;
}
analSet$overall.graph <- overall.graph;
return(saveSet(analSet, "analSet", output));
}
#' Prepare the json file for network visualization
#'
#' @param net.nm Name of subnetwork created from network building (i.e subnetwork1)
#' @param json.nm Name of json file to be exported for network visualization
#'
#' @export
#'
PrepareNetwork <- function(net.nm, json.nm){
analSet <- readSet(analSet, "analSet");
my.ppi <- analSet$ppi.comps[[net.nm]];
nd.nms <- V(my.ppi)$name;
analSet <- convertIgraph2JSON(net.nm, json.nm);
current.net.nm <<- net.nm;
return(saveSet(analSet, "analSet", 1));
}
#' Prepare the json file for network visualization from user uploaded graph file
#'
#' @param net.nm Name of subnetwork created from network building (i.e subnetwork1)
#' @param json.nm Name of json file to be exported for network visualization
#'
#' @export
#'
PrepareNetworkUpload <- function(net.nm, json.nm){
my.ppi <- ppi.comps[[net.nm]];
nd.nms <- V(my.ppi)$name;
expr.vec <<- rep(0, length(nd.nms))
table.nmu <<- "NA"
convertIgraph2JSONFromFile(net.nm, json.nm, data.idType);
current.net.nm <<- net.nm;
return(saveSet(analSet, "analSet", 1));
}
GetNodeIDs <- function(){
V(overall.graph)$name;
}
GetNodeNames <- function(){
V(overall.graph)$Label;
}
GetNodeDegrees <- function(){
degree(overall.graph);
}
GetNodeBetweenness <- function(){
round(betweenness(overall.graph, directed=F, normalized=F), 2);
}
#' Compute minimum connect subnetwork based on input nodes using prize-collecting steiner forest approach
#'
#' @return check overall.graph object for result
#' @export
#'
ComputePCSFNet <- function(){
paramSet <- readSet(paramSet, "paramSet");
seed.genes <- paramSet$seed.genes;
edg <- as.data.frame(get.edgelist(overall.graph));
edg$V3 <- rep(1, nrow(edg));
colnames(edg) <- c("from", "to", "cost");
node_names <- unique(c(as.character(edg[,1]),as.character(edg[,2])))
ppi <- graph.data.frame(edg[,1:2],vertices=node_names,directed=F)
E(ppi)$weight <- as.numeric(edg[,3])
ppi <- simplify(ppi)
if(sum(expr.vec) == 0){ # make sure weights are not 0?!
expr.vec = expr.vec +1
}
g <- Compute.SteinerForest(ppi, expr.vec, w = 5, b = 100, mu = 0.0005);
nodeList <- get.data.frame(g, "vertices");
colnames(nodeList) <- c("Id", "Label");
write.csv(nodeList, file="orig_node_list.csv", row.names=F, quote=F);
edgeList <- get.data.frame(g, "edges");
edgeList <- cbind(rownames(edgeList), edgeList);
colnames(edgeList) <- c("Id", "Source", "Target");
write.csv(edgeList, file="orig_edge_list.csv", row.names=F, quote=F);
path.list <- NULL;
analSet <- DecomposeGraph(g,analSet);
substats <- analSet$substats;
if(!is.null(substats)){
overall.graph <<- g;
current.msg<<- "Steiner Forest was completed successfully";
output <- c(length(seed.genes),length(seed.proteins), nrow(nodeList), nrow(edgeList), length(ppi.comps), substats);
}else{
output <- 0;
}
analSet$overall.graph <- overall.graph;
return(saveSet(analSet, "analSet", output));
}
# Adapted from PCSF
# https://github.com/IOR-Bioinformatics/PCSF
Compute.SteinerForest <- function(ppi, terminals, w = 2, b = 1, mu = 0.0005, dummies){
# Gather the terminal genes to be analyzed, and their scores
terminal_names <- names(terminals)
terminal_values <- as.numeric(terminals)
# Incorporate the node prizes
node_names <- V(ppi)$name
node_prz <- vector(mode = "numeric", length = length(node_names))
index <- match(terminal_names, node_names)
percent <- signif((length(index) - sum(is.na(index)))/length(index)*100, 4)
if (percent < 5){
print("Less than 1% of your terminal nodes are matched in the interactome!");
return(NULL);
}
paste0(" ", percent, "% of your terminal nodes are included in the interactome\n");
terminal_names <- terminal_names[!is.na(index)]
terminal_values <- terminal_values[!is.na(index)]
index <- index[!is.na(index)]
node_prz[index] <- terminal_values
if(missing(dummies)||is.null(dummies)||is.na(dummies)){
dummies <- terminal_names #re-assign this to allow for input
}
## Prepare input file for MST-PCSF implementation in C++
# Calculate the hub penalization scores
node_degrees <- igraph::degree(ppi)
hub_penalization <- - mu*node_degrees
# Update the node prizes
node_prizes <- b*node_prz
index <- which(node_prizes==0)
node_prizes[index] <- hub_penalization[index]
# Construct the list of edges
edges <- ends(ppi,es = E(ppi))
from <- c(rep("DUMMY", length(dummies)), edges[,1])
to <- c(dummies, edges[,2])
cost <- c(rep(w, length(dummies)), E(ppi)$weight)
#PCSF will faill if there are NAs in weights, this will check and fail gracefully
if(any(is.na(E(ppi)$weight))){
print("NAs found in the weight vector!");
return (NULL);
}
## Feed the input into the PCSF algorithm
output <- XiaLabCppLib::call_sr(from,to,cost,node_names,node_prizes)
# Check the size of output subnetwork and print a warning if it is 0
if(length(output[[1]]) != 0){
# Contruct an igraph object from the MST-PCSF output
e <- data.frame(output[[1]], output[[2]], output[[3]])
e <- e[which(e[,2]!="DUMMY"), ]
names(e) <- c("from", "to", "weight")
# Differentiate the type of nodes
type <- rep("Steiner", length(output[[4]]))
index <- match(terminal_names, output[[4]])
index <- index[!is.na(index)]
type[index] <- "Terminal"
v <- data.frame(output[[4]], output[[5]], type)
names(v) <- c("terminals", "prize", "type")
subnet <- graph.data.frame(e,vertices=v,directed=F)
E(subnet)$weight <- as.numeric(output[[3]])
subnet <- delete_vertices(subnet, "DUMMY")
subnet <- delete_vertices(subnet, names(which(degree(subnet)==0)));
return(subnet)
} else{
print("Subnetwork can not be identified for a given parameter set")
return(NULL);
}
}
# for a given graph, obtain the smallest subgraphs that contain
# all the seed nodes. This is acheived by iteratively remove
# the marginal nodes (degree = 1) that are not in the seeds
#' Compute minimum connected network composed of seed nodes using shortest path based approach
#'
#' @param dataSetObj Input the name of the created dataSetObj (see Init.Data)
#' @param max.len Maximum number of seeds, if more than this number, take top 100 seed nodes based on degrees
#'
#' @return check overall.graph object for result
#' @export
#'
GetMinConnectedGraphs <- function(max.len = 200){
set.seed(8574);
paramSet <- readSet(paramSet, "paramSet");
seed.genes <- paramSet$seed.genes;
# first get shortest paths for all pair-wise seeds
my.seeds <- seed.proteins;
sd.len <- length(my.seeds);
paths.list <-list();
# first trim overall.graph to remove no-seed nodes of degree 1
dgrs <- degree(overall.graph);
keep.inx <- dgrs > 1 | (names(dgrs) %in% my.seeds);
nodes2rm <- V(overall.graph)$name[!keep.inx];
overall.graph <- simplify(delete.vertices(overall.graph, nodes2rm));
# need to restrict the operation b/c get.shortest.paths is very time consuming
# for top max.len highest degrees
if(sd.len > max.len){
hit.inx <- names(dgrs) %in% my.seeds;
sd.dgrs <- dgrs[hit.inx];
sd.dgrs <- rev(sort(sd.dgrs));
# need to synchronize all (seed.proteins) and top seeds (my.seeds)
seed.proteins <- names(sd.dgrs);
if(max.len>table(hit.inx)[["TRUE"]]){
sd.len <- table(hit.inx)[["TRUE"]];
}else{
sd.len <- max.len;
}
my.seeds <- seed.proteins[1:sd.len];
current.msg <<- paste("The minimum connected network was computed using the top", sd.len, "seed proteins in the network based on their degrees.");
}else{
current.msg <<- paste("The minimum connected network was computed using all seed proteins in the network.");
}
# now calculate the shortest paths for
# each seed vs. all other seeds (note, to remove pairs already calculated previously)
for(pos in 1:sd.len){
paths.list[[pos]] <- get.shortest.paths(overall.graph, my.seeds[pos], seed.proteins[-(1:pos)])$vpath;
}
nds.inxs <- unique(unlist(paths.list));
nodes2rm <- V(overall.graph)$name[-nds.inxs];
g <- simplify(delete.vertices(overall.graph, nodes2rm));
nodeList <- get.data.frame(g, "vertices");
colnames(nodeList) <- c("Id", "Label");
fast.write(nodeList, file="orig_node_list.csv", row.names=F);
edgeList <- get.data.frame(g, "edges");
edgeList <- cbind(rownames(edgeList), edgeList);
colnames(edgeList) <- c("Id", "Source", "Target");
fast.write(edgeList, file="orig_edge_list.csv", row.names=F);
path.list <- NULL;
analSet <- DecomposeGraph(g,analSet);
substats <- analSet$substats;
net.stats<<-net.stats
if(!is.null(substats)){
overall.graph <<- g;
output <- c(length(seed.genes),length(seed.proteins), nrow(nodeList), nrow(edgeList), length(ppi.comps), substats);
}else{
output <- 0;
}
analSet$overall.graph <- overall.graph;
return(saveSet(analSet, "analSet", output));
}
UpdateSubnetStats <- function(){
old.nms <- names(ppi.comps);
net.stats <- ComputeSubnetStats(ppi.comps);
ord.inx <- order(net.stats[,1], decreasing=TRUE);
net.stats <- net.stats[ord.inx,];
rownames(net.stats) <- old.nms[ord.inx];
net.stats <<- net.stats;
}
# exclude nodes in current.net (networkview)
ExcludeNodes <- function(nodeids, filenm){
nodes2rm <- strsplit(nodeids, ";")[[1]];
current.net <- ppi.comps[[current.net.nm]];
current.net <- delete.vertices(current.net, nodes2rm);
# need to remove all orphan nodes
bad.vs<-V(current.net)$name[degree(current.net) == 0];
current.net <- delete.vertices(current.net, bad.vs);
# return all those nodes that are removed
nds2rm <- paste(c(bad.vs, nodes2rm), collapse="||");
# update topo measures
node.btw <- as.numeric(betweenness(current.net));
node.dgr <- as.numeric(degree(current.net));
node.exp <- as.numeric(get.vertex.attribute(current.net, name="abundance", index = V(current.net)));
nms <- V(current.net)$name;
hit.inx <- match(nms, ppi.net$node.data[,1]);
lbls <- ppi.net$node.data[hit.inx,2];
nodes <- vector(mode="list");
for(i in 1:length(nms)){
nodes[[i]] <- list(
id=nms[i],
label=lbls[i],
degree=node.dgr[i],
between=node.btw[i],
expr = node.exp[i]
);
}
# now only save the node pos to json
netData <- list(deletes=nds2rm,nodes=nodes);
sink(filenm);
cat(RJSONIO::toJSON(netData));
sink();
ppi.comps[[current.net.nm]] <<- current.net;
UpdateSubnetStats();
# remember to forget the cached layout, and restart caching, as this is now different object (with the same name)
#forget(PerformLayOut_mem);
return(filenm);
}
# exclude nodes in overall net (network builder)
ExcludeNodesOverall <- function(nodeids, id.type, vismode){
paramSet <- readSet(paramSet, "paramSet");
seed.genes <- paramSet$seed.genes;
data.org <- paramSet$data.org;
# all convert to uniprot ID
lines <- strsplit(nodeids, "\r|\n|\r\n")[[1]];
lines<- sub("^[[:space:]]*(.*?)[[:space:]]*$", "\\1", lines, perl=TRUE);
if(vismode != "network"){
prot.anots <- .doGeneIDMapping(lines, id.type, data.org, "matrix");
nodes2rm <- unique(prot.anots$accession);
}else{
prot.anots <- lines
nodes2rm <- unique(lines);
}
# now find the overlap
nodes2rm <- nodes2rm[nodes2rm %in% V(overall.graph)$name];
g <- delete.vertices(overall.graph, nodes2rm);
nodeList <- get.data.frame(g, "vertices");
colnames(nodeList) <- c("Id", "Label");
fast.write(nodeList, file="orig_node_list.csv", row.names=F);
edgeList <- get.data.frame(g, "edges");
edgeList <- cbind(rownames(edgeList), edgeList);
colnames(edgeList) <- c("Id", "Source", "Target");
fast.write(edgeList, file="orig_edge_list.csv", row.names=F);
analSet <- DecomposeGraph(g,analSet);
substats <- analSet$substats;
if(!is.null(substats)){
overall.graph <<- g;
output <- c(length(seed.genes),length(seed.proteins), nrow(nodeList), nrow(edgeList), length(ppi.comps), substats);
}else{
output <- 0;
}
analSet$overall.graph <- g;
return(saveSet(analSet,"analSet", output));
}
PrepareSubnetDownloads <- function(nm){
g <- ppi.comps[[nm]];
# need to update graph so that id is compound names rather than ID
V(g)$name <- as.character(doID2LabelMapping(V(g)$name));
saveNetworkInSIF(g, nm);
}
# adapted from BioNet
saveNetworkInSIF <- function(network, name){
edges <- .graph.sif(network=network, file=name);
sif.nm <- paste(name, ".sif", sep="");
if(length(list.edge.attributes(network))!=0){
edge.nms <- .graph.eda(network=network, file=name, edgelist.names=edges);
sif.nm <- c(sif.nm, edge.nms);
}
if(length(list.vertex.attributes(network))!=0){
node.nms <- .graph.noa(network=network, file=name);
sif.nm <- c(sif.nm, node.nms);
}
# need to save all sif and associated attribute files into a zip file for download
zip(paste(name,"_sif",".zip", sep=""), sif.nm);
}
# internal function to write cytoscape .sif file
.graph.sif <- function(network, file){
edgelist.names <- igraph::get.edgelist(network, names=TRUE)
edgelist.names <- cbind(edgelist.names[,1], rep("pp", length(E(network))), edgelist.names[,2]);
write.table(edgelist.names, row.names=FALSE, col.names=FALSE, file=paste(file, ".sif", sep=""), sep="\t", quote=FALSE)
return(edgelist.names)
}
doID2LabelMapping <- function(entrez.vec){
if(exists("nodeListu")){
hit.inx <- match(entrez.vec, nodeListu[, "Id"]);
symbols <- nodeListu[hit.inx, "Label"];
# if not gene symbol, use id by itself
na.inx <- is.na(symbols);
symbols[na.inx] <- entrez.vec[na.inx];
return(symbols);
}else{ # network upload
return(entrez.vec);
}
}
# internal method to write cytoscape node attribute files
.graph.noa <- function(network, file){
all.nms <- c();
attrib <- list.vertex.attributes(network)
for(i in 1:length(attrib)){
if(is(get.vertex.attribute(network, attrib[i]))[1] == "character")
{
type <- "String"
}
if(is(get.vertex.attribute(network, attrib[i]))[1] == "integer")
{
type <- "Integer"
}
if(is(get.vertex.attribute(network, attrib[i]))[1] == "numeric")
{
type <- "Double"
}
noa <- cbind(V(network)$name, rep("=", length(V(network))), get.vertex.attribute(network, attrib[i]))
first.line <- paste(attrib[i], " (class=java.lang.", type, ")", sep="")
file.nm <- paste(file, "_", attrib[i], ".NA", sep="");
write(first.line, file=file.nm, ncolumns = 1, append=FALSE, sep=" ")
write.table(noa, row.names = FALSE, col.names = FALSE, file=file.nm, sep=" ", append=TRUE, quote=FALSE);
all.nms <- c(all.nms, file.nm);
}
return(all.nms);
}
# internal method to write cytoscape edge attribute files
.graph.eda <- function(network, file, edgelist.names){
all.nms <- c();
attrib <- list.edge.attributes(network)
for(i in 1:length(attrib)){
if(is(get.edge.attribute(network, attrib[i]))[1] == "character")
{
type <- "String"
}
if(is(get.edge.attribute(network, attrib[i]))[1] == "integer")
{
type <- "Integer"
}
if(is(get.edge.attribute(network, attrib[i]))[1] == "numeric")
{
type <- "Double"
}
eda <- cbind(cbind(edgelist.names[,1], rep("(pp)", length(E(network))), edgelist.names[,3]), rep("=", length(E(network))), get.edge.attribute(network, attrib[i]))
first.line <- paste(attrib[i], " (class=java.lang.", type, ")", sep="");
file.nm <- paste(file, "_", attrib[i], ".EA", sep="");
write(first.line, file=file.nm, ncolumns=1, append=FALSE, sep =" ")
write.table(eda, row.names = FALSE, col.names = FALSE, file=file.nm, sep=" ", append=TRUE, quote=FALSE);
all.nms <- c(all.nms, file.nm);
}
return(all.nms);
}
SetCellCoexNumber <-function(num){
cellCoexNumber <<- num;
}
SetDiffNetName <-function(nm){
diffNetName <<- nm;
}
SetDiffFilter <-function(pct){
diffPct <<- pct/10;
}
# note: hit.query, resTable must synchronize
#' Perform gene enrichment analysis or identify gene regulatory targets
#'
#' @param file.nm File name of result table to be exported in csv format, do not include file extension
#' @param fun.type Enrichment database type
#' @param IDs String of ids to be tested separated by "; "
#'
#' @export
#'
PerformNetEnrichment <- function(dataName="", file.nm, fun.type, IDs){
dataSet <- readDataset(dataName);
paramSet <- readSet(paramSet, "paramSet");
data.org <- paramSet$data.org;
# prepare query
ora.vec <- NULL;
if(ppi.net$db.type == 'ppi'){
if(data.org == "ath"){
idtype <- "entrez"
}else if(data.org %in% c("bsu", "tbr", "cel", "dme", "eco", "pfa") & net.type == "string"){
idtype <- "string"
}else if(data.org %in% c("bta","dre","rno","gga") & net.type == "string"){
idtype <- "entrez";
}else if(data.org %in% c("hsa","mmu", "pae") & net.type %in% c("string","innate", "huri", "interactome")){
idtype <- "entrez";
}else if(data.org %in% c("hsa","mmu", "cel", "dme","sce") & net.type %in% c("irefinx", "rolland")){
idtype <- "uniprot";
}else if(data.org == "sce" & net.type == "string"){ # only for yeast
idtype <- "embl_gene";
}
id.vec <- unlist(strsplit(IDs, "; "));
if(idtype=="uniprot"){
ora.vec <- .doGeneIDMapping(id.vec, "uniprot", data.org, "vec")
names(ora.vec) <- id.vec;
}else if(idtype=="embl_protein"){
ora.vec <- .doGeneIDMapping(id.vec, "embl_protein", data.org, "vec")
names(ora.vec) <- id.vec;
}else if(idtype=="string"){
ora.vec <- .doGeneIDMapping(id.vec, "string", data.org, "vec")
names(ora.vec) <- id.vec;
}else if(idtype=="embl_gene"){
ora.vec <- .doGeneIDMapping(id.vec, "embl_gene", data.org, "vec")
names(ora.vec) <- id.vec;
}else{
ora.vec <- id.vec;
names(ora.vec) <- ora.vec;
}
#if nothing matches, try original ids;
if(all(is.na(ora.vec))){
ora.vec <- unlist(strsplit(IDs, "; "));
names(ora.vec) <- ora.vec;
}
}else{ # net is tf/mir/drug, they already in entrez
ora.vec <- unlist(strsplit(IDs, "; "));
names(ora.vec) <- as.character(ora.vec);
}
if(fun.type %in% c("trrust", "encode", "jaspar", "mirnet", "met", "drugbank", "disease")){
res <- PerformRegEnrichAnalysis(dataSet, file.nm, fun.type, ora.vec, "inverse");
}else{
res <- .performEnrichAnalysis(dataSet, file.nm, fun.type, ora.vec);
}
#if(checkEntrezMatches(unname(ora.vec)) > 0 && res == 0){
# res=2;
#}
return(res);
}
PlotCorHistogram <- function(imgNm){
Cairo(file=imgNm, width=400, height=400, type="png", bg="white");
library(ggplot2)
cor <- E(overall.graph)$correlation
p<-ggplot(data.frame(Correlation=abs(cor)), aes(x=Correlation)) + geom_histogram() + ggtitle("Correlation distribution")+
theme(plot.title = element_text(hjust = 0.5));
print(p);
dev.off();
}
#' Filter interactions by tissue coexpression data
#'
#' @param type Name of database (encode, gtex, hpa)
#' @param tissue Selected tissue to filter (refer to web server interface for options, replace space with "." (i.e. Adipose.Tissue))
#'
#' @export
#'
FilterByTissue <- function(type, tissue){
paramSet <- readSet(paramSet, "paramSet");
seed.genes <- paramSet$seed.genes;
all.nms <- V(overall.graph)$name;
gene.nms <- all.nms[which(V(overall.graph)$Types %in% c("Gene", "Protein"))]
tissue.mat <- queryFilterDB(type, data.org);
hit.inx <- tissue.mat$Tissue %in% c(tissue, "All");
tissue.genes <- tissue.mat[,1][!hit.inx];
nodes2rm <- gene.nms[which(gene.nms %in% tissue.genes)]
g <- simplify(delete.vertices(overall.graph, nodes2rm));
nodeList <- get.data.frame(g, "vertices");
nodeList <- nodeList[,1:2];
colnames(nodeList) <- c("Id", "Label");
fast.write(nodeList, file="orig_node_list.csv");
nd.inx <- ppi.net$node.data[,1] %in% nodeList[,1];
edgeList <- get.data.frame(g, "edges");
edgeList <- edgeList[,1:2];
colnames(edgeList) <- c("Source", "Target");
fast.write(edgeList, file="orig_edge_list.csv");
# update ppi.net
ppi.net$order <- 1;
ppi.net$node.data <- nodeList;
ppi.net$edge.data <- edgeList;
ppi.net <<- ppi.net;
overall.graph <- g
analSet <- DecomposeGraph(overall.graph,analSet);
substats <- analSet$substats;
if(!is.null(substats)){
overall.graph <<- overall.graph;
output <- c(length(seed.genes),length(seed.proteins), vcount(overall.graph), ecount(overall.graph), length(ppi.comps), substats);
}else{
output <- 0;
}
analSet$overall.graph <- overall.graph;
return(saveSet(analSet, "analSet", output));
}
#' Filter interactions by correlation (only applicable to coexpression network)
#'
#' @param min.cor Correlation threshold (0.5 - 1)
#'
#' @export
#'
FilterNetByCor <- function( min.cor){
paramSet <- readSet(paramSet, "paramSet");
seed.genes <- paramSet$seed.genes;
all.nms <- V(overall.graph)$name;
edge.mat <- get.edgelist(overall.graph);
cor <- E(overall.graph)$correlation;
edges2rm.inx <-NULL;
if(min.cor > 0.5){
rm.inx <- cor <= min.cor;
}
overall.graph <- simplify(delete.edges(overall.graph, which(rm.inx)));
current.msg <<- paste("A total of", length(which(rm.inx)) , "edges was reduced.");
analSet <- DecomposeGraph(overall.graph,analSet);
substats <- analSet$substats;
if(!is.null(substats)){
overall.graph <<- overall.graph;
return(c(length(seed.genes),length(seed.proteins), vcount(overall.graph), ecount(overall.graph), length(ppi.comps), substats));
}else{
return(0);
}
}
regEnrichment <- function(dataName, file.nm, fun.type, IDs, netInv){
regBool <<- "false";
res <- PerformNetEnrichment(dataName, file.nm, fun.type, IDs);
}
PerformRegEnrichAnalysis <- function(dataSet, file.nm, fun.type, ora.vec, netInv){
if(!exists("my.reg.enrich")){ # public web on same user dir
compiler::loadcmp("../../rscripts/networkanalystr/_utils_regenrich.Rc");
}
return(my.reg.enrich(dataSet, file.nm, fun.type, ora.vec, netInv));
}
PrepareTheraNet <- function(net.nm, json.nm, theraids, regids, regnms){
require("igraph")
net.nm = gsub(".json", "", net.nm);
if(is.null(net.nm)){
net.nm <- names(ppi.comps)[1];
}
reg.count <- reg.count + 1;
reg.nm <- paste("addedReg", reg.count, sep="");
my.ppi <- ppi.comps[[net.nm]];
net.nm <<- net.nm;
json.nm <<- json.nm;
theraids <<- theraids
regids<<-regids;
theraids <- strsplit(theraids, ";");
theraids <- theraids[[1]];
theraids <- strsplit(theraids, ",");
regids <- strsplit(regids, ",");
regids <- regids[[1]]
regnms <- strsplit(regnms, ",");
regnms <- regnms[[1]]
regnms<<-regnms;
regids<<-regids;
my.ppi <- ppi.comps[[net.nm]];
nms <- V(my.ppi)$name
remove.inx <-regids %in% nms
nodesToAdd <- regids[!remove.inx];
if(length(nodesToAdd)>0){
my.ppi <- my.ppi + vertices(nodesToAdd);
}
for(i in 1:length(regids)){
for(j in 1:length(theraids[[i]])){
my.ppi <- my.ppi + edge(regids[[i]], theraids[[i]][j]);
}
}
if(!is.null(V(my.ppi)$moltype)){
moltype.vec <- V(my.ppi)$moltype
}else{
moltype.vec <- rep("NA", length(V(my.ppi)))
}
nms <- V(my.ppi)$name
reg.inx <- nms %in% regids
moltype.vec[reg.inx] <- "regulator"
my.ppi <- set_vertex_attr(my.ppi, "moltype", value = moltype.vec)
edge.data <- data.frame(regids, regnms, rep("regulator", length(regids)) ,stringsAsFactors=FALSE);
colnames(edge.data) <- c("Id", "Label", "Types");
colnames(ppi.net$node.data) <- c("Id", "Label", "Types");
ppi.net$node.data <<-rbind(ppi.net$node.data, edge.data);
filenm <- paste(reg.nm, ".json", sep="");
reg.count <<- reg.count
my.ppi <- simplify(my.ppi);
ppi.comps[[reg.nm]] <<- my.ppi;
current.net.nm <<- reg.nm;
ppi.comps <<- ppi.comps;
UpdateSubnetStats();
#net.info$reg.ids <<- regids;
convertIgraph2JSON(reg.nm, filenm);
return(filenm);
}
GetSelectedNetworks <- function(){
paramSet <- readSet(paramSet, "paramSet");
return(names(paramSet$nets));
}
#' Merge or identify shared portions of multiple networks
#'
#' @param type Type of integrative network, either intersect, union or unique
#'
#' @export
#'
ComputeIntegNet <- function(type){
gObjs <- list();
paramSet <- readSet(paramSet, "paramSet");
data.org <- paramSet$data.org;
seed.expr <- paramSet$seed.expr;
seed.genes <- paramSet$seed.genes;
for(i in 1:length(paramSet$nets)){
ppi.net <<- paramSet$nets[[i]];
CreateGraph();
gObj <- overall.graph;
gObjs[[i]] <- gObj;
}
if(type == "intersect"){
g <- do.call(igraph::intersection, gObjs)
}else if(type == "union"){
g <- do.call(igraph::union, gObjs)
}else{
gObjs.union <- list()
gObj.unique <- "";
for(i in 1:length(paramSet$nets)){
gObj <- gObjs[[i]];
nm <- names(paramSet$nets)[i]
if(nm != type){
gObjs.union[[nm]] <- gObj
}else{
gObj.unique <- gObj
}
}
union.graph <- do.call(igraph::union, gObjs.union)
g <- igraph::difference(gObj.unique, union.graph)
}
nms = V(g)$name
labels = rep("NA", length(V(g)$name))
types = rep("NA", length(V(g)$name))
for(i in 1:length(paramSet$nets)){
ppi.net <- paramSet$nets[[i]];
node.ids <- ppi.net$node.data[,1]
node.nms <- ppi.net$node.data[,2]
node.types <- ppi.net$node.data[,3]
hit.inx <- nms %in% node.ids;
match.inx <- which(node.ids %in% nms)
labels[hit.inx] <- node.nms[match.inx]
types[hit.inx] <- node.types[match.inx]
}
g <- set.vertex.attribute(g, "Label", index = V(g), value = labels);
g <- set.vertex.attribute(g, "Types", index = V(g), value = types);
node.res <- data.frame(Id=V(g)$name, Label=V(g)$Label, Types=V(g)$Types)
edge.mat <- as_edgelist(g);
edge.res <- data.frame(Source=edge.mat[,1], Target=edge.mat[,2])
ppi.net$db.type <- "multi"
ppi.net$node.data <- node.res;
ppi.net$edge.data <- edge.res;
node.list <- ppi.net$node.data;
edge.list <- ppi.net$edge.data;
overall.graph <- g
# add node expression value
if(ppi.net$db.type == "ppi"){
## Temp fix seed.expr all in entrez?
## all converted to new IDs based on PPI from the given organism
newIDs <- doPpiIDMapping(sqlite.path, names(seed.expr), data.org);
}else{# all entrez in mirNet
newIDs <- names(seed.expr);
}
match.index <- match(V(overall.graph)$name, newIDs);
expr.vals <- seed.expr[match.index];
expr.vec <- expr.vals;
names(expr.vec)<- V(overall.graph)$name;
expr.vec <<- expr.vec[!is.na(expr.vec)]
overall.graph <- set.vertex.attribute(overall.graph, "abundance", index = V(overall.graph), value = expr.vals);
hit.inx <- seed.proteins %in% node.list[,1];
seed.proteins <<- seed.proteins[hit.inx];
analSet <- DecomposeGraph(overall.graph,analSet);
substats <- analSet$substats;
overall.graph <<- overall.graph;
ppi.net <<- ppi.net
if(!is.null(substats)){
output <- c(length(seed.genes), length(seed.proteins), nrow(node.list), nrow(edge.list), length(ppi.comps), substats);
}else{
output <- 0;
}
analSet$overall.graph <- overall.graph;
analSet$ppi.net <- ppi.net;
return(saveSet(analSet,"analSet", output));
}
ClearNetSelection <- function(type){
paramSet <- readSet(paramSet, "paramSet");
if(type %in% names(paramSet$nets)){
inx = which( names(paramSet$nets) == type)
paramSet$nets[[inx]] = NULL;
}
saveSet(paramSet, "paramSet");
return(1)
}
queryFilterDB <- function(type, org){
paramSet <- readSet(paramSet, "paramSet");
sqlite.path <- paramSet$sqlite.path;
require('RSQLite');
conv.db <- dbConnect(SQLite(), paste(sqlite.path, "tissue_filter.sqlite", sep=""));
db.map <- dbReadTable(conv.db, paste0(data.org,"_",type));
dbDisconnect(conv.db); cleanMem();
return(db.map)
}
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.