R/net_anal_utils.R

Defines functions queryFilterDB ClearNetSelection ComputeIntegNet GetSelectedNetworks PrepareTheraNet PerformRegEnrichAnalysis regEnrichment FilterNetByCor FilterByTissue PlotCorHistogram PerformNetEnrichment SetDiffFilter SetDiffNetName SetCellCoexNumber .graph.eda .graph.noa doID2LabelMapping .graph.sif saveNetworkInSIF PrepareSubnetDownloads ExcludeNodesOverall ExcludeNodes UpdateSubnetStats GetMinConnectedGraphs Compute.SteinerForest ComputePCSFNet GetNodeBetweenness GetNodeDegrees GetNodeNames GetNodeIDs PrepareNetworkUpload PrepareNetwork FilterBipartiNet CreateGraph BuildSeedProteinNet ExpandNetworkSearch .prepareListSeeds .prepareSigProteinJSON SearchNetDB BuildMultiNet

##################################################
## R scripts for NetworkAnalyst
## Description: biological network analysis methods
## Author: Jeff Xia, jeff.xia@mcgill.ca
###################################################

# table.nm is the org code used for sqlite table (ppi)
# for chem type, table.nm is drugbank or ctd
# note, last two param only for STRING database

BuildMultiNet <- function(tblNm1="NA", tblNm2="NA", tblNm3="NA", tblNm4="NA", order=1){

  opts.vec <- c(tblNm1, tblNm2, tblNm3, tblNm4);
  opts.vec <- strsplit(opts.vec, "__");
  tbl.vec <- vector();
  dbNm.vec <- vector();

  for(i in 1:length(opts.vec)){
    dbNm.vec[i] <- opts.vec[[i]][1];
    tbl.vec[i] <- opts.vec[[i]][2];
  }

  node.resu <<- "empty";
  edge.resu <<- "empty";
  res.list <- list();
  for ( i in 1:length(tbl.vec)){
    if(tbl.vec[i] != "NA"){
      net.type <<- tbl.vec[i];
      if(dbNm.vec[i] == "ppi"){
        tblNm <- paste0(data.org, "_", tbl.vec[i]); 
      }else{
        tblNm <- tbl.vec[i];
      }
      res = SearchNetDB(dbNm.vec[i], tblNm, require.exp=TRUE, min.score = 900, order=1);
      res.list[[i]] = res;
    }
  }
  return(res);
}

#' Query database using input list or previously computed network
#'
#' @param db.type Input the name of the created dataSetObj (see Init.Data)
#' @param type Network type (gene, met, mir, tf, mic, peak, m2m, snp)
#' @param dbType Database name (i.e innatedb)
#' @param inputType Omics type of input features (gene, protein, met, tf, mic, peak, snp)
#'
#' @export
#'
SearchNetDB <- function(db.type, table.nm, require.exp=TRUE, min.score = 900, order=1){

    regids<<- "";
    db.typeu <<- db.type;
    result.list <- .prepareSigProteinJSON();

    paramSet <- readSet(paramSet, "paramSet");
    data.org <- paramSet$data.org;
    sqlite.path <- paramSet$sqlite.path;
    lib.path <- paramSet$lib.path;

    net.type <- gsub(paste0(data.org,"_"),'',table.nm);
    SetNetType(net.type);

    protein.vec <- result.list$protein.vec; # this actually is entrez IDs?
    seed.proteins <<- protein.vec;
    # now do the database search
    if(db.type == "ppi"){
        protein.vec <- doPpiIDMapping(sqlite.path, protein.vec, data.org);
        res <- QueryPpiSQLite(sqlite.path, table.nm, protein.vec, require.exp, min.score);
        # no hits
      if(nrow(res)==0){ return(c(0,0)); }
        edge.res <- data.frame(Source=res[,1],Target=res[,2]);
        row.names(edge.res) <- 1:nrow(res);
        fast.write(edge.res, file="orig_edge_list.csv",row.names=FALSE);
        node.ids <- c(res[,1], res[,2])
        node.nms <- c(res[,3], res[,4]);
        node.types <- c(rep("Protein", nrow(res)), rep("Protein", nrow(res)))
    }else if(db.type == "tf"){
        table.nm <- paste(data.org,table.nm, sep="_");
        res <- QueryTFSQLite(sqlite.path, table.nm, protein.vec);
        # no hits
        if(nrow(res)==0){ return(c(0,0)); }
        edge.res <- data.frame(Source=res[,"entrez"],Target=res[,"tfid"]);
        row.names(edge.res) <- 1:nrow(res);
        fast.write(edge.res, file="orig_edge_list.csv",row.names=FALSE);
        node.ids <- c(res[,"entrez"], res[,"tfid"])
        node.nms <- c(res[,"symbol"], res[,"tfname"]);
        node.types <- c(rep("Protein", nrow(res)), rep("TF", nrow(res)))
    }else if(db.type == "crossppi"){
        res <- QueryOtherPpiSQLite(sqlite.path, table.nm, protein.vec, data.org);
        # no hits
        if(nrow(res)==0){ return(c(0,0)); }
        edge.res <- data.frame(Source=res[,"id1"],Target=res[,"id2"]);
        row.names(edge.res) <- 1:nrow(res);
        fast.write(edge.res, file="orig_edge_list.csv",row.names=FALSE);
        node.ids <- c(res[,"id1"], res[,"id2"])
        node.nms <- c(res[,"name1"], res[,"name2"]);
        node.types <- c(rep("Protein", nrow(res)), rep("Foreign_Protein", nrow(res)))
        if(table.nm == "microbiolink1"){
            edge.extra.info <- data.frame(domain=c(res[,"PFAM_domain1"], res[,"PFAM_domain2"]))
            node.extra.info <- data.frame(id=node.ids, species=c(rep("Human", nrow(res)), res[,"species2"]), domain=c(res[,"PFAM_domain1"], res[,"PFAM_domain2"]))
            node.infoU <<- node.extra.info[!duplicated(node.extra.info$id),];
        }else if(table.nm == "microbiolink2"){
            edge.extra.info <- data.frame(domain=c(res[,"linear_motif1"], res[,"PFAM_domain2"]))
            node.extra.info <- data.frame(id=node.ids, species=c(rep("Human", nrow(res)), res[,"species2"]), domain=c(res[,"linear_motif1"], res[,"PFAM_domain2"]))
            node.infoU <<- node.extra.info[!duplicated(node.extra.info$id),];
        }
    }else if(db.type == "signal"){
        res <- QueryOtherPpiSQLite(sqlite.path,table.nm, protein.vec, data.org);
        # no hits
        if(nrow(res)==0){ return(c(0,0)); }
        edge.res <- data.frame(Source=res[,"id1"],Target=res[,"id2"], Direction=res[, "EFFECT"], Mechanism=res[,"MECHANISM"]);
        row.names(edge.res) <- 1:nrow(res);
        fast.write(edge.res, file="orig_edge_list.csv",row.names=FALSE);
        node.ids <- c(res[,"id1"], res[,"id2"])
        node.nms <- c(res[,"name1"], res[,"name2"]);
        edge.extra.info <- res[,-c(1:4)];
        node.extra.info <- c(res[,"TYPEA"], res[,"TYPEB"])
        node.infoU <<- node.extra.info
        node.types <- c(res[,"TYPEA"], res[,"TYPEB"])

    }else if(db.type == "mir"){ # in miRNA, table name is org code, colname is id type
        db.nm <- table.nm;
        res <- QueryMirSQLite(sqlite.path, data.org, "entrez", protein.vec, db.nm);
        # no hits
        if(nrow(res)==0){ return(c(0,0)); }
        edge.res <- data.frame(Source=res[,"entrez"],Target=res[,"mir_acc"]);
        row.names(edge.res) <- 1:nrow(res);
        fast.write(edge.res, file="orig_edge_list.csv",row.names=FALSE);
        node.ids <- c(res[,"entrez"], res[,"mir_acc"])
        node.nms <- c(res[,"symbol"], res[,"mir_id"]);
        node.types <- c(rep("Protein", nrow(res)), rep("miRNA", nrow(res)))
    }else if(db.type == "drug"){
        # note, all drug data is on human,
        protein.vec <- doEntrez2UniprotMapping(protein.vec);
        protein.vec <- protein.vec[!is.na(protein.vec)];
        res <- QueryDrugSQLite(sqlite.path, protein.vec);
        # no hits
        if(nrow(res)==0){ return(c(0,0)); }
        entrez.vec <- .doGeneIDMapping(res[,"upid"], "uniprot", data.org, "vec");
        edge.res <- data.frame(Source=entrez.vec,Target=res[,"dbid"]);
        row.names(edge.res) <- 1:nrow(res);
        fast.write(edge.res, file="orig_edge_list.csv",row.names=FALSE);
        node.ids <- c(entrez.vec, res[,"dbid"]);
        node.nms <- c(res[,"symbol"], res[,"dbname"]);
        node.types <- c(rep("Protein", nrow(res)), rep("Drug", nrow(res)));
        protein.vec <- .doGeneIDMapping(protein.vec, "uniprot", data.org, "vec");
    }else if(db.type == "disease"){
        res <- QueryDiseaseSQLite(sqlite.path, protein.vec);
        # no hits
        if(nrow(res)==0){ return(c(0,0)); }
        edge.res <- data.frame(Source=res[,"entrez"],Target=res[,"diseaseId"]);
        row.names(edge.res) <- 1:nrow(res);
        fast.write(edge.res, file="orig_edge_list.csv",row.names=FALSE);

        node.ids <- c(res[,"entrez"], res[,"diseaseId"])
        node.nms <- c(res[,"symbol"], res[,"diseaseName"]);
        node.types <- c(rep("Protein", nrow(res)), rep("Disease", nrow(res)))

    }else if(db.type == "tfmir"){
        res <- QueryTfmirSQLite(sqlite.path, protein.vec, data.org);
        # no hits
        if(nrow(res)==0){ return(c(0,0)); }
        edge.res <- data.frame(Source=res[,"id1"],Target=res[,"id2"]);
        row.names(edge.res) <- 1:nrow(res);
        fast.write(edge.res, file="orig_edge_list.csv",row.names=FALSE);

        node.ids <- c(res[,"id1"], res[,"id2"])
        node.nms <- c(res[,"name1"], res[,"name2"]);

        node.types <- c(rep("Protein", 2*nrow(res)))
        db.path <- paste(lib.path, data.org, "/mirlist.rds", sep="");
        db.map <-  readRDS(db.path);
        tf.inx <- node.ids %in% edge.res[,"Source"];
        mir.inx <- node.ids %in% db.map[,1];
        node.types[tf.inx] <- "TF";
        node.types[mir.inx] <- "miRNA";

    }else if(db.type == "cellcoex"){
        res <- QueryCellCoexSQLite(sqlite.path, protein.vec, data.org);
        # no hits
        if(nrow(res)==0){ return(c(0,0)); }
        edge.res <- data.frame(Source=res[,"id1"],Target=res[,"id2"]);
        row.names(edge.res) <- 1:nrow(res);
        fast.write(edge.res, file="orig_edge_list.csv",row.names=FALSE);

        node.ids <- c(res[,"id1"], res[,"id2"])
        node.nms <- c(res[,"name1"], res[,"name2"]);
        node.types <- c(rep("Protein", nrow(res)), rep("Protein", nrow(res)))

    }else if(db.type == "tissueppi"){
        res <- QueryDiffNetSQLite(sqlite.path, protein.vec);
        # no hits
        if(nrow(res)==0){ return(c(0,0)); }
        edge.res <- data.frame(Source=res[,"id1"],Target=res[,"id2"]);
        row.names(edge.res) <- 1:nrow(res);
        fast.write(edge.res, file="orig_edge_list.csv",row.names=FALSE);

        node.ids <- c(res[,"id1"], res[,"id2"])
        node.nms <- c(res[,"name1"], res[,"name2"]);
        node.types <- c(rep("Protein", nrow(res)), rep("Protein", nrow(res)))
    }else if(db.type == "tissuecoex"){
        # note, all drug data is on human,
        res <- QueryTissueCoexSQLite(sqlite.path, protein.vec, data.org);
        # no hits
        if(nrow(res)==0){ return(c(0,0)); }
        edge.res <- data.frame(Source=res[,"id1"],Target=res[,"id2"]);
        row.names(edge.res) <- 1:nrow(res);
        fast.write(edge.res, file="orig_edge_list.csv",row.names=FALSE);

        node.ids <- c(res[,"id1"], res[,"id2"])
        node.nms <- c(res[,"name1"], res[,"name2"]);
        node.types <- c(rep("Protein", nrow(res)), rep("Protein", nrow(res)))
    }else if(db.type == "chem"){
        res <- QueryChemSQLite(sqlite.path, data.org, protein.vec);
        if(nrow(res)==0){ return(c(0,0)); }
        edge.res <- data.frame(Source=res[,"entrez"],Target=res[,"ctdid"]);
        row.names(edge.res) <- 1:nrow(res);
        fast.write(edge.res, file="orig_edge_list.csv",row.names=FALSE);

        node.ids <- c(res[,"entrez"], res[,"ctdid"])
        node.nms <- c(res[,"symbol"], res[,"name"]);
        node.types <- c(rep("Protein", nrow(res)), rep("Chemical", nrow(res)))
    }else if(db.type == "met"){
        table.nm <- paste(data.org, met.type, sep="_");
        res <- QueryMetSQLiteNet(sqlite.path, table.nm, protein.vec, netInv);
        if(nrow(res)==0){ return(c(0,0)); }
        edge.res <- data.frame(Source=res[,"entrez"],Target=res[,"kegg"]);
        row.names(edge.res) <- 1:nrow(res);
        fast.write(edge.res, file="orig_edge_list.csv",row.names=FALSE);

        node.ids <- c(res[,"kegg"], res[,"entrez"])
        node.nms <- c(res[,"met"], res[,"symbol"]);
        node.types <- c(rep("Metabolite", nrow(res)), rep("Protein", nrow(res)))
    }
    if(!is.null(node.ids)){
        seed.inx <- node.ids %in% unique(seed.proteins);
        node.types[seed.inx] <- "Seed"
    }

    if(order == 0 && db.type == "ppi"){
        keep.inx <- node.ids %in% protein.vec;
        node.ids <- node.ids[keep.inx];
        node.nms <- node.nms[keep.inx];
        node.types <- node.types[keep.inx];
        keep.inx <- edge.res[,1] %in% protein.vec;
        edge.res <- edge.res[keep.inx,];
        keep.inx <- edge.res[,2] %in% protein.vec;
        edge.res <- edge.res[keep.inx,];
    }

    node.res <- data.frame(Id=node.ids, Label=node.nms, Types=node.types);
    node.res <- node.res[!duplicated(node.res$Id),];

    if( grepl("microbiolink", table.nm, fixed = TRUE) || db.type == "signal"){
        edge.res <- cbind(edge.res, edge.extra.info);
        edge.infoU <<- edge.res;
    }

    nodeListu <<- node.res;
    table.nmu <<- table.nm;

    fast.write(node.res, file="orig_node_list.csv", row.names=FALSE);

    ppi.net <<- list(
        db.type=db.type,
        order=1,
        seeds=protein.vec,
        table.nm=table.nm,
        node.data = node.res,
        edge.data = edge.res,
        require.exp = require.exp,
        min.score = min.score
    );

    paramSet$nets[[db.type]] <- ppi.net
    saveSet(paramSet, "paramSet");
    analSet$ppi.net <- ppi.net;
    output <- c(nrow(node.res), nrow(res))
    return(saveSet(analSet, "analSet", output))
}

.prepareSigProteinJSON <- function(){
    paramSet <- readSet(paramSet, "paramSet");
    anal.type <- paramSet$anal.type;
    if(anal.type == "genelist"){
        result.list <- .prepareListSeeds();
    }else{ # single expression data or meta.mat
        result.list <- .prepareExpressSeeds();
    }
    return(result.list);
}

.prepareListSeeds <- function(){
    #save.image("seed.RData");
    protein.list <- list();
    gene.list <- list();
    paramSet <- readSet(paramSet, "paramSet");
    selectedNetDataset <- paramSet$selectedNetDataset;

    msgSet <- readSet(msgSet, "msgSet");

    data.org <- paramSet$data.org;

    mdata.all <- paramSet$mdata.all;

    if(paramSet$numOfLists > 1){
        if(selectedNetDataset %in% c("intersect","union")){
            dataSet <- list();
            dataSet$name <- selectedNetDataset
            my.vec <- names(mdata.all);
            com.ids <- NULL;
            list.vec <- list()
            for(i in 1:length(my.vec)){
                datSet <- readDataset(my.vec[i]);
                if(is.null(com.ids)){
                  com.ids <- datSet$GeneAnotDB[,"gene_id"];
                  prot.mat <- datSet$prot.mat
                  list.vec[[i]] <- com.ids
                }else{
                  if(selectedNetDataset == "intersect"){
                    com.ids <- datSet$GeneAnotDB[,"gene_id"];
                    list.vec[[i]] <- com.ids
                  }else{
                    com.ids <- union(com.ids, datSet$GeneAnotDB[,"gene_id"]);
                  }
                    prot.mat <- rbind(prot.mat, as.matrix(datSet$prot.mat[rownames(datSet$prot.mat) %in% com.ids,]))
                }
           }
            if(selectedNetDataset == "intersect"){
            com.ids <- Reduce(intersect, list.vec)
            prot.mat <- as.matrix(datSet$prot.mat[rownames(datSet$prot.mat) %in% com.ids,])
            }else{
            com.ids <- unique(as.character(com.ids[!is.na(com.ids)])); # make sure it is interpreted as name not index
            }

            com.symbols <- doEntrez2SymbolMapping(com.ids, data.org, paramSet$data.idType);
            dataSet$GeneAnotDB <- data.frame(gene_id=com.ids, accession=com.symbols);
            dataSet$prot.mat <- prot.mat;
            dataSet$sig.mat <- prot.mat
            dataSet$seeds.proteins <- com.ids
        }else{
           my.vec <- names(mdata.all);
           # make sure reference is the first
           inx <- which(my.vec == selectedNetDataset);
           my.vec <- my.vec[-inx];
           com.ids <- NULL;
           ids.list <- list()
           for(i in 1:length(my.vec)){
                dataSet <- readDataset(my.vec[i]);
                ids.list[[i]]=dataSet$GeneAnotDB[,"gene_id"]
           }
            dataSet <- readDataset(selectedNetDataset);
            ids <- unique(unlist(ids.list));
            com.ids <-setdiff(dataSet$GeneAnotDB[,"gene_id"], ids);
            prot.mat <- as.matrix(dataSet$prot.mat[which(rownames(dataSet$prot.mat) %in% com.ids),])
            com.symbols <- doEntrez2SymbolMapping(com.ids, data.org, paramSet$data.idType);
            dataSet$GeneAnotDB <- data.frame(gene_id=com.ids, accession=com.symbols);
            dataSet$prot.mat <- prot.mat;
            dataSet$sig.mat <- prot.mat
            dataSet$seeds.proteins <- com.ids
        }
    }else{
        dataSet <- readDataset("datalist1");
    }
    

    # return a json array object
    # each object for a single dataset its sig proteins
    meta.vec <- meta.gene.vec <- meta.seed.expr <- NULL;
    file.create("seed_proteins.txt");
    GeneAnotDB <- NULL;

    gene.mat <- dataSet$sig.mat;
    prot.mat <- dataSet$prot.mat;
    write(paste("#DataSet:", dataSet$name),file="sig_genes.txt",append=TRUE);
    write.table(dataSet$sig.mat, file="sig_genes.txt", append=TRUE);

    meta.gene.vec <- c(meta.gene.vec, rownames(gene.mat));
    gene.list[[dataSet$name]] <- list(gene=rownames(gene.mat),logFC=unname(gene.mat[,1]));
    GeneAnotDB <- rbind(GeneAnotDB, dataSet$GeneAnotDB);
    meta.seed.expr <- c(meta.seed.expr, prot.mat[,1]);
    write(paste("#DataSet:", dataSet$name),file="seed_proteins.txt",append=TRUE);
    write.table(cbind(Emblprotein=rownames(prot.mat), Expression=prot.mat[,1]), file="seed_proteins.txt", row.names=F, quote=F,append=TRUE);
    protein.vec <- prot.mat[,1];
    meta.vec <- c(meta.vec, names(protein.vec));
    if(length(protein.vec) == 1){
        protein.vec <- as.matrix(protein.vec)
    }
    protein.list[[dataSet$name]] <- signif(protein.vec, 3);

    gene.list$name <- dataSet$name;
    paramSet$seed.genes <- unique(meta.gene.vec);

    meta.seed.df <- as.matrix(meta.seed.expr);
    rownames(meta.seed.df) <- names(meta.seed.expr);

    res <- RemoveDuplicates(meta.seed.df, "max", quiet=F, paramSet, msgSet);
    seed.expr <- res[[1]];
    msgSet <- res[[2]];
    paramSet$seed.expr <- seed.expr[,1];
    protein.vec <- unique(meta.vec);

    result <- list(
        gene.list = gene.list,
        protein.list = protein.list,
        protein.vec = protein.vec
    );

    saveSet(paramSet, "paramSet");
    return(result)
}


# 2nd order network, only for PPI
ExpandNetworkSearch <- function(){
    paramSet <- readSet(paramSet, "paramSet");
    data.org <- paramSet$data.org;
    sqlite.path <- paramSet$sqlite.path
    if(ppi.net$order == 0){
        SearchNetDB(ppi.net$db.type, ppi.net$table.nm);
        CreateGraph();
    }
    protein.vec <- V(overall.graph)$name;
    table.nm <- ppi.net$table.nm;
    if(db.typeu == "ppi"){
        res <- QueryPpiSQLite(sqlite.path, table.nm, protein.vec, ppi.net$require.exp, ppi.net$min.score);
    }else if (db.typeu == "tissuecoex"){
        res <- QueryTissueCoexSQLite(sqlite.path, protein.vec, data.org);
    }else if (db.typeu == "tissueppi"){
        res <- QueryDiffNetSQLite(sqlite.path, protein.vec);
    }else if (db.typeu == "cellcoex"){
        res <- QueryCellCoexSQLite(sqlite.path, protein.vec, data.org);
    }
    edge.res <- data.frame(Source=res[,1],Target=res[,2]);
    #row.names(edge.res) <- res[,5];

    node.ids <- c(res[,1], res[,2])
    node.nms <- c(res[,3], res[,4]);
    node.res <- data.frame(Id=node.ids, Label=node.nms);
    node.res <- node.res[!duplicated(node.res$Id),];
    if(nrow(node.res) < 10000){ # only overwrite if within the range
        fast.write(edge.res, file="orig_edge_list.csv",row.names=FALSE);
        fast.write(node.res, file="orig_node_list.csv", row.names=FALSE);
        ppi.net <<- list(db.type=db.typeu, order=2, seeds=protein.vec, table.nm=table.nm, node.data = node.res, edge.data = edge.res);
    }
    analSet$ppi.net <- ppi.net;
    output <- nrow(node.res);
    return(saveSet(analSet, "analSet", output));
}

# zero-order network - create ppi nets from only input (seeds)

#' Create network from only input (seeds)
#'
#' @export
#'
BuildSeedProteinNet <- function(){

    nodes <- V(overall.graph)$name;
    hit.inx <-  nodes %in% seed.proteins;
    nodes2rm <- nodes[!hit.inx];
    g <- simplify(delete.vertices(overall.graph, nodes2rm));

    nodeList <- get.data.frame(g, "vertices");
    nodeList <- nodeList[,1:2];
    colnames(nodeList) <- c("Id", "Label");

    fast.write(nodeList, file="orig_node_list.csv");
    nd.inx <- ppi.net$node.data[,1] %in% nodeList[,1];

    edgeList <- get.data.frame(g, "edges");
    edgeList <- edgeList[,1:2];
    colnames(edgeList) <- c("Source", "Target");
    fast.write(edgeList, file="orig_edge_list.csv");

    # update ppi.net
    ppi.net$order <- 0;
    ppi.net$node.data <- nodeList;
    ppi.net$edge.data <- edgeList;
    ppi.net <<- ppi.net;
    analSet$ppi.net <- ppi.net;
    return(saveSet(analSet, "analSet"));
}

# create igraph from the edgelist saved from graph DB
# and decompose into subnets

#' Create igraph object from edgelist created from database selection and decompose into connected subnetworks
#'
#' @export
#'
CreateGraph <- function(){
    require('igraph');
    paramSet <- readSet(paramSet, "paramSet");
    data.org <- paramSet$data.org;
    seed.expr <- paramSet$seed.expr;
    seed.genes <- paramSet$seed.genes;

    node.list <- ppi.net$node.data;
    edge.list <- ppi.net$edge.data;

    seed.proteins <- ppi.net$seeds;
    overall.graph <- simplify(graph.data.frame(edge.list, directed=FALSE, vertices=node.list));

    if("domain" %in% colnames(edge.list)){
        overall.graph <- simplify(graph.data.frame(edge.list, directed=F, vertices=node.list));
        overall.graph <- set.vertex.attribute(overall.graph, "species", index = V(overall.graph), value = node.infoU$species[which(V(overall.graph)$name %in% node.infoU$id)]);
        overall.graph <- set.vertex.attribute(overall.graph, "domain", index = V(overall.graph), value = node.infoU$domain[which(V(overall.graph)$name %in% node.infoU$id)]);
    }else if("Direction" %in% colnames(edge.list)){
        overall.graph <- simplify(graph.data.frame(edge.list, directed=F, vertices=node.list));
        overall.graph <-set_edge_attr(overall.graph, "direction",index = E(overall.graph), value = as.character(edge.list$Direction))
        overall.graph <-set_edge_attr(overall.graph, "mechanism",index = E(overall.graph), value = as.character(edge.infoU$Mechanism))
    }else if("TYPEA" %in% colnames(edge.list)){
        overall.graph <- simplify(graph.data.frame(edge.list, directed=F, vertices=node.list));
        overall.graph <-set_edge_attr(overall.graph, "direction",index = E(overall.graph), value = as.character(edge.infoU$EFFECT))
        overall.graph <-set_edge_attr(overall.graph, "mechanism",index = E(overall.graph), value = as.character(edge.list$Mechanism))

        overall.graph <- set.vertex.attribute(overall.graph, "type", index = V(overall.graph), value = node.infoU);
    }else {
        overall.graph <- simplify(graph.data.frame(edge.list, directed=FALSE, vertices=node.list));
        overall.graph <-set_edge_attr(overall.graph, "direction",index = E(overall.graph), value = "NA")
    }

    # add node expression value
    if(ppi.net$db.type == "ppi"){
        ## Temp fix seed.expr all in entrez?
        ## all converted to new IDs based on PPI from the given organism
        newIDs <- doPpiIDMapping(sqlite.path, names(seed.expr), data.org)
    }else{# all entrez in mirNet
        newIDs <- names(seed.expr);
    }
    match.index <- match(V(overall.graph)$name, newIDs);
    expr.vals <- seed.expr[match.index];
 #  expr.vec <- abs(expr.vals) # logFC can be negative!!
    expr.vec <- expr.vals;
    names(expr.vec)<- V(overall.graph)$name;
    expr.vec <<- expr.vec[!is.na(expr.vec)]
    overall.graph <- set.vertex.attribute(overall.graph, "abundance", index = V(overall.graph), value = expr.vals);

    hit.inx <- seed.proteins %in% node.list[,1];
    seed.proteins <<- seed.proteins[hit.inx];

    analSet <- DecomposeGraph(overall.graph,analSet);
    substats <- analSet$substats;
    overall.graph <<- overall.graph;
    analSet$overall.graph <- overall.graph;
    if(!is.null(substats)){
        output <- c(length(seed.genes), length(seed.proteins), nrow(node.list), nrow(edge.list), length(ppi.comps), substats);
    }else{
        output <- 0;
    }
       return( saveSet(analSet, "analSet", output) );

}

#' Filter bipartite network by degree and/or betweenness
#'
#' @export
#'
FilterBipartiNet <- function(nd.type, min.dgr, min.btw){
    paramSet <- readSet(paramSet, "paramSet");
    seed.genes <- paramSet$seed.genes;

    all.nms <- V(overall.graph)$name;
    edge.mat <- get.edgelist(overall.graph);
    dgrs <- degree(overall.graph);
    nodes2rm.dgr <- nodes2rm.btw <- NULL;

    if(nd.type == "gene"){
        hit.inx <- all.nms %in% edge.mat[,1];
    }else if(nd.type=="other"){
        hit.inx <- all.nms %in% edge.mat[,2];
    }else{ # all
        hit.inx <- rep(TRUE, length(all.nms));
    }

    if(min.dgr > 0){
        rm.inx <- dgrs <= min.dgr & hit.inx;
        nodes2rm.dgr <- V(overall.graph)$name[rm.inx];
    }
    if(min.btw > 0){
        btws <- betweenness(overall.graph);
        rm.inx <- btws <= min.btw & hit.inx;
        nodes2rm.btw <- V(overall.graph)$name[rm.inx];
    }

    nodes2rm <- unique(c(nodes2rm.dgr, nodes2rm.btw));
    overall.graph <- simplify(delete.vertices(overall.graph, nodes2rm));
    current.msg <<- paste("A total of", length(nodes2rm) , "was reduced.");
    analSet <- DecomposeGraph(overall.graph,analSet);
    substats <- analSet$substats;
    if(!is.null(substats)){
        overall.graph <<- overall.graph;
        output <- c(length(seed.genes),length(seed.proteins), vcount(overall.graph), ecount(overall.graph), length(ppi.comps), substats);
    }else{
        output <- 0;
    }

    analSet$overall.graph <- overall.graph;
    return(saveSet(analSet, "analSet", output));
}

#' Prepare the json file for network visualization
#'
#' @param net.nm Name of subnetwork created from network building (i.e subnetwork1)
#' @param json.nm Name of json file to be exported for network visualization
#'
#' @export
#'
PrepareNetwork <- function(net.nm, json.nm){
   analSet <- readSet(analSet, "analSet");
   my.ppi <- analSet$ppi.comps[[net.nm]];
   nd.nms <- V(my.ppi)$name;


   analSet <- convertIgraph2JSON(net.nm, json.nm);
   current.net.nm <<- net.nm;
   return(saveSet(analSet, "analSet", 1));

}

#' Prepare the json file for network visualization from user uploaded graph file
#'
#' @param net.nm Name of subnetwork created from network building (i.e subnetwork1)
#' @param json.nm Name of json file to be exported for network visualization
#'
#' @export
#'
PrepareNetworkUpload <- function(net.nm, json.nm){

   my.ppi <- ppi.comps[[net.nm]];
   nd.nms <- V(my.ppi)$name;
   expr.vec <<- rep(0, length(nd.nms))
   table.nmu <<- "NA"
   convertIgraph2JSONFromFile(net.nm, json.nm, data.idType);
   current.net.nm <<- net.nm;
   return(saveSet(analSet, "analSet", 1));

}

GetNodeIDs <- function(){
    V(overall.graph)$name;
}

GetNodeNames <- function(){
    V(overall.graph)$Label;
}

GetNodeDegrees <- function(){
    degree(overall.graph);
}

GetNodeBetweenness <- function(){
    round(betweenness(overall.graph, directed=F, normalized=F), 2);
}

#' Compute minimum connect subnetwork based on input nodes using prize-collecting steiner forest approach
#'
#' @return check overall.graph object for result
#' @export
#'
ComputePCSFNet <- function(){
  paramSet <- readSet(paramSet, "paramSet");
  seed.genes <- paramSet$seed.genes;
  edg <- as.data.frame(get.edgelist(overall.graph));
  edg$V3 <- rep(1, nrow(edg));
  colnames(edg) <- c("from", "to", "cost");

  node_names <- unique(c(as.character(edg[,1]),as.character(edg[,2])))
  ppi <- graph.data.frame(edg[,1:2],vertices=node_names,directed=F)
  E(ppi)$weight <- as.numeric(edg[,3])
  ppi <- simplify(ppi)

  if(sum(expr.vec) == 0){ # make sure weights are not 0?!
     expr.vec = expr.vec +1
  }
  g <- Compute.SteinerForest(ppi, expr.vec, w = 5, b = 100, mu = 0.0005);

  nodeList <- get.data.frame(g, "vertices");
  colnames(nodeList) <- c("Id", "Label");
  write.csv(nodeList, file="orig_node_list.csv", row.names=F, quote=F);
  
  edgeList <- get.data.frame(g, "edges");
  edgeList <- cbind(rownames(edgeList), edgeList);
  colnames(edgeList) <- c("Id", "Source", "Target");
  write.csv(edgeList, file="orig_edge_list.csv", row.names=F, quote=F);
  
  path.list <- NULL;
  analSet <- DecomposeGraph(g,analSet);
  substats <- analSet$substats;
  if(!is.null(substats)){
    overall.graph <<- g;
    current.msg<<- "Steiner Forest was completed successfully";
    output <- c(length(seed.genes),length(seed.proteins), nrow(nodeList), nrow(edgeList), length(ppi.comps), substats);
  }else{
    output <- 0;
  }
  analSet$overall.graph <- overall.graph;
  return(saveSet(analSet, "analSet", output));
}

# Adapted from PCSF
# https://github.com/IOR-Bioinformatics/PCSF
Compute.SteinerForest <- function(ppi, terminals, w = 2, b = 1, mu = 0.0005, dummies){

  # Gather the terminal genes to be analyzed, and their scores
  terminal_names <- names(terminals)
  terminal_values <- as.numeric(terminals)

  # Incorporate the node prizes
  node_names <- V(ppi)$name
  node_prz <- vector(mode = "numeric", length = length(node_names))
  index <- match(terminal_names, node_names)
  percent <- signif((length(index) - sum(is.na(index)))/length(index)*100, 4)
  if (percent < 5){
    print("Less than 1% of your terminal nodes are matched in the interactome!");
    return(NULL);
  }
  paste0("  ", percent, "% of your terminal nodes are included in the interactome\n");
  terminal_names <- terminal_names[!is.na(index)]
  terminal_values <- terminal_values[!is.na(index)]
  index <- index[!is.na(index)]
  node_prz[index] <- terminal_values

  if(missing(dummies)||is.null(dummies)||is.na(dummies)){
    dummies <- terminal_names #re-assign this to allow for input
  }

  ## Prepare input file for MST-PCSF implementation in C++

  # Calculate the hub penalization scores
  node_degrees <- igraph::degree(ppi)
  hub_penalization <- - mu*node_degrees

  # Update the node prizes
  node_prizes <- b*node_prz
  index <- which(node_prizes==0)
  node_prizes[index] <- hub_penalization[index]

  # Construct the list of edges
  edges <- ends(ppi,es = E(ppi))
  from <- c(rep("DUMMY", length(dummies)), edges[,1])
  to <- c(dummies, edges[,2])

  cost <- c(rep(w, length(dummies)), E(ppi)$weight)

  #PCSF will faill if there are NAs in weights, this will check and fail gracefully
  if(any(is.na(E(ppi)$weight))){
    print("NAs found in the weight vector!");
    return (NULL);
  }

  ## Feed the input into the PCSF algorithm
  output <- XiaLabCppLib::call_sr(from,to,cost,node_names,node_prizes)

  # Check the size of output subnetwork and print a warning if it is 0
  if(length(output[[1]]) != 0){

    # Contruct an igraph object from the MST-PCSF output
    e <- data.frame(output[[1]], output[[2]], output[[3]])
    e <- e[which(e[,2]!="DUMMY"), ]
    names(e) <- c("from", "to", "weight")

    # Differentiate the type of nodes
    type <- rep("Steiner", length(output[[4]]))
    index <- match(terminal_names, output[[4]])
    index <- index[!is.na(index)]
    type[index] <- "Terminal"

    v <- data.frame(output[[4]], output[[5]], type)
    names(v) <- c("terminals", "prize", "type")
    subnet <- graph.data.frame(e,vertices=v,directed=F)
    E(subnet)$weight <- as.numeric(output[[3]])
    subnet <- delete_vertices(subnet, "DUMMY")
    subnet <- delete_vertices(subnet, names(which(degree(subnet)==0)));
    return(subnet)

  } else{
    print("Subnetwork can not be identified for a given parameter set")
    return(NULL);
  }
}


# for a given graph, obtain the smallest subgraphs that contain
# all the seed nodes. This is acheived by iteratively remove
# the marginal nodes (degree = 1) that are not in the seeds

#' Compute minimum connected network composed of seed nodes using shortest path based approach
#'
#' @param dataSetObj Input the name of the created dataSetObj (see Init.Data)
#' @param max.len Maximum number of seeds, if more than this number, take top 100 seed nodes based on degrees
#'
#' @return check overall.graph object for result
#' @export
#'
GetMinConnectedGraphs <- function(max.len = 200){
    set.seed(8574);
    paramSet <- readSet(paramSet, "paramSet");
    seed.genes <- paramSet$seed.genes;
    # first get shortest paths for all pair-wise seeds
    my.seeds <- seed.proteins;
    sd.len <- length(my.seeds);
    paths.list <-list();

    # first trim overall.graph to remove no-seed nodes of degree 1
    dgrs <- degree(overall.graph);
    keep.inx <- dgrs > 1 | (names(dgrs) %in% my.seeds);
    nodes2rm <- V(overall.graph)$name[!keep.inx];
    overall.graph <-  simplify(delete.vertices(overall.graph, nodes2rm));

    # need to restrict the operation b/c get.shortest.paths is very time consuming
    # for top max.len highest degrees
    if(sd.len > max.len){
        hit.inx <- names(dgrs) %in% my.seeds;
        sd.dgrs <- dgrs[hit.inx];
        sd.dgrs <- rev(sort(sd.dgrs));
        # need to synchronize all (seed.proteins) and top seeds (my.seeds)
        seed.proteins <- names(sd.dgrs);
        if(max.len>table(hit.inx)[["TRUE"]]){
            sd.len <-  table(hit.inx)[["TRUE"]];
        }else{
            sd.len <-  max.len;
        }
        my.seeds <- seed.proteins[1:sd.len];
        current.msg <<- paste("The minimum connected network was computed using the top", sd.len, "seed proteins in the network based on their degrees.");
    }else{
        current.msg <<- paste("The minimum connected network was computed using all seed proteins in the network.");
    }
    # now calculate the shortest paths for
    # each seed vs. all other seeds (note, to remove pairs already calculated previously)
    for(pos in 1:sd.len){
            paths.list[[pos]] <- get.shortest.paths(overall.graph, my.seeds[pos], seed.proteins[-(1:pos)])$vpath;
    }
    nds.inxs <- unique(unlist(paths.list));
    nodes2rm <- V(overall.graph)$name[-nds.inxs];
    g <- simplify(delete.vertices(overall.graph, nodes2rm));

    nodeList <- get.data.frame(g, "vertices");
    colnames(nodeList) <- c("Id", "Label");
    fast.write(nodeList, file="orig_node_list.csv", row.names=F);

    edgeList <- get.data.frame(g, "edges");
    edgeList <- cbind(rownames(edgeList), edgeList);
    colnames(edgeList) <- c("Id", "Source", "Target");
    fast.write(edgeList, file="orig_edge_list.csv", row.names=F);

    path.list <- NULL;
    analSet <- DecomposeGraph(g,analSet);
    substats <- analSet$substats;
    net.stats<<-net.stats
    if(!is.null(substats)){
        overall.graph <<- g;
        output <- c(length(seed.genes),length(seed.proteins), nrow(nodeList), nrow(edgeList), length(ppi.comps), substats);
    }else{
        output <- 0;
    }
    analSet$overall.graph <- overall.graph;
    return(saveSet(analSet, "analSet", output));
}

UpdateSubnetStats <- function(){
    old.nms <- names(ppi.comps);
    net.stats <- ComputeSubnetStats(ppi.comps);
    ord.inx <- order(net.stats[,1], decreasing=TRUE);
    net.stats <- net.stats[ord.inx,];
    rownames(net.stats) <- old.nms[ord.inx];
    net.stats <<- net.stats;
}

# exclude nodes in current.net (networkview)
ExcludeNodes <- function(nodeids, filenm){

    nodes2rm <- strsplit(nodeids, ";")[[1]];
    current.net <- ppi.comps[[current.net.nm]];
    current.net <- delete.vertices(current.net, nodes2rm);

    # need to remove all orphan nodes
    bad.vs<-V(current.net)$name[degree(current.net) == 0];
    current.net <- delete.vertices(current.net, bad.vs);

    # return all those nodes that are removed
    nds2rm <- paste(c(bad.vs, nodes2rm), collapse="||");

    # update topo measures
    node.btw <- as.numeric(betweenness(current.net));
    node.dgr <- as.numeric(degree(current.net));
    node.exp <- as.numeric(get.vertex.attribute(current.net, name="abundance", index = V(current.net)));
    nms <- V(current.net)$name;
    hit.inx <- match(nms, ppi.net$node.data[,1]);
    lbls <- ppi.net$node.data[hit.inx,2];

    nodes <- vector(mode="list");
    for(i in 1:length(nms)){
        nodes[[i]] <- list(
                  id=nms[i],
                  label=lbls[i],
                  degree=node.dgr[i],
                  between=node.btw[i],
                  expr = node.exp[i]
                );
    }
    # now only save the node pos to json
    netData <- list(deletes=nds2rm,nodes=nodes);
    sink(filenm);
    cat(RJSONIO::toJSON(netData));
    sink();

    ppi.comps[[current.net.nm]] <<- current.net;
    UpdateSubnetStats();

    # remember to forget the cached layout, and restart caching, as this is now different object (with the same name)
    #forget(PerformLayOut_mem);
    return(filenm);
}

# exclude nodes in overall net (network builder)
ExcludeNodesOverall <- function(nodeids, id.type, vismode){
    paramSet <- readSet(paramSet, "paramSet");
    seed.genes <- paramSet$seed.genes;
    data.org <- paramSet$data.org;
    # all convert to uniprot ID
    lines <- strsplit(nodeids, "\r|\n|\r\n")[[1]];
    lines<- sub("^[[:space:]]*(.*?)[[:space:]]*$", "\\1", lines, perl=TRUE);
    if(vismode != "network"){
        prot.anots <- .doGeneIDMapping(lines, id.type, data.org, "matrix");
        nodes2rm <- unique(prot.anots$accession);
    }else{
        prot.anots <- lines
        nodes2rm <- unique(lines);
    }

    # now find the overlap
    nodes2rm <- nodes2rm[nodes2rm %in% V(overall.graph)$name];
    g <- delete.vertices(overall.graph, nodes2rm);

    nodeList <- get.data.frame(g, "vertices");
    colnames(nodeList) <- c("Id", "Label");

    fast.write(nodeList, file="orig_node_list.csv", row.names=F);

    edgeList <- get.data.frame(g, "edges");
    edgeList <- cbind(rownames(edgeList), edgeList);
    colnames(edgeList) <- c("Id", "Source", "Target");
    fast.write(edgeList, file="orig_edge_list.csv", row.names=F);

    analSet <- DecomposeGraph(g,analSet);
    substats <- analSet$substats;
    if(!is.null(substats)){
        overall.graph <<- g;
        output <- c(length(seed.genes),length(seed.proteins), nrow(nodeList), nrow(edgeList), length(ppi.comps), substats);
    }else{
        output <- 0;
    }
    analSet$overall.graph <- g;
    return(saveSet(analSet,"analSet", output));
}

PrepareSubnetDownloads <- function(nm){
  g <- ppi.comps[[nm]];
  # need to update graph so that id is compound names rather than ID
  V(g)$name <- as.character(doID2LabelMapping(V(g)$name));
  saveNetworkInSIF(g, nm);
}

# adapted from BioNet
saveNetworkInSIF <- function(network, name){
    edges <- .graph.sif(network=network, file=name);
    sif.nm <- paste(name, ".sif", sep="");
    if(length(list.edge.attributes(network))!=0){
	edge.nms <- .graph.eda(network=network, file=name, edgelist.names=edges);
        sif.nm <- c(sif.nm, edge.nms);

    }
    if(length(list.vertex.attributes(network))!=0){
	node.nms <- .graph.noa(network=network, file=name);
        sif.nm <- c(sif.nm, node.nms);
    }
    # need to save all sif and associated attribute files into a zip file for download
    zip(paste(name,"_sif",".zip", sep=""), sif.nm);
}

# internal function to write cytoscape .sif file
.graph.sif <- function(network, file){
    edgelist.names <- igraph::get.edgelist(network, names=TRUE)
    edgelist.names <- cbind(edgelist.names[,1], rep("pp", length(E(network))), edgelist.names[,2]);
    write.table(edgelist.names, row.names=FALSE, col.names=FALSE, file=paste(file, ".sif", sep=""), sep="\t", quote=FALSE)
    return(edgelist.names)
}

doID2LabelMapping <- function(entrez.vec){
    if(exists("nodeListu")){
        hit.inx <- match(entrez.vec, nodeListu[, "Id"]);
        symbols <- nodeListu[hit.inx, "Label"];

        # if not gene symbol, use id by itself
        na.inx <- is.na(symbols);
        symbols[na.inx] <- entrez.vec[na.inx];
        return(symbols);
    }else{ # network upload
        return(entrez.vec);
    }
}

# internal method to write cytoscape node attribute files
.graph.noa <- function(network, file){
  all.nms <- c();
  attrib <- list.vertex.attributes(network)
  for(i in 1:length(attrib)){
    if(is(get.vertex.attribute(network, attrib[i]))[1] == "character")
    {
      type <- "String"
    }
    if(is(get.vertex.attribute(network, attrib[i]))[1] == "integer")
    {
      type <- "Integer"
    }
    if(is(get.vertex.attribute(network, attrib[i]))[1] == "numeric")
    {
      type <- "Double"
    }
    noa <- cbind(V(network)$name, rep("=", length(V(network))), get.vertex.attribute(network, attrib[i]))
    first.line <- paste(attrib[i], " (class=java.lang.", type, ")", sep="")
    file.nm <- paste(file, "_", attrib[i], ".NA", sep="");
    write(first.line, file=file.nm, ncolumns = 1, append=FALSE, sep=" ")
    write.table(noa, row.names = FALSE, col.names = FALSE, file=file.nm, sep=" ", append=TRUE, quote=FALSE);
    all.nms <- c(all.nms, file.nm);
  }
  return(all.nms);
}

# internal method to write cytoscape edge attribute files
.graph.eda <- function(network, file, edgelist.names){
  all.nms <- c();
  attrib <- list.edge.attributes(network)
  for(i in 1:length(attrib)){
    if(is(get.edge.attribute(network, attrib[i]))[1] == "character")
    {
      type <- "String"
    }
    if(is(get.edge.attribute(network, attrib[i]))[1] == "integer")
    {
      type <- "Integer"
    }
    if(is(get.edge.attribute(network, attrib[i]))[1] == "numeric")
    {
      type <- "Double"
    }
    eda <- cbind(cbind(edgelist.names[,1], rep("(pp)", length(E(network))), edgelist.names[,3]), rep("=", length(E(network))), get.edge.attribute(network, attrib[i]))
    first.line <- paste(attrib[i], " (class=java.lang.", type, ")", sep="");
    file.nm <- paste(file, "_", attrib[i], ".EA", sep="");
    write(first.line, file=file.nm, ncolumns=1, append=FALSE, sep =" ")
    write.table(eda, row.names = FALSE, col.names = FALSE, file=file.nm, sep=" ", append=TRUE, quote=FALSE);
    all.nms <- c(all.nms, file.nm);
  }
  return(all.nms);
}

SetCellCoexNumber <-function(num){
    cellCoexNumber <<- num;
}

SetDiffNetName <-function(nm){
    diffNetName <<- nm;
}

SetDiffFilter <-function(pct){
    diffPct <<- pct/10;
}

# note: hit.query, resTable must synchronize

#' Perform gene enrichment analysis or identify gene regulatory targets
#'
#' @param file.nm File name of result table to be exported in csv format, do not include file extension
#' @param fun.type Enrichment database type
#' @param IDs String of ids to be tested separated by "; "
#'
#' @export
#'
PerformNetEnrichment <- function(dataName="", file.nm, fun.type, IDs){
    dataSet <- readDataset(dataName);
    paramSet <- readSet(paramSet, "paramSet");
    data.org <- paramSet$data.org;
    # prepare query
    ora.vec <- NULL;
    if(ppi.net$db.type == 'ppi'){
        if(data.org == "ath"){
                idtype <- "entrez"
        }else if(data.org %in% c("bsu", "tbr", "cel", "dme", "eco", "pfa") & net.type == "string"){
                idtype <- "string"
        }else if(data.org %in% c("bta","dre","rno","gga") & net.type == "string"){
                idtype <- "entrez";
        }else if(data.org %in% c("hsa","mmu", "pae") & net.type %in% c("string","innate", "huri", "interactome")){
                idtype <- "entrez";
        }else if(data.org %in% c("hsa","mmu", "cel", "dme","sce") & net.type %in% c("irefinx", "rolland")){
                idtype <- "uniprot";
        }else if(data.org == "sce" & net.type == "string"){ # only for yeast
                idtype <- "embl_gene";
        }
        id.vec <- unlist(strsplit(IDs, "; "));
        if(idtype=="uniprot"){
            ora.vec <- .doGeneIDMapping(id.vec, "uniprot", data.org, "vec")
            names(ora.vec) <- id.vec;
        }else if(idtype=="embl_protein"){
            ora.vec <- .doGeneIDMapping(id.vec, "embl_protein", data.org, "vec")
            names(ora.vec) <- id.vec;
        }else if(idtype=="string"){
            ora.vec <- .doGeneIDMapping(id.vec, "string", data.org, "vec")
            names(ora.vec) <- id.vec;
        }else if(idtype=="embl_gene"){
            ora.vec <- .doGeneIDMapping(id.vec, "embl_gene", data.org, "vec")
            names(ora.vec) <- id.vec;
        }else{
            ora.vec <- id.vec;
            names(ora.vec) <- ora.vec;
        }

        #if nothing matches, try original ids;
        if(all(is.na(ora.vec))){
            ora.vec <- unlist(strsplit(IDs, "; "));
            names(ora.vec) <- ora.vec;
        }

    }else{ # net is tf/mir/drug, they already in entrez
        ora.vec <- unlist(strsplit(IDs, "; "));
        names(ora.vec) <- as.character(ora.vec);
    }

    if(fun.type %in% c("trrust", "encode", "jaspar", "mirnet", "met", "drugbank", "disease")){
        res <- PerformRegEnrichAnalysis(dataSet, file.nm, fun.type, ora.vec, "inverse");
    }else{
        res <- .performEnrichAnalysis(dataSet, file.nm, fun.type, ora.vec);
    }

    #if(checkEntrezMatches(unname(ora.vec)) > 0 && res == 0){
    #    res=2;
    #}
    return(res);
}


PlotCorHistogram <- function(imgNm){
    Cairo(file=imgNm, width=400, height=400, type="png", bg="white");
    library(ggplot2)
    cor <- E(overall.graph)$correlation
    p<-ggplot(data.frame(Correlation=abs(cor)), aes(x=Correlation)) + geom_histogram() + ggtitle("Correlation distribution")+
    theme(plot.title = element_text(hjust = 0.5));
    print(p);
    dev.off();
}

#' Filter interactions by tissue coexpression data
#'
#' @param type Name of database (encode, gtex, hpa)
#' @param tissue Selected tissue to filter (refer to web server interface for options, replace space with "." (i.e. Adipose.Tissue))
#'
#' @export
#'
FilterByTissue <- function(type, tissue){
    paramSet <- readSet(paramSet, "paramSet");
    seed.genes <- paramSet$seed.genes;
    all.nms <- V(overall.graph)$name;
    gene.nms <- all.nms[which(V(overall.graph)$Types %in% c("Gene", "Protein"))]

    tissue.mat <- queryFilterDB(type, data.org);
    hit.inx <- tissue.mat$Tissue %in% c(tissue, "All");

    tissue.genes <- tissue.mat[,1][!hit.inx];
    nodes2rm <- gene.nms[which(gene.nms %in% tissue.genes)]

    g <- simplify(delete.vertices(overall.graph, nodes2rm));

    nodeList <- get.data.frame(g, "vertices");
    nodeList <- nodeList[,1:2];
    colnames(nodeList) <- c("Id", "Label");

    fast.write(nodeList, file="orig_node_list.csv");
    nd.inx <- ppi.net$node.data[,1] %in% nodeList[,1];

    edgeList <- get.data.frame(g, "edges");
    edgeList <- edgeList[,1:2];
    colnames(edgeList) <- c("Source", "Target");
    fast.write(edgeList, file="orig_edge_list.csv");

    # update ppi.net
    ppi.net$order <- 1;
    ppi.net$node.data <- nodeList;
    ppi.net$edge.data <- edgeList;
    ppi.net <<- ppi.net;
    overall.graph <- g
    analSet <- DecomposeGraph(overall.graph,analSet);
    substats <- analSet$substats;
    if(!is.null(substats)){
        overall.graph <<- overall.graph;
        output <- c(length(seed.genes),length(seed.proteins), vcount(overall.graph), ecount(overall.graph), length(ppi.comps), substats);
    }else{
        output <- 0;
    }
    analSet$overall.graph <- overall.graph;
    return(saveSet(analSet, "analSet", output));
}

#' Filter interactions by correlation (only applicable to coexpression network)
#'
#' @param min.cor Correlation threshold (0.5 - 1)
#'
#' @export
#'
FilterNetByCor <- function( min.cor){
    paramSet <- readSet(paramSet, "paramSet");
    seed.genes <- paramSet$seed.genes;
    all.nms <- V(overall.graph)$name;
    edge.mat <- get.edgelist(overall.graph);
    cor <- E(overall.graph)$correlation;
    edges2rm.inx <-NULL;

    if(min.cor > 0.5){
        rm.inx <- cor <= min.cor;
    }

    overall.graph <- simplify(delete.edges(overall.graph, which(rm.inx)));
    current.msg <<- paste("A total of", length(which(rm.inx)) , "edges was reduced.");
    analSet <- DecomposeGraph(overall.graph,analSet);
    substats <- analSet$substats;
    if(!is.null(substats)){
        overall.graph <<- overall.graph;
        return(c(length(seed.genes),length(seed.proteins), vcount(overall.graph), ecount(overall.graph), length(ppi.comps), substats));
    }else{
        return(0);
    }
}

regEnrichment <- function(dataName, file.nm, fun.type, IDs, netInv){

  regBool <<- "false";
  res <- PerformNetEnrichment(dataName, file.nm, fun.type, IDs);
}

PerformRegEnrichAnalysis <- function(dataSet, file.nm, fun.type, ora.vec, netInv){
    if(!exists("my.reg.enrich")){ # public web on same user dir
        compiler::loadcmp("../../rscripts/networkanalystr/_utils_regenrich.Rc");    
    }
    return(my.reg.enrich(dataSet, file.nm, fun.type, ora.vec, netInv));
}

PrepareTheraNet <- function(net.nm, json.nm, theraids, regids, regnms){
  require("igraph")
  net.nm = gsub(".json", "", net.nm);
  
  if(is.null(net.nm)){
    net.nm <- names(ppi.comps)[1];
  }
  
  reg.count <- reg.count + 1;
  reg.nm <- paste("addedReg", reg.count, sep="");
  
  my.ppi <- ppi.comps[[net.nm]];
  net.nm <<- net.nm;
  json.nm <<- json.nm;
  theraids <<- theraids
  regids<<-regids;

  theraids <- strsplit(theraids, ";");
  theraids <- theraids[[1]];
  theraids <- strsplit(theraids, ",");
  
  regids <- strsplit(regids, ",");
  regids <- regids[[1]]
  regnms <- strsplit(regnms, ",");
  regnms <- regnms[[1]]
  regnms<<-regnms;
  regids<<-regids;
  my.ppi <- ppi.comps[[net.nm]];
               
  nms <- V(my.ppi)$name                           
  remove.inx <-regids %in% nms
  nodesToAdd <- regids[!remove.inx];
    if(length(nodesToAdd)>0){
      my.ppi <- my.ppi + vertices(nodesToAdd);
    }
  for(i in 1:length(regids)){
    for(j in 1:length(theraids[[i]])){
      my.ppi <- my.ppi + edge(regids[[i]], theraids[[i]][j]);
    }
  }
  if(!is.null(V(my.ppi)$moltype)){
    moltype.vec <- V(my.ppi)$moltype
  }else{
    moltype.vec <- rep("NA", length(V(my.ppi)))
  }
    nms <- V(my.ppi)$name
    reg.inx <- nms %in% regids
    moltype.vec[reg.inx] <- "regulator"
    my.ppi <- set_vertex_attr(my.ppi, "moltype", value = moltype.vec)

  edge.data <- data.frame(regids, regnms, rep("regulator", length(regids)) ,stringsAsFactors=FALSE);
  
  colnames(edge.data) <- c("Id", "Label", "Types");
  colnames(ppi.net$node.data) <- c("Id", "Label", "Types");
  ppi.net$node.data <<-rbind(ppi.net$node.data, edge.data);
  filenm <- paste(reg.nm, ".json", sep="");
  reg.count <<- reg.count

  my.ppi <- simplify(my.ppi);
  ppi.comps[[reg.nm]] <<- my.ppi;
  current.net.nm <<- reg.nm;
  ppi.comps <<- ppi.comps;
  UpdateSubnetStats();
  #net.info$reg.ids <<- regids;
  convertIgraph2JSON(reg.nm, filenm);

  return(filenm);
  
}

GetSelectedNetworks <- function(){
    paramSet <- readSet(paramSet, "paramSet");
    return(names(paramSet$nets));
}

#' Merge or identify shared portions of multiple networks
#'
#' @param type Type of integrative network, either intersect, union or unique
#'
#' @export
#'
ComputeIntegNet <- function(type){
    gObjs <- list();
    paramSet <- readSet(paramSet, "paramSet");
    data.org <- paramSet$data.org;
    seed.expr <- paramSet$seed.expr;
    seed.genes <- paramSet$seed.genes;

    for(i in 1:length(paramSet$nets)){
        ppi.net <<- paramSet$nets[[i]];
        CreateGraph();
        gObj <- overall.graph;
        gObjs[[i]] <- gObj;
    }

    if(type == "intersect"){
       g <- do.call(igraph::intersection, gObjs)
    }else if(type == "union"){
       g <- do.call(igraph::union, gObjs)
    }else{
        gObjs.union <- list()
        gObj.unique <- "";
        for(i in 1:length(paramSet$nets)){
            gObj <- gObjs[[i]];
            nm <- names(paramSet$nets)[i]
            if(nm != type){
                gObjs.union[[nm]] <- gObj
            }else{
                gObj.unique <- gObj
            }
        }
        union.graph <- do.call(igraph::union, gObjs.union)
        g <- igraph::difference(gObj.unique, union.graph)  
    }

    nms = V(g)$name
    labels = rep("NA", length(V(g)$name))
    types = rep("NA", length(V(g)$name))

    for(i in 1:length(paramSet$nets)){
        ppi.net <- paramSet$nets[[i]];
        node.ids <- ppi.net$node.data[,1]
        node.nms <- ppi.net$node.data[,2]
        node.types <- ppi.net$node.data[,3]
        hit.inx <- nms %in% node.ids;
        match.inx <- which(node.ids %in% nms)
        labels[hit.inx] <- node.nms[match.inx]
        types[hit.inx] <- node.types[match.inx]
    }
    g <- set.vertex.attribute(g, "Label", index = V(g), value = labels);
    g <- set.vertex.attribute(g, "Types", index = V(g), value = types);

    node.res <- data.frame(Id=V(g)$name, Label=V(g)$Label, Types=V(g)$Types)
    edge.mat <- as_edgelist(g);
    edge.res <- data.frame(Source=edge.mat[,1], Target=edge.mat[,2])
    
    ppi.net$db.type <- "multi"
    ppi.net$node.data <- node.res;
    ppi.net$edge.data <- edge.res;

    node.list <- ppi.net$node.data;
    edge.list <- ppi.net$edge.data;
        
    overall.graph <- g

 # add node expression value
    if(ppi.net$db.type == "ppi"){
        ## Temp fix seed.expr all in entrez?
        ## all converted to new IDs based on PPI from the given organism
        newIDs <- doPpiIDMapping(sqlite.path, names(seed.expr), data.org);
    }else{# all entrez in mirNet
        newIDs <- names(seed.expr);
    }
    match.index <- match(V(overall.graph)$name, newIDs);
    expr.vals <- seed.expr[match.index];
    expr.vec <- expr.vals;
    names(expr.vec)<- V(overall.graph)$name;
    expr.vec <<- expr.vec[!is.na(expr.vec)]
    overall.graph <- set.vertex.attribute(overall.graph, "abundance", index = V(overall.graph), value = expr.vals);

    hit.inx <- seed.proteins %in% node.list[,1];
    seed.proteins <<- seed.proteins[hit.inx];

    analSet <- DecomposeGraph(overall.graph,analSet);
    substats <- analSet$substats;
    overall.graph <<- overall.graph;
    ppi.net <<- ppi.net
    if(!is.null(substats)){
        output <- c(length(seed.genes), length(seed.proteins), nrow(node.list), nrow(edge.list), length(ppi.comps), substats);
    }else{
        output <- 0;
    } 
    analSet$overall.graph <- overall.graph;
    analSet$ppi.net <- ppi.net;
    return(saveSet(analSet,"analSet", output));
}

ClearNetSelection <- function(type){
    paramSet <- readSet(paramSet, "paramSet");
    if(type %in% names(paramSet$nets)){
        inx = which( names(paramSet$nets) == type)
         paramSet$nets[[inx]] = NULL;
    }
    saveSet(paramSet, "paramSet");
    return(1)
}

queryFilterDB <- function(type, org){
  paramSet <- readSet(paramSet, "paramSet");
  sqlite.path <- paramSet$sqlite.path;
  require('RSQLite');
  conv.db <- dbConnect(SQLite(), paste(sqlite.path, "tissue_filter.sqlite", sep="")); 
  db.map <- dbReadTable(conv.db, paste0(data.org,"_",type));
  dbDisconnect(conv.db); cleanMem();

  return(db.map)
}
xia-lab/NetworkAnalystR documentation built on Jan. 10, 2023, 4:47 a.m.