R/networkBuilder.R

Defines functions polishNetwork filterNetwork buildNetwork convertID uniqueExprsData

Documented in buildNetwork convertID filterNetwork polishNetwork uniqueExprsData

#' unique the microarray data
#' 
#' @description get unique the microarray data for each gene id.
#' 
#' @param exprsData dataset of expression comparison data
#' @param method method must be Max, Median or Min
#' @param condenseName column names to be condensed
#' 
#' @return a dataframe of expression data without duplicates
#' @keywords network
#' @examples 
#' data("example.data")
#' example.microarrayData<-uniqueExprsData(example.data$example.microarrayData,
#'                                         method="Max", condenseName='logFC')
#' @export
#' @importFrom plyr . ddply
#' 
uniqueExprsData<-function(exprsData, method='Max', condenseName='logFC'){
    if(!(method %in% c("Max", "Median", "Min"))){
        stop("method must be Max, Median or Min")
    }
    if(!GeneNetworkBuilder:::checkCName("symbols", exprsData)){
        stop("symbols is not a valide colname of exprsData")
    }
    if(!GeneNetworkBuilder:::checkCName(condenseName, exprsData)){
        stop(paste(condenseName," is not a valide colname of exprsData"))
    }
    if(!is.data.frame(exprsData)){
        exprsData<-as.data.frame(exprsData)
    }
    if(!is.numeric(exprsData[ , condenseName])){
        stop(paste("class of", condenseName, "is not a numeric column"))
    }
    exprsData<-switch(method,
                      Max   =plyr::ddply(exprsData, plyr::.(symbols), 
                                         GeneNetworkBuilder:::getMax, 
                                         condenseName),
                      Median=plyr::ddply(exprsData, plyr::.(symbols), 
                                         GeneNetworkBuilder:::getMedian, 
                                         condenseName),
                      Min   =plyr::ddply(exprsData, plyr::.(symbols),
                                         GeneNetworkBuilder:::getMin, 
                                         condenseName)
                           )
    exprsData
}

#' convert gene IDs by id map
#' @description For same gene, there are multple gene alias. 
#' In order to eliminate the possibility of missing any connections, 
#' convert the gene symbols to unique gene ids is important. 
#' This function can convert the gene symbols to unique ids and 
#' convert it back according a giving map.
#' @param x a matrix or dataframe contain the columns to be converted.
#' @param IDsMap a character vector of the identifier map
#' @param ByName the column names to be converted
#' @return a matrix or dataframe with converted gene IDs
#' @examples 
#' data("ce.IDsMap")
#' bind<-cbind(from="daf-16", to=c("fkh-7", "hlh-13", "mxl-3", "nhr-3", "lfi-1"))
#' convertID(toupper(bind), ce.IDsMap, ByName=c("from", "to"))
#' @keywords convert
#' @export
#' 
convertID<-function(x, IDsMap, ByName=c("from", "to")){
    if((!is.character(IDsMap)) | (is.null(IDsMap))){
        stop("invalide IDsMap")
    }
    for(i in 1:length(ByName)){
        if(!GeneNetworkBuilder:::checkCName(ByName[i],x)){
            stop(paste(ByName[i],"is not a valide colname of x"))
        }
        x[,ByName[i]]<-IDsMap[as.character(x[,ByName[i]])]
    }
    x
}

#' construct the regulatory network
#' @description Get all the connections of interesting genes from regulatory map.
#' @param TFbindingTable a matrix or data.frame with interesting genes. 
#'                       Column names must be 'from', 'to'
#' @param interactionmap Transcription regulatory map. 
#'                       Column names of interactionmap must be 'from','to'
#' @param level Depth of node path
#' 
#' @return a dataframe or matrix of all the connections of interesting genes
#' @keywords network
#' @examples 
#' data("ce.interactionmap")
#' data("example.data")
#' xx<-buildNetwork(example.data$ce.bind, ce.interactionmap, level=2)
#' @export

buildNetwork<-function(TFbindingTable, interactionmap, level=3){
    GeneNetworkBuilder:::checkMap(interactionmap, TFbindingTable)
    if(level>0){
        y<-interactionmap[interactionmap[ , "from"] %in% unique(as.character(TFbindingTable[ , "to"])), 1:2,drop=F]
        y<-unique(y)
        z<-y[!(y[,"to"] %in% TFbindingTable[,"to"]), , drop=F]
        nrow1<-nrow(TFbindingTable)
        TFbindingTable<-rbind(TFbindingTable, y)
        TFbindingTable<-unique(TFbindingTable)
        level<-level-1
        if(level>0){
            nrow2<-nrow(TFbindingTable)
            if(nrow2>nrow1){
                y<-buildNetwork(z, interactionmap, level)
                TFbindingTable<-rbind(TFbindingTable, y)
            }
            TFbindingTable<-unique(TFbindingTable)
        }
    }
    TFbindingTable
}

#' filter the regulatory network table by expression profile
#' @description verify every nodes in the regulatory network by expression profile
#' @param rootgene name of root gene. It must be the ID used in xx regulatory network
#' @param sifNetwork Transcription regulatory network table. 
#'                   Column names of xx must be 'from','to'
#' @param exprsData dataset of expression comparison data, 
#'                  which should contain column logFC and column given by exprsDataByName
#' @param mergeBy The column name contains ID information used to merge with 
#'                'to' column of sifNetwork in exprsData
#' @param miRNAlist vector of microRNA ids.
#' @param remove_miRNA remove miRNA from the network or not. 
#'                     Bool value, TRUE or FALSE
#' @param tolerance maximum number of unverified nodes in each path
#' @param cutoffPVal cutoff p value of valid differential expressed gene/miRNA
#' @param cutoffLFC cutoff log fold change value of a valid differential 
#'                  expressed gene/miRNA
#' @param minify Only keep the best path if multiple paths exists for single node? 
#'               Bool value, TRUE or FALSE
#' @param miRNAtol take miRNA expression into account for tolerance calculation. 
#'                 Bool value, TRUE or FALSE
#' @return a dataframe of filtered regulatory network by expression profile
#' @import Rcpp
#' @useDynLib GeneNetworkBuilder
#' @export
#' 
#' @examples 
#' data("ce.miRNA.map")
#' data("example.data")
#' data("ce.interactionmap")
#' data("ce.IDsMap")
#' sifNetwork<-buildNetwork(example.data$ce.bind, ce.interactionmap, level=2)
#' cifNetwork<-filterNetwork(rootgene=ce.IDsMap["DAF-16"], sifNetwork=sifNetwork, 
#'                          exprsData=uniqueExprsData(example.data$ce.exprData, "Max", condenseName='logFC'),
#'                          mergeBy="symbols",
#'                          miRNAlist=as.character(ce.miRNA.map[ , 1]), tolerance=1)
#' @keywords network
#' 
filterNetwork<-function(rootgene, sifNetwork, exprsData, mergeBy="symbols", miRNAlist, remove_miRNA=FALSE,
                    tolerance=0, cutoffPVal=0.01, cutoffLFC=0.5, minify=TRUE, miRNAtol=FALSE)
{
    GeneNetworkBuilder:::checkMCName(sifNetwork)
    if(!missing(miRNAlist)){
        if(!is.vector(miRNAlist)){
            stop("miRNAlist should be a vector")
        }
    }
    if(!GeneNetworkBuilder:::checkCName(mergeBy, exprsData)){
        stop(paste(mergeBy, "is not a column name of exprsData"))
    }
    if(!GeneNetworkBuilder:::checkCName("logFC", exprsData)){
        stop("logFC is not a column name of exprsData")
    }
    if(!is.numeric(exprsData[ , "logFC"])){
        stop("class of exprsData[ , \"logFC\"] is not a numeric column")
    }
    if(!GeneNetworkBuilder:::checkCName("P.Value", exprsData)){
        stop("P.Value is not a column name of exprsData")
    }
    if(!is.numeric(exprsData[ , "P.Value"])){
        stop("class of exprsData[ , \"P.Value\"] is not a numeric column")
    }
    if(!is.numeric(cutoffLFC)){
        stop("cutoffLFC is not a numeric")
    }
    if(!is.numeric(cutoffPVal)){
        stop("cutoffPVal is not a numeric")
    }
    if(any(duplicated(exprsData[,mergeBy]))){
        stop("expresData has multiple logFC for same ID. Please try ?uniqueExprsData")
    }
    if(!is.logical(minify)){
        stop("minify is not a logical")
    }
    if(!is.logical(miRNAtol)){
        stop("miRNAtol is not a logical")
    }
    tolerance<-round(tolerance)
    cifNetwork<-merge(sifNetwork, exprsData, by.x="to", by.y=mergeBy, all.x=TRUE)
##   convert NA to 0 for logFC
    cifNetwork.logFC<-cifNetwork[,"logFC"]
    cifNetwork.logFC[is.na(cifNetwork.logFC)]<-0.0
    cifNetwork.pValue<-cifNetwork[,"P.Value"]
    cifNetwork.pValue[is.na(cifNetwork.pValue)]<-0.0
##   label microRNA
    cifNetwork$miRNA<-FALSE
    cifNetwork$dir<-2
    if(!missing(miRNAlist)){
        if(length(miRNAlist)>0){
            cifNetwork$miRNA<-ifelse(cifNetwork$to %in% miRNAlist, TRUE, FALSE)
            cifNetwork$dir<-ifelse(cifNetwork$from %in% miRNAlist, 0, 2)
        }
    }
##   remove micorRNA
    if(remove_miRNA){
        cifNetwork<-cifNetwork[!cifNetwork$miRNA, ]
        cifNetwork.logFC<-cifNetwork.logFC[!cifNetwork$miRNA]
    }
    rootlogFC<-exprsData[exprsData[ , mergeBy] == rootgene, "logFC"]
    rootlogFC<-rootlogFC[!is.na(rootlogFC)]
    rootlogFC<-ifelse(length(rootlogFC) < 1, 0.0, rootlogFC[1])
    cifNetwork.list <- .Call("filterNodes",
                         as.character(cifNetwork$from), 
                         as.character(cifNetwork$to), 
                         cifNetwork$miRNA, 
                         cifNetwork.logFC,
                         cifNetwork.pValue,
                         cifNetwork$dir,
                         nrow(cifNetwork), 
                         rootgene[1],
                         rootlogFC[1],
                         tolerance[1],
                         minify[1],
                         miRNAtol[1],
                         cutoffLFC[1],
                         cutoffPVal[1]
                         )
    cifNetwork.list <- do.call(rbind, lapply(names(cifNetwork.list),
                                           function(.name, .ele){
                                              if(length(.ele[[.name]])>0){
                                                cbind(from=.ele[[.name]], to=.name)
                                              }else{
                                                cbind(from=NA, to=.name)
                                              }
                                            },
                                           cifNetwork.list)
                            )
    cifNetwork <- merge(cifNetwork, cifNetwork.list)
    cifNetwork
}

#' generate an object of grahpNEL to represent the regulation network
#' @description generate an object of grahpNEL to represent the regulation network. 
#' Each node will has three attributes: size, borderColor and fill.
#' @param cifNetwork dataframe used to draw network graph. column names of 
#'                   cifNetwork must contain 'from', 'to', 'logFC' and 'miRNA'
#' @param nodesDefaultSize nodes default size
#' @param useLogFCAsWeight how to determine the weights for each nodes. 
#'                         If TURE, use logFC value as weight.
#'                         If FALSE, use constant 1 as weight.
#' @param nodecolor a character vector of color set. 
#'                  The node color will be mapped to color set by log fold change
#' @param nodeBg background of node
#' @param nodeBorderColor a list of broder node color set. 
#'                        nodeBorderColor's element must be gene and miRNA
#' @param edgelwd the width of edge
#' @param ... any parameters can be passed to \link[graph:graph.par]{graph.par}
#' @return An object of graphNEL class of the network
#' @import graph
#' @export
#' @examples 
#' data("ce.miRNA.map")
#' data("example.data")
#' data("ce.interactionmap")
#' data("ce.IDsMap")
#' sifNetwork<-buildNetwork(example.data$ce.bind, ce.interactionmap, level=2)
#' cifNetwork<-filterNetwork(rootgene=ce.IDsMap["DAF-16"], sifNetwork=sifNetwork, 
#'                          exprsData=uniqueExprsData(example.data$ce.exprData, "Max", condenseName='logFC'),
#'                          mergeBy="symbols",
#'                          miRNAlist=as.character(ce.miRNA.map[ , 1]), tolerance=1)
#' gR<-polishNetwork(cifNetwork)
#' ##	browseNetwork(gR)
#' @keywords network
#' 
polishNetwork<-function(cifNetwork, 
                        nodesDefaultSize=48, useLogFCAsWeight=FALSE, 
                        nodecolor=colorRampPalette(c("green", "yellow", "red"))(5), nodeBg="white",
                        nodeBorderColor=list(gene='darkgreen',miRNA='darkblue'), 
                        edgelwd=0.25, ...)
{
  cname<-c("from", "to")
  if(!is.data.frame(cifNetwork)){
    stop("cifNetwork should be a data.frame")
  }
  if(length(intersect(c("from", "to", "logFC", "miRNA"), colnames(cifNetwork)))<4){
    stop("colnames of cifNetwork must contain 'from', 'to', 'logFC' and 'miRNA'");
  }
  if(length(nodecolor) < 2){
    stop("nodecolor should have more than 1 elements")
  }
  if(length(setdiff(c('gene', 'miRNA'), names(nodeBorderColor))) > 0){
    stop("nodeBorderColor's element must be 'gene' and 'miRNA'")
  }
  cifNetwork<-cifNetwork[!duplicated(cifNetwork[,cname]), ]
  edge<-cifNetwork[cifNetwork$from!="" & cifNetwork$to!="", cname]
  node<-c(as.character(unlist(edge)))
  node<-node[!is.na(node)]
  node<-unique(node)
  if(length(node) <= 1){
    stop("Can not built network for the inputs. Too less connections.")
  }
  edL<-split(cifNetwork[,c("to","logFC")],cifNetwork[,"from"])
  edL<-lapply(node,function(.ele,edL,useLogFCAsWeight){
    .ele<-edL[[.ele]]
    if(is.null(.ele)){
      .ele<-list(edges=c(),weights=c())
    }else{
      if(useLogFCAsWeight){
        .ele<-list(edges=as.character(.ele$to),weights=abs(.ele$logFC))
      }else{
        .ele<-list(edges=as.character(.ele$to),weights=rep(1,length(.ele$to)))
      }
    }
  },edL,useLogFCAsWeight)
  names(edL)<-node
  gR<-new("graphNEL", nodes=node, edgeL=edL, edgemode="directed")
  ## set node default data
  nodeDataDefaults(gR, attr="label") <- NA
  nodeDataDefaults(gR, attr="logFC") <- 0
  nodeDataDefaults(gR, attr="miRNA") <- FALSE
  for(i in node) {
    nodeData(gR, n=i, attr="label") <- i
    nodeData(gR, n=i, attr="logFC") <- cifNetwork[match(i, cifNetwork$to), "logFC"]
    nodeData(gR, n=i, attr="miRNA") <- cifNetwork[match(i, cifNetwork$to), "miRNA"]
  }
  ## set node size
  nodeDataDefaults(gR, attr="size")<-nodesDefaultSize
  for(i in unique(as.character(cifNetwork$from))){
    nodeData(gR, n=i, attr="size")<-ceiling(5*length(edL[[i]]$edges)/length(node)) * nodesDefaultSize/2 + nodesDefaultSize
  }
  ## set node color    
  nodeDataDefaults(gR, attr="fill")<-nodeBg
  lfcMax<-ceiling(max(abs(cifNetwork[!is.na(cifNetwork$logFC),"logFC"])))
  lfcSeq<-seq(-1*lfcMax,lfcMax,length.out=length(nodecolor)+1)
  colset<-unique(cifNetwork[!is.na(cifNetwork$logFC),c("to","logFC")])
  colset<-apply(colset, 1, function(.ele,color,lfcSeq){
    id=0
    for(i in 1:length(lfcSeq)){
      .elelfc<-as.numeric(as.character(.ele[2]))
      if(lfcSeq[i]<=.elelfc & lfcSeq[i+1]>=.elelfc){
        id=i
        break
      }
    }
    if(id!=0){
      c(.ele,nodecolor[id])
    }else{
      c(.ele,nodeBg)
    }
  },nodecolor,lfcSeq)
  colors<-colset[3,]
  names(colors)<-colset[1,]
  for(i in names(colors)){
    nodeData(gR, n=i, attr="fill")<-colors[i]
  }
  colset<-node[!node %in% names(colors)]
  names(colset)<-colset
  colset<-nodeBg
  colors<-c(colors,colset)
  ## set node border color    
  miRNAs<-unique(as.character(cifNetwork[cifNetwork[,"miRNA"],"to"]))
  nodeBC<-character(length(node))
  names(nodeBC)<-node
  nodeDataDefaults(gR, attr="borderColor")<-nodeBorderColor$gene
  for(i in node) {
    if(i %in% miRNAs){
      nodeBC[i]<-nodeBorderColor$miRNA
      nodeData(gR, n=i, attr="borderColor")<-nodeBorderColor$miRNA
    }else{
      nodeBC[i]<-nodeBorderColor$gene
    }
  }
  graph::nodeRenderInfo(gR) <- list(col=nodeBC, fill=colors, ...)
  graph::edgeRenderInfo(gR) <- list(lwd=edgelwd)
  gR
}

Try the GeneNetworkBuilder package in your browser

Any scripts or data that you put into this service are public.

GeneNetworkBuilder documentation built on Nov. 17, 2017, 12:12 p.m.