R/computeNwWordCloud.R

#'Compute wordCloud of the input network
#'@description compute wordCloud of the network nodes.
#'The input network is generated from any function such as \code{\link{computeSimilarity}}, \code{\link{computeCorrelation}}, \code{\link{computeParCorrelation}}, \code{\link{computeSubnetwork}},
#'\code{\link{fetchHetNetwork}}, \code{\link{fetchHetNetworkByGID}}, \code{\link{fetchNetwork} and \code{\link{fetchNetworkByGID}}.
#'The function wraps around the main steps of \pkg{\link{tm}} to wordCloud.
#'@usage computeNwWordCloud(edgelist, nodelist, annotation, internalid)
#'@param edgelist a data frame of edges contains at least a source column (1st column) and a target column (2nd column).
#'@param nodelist a data frame of nodes contains at least two columns of node attributes.
#'1st column is id or neo4j id, 2nd column is id or grinn id. The 2nd column is used for Mesh annotation.
#'@param annotation a string specifying the annotation type e.g. pathway (default) and mesh. Pathway annotation requires the database.
#'Mesh annotation doesn't require the database but it is available for PubChem compounds only.
#'@param internalid a logical value indicating whether the network nodes are neo4j ids, if TRUE (default). If not, the network nodes are expected to be any ids.
#'See details and see \code{\link{convertId}} for how to convert ids. It has no effect on Mesh annotation.
#'@details The database uses two id systems. The neo4j id is a numeric, internal id automatically generated by the database system.
#'The grinn id (gid) is an id system of Grinn database that uses main ids of standard resources
#'i.e. ENSEMBL for genes (e.g.ENSG00000139618), UniProt for proteins (e.g.P0C9J6), PubChem CID for compounds (e.g.5793), KEGG for pathways (e.g.hsa00010).
#'@return list of data frame of nodes, edges, wordcloud and pairs. The pairs data frame contains annotation pairs. The data frame of wordcloud contains the following components:
#'
#'\code{rank} = rank sort by freq
#'
#'\code{id} = annotation id or annotation neo4j id
#'
#'\code{gid} = annotation id or annotation grinn id
#'
#'\code{nodename} = annotation name
#'
#'\code{nodelabel} = annotation type
#'
#'\code{nodexref} = cross references
#'
#'\code{freq} = frequency or number of input entities in each annotation term
#'
#'\code{member} = list of members of the annotation term
#'
#'Return list of empty data frame if error or found nothing.
#'@author Kwanjeera W \email{kwanich@@ucdavis.edu}
#'@references http://www.sthda.com/english/wiki/text-mining-and-word-cloud-fundamentals-in-r-5-simple-steps-you-should-know
#'@seealso \pkg{\link{tm}}, \pkg{\link{wordcloud}}
#'@examples
#'#simnw <- computeSimilarity(c(1110,10413,196,51,311,43,764,790)) #compute similarity network for given pubchem compounds
#'#result <- computeNwWordCloud(simnw$edges, simnw$nodes, annotation="mesh", internalid=FALSE)
#'#wordcloud::wordcloud(words = result$wordcloud$nodename, freq = result$wordcloud$freq, scale=c(2,.1),min.freq = 1,max.words=50, random.order=FALSE, rot.per=0.5, colors=RColorBrewer::brewer.pal(8, "Dark2"))
#'#barplot(result$wordcloud$freq[1:10], las = 2, names.arg = result$wordcloud$nodename[1:10], col ="lightblue", main ="Most frequent words", ylab = "Word frequencies")
#'@export
computeNwWordCloud <- function(edgelist, nodelist, annotation="pathway", internalid = TRUE) UseMethod("computeNwWordCloud")
#'@export
computeNwWordCloud.default <- function (edgelist, nodelist, annotation="pathway", internalid = TRUE){
  out <- tryCatch(
    {
      tmparg <- try(annotation <- match.arg(tolower(annotation), c("pathway","mesh"), several.ok = FALSE), silent = TRUE)
      if (class(tmparg) == "try-error") {
        stop("argument 'annotation' is not valid, choose one from the list: pathway,mesh")
      }
      if(tolower(annotation) == 'pathway' && foundDb()){#pathway wordcloud
        cat("Querying database ...\n")
        if(internalid){
          annols = apply(nodelist, 1, function(x) fetchNetwork(to=x["id"], fromtype="pathway", totype = x["nodelabel"], reltype = "ANNOTATION")) #query annotation pairs
        }else{
          annols = apply(nodelist, 1, function(x) fetchNetworkByGID(to=x["gid"], fromtype="pathway", totype = x["nodelabel"], reltype = "ANNOTATION")) #query annotation pairs
        }
        if(!is.null(unlist(annols))){#found annotation
          annonws = combineNetworks(annols) #combine annotation pairs
          wc = callWordCloud(edgelist = annonws$edges, nodelist = annonws$nodes) #compute wordcloud
          wc = wc[order(wc$freq, decreasing = TRUE),]
          wc$rank = seq(1:nrow(wc))
          wc = wc[,c(ncol(wc),1:(ncol(wc)-1))] #rearrange columns
          meminfo = merge(annonws$edges, nodelist, by.x='target', by.y='id')
          wc$membername = plyr::ddply(meminfo,c('source'),plyr::summarise,membername=list(nodename))$membername
          list(nodes=nodelist, edges=edgelist, wordcloud=wc, pairs=annonws$edges) #output
        }
        else{#no annotation found
          list(nodes=data.frame(), edges=data.frame(), wordcloud=data.frame(), pairs=data.frame()) #output
        }
      }else if(tolower(annotation) == 'mesh'){#mesh wordcloud
        cat("Connecting PubChem ...\n")
        annols = apply(nodelist, 1, function(x) callMesh(pcid=x["gid"])) #query annotation pairs
        if(!is.null(unlist(annols))){#found annotation
          annonws = combineNetworks(annols) #combine annotation pairs
          #format edge, change gid to id, fix edge row order
          annopair = dplyr::right_join(nodelist[,1:2],annonws$edges[,1:2],by=c('gid' = 'target'))[,c(3,1)]
          colnames(annopair) = c('source','target')
          wc = callWordCloud(edgelist = annopair, nodelist = annonws$nodes) #compute wordcloud
          wc = wc[order(wc$freq, decreasing = TRUE),]
          wc$rank = seq(1:nrow(wc))
          wc = wc[,c(ncol(wc),1:(ncol(wc)-5),(ncol(wc)-2),(ncol(wc)-1))] #rearrange columns
          meminfo = merge(annopair, nodelist, by.x='target', by.y='id')
          wc$membername = plyr::ddply(meminfo,c('source'),plyr::summarise,membername=list(nodename))$membername
          list(nodes=nodelist, edges=edgelist, wordcloud=wc, pairs=annopair) #output
        }
        else{#no annotation found
          list(nodes=data.frame(), edges=data.frame(), wordcloud=data.frame(), pairs=data.frame()) #output
        }
      }else{
        cat('Error: No database installed, returning no data ..\n')
        list(nodes=data.frame(), edges=data.frame(), wordcloud=data.frame(), pairs=data.frame()) #output
      }
    },error=function(e) {
      message(e)
      cat("\nError: RETURN no data ..\n")
      list(nodes=data.frame(), edges=data.frame(), wordcloud=data.frame(), pairs=data.frame()) #output
    })
  return(out)
}
kwanjeeraw/metabox documentation built on May 20, 2019, 7:07 p.m.