R/read.gpml.R

Defines functions read.gpml

Documented in read.gpml

##' This function parses wikipathways gpml file to a list format
##' for enrichment analysis.
##'
##' @title parse Wikipathways gpml file to a list format
##'
##' @param file *.gpml file
##' @param minnum The minimal # of metabolites that should consider (Default 5).
##' @return list
##' @export
##' @examples
##'
##' file <- system.file('extdata', 
##'     'At_AtMetExpress_overview_WP3622_89229.gpml', package = 'MSEAp')
##' wikip <- read.gpml(file)
##'
##' @author Kozo Nishida
## Simple function to read in a Wikipathways .gmpl file and return a list of
## metabolite set
read.gpml <- function(file, minnum = 5) {
    if (!grepl("\\.gpml$", file)[1]) 
        stop("Wikipathways Metabolite set information must be a .gpml file")
    
    filename <- utils::tail(unlist(strsplit(file, "/")), n = 1)
    wikipId <- utils::tail(unlist(strsplit(filename, "_")), n = 2)[[1]]
    
    # doc <- XML::xmlRoot(XML::xmlTreeParse(file))
    doc <- XML::xmlTreeParse(file)
    gpml <- XML::xmlToList(doc)
    nms <- names(gpml)
    
    tgt_nodes <- gpml[nms == "DataNode"]
    ids <- unlist(lapply(tgt_nodes, function(x) x$Xref[["ID"]]))
    ids <- ids[ids != ""]
    
    if (length(ids) >= minnum) {
        return(list(wikipId, gpml$.attrs[["Name"]], ids))
    } else {
        print("This GPML does not have the minimum number of metabolites.")
    }
    
}
afukushima/MSEAp documentation built on Sept. 18, 2019, 7:12 p.m.