R/read.gmt.R

Defines functions read.gmt

Documented in read.gmt

##' This function parses gmt file to a list format
##' for enrichment analysis.
##'
##' @title parse gmt file to a list format
##'
##' @param file *.gmt file (a TAB-delimited file)
##' @param format select the compound DB-IDs ('KEGG', 'HMDB', 'KNApSacK')
##' @return list
##' @export
##' @examples
##'
##' file <- system.file('extdata', 'sample_SMPDB.gmt', package = 'MSEAp')
##' smp <- read.gmt(file)
##'
##' @author Atsushi Fukushima
## Simple function to read in a .gmt file and return a list of metabolite set
read.gmt <- function(file, format = "KEGG") {
    if (!grepl("\\.gmt$", file)[1]) 
        stop("Metabolite set information must be a .gmt file")
    
    ## read in the gmt file as a vector of lines
    metSetDB <- readLines(file, encoding = "UTF-8")
    metSetDB <- strsplit(metSetDB, "\t")
    res <- lapply(metSetDB, function(x) {
        smpdb.id <- unlist(x[1])
        path.name <- unlist(x[2])
        
        if (!(format %in% c("general", "KEGG", "HMDB", "KNApSAcK"))) {
            stop("Identifier format must be KEGG or HMDB or KNApSAcK.")
        }
        .returnPathCPDS <- 
            list(general = unlist(x[3:length(x)]), 
                KEGG = unlist(x[grep("^C\\d\\d\\d\\d\\d$", x)]), 
                HMDB = unlist(x[grep("^HMDB\\d\\d\\d\\d\\d$", x)]), 
                KNApSAcK = unlist(x[grep("^C\\d\\d\\d\\d\\d\\d\\d\\d$", x)]))
        path.cpds <- .returnPathCPDS[[format]]
        
        if (length(path.cpds) > 0) {
            return(c(smpdb.id, path.name, list(path.cpds)))
        } else stop("Identifier format must be same.")
    })
    return(res)
}
afukushima/MSEAp documentation built on Sept. 18, 2019, 7:12 p.m.