R/loadKEGGdata.R

Defines functions loadKEGGdata

Documented in loadKEGGdata

#' @include buildDataFromGraph.R
#' 
#' @description 
#' Function \code{loadKEGGdata} loads the internal files 
#' containing the KEGG knowledge model into a 
#' \code{\link{FELLA.DATA}} object. 
#' 
#' In general, \code{generateGraphFromKEGGREST} and 
#' \code{generateDataFromGraph} are one-time executions 
#' for a given organism and knowledge model, 
#' in this precise order. 
#' On the other hand, the user needs to run \code{loadKEGGdata} 
#' in every new R session to load such model into a 
#' \code{\link{FELLA.DATA}} object. 
#' 
#' @details 
#' Function \code{loadKEGGdata} returns a 
#' \code{\link{FELLA.DATA}} object from any of the 
#' databases generated by \code{\link{FELLA.DATA}}.
#' This object is the starting point of any enrichment 
#' using \code{\link{FELLA}}.
#' In case the user built the matrices for "diffusion" and "pagerank", 
#' he or she can choose to load them.  
#' Further detail on the methods can be found in [Picart-Armada, 2017].
#' The matrices allow a faster computation and the definition 
#' of a custom background, but use up to 250MB of memory each.
#'
#' @inheritParams .params
#'
#' @return \code{loadKEGGdata} returns the 
#' \code{\link{FELLA.DATA}} object 
#' that contains the KEGG knowledge representation.
#'  
#' @examples 
#' ## Toy example
#' ## In this case, the graph is not built from current KEGG. 
#' ## It is loaded from sample data in FELLA
#' data("FELLA.sample")
#' ## Graph to build the database (this example is a bit hacky)
#' g.sample <- FELLA:::getGraph(FELLA.sample)
#' dir.tmp <- paste0(tempdir(), "/", paste(sample(letters), collapse = ""))
#' ## Build internal files in a temporary directory
#' buildDataFromGraph(
#' keggdata.graph = g.sample, 
#' databaseDir = dir.tmp, 
#' internalDir = FALSE, 
#' matrices = NULL, 
#' normality = NULL, 
#' dampingFactor = 0.85,
#' niter = 10)
#' ## Load database
#' myFELLA.DATA <- loadKEGGdata(
#' dir.tmp, 
#' internalDir = FALSE)
#' myFELLA.DATA
#'
#' ######################
#'
#' \dontrun{
#' ## Full example
#' 
#' ## First step: graph for Mus musculus discarding the mmu01100 pathway
#' ## (an analog example can be built from human using organism = "hsa")
#' g.mmu <- buildGraphFromKEGGREST(
#' organism = "mmu", 
#' filter.path = "mmu01100")
#' summary(g.mmu)
#' cat(comment(g.mmu))
#' 
#' ## Second step: build internal files for this graph
#' ## (consumes some time and memory, especially if we compute 
#' "diffusion" and "pagerank" matrices)
#' buildDataFromGraph(
#' keggdata.graph = g.mmu, 
#' databaseDir = "example_db_mmu", 
#' internalDir = TRUE, 
#' matrices = c("hypergeom", "diffusion", "pagerank"), 
#' normality = c("diffusion", "pagerank"), 
#' dampingFactor = 0.85,
#' niter = 1e3)
#' ## Third step: load the internal files into a FELLA.DATA object
#' FELLA.DATA.mmu <- loadKEGGdata(
#' "example_db_mmu", 
#' internalDir = TRUE, 
#' loadMatrix = c("diffusion", "pagerank"))
#' FELLA.DATA.mmu
#' }
#' 
#' @seealso class \code{\link{FELLA.DATA}}
#' 
#' @template refs_fella_data
#' 
#' @rdname data-funs
#' 
#' @import igraph
#' @export
loadKEGGdata <- function(
    databaseDir = tail(listInternalDatabases(), 1), 
    internalDir = TRUE, 
    loadMatrix = NULL) { 
    
    message("Loading KEGG graph data...")
    
    # Checking the input
    ########################
    checkArgs <- checkArguments(
        loadMatrix = loadMatrix, 
        databaseDir = databaseDir, 
        internalDir = internalDir)
    if (!checkArgs$valid)
        stop("Bad argument when calling function 'runDiffusion'.")  
    
    ##########################
    assign("F.DATA", new("FELLA.DATA"))
    
    # Make sure there is a slash
    path <- ifelse(
        internalDir, 
        paste0(
            system.file("database", package = "FELLA"), 
            "/", databaseDir, "/"), 
        paste0(databaseDir, "/"))
    
    # Does the dir exist?
    if (!dir.exists(path)) {
        stop(
            "Directory ", path, " does not exist. ", 
            "Database '", databaseDir, "' cannot be found. ",
            "Aborting...")
    }
    
    # Load the graph and the identifiers (required)
    if (file.exists(paste0(path, "keggdata.graph.RData"))) {
        load(paste0(path, "keggdata.graph.RData"))
        keggdata.graph <- get("keggdata.graph")
        keggdata.pvalues.size <- get("keggdata.pvalues.size")
        
        F.DATA@keggdata@graph <- keggdata.graph
        F.DATA@keggdata@pvalues.size <- keggdata.pvalues.size
        F.DATA@keggdata@id2name <- V(keggdata.graph)$NAME
        names(F.DATA@keggdata@id2name) <- V(keggdata.graph)$name
        
        F.DATA@keggdata@id$pathway <- which(V(keggdata.graph)$com == 1)
        names(F.DATA@keggdata@id$pathway) <- 
            (V(keggdata.graph)$name)[F.DATA@keggdata@id$pathway]
        F.DATA@keggdata@id$module <- which(V(keggdata.graph)$com == 2)
        names(F.DATA@keggdata@id$module) <- 
            (V(keggdata.graph)$name)[F.DATA@keggdata@id$module]
        F.DATA@keggdata@id$enzyme <- which(V(keggdata.graph)$com == 3)
        names(F.DATA@keggdata@id$enzyme) <- 
            (V(keggdata.graph)$name)[F.DATA@keggdata@id$enzyme]
        F.DATA@keggdata@id$reaction <- which(V(keggdata.graph)$com == 4)
        names(F.DATA@keggdata@id$reaction) <- 
            (V(keggdata.graph)$name)[F.DATA@keggdata@id$reaction]
        F.DATA@keggdata@id$compound <- which(V(keggdata.graph)$com == 5)
        names(F.DATA@keggdata@id$compound) <- 
            (V(keggdata.graph)$name)[F.DATA@keggdata@id$compound]
    } else {
        stop(
            "'keggdata.graph.RData' not present in:", 
            paste0(path, "keggdata.graph.RData"), 
            ". Please check that KEGG data is available.")
    }
    message("Done.")
    
    # Load matrix for hypergeometric test
    message("Loading hypergeom data...")
    message("Loading matrix...")
    if (file.exists(paste0(path, "hypergeom.matrix.RData"))) {
        load(paste0(path, "hypergeom.matrix.RData"))
        hypergeom.matrix <- get("hypergeom.matrix")
        
        F.DATA@hypergeom@matrix <- hypergeom.matrix
    } else {
        message(
            "'hypergeom.matrix.RData' not present in:", 
            paste0(path, "hypergeom.matrix.RData"), 
            ". Hypergeometric test won't execute.")
    }
    message("Done.")
    
    # Load matrix for diffusion 
    message("Loading diffusion data...")
    message("Loading matrix...")
    if ("diffusion" %in% loadMatrix) {
        if (!file.exists(paste0(path, "diffusion.matrix.RData"))) {
            message(
                "'diffusion.matrix.RData' not present in:", 
                paste0(path, "diffusion.matrix.RData"), 
                ". Simulated permutations may execute slower for diffusion.")
        } else {
            load(paste0(path, "diffusion.matrix.RData"))
            diffusion.matrix <- get("diffusion.matrix")
            
            F.DATA@diffusion@matrix <- diffusion.matrix
        }
        
    } else {
        message(
            "'diffusion.matrix.RData' not loaded. ", 
            "Simulated permutations may execute slower for diffusion.")
    }
    message("Done.")
    
    # Load diffusion rowsums for z-score calculation 
    message("Loading rowSums...")
    if (file.exists(paste0(path, "diffusion.rowSums.RData")) ) {
        load(paste0(path, "diffusion.rowSums.RData"))
        diffusion.rowSums <- get("diffusion.rowSums")
        diffusion.squaredRowSums <- get("diffusion.squaredRowSums")
        
        F.DATA@diffusion@rowSums <- diffusion.rowSums
        F.DATA@diffusion@squaredRowSums <- diffusion.squaredRowSums
    } else {
        message(
            "'diffusion.rowSums.RData' not present in:", 
            paste0(path, "diffusion.rowSums.RData"), 
            ". Z-scores won't be available for diffusion.")
    }
    message("Done.")
    
    # Load matrix for pagerank
    message("Loading pagerank data...")
    message("Loading matrix...")
    if ("pagerank" %in% loadMatrix) {
        if (!file.exists(paste0(path, "pagerank.matrix.RData"))) {
            message(
                "'pagerank.matrix.RData' not present in:", 
                paste0(path, "pagerank.matrix.RData"), 
                ". Simulated permutations may execute slower for pagerank.")
        } else {
            load(paste0(path, "pagerank.matrix.RData"))
            pagerank.matrix <- get("pagerank.matrix")
            
            F.DATA@pagerank@matrix <- pagerank.matrix
        }
        
    } else {
        message(
            "'pagerank.matrix.RData' not loaded. ", 
            "Simulated permutations may execute slower for pagerank.")  
    }
    message("Done.")
    
    # Load pagerank rowsums for z-score calculation
    message("Loading rowSums...")
    if (file.exists(paste0(path, "pagerank.rowSums.RData")) ) {
        load(paste0(path, "pagerank.rowSums.RData"))
        pagerank.rowSums <- get("pagerank.rowSums")
        pagerank.squaredRowSums <- get("pagerank.squaredRowSums")
        
        F.DATA@pagerank@rowSums <- pagerank.rowSums
        F.DATA@pagerank@squaredRowSums <- pagerank.squaredRowSums
    } else {
        message(
            "'pagerank.rowSums.RData' not present in:", 
            paste0(path, "pagerank.rowSums.RData"), 
            ". Z-scores won't be available for pagerank.") 
    }
    message("Done.")
    
    F.DATA@keggdata@status <- "loaded"
    
    message("Data successfully loaded.")
    return(F.DATA)
}
b2slab/FELLA documentation built on March 3, 2021, 2:22 p.m.