Nothing
## imports the XML format of MSigDB
.importMsigDBXML <- function(file, fields, organism) {
msig <- list()
fields <- unique(c("SYSTEMATIC_NAME", "STANDARD_NAME", "CATEGORY_CODE", "SUB_CATEGORY_CODE", fields))
field.names <- c( "ID", "Title", "Category", "Subcategory", fields[-c(1:4)] )
foo <- xmlParse(file)
foo <- xmlToList(foo)
if(organism != "all") {
orgs <- sapply(foo, function(x) x["ORGANISM"])
foo <- foo[ orgs == organism ]
}
# remove NULLs
foo <- foo[ ! sapply(foo, is.null) ]
msig$gs <- t(sapply(foo, function(x) x[ fields ]))
colnames(msig$gs) <- field.names
msig$gs <- as_tibble(msig$gs)
if(any(duplicated(msig$gs$ID))) {
warning("Duplicated IDs found; automatic IDs will be generated")
msig$gs$oldID <- msig$gs$ID
msig$gs$ID <- make.unique(as.character(msig$gs$ID))
}
#rownames(msig$gs) <- msig$MODULES[,"ID"]
msig$gs2gv <- sapply(foo, function(x) x[ "MEMBERS_SYMBOLIZED" ])
msig$gs2gv <- strsplit(msig$gs2gv, ",")
msig$gv <- unique(unlist(msig$gs2gv))
msig$gs2gv <- lapply(msig$gs2gv, function(x) match(x, msig$gv))
#names(msig$gs2gv) <- msig$gs$ID
as_tmodGS(msig)
}
## imports the GMT format of MSigDB
.importMsigDBGMT <- function(file) {
stop("This does not work at the present.")
msig <- list()
con <- file(file, open="r")
lines <- readLines(con)
close(con)
ids <- gsub( "\t.*", "", lines)
desc <- gsub( "^[^\t]*\t([^\t]*)\t.*", "\\1", lines )
genes <- gsub( "^[^\t]*\t[^\t]*\t(.*)", "\\1", lines )
msig$MODULES <- data.frame(
ID=ids, Title=desc, stringsAsFactors=FALSE)
if(any(duplicated(msig$MODULES$ID))) {
warning("Duplicated IDs found; automatic IDs will be generated")
msig$MODULES$oldID <- msig$MODULES$ID
msig$MODULES$ID <- make.unique(as.character(msig$MODULES$ID))
}
rownames(msig$MODULES) <- msig$MODULES[,"ID"]
msig$MODULES2GENES <- strsplit(genes, "\t")
names(msig$MODULES2GENES) <- ids
msig$GENES <- data.frame( ID=unique(unlist(msig$MODULES2GENES)))
msig <- new("tmod", msig)
msig
}
#' Import data from MSigDB
#'
#' Import data from an MSigDB file in either XML or GMT format
#'
#' This command parses a file from MSigDB. Both XML and the MSigDB-specific
#' "GMT" format are supported (however, the latter is discouraged, as it
#' contains less information).
#' @param file The name of the file to parse
#' @param format Format (either "xml" or "gmt")
#' @param organism Select the organism to use. Use "all" for all organisms in the file (only for "xml" format; default: "Homo sapiens")
#' @param fields Which fields to import to the MODULES data frame (only for "xml" format)
#' @return A tmod object
#' @importFrom XML xmlParse xmlToList
#' @importFrom tibble as_tibble
#' @examples
#' \dontrun{
#' ## First, download the file "msigdb_v7.5.1.xml"
#' ## from http://www.broadinstitute.org/gsea/downloads.jsp
#' msig <- tmodImportMSigDB("msigdb_v7.5.1.xml")
#' }
#' @export
tmodImportMSigDB <- function( file=NULL, format="xml", organism="Homo sapiens",
fields=c( "STANDARD_NAME", "CATEGORY_CODE", "SUB_CATEGORY_CODE", "EXACT_SOURCE", "EXTERNAL_DETAILS_URL") ) {
if(length(file) != 1) stop("Incorrect file parameter")
if(!file.exists(file)) stop( sprintf("File %s does not exist", file))
format <- match.arg(format, c( "xml", "gmt"))
msig <- switch(format,
xml=.importMsigDBXML(file, fields, organism),
gmt=.importMsigDBGMT(file))
s <- msig$gs$Title
msig$gs$Title <- paste0(toupper(substring(s, 1,1)), tolower(substring(s, 2)) )
msig$gs$Title <- gsub( "^Gse([0-9])", "GSE\\1", msig$gs$Title )
msig$gs$Title <- gsub( "_", " ", msig$gs$Title )
msig$gs$B <- sapply(msig$gs2gv, length)
msig
}
.tmod2DataFrameRowsModule <- function(mset, module_col, feature_col, sep) {
ret <- mset$gs
ret[ , feature_col ] <- sapply(mset$gs2gv, function(x) paste(mset$gv[ x ], collapse=sep))
colnames(ret)[ colnames(ret) == "ID" ] <- module_col
ret
}
.tmod2DataFrameRowsFeatures <- function(mset, module_col, feature_col, sep) {
ret <- tibble(ID=mset$gv)
colnames(ret) <- feature_col
gv2gs <- split(rep(mset$gs$ID, lengths(mset$gs2gv)), mset$gv[ unlist(mset$gs2gv) ])
ret[ , module_col ] <- sapply(gv2gs, function(x) paste(x, collapse=sep))
ret
}
#' Convert a tmod module set into a data frame
#'
#' Convert a tmod module set into a data frame
#'
#' @param mset a tmod object (e.g. generated by makeTmod)
#' @param rows if "modules", then there will be a row corresponding to each
#' module (gene set); if "features", then there will be a row corresponding to
#' each gene.
#' @param module_col Name of the column with module (gene set) IDs
#' @param feature_col Name of the column with feature (gene) IDs
#' @param sep separator used to collate module IDs (if rows=="features") or feature IDs (if rows=="modules")
#' @seealso \code{\link{makeTmodGS}}, \code{\link{makeTmod}}
#' @export
tmod2DataFrame <- function(mset, rows="modules", module_col="module_id", feature_col="feature_id", sep=",") {
mset <- .getmodules_gs(NULL, mset)
rows <- match.arg(rows, c("modules", "features"))
ret <- switch(rows,
modules = .tmod2DataFrameRowsModule(mset, module_col, feature_col, sep),
features = .tmod2DataFrameRowsFeatures(mset, module_col, feature_col, sep)
)
return(ret)
}
#' Convert a data frame to a tmod object
#'
#' Convert a data frame to a tmod object
#'
#' `makeTmodFromFeatureDataFrame` converts mapping information from features (genes) to modules (gene
#' sets). The data frame has a row for each feature-module pair.
#'
#' `makeTmodFromModuleDataFrame` converts mapping information from features
#' (genes) to modules (gene sets). The data frame has a row for each module,
#' and all gene IDs corresponding to a module are stored as a comma separated string, e.g.
#'
#' Vice versa, `tmod2DataFrame` converts a tmod object to a data frame.
#' @param df A data frame
#' @param feature_col Which column contains the feature (gene) IDs
#' @param module_col Which column contains the module (gene set) IDs
#' @param title_col Description of the modules (if NULL, the description will
#' be taken from the module_col)
#' @param extra_module_cols Additional columns to include in the module data frame
#' @param extra_gene_cols Additional gene columns to include in the genes data frame
#' @seealso \code{\link{makeTmodGS}}, \code{\link{makeTmodGS}}
#' @return A tmod object
#' @examples
#' df <- data.frame(
#' gene_id=LETTERS[1:10],
#' geneset_id=rep(letters[1:2], each=5),
#' geneset_description=rep(paste0("Gene set ", letters[1:2]), each=5))
#' res <- makeTmodFromDataFrame(df,
#' feature_col="gene_id",
#' module_col="geneset_id",
#' title_col="geneset_description")
#' @export
makeTmodFromDataFrame <- function(df, feature_col=1, module_col=2, title_col=NULL, extra_module_cols=NULL, extra_gene_cols=NULL) {
if(!is.data.frame(df)) stop("df must be a data.frame")
df <- df[ !is.na(df[, feature_col]) & !is.na(df[, module_col]), ]
df <- as.data.frame(df) ## in case it is a tibble
df_unique <- df[ !duplicated(df[, module_col ]), ]
mods <- tibble(ID=df_unique[ , module_col ])
if(is.null(title_col)) {
title_col <- module_col
}
mods[ , "Title"] <- df_unique[ , title_col ]
if(!is.null(extra_module_cols)) {
extr <- df_unique[ , extra_module_cols, drop=FALSE ]
mods <- cbind(mods, extr)
}
#m2g <- lapply(mods[ , "ID" ], function(m) df[ df[ , module_col ] == m, feature_col])
gv <- unique(df[ , feature_col ])
df[ , feature_col ] <- match(df[ , feature_col ], gv)
gs2gv <- tapply(df[ , feature_col ], df[ , module_col ], unique, simplify=FALSE)
## convert from array list to list
gs2gv <- lapply(gs2gv, function(.) .)
mods <- mods[ match(names(gs2gv), mods$ID), ]
message("making Tmod")
as_tmodGS(list(gs=mods, gs2gv=gs2gv, gv=gv))
}
Any scripts or data that you put into this service are public.
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.