Nothing
#' Creation of the object to use in gene set enrichment meta-analysis
#'
#' It allows the creation of an object to perform gene set
#' enrichment meta-analysis.
#'
#' @param listEX A list of dataframes or matrix (genes in rows and sample
#' in columns). A list of ExpressionSets can be used too
#'
#' @param listPheno A list of phenodatas (dataframes or matrix). If the object
#' listEX is a list of ExpressionSets this element can be null.
#'
#' @param namePheno A list or vector of the different colunm names or
#' positions from the phenodatas where the experimental and reference groups
#' are identified. Each element of namePheno correspont to its equivalent
#' element in the listPheno (default a vector of 1, all the first columns of
#' each elements of listPheno are selected).
#'
#' @param expGroups A list of vectors or a vector containing the names or the
#' positions with which we identify the elements of the experiment groups
#' (cases) of the namePheno element (default a vector of 1, all the first
#' groups are selected)
#'
#' @param refGroups A list of vectors or a vector containing the names or the
#' positions with which we identify the elements of the reference groups
#' (control) of the namePheno elements (default a vector of 1, all the first
#' groups are selected)
#'
#' @param geneSets List of gene sets to check. Object similar
#' to the one used in the fgsea package
#'
#' @param pathMethod The single sample enrichment method used to obtain
#' the enrichment score of each sample and gene set.
#' See details for more information
#'
#' @param minSize Minimum size of the resulting gene sets after gene
#' identifier mapping. By default, the minimum size is 7.
#'
#' @param kcdf Only neccesary for the GSVA method. Character vector of length 1
#' denoting the kernel to use during the non-parametric estimation of the
#' cumulative distribution function of expression levels across samples.
#' By default, kcdf="Gaussian" which is suitable when input expression values
#' are continuous, such as microarray fluorescent units in logarithmic scale,
#' RNA-seq log-CPMs, log-RPKMs or log-TPMs. When input expression values are
#' integer counts, such as those derived from RNA-seq experiments, then this
#' argument should be set to kcdf="Poisson".
#'
#' @param normalize boolean specifying if the gen set matrices should be
#' normalized. Default value "TRUE".
#'
#' @param n.cores Number of cores to use in the parallelization of the datsets.
#' By default, n.cores=1.
#'
#' @param internal.n.cores Number of cores to use in the parallelization of the
#' single sample enrichment methods. By default internal.n.cores= 1.
#'
#' @details The single sample scoring methods that can be used to obtain the
#' enrichment score of each sample and gene set are:
#'\enumerate{
#' \item "GSVA": Gene Set Variation method (Hänzelmann S, 2013)
#' \item "Zscore": Z-score method (Lee E, 2008)
#' \item "ssGSEA": Single Sample Gene Set Enrichment Analysis method
#' (Barbie DA, 2009)
#' \item "Singscore": Single sample scoring of molecular phenotypes
#' (Foroutan M, 2018)
#' }
#'
#' In parallelization, several aspects must be considered.
#' n.cores refers to the parallelization of studies or datasets.
#' Therefore, if we have 3 studies, the maximum number for n.cores will be 3.
#' internal.n.cores refers to the parallelization of single sample enrichment
#' methods. This is especially recommended for the ssGSEA method. For Singscore
#' and GSVA, it may also be advisable. The process is parallelized based on the
#' samples in each study. Therefore, the larger the number of samples, the
#' slower the process will be.
#' The number of cores that the computer will use is the multiplication of both
#' parameters n.cores * internal.n.cores = total cores.
#'
#'
#' @return The object needed to perform gene set enrichment meta-analysis.
#' Each list contains two elements: The first element is the gene set matrix
#' (gene sets in rows
#' and samples in columns) The second element is a vector of zeros and ones
#' that represents the state of the diffenrent samples of the gene sets
#' matrix. 0 represents reference group (controls) and 1 represents
#' experimental group (cases).
#'
#' @references
#'
#' Hänzelmann S, Castelo R, Guinney J. (2013)
#' GSVA: gene set variation analysis for microarray and RNA-Seq data.
#' BMC Bioinformatics. 2013;14: 7. doi:10.1186/1471-2105-14-7
#'
#' Lee E, Chuang H-Y, Kim J-W, Ideker T, Lee D. (2008)
#' Inferring Pathway Activity toward Precise Disease Classification.
#' PLOS Computational Biology. 2008;4: e1000217.
#' doi:10.1371/journal.pcbi.1000217
#'
#' Barbie DA, Tamayo P, Boehm JS, Kim SY, Moody SE, Dunn IF, et al. (2009)
#' Systematic RNA interference reveals that oncogenic KRAS-driven cancers
#' require TBK1. Nature. 2009;462: 108–112. doi:10.1038/nature08460
#'
#' Foroutan M, Bhuva DD, Lyu R, Horan K, Cursons J, Davis MJ. (2018)
#' Single sample scoring of molecular phenotypes. BMC Bioinformatics.
#' 2018;19: 404. doi:10.1186/s12859-018-2435-4
#'
#' Korotkevich G, Sukhov V, Budin N, Shpak B, Artyomov MN,
#' Sergushichev A. (2021)
#' Fast gene set enrichment analysis. bioRxiv; 2021. p. 060012.
#' doi:10.1101/060012
#'
#'
#' @examples
#'
#' data("simulatedData")
#' listMatrices <- list(study1Ex, study2Ex)
#' listPhenodata <- list(study1Pheno, study2Pheno)
#' phenoGroups <- c("Condition","Condition")
#' phenoCases <- list("Case", "Case")
#' phenoControls <- list("Healthy", "Healthy")
#' objectMApathSim <- createObjectMApath(
#' listEX = listMatrices,
#' listPheno = listPhenodata, namePheno = phenoGroups,
#' expGroups = phenoCases, refGroups = phenoControls,
#' geneSets = GeneSets,
#' pathMethod = "Zscore")
#'
#'
#'
createObjectMApath <- function(listEX, listPheno = NULL,
namePheno = c(rep(1, length(listEX))),
expGroups= c(rep(1, length(listEX))),
refGroups = c(rep(2, length(listEX))),
geneSets,
pathMethod = c("GSVA", "Zscore", "ssGSEA", "Singscore"), minSize = 7,
kcdf = "Gaussian", normalize = TRUE,
n.cores = 1, internal.n.cores = 1){
pathMethod <- match.arg(pathMethod)
objectExMA <- .createObjectMA(listEX, listPheno, namePheno, expGroups,
refGroups)
objectExMApath <- .objectExMA.to.objectMApath(objectExMA, geneSets,
pathMethod, minSize, kcdf, normalize = normalize,
n.cores, internal.n.cores)
return(objectExMApath)
}
#Create objectMa similar to DExMA
.createObjectMA <- function(listEX, listPheno = NULL,
namePheno = c(rep(1, length(listEX))),
expGroups= c(rep(1, length(listEX))),
refGroups = c(rep(2, length(listEX)))){
#check input objects
if(!is.list(listEX)){
stop("listEX must be a list")
}
if(length(listEX)<2){stop("There must be at least 2 studies")}
if (!any(is.list(listPheno) | is.null(listPheno))){
stop("listPheno must be a List or NULL")
}
if(!any(is.list(namePheno) | is.numeric(namePheno) |
is.character(namePheno))){
stop("namePheno must be a list or numeric or character")
}
if(!any(is.list(expGroups) | is.numeric(expGroups) |
is.character(expGroups))){
stop("expGroups must be a list or numeric or character")
}
if(!any(is.list(refGroups) | is.numeric(refGroups) |
is.character(refGroups))){
stop("refGroups must be a list or numeric or character")
}
#Obtaining the object
container <- list(0)
for (i in seq_len(length(listEX))) {
container[[i]] <- .elementObjectMA(
expressionMatrix=as.matrix(listEX[[i]]),
pheno=listPheno[[i]],
groupPheno=namePheno[[i]],
expGroup=expGroups[[i]],
refGroup=refGroups[[i]])
}
#Names of the results
if(is.null(names(listEX))){
names(container) <- paste("Study",seq_len(length(listEX)),sep="")}
else{
if(length(names(listEX)) >
length(names(listEX)[is.na(names(listEX))==FALSE])){
names(container) <- paste("Study",seq_len(length(listEX)),sep="")}
else{names(container) <- names(listEX)}}
return(container)
}
#Element objectMA similar to DExMA
.elementObjectMA <- function(expressionMatrix, pheno=NULL, groupPheno,
expGroup=1, refGroup=2){
if (!any(is.data.frame(expressionMatrix) | is.matrix(expressionMatrix) |
is(expressionMatrix,"ExpressionSet"))){
stop("Elements of listEX must be a dataframe, matrix or
ExpressionSet object")
}
if (!any(is.data.frame(pheno) | is.matrix(pheno) | is.null(pheno))){
stop("Elements of listPheno must be a dataframe, a matrix or NULL")}
if (!any(is.character(groupPheno) |is.numeric(groupPheno))){
stop("groupPheno must be a character or numeric object")}
if (!any(is.character(expGroup) | is.numeric(expGroup))){
stop("expGroup must be a character or numeric object")}
if (!any(is.character(refGroup) | is.numeric(refGroup))){
stop("refGroup must be a character or numeric object")}
## Get data from expression set
if(is(expressionMatrix, "ExpressionSet")){
if (is.null(pheno)){
pheno <- pData(expressionMatrix)}
expressionMatrix <- exprs(expressionMatrix)
}
else{
if(is.null(pheno)) {
stop("If DATA is not a ExpressionSet, you must provide
pheno parameter")}
}
colnames(expressionMatrix) <- as.character(colnames(expressionMatrix))
rownames(pheno) <- as.character(rownames(pheno))
pheno <- pheno[colnames(expressionMatrix),,drop=FALSE]
if (is.numeric(groupPheno)){
groupPheno <- colnames(pheno)[groupPheno]}
if (is.numeric(expGroup)){
pheno[,groupPheno] <- factor(pheno[,groupPheno])
expGroup <- levels(pheno[,groupPheno])[expGroup]}
if (is.numeric(refGroup)){
pheno[,groupPheno] <- factor(pheno[,groupPheno])
refGroup <- levels(pheno[,groupPheno])[refGroup]}
Inclusion <- rownames(pheno)[pheno[,groupPheno] %in%
c(expGroup, refGroup)]
expressionMatrix <- expressionMatrix[,Inclusion]
pheno <- data.frame(pheno[Inclusion, groupPheno, drop=FALSE])
colnames(pheno) <- groupPheno
pheno <- droplevels(pheno)
## Convert pheno to 0 and 1 for use the rest of functions
store<-rep(0, length(pheno))
for (k in seq_len(length(pheno[,groupPheno]))) {
if(pheno[,groupPheno][k] %in% expGroup){store[k]=1}
if(pheno[,groupPheno][k] %in% refGroup){store[k]=0}}
metalObject <- list(mExpres=expressionMatrix, condition=store)
return(metalObject)
}
#Create objectMAEXpath
.objectExMA.to.objectMApath <- function(objectExMA, geneSets,
pathMethod = c("GSVA", "Zscore", "ssGSEA", "Singscore"), minSize = 7,
kcdf = "Gaussian", n.cores = 1, internal.n.cores = 1, normalize = TRUE){
pathMethod <- match.arg(pathMethod)
if(Sys.info()["sysname"] == "Windows"){
param <- SnowParam(workers = n.cores)
}
else{
param <- MulticoreParam(workers = n.cores)
}
#GSVA method
if(pathMethod == "GSVA"){
message("Applying the GSVA method")
objectMApath <- bplapply(objectExMA, function(x){
x[[1]] <- .applyGSVA(x[[1]], geneSets = geneSets,
minSize = minSize, kcdf = kcdf, normalize = normalize,
internal.n.cores = internal.n.cores)
x[[2]] <- x[[2]]
names(x) <- c("mPath", "condition")
return(x)},
BPPARAM = param
)
names(objectMApath) <- names(objectExMA)
}
#ssGSEA method
if(pathMethod == "ssGSEA"){
message("Applying the ssGSEA method")
objectMApath <- bplapply(objectExMA, function(x){
x[[1]] <- .applyssGSEA(x[[1]], geneSets = geneSets,
minSize = minSize, normalize = normalize,
internal.n.cores = internal.n.cores)
x[[2]] <- x[[2]]
names(x) <- c("mPath", "condition")
return(x)},
BPPARAM = param
)
names(objectMApath) <- names(objectExMA)
}
#Z-score method
if(pathMethod == "Zscore"){
message("Applying the Zscore method")
objectMApath <- bplapply(objectExMA, function(x) {
x[[1]] <- .applyZscore(x[[1]], geneSets = geneSets,
minSize = minSize,
internal.n.cores = internal.n.cores)
x[[2]] <- x[[2]]
names(x) <- c("mPath", "condition")
return(x)},
BPPARAM = param
)
names(objectMApath) <- names(objectExMA)
}
#singscore method
if(pathMethod == "Singscore"){
message("Applying the singscore method")
objectMApath <- bplapply(objectExMA, function(x) {
x[[1]] <- .applySingscore(x[[1]], geneSets = geneSets,
normalize = normalize, minSize = minSize)
x[[2]] <- x[[2]]
names(x) <- c("mPath", "condition")
return(x)},
BPPARAM = param
)
names(objectMApath) <- names(objectExMA)
}
return(objectMApath)
}
#GSVA
.applyGSVA <- function(exMatrix, geneSets, minSize = 7, kcdf = "Gaussian",
internal.n.cores = 1, normalize = TRUE){
if(Sys.info()["sysname"] == "Windows"){
internalparam <- SnowParam(workers = internal.n.cores)
}
else{
internalparam <- MulticoreParam(workers = internal.n.cores)
}
paramMatrix <- gsvaParam(exMatrix, geneSets, minSize = minSize, kcdf = kcdf)
gsvaMatrix <- gsva(paramMatrix,
BPPARAM = internalparam, verbose = FALSE)
#gsvaMatrix normalization
if(normalize == TRUE){
gsvaMatrix <- (gsvaMatrix - mean(gsvaMatrix)) / sd(gsvaMatrix)
}
return(gsvaMatrix)
}
#Zscore
.applyZscore <- function(exMatrix, geneSets, minSize = 7, internal.n.cores = 1){
if(Sys.info()["sysname"] == "Windows"){
internalparam <- SnowParam(workers = internal.n.cores)
}
else{
internalparam <- MulticoreParam(workers = internal.n.cores)
}
paramMatrix <- zscoreParam(exMatrix, geneSets, minSize = minSize)
ZscoreMatrix <- gsva(paramMatrix, BPPARAM = internalparam, verbose = FALSE)
return(ZscoreMatrix)
}
#ssGSEA
.applyssGSEA <- function(exMatrix, geneSets, minSize = 7, normalize = TRUE,
internal.n.cores = 1){
if(Sys.info()["sysname"] == "Windows"){
internalparam <- SnowParam(workers = internal.n.cores)
}
else{
internalparam <- MulticoreParam(workers = internal.n.cores)
}
paramMatrix <- ssgseaParam(exMatrix, geneSets, minSize = minSize)
ssGSEAMatrix <- gsva(paramMatrix, BPPARAM = internalparam, verbose = FALSE)
#ssGSEAMatrix normalization
if(normalize == TRUE){
ssGSEAMatrix <- (ssGSEAMatrix - mean(ssGSEAMatrix)) / sd(ssGSEAMatrix)
}
return(ssGSEAMatrix)
}
#Singscore
.applySingscore <- function(exMatrix, geneSets, minSize = 7, normalize = TRUE,
internal.n.cores = 1){
exMatrixgenes <- rownames(exMatrix)
pathways_filtered <- lapply(geneSets, function(pathway) {
genes_com <- intersect(pathway, exMatrixgenes)
if (length(genes_com) >= minSize) {
return(pathway)
} else {
return(NULL)
}
})
pathways_filtered <- pathways_filtered[!sapply(pathways_filtered, is.null)]
geneSets <- pathways_filtered
rankMatrix <- rankGenes(exMatrix, tiesMethod = "average")
message("Estimating singscore values")
listSign <- suppressWarnings(lapply(geneSets,
function(x) simpleScore(rankData = rankMatrix, upSet = x)))
listScores <- sapply(listSign, function(x) x$TotalScore)
if(is.list(listScores)){
pathMatrix <- do.call(rbind, listScores)}
else{
pathMatrix <- t(listScores)
}
colnames(pathMatrix) <- colnames(exMatrix)
#pathMatrix normalization
if(normalize == TRUE){
pathMatrix <- (pathMatrix - mean(pathMatrix)) / sd(pathMatrix)
}
return(pathMatrix)
}
Any scripts or data that you put into this service are public.
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.