#' GSEA
#'
#' This function performs Gene Set Enrichment Analysis
#' @param reflist named vector of reference scores
#' @param set element set
#' @param method one of 'permutation' or 'pareto'
#' @param w exponent used to raise the supplied scores. Default is 1 (original
#' scores unchanged)
#' @param np Number of permutations (Default: 1000)
#' @param gsea_null a GSEA null distribution (Optional)
#' @return A GSEA object. Basically a list of s components:
#' \describe{
#' \item{ES}{The enrichment score}
#' \item{NES}{The normalized enrichment socre}
#' \item{ledge}{The items in the leading edge}
#' \item{p.value}{The permutation-based p-value}
#' }
#' @examples
#' reflist<-setNames(-sort(rnorm(1000)),paste0('gene',1:1000))
#' set<-paste0('gene',sample(1:200,50))
#' obj<-gsea(reflist,set,method='pareto',np=1000)
#' obj$p.value
#' @export
gsea <- function(reflist,
set,
method=c("permutation","pareto"),
np=1000,
w=1,
gsea_null=NULL) {
# Get elements in set that are in the ref list
set <- intersect(names(reflist), set)
# Sort the reference list
# Get the list order, from higher (1)to smaller (n)
ix <- order(reflist, decreasing=TRUE)
reflist <- reflist[ix] # Reorder the reference list
# Initialize variables for running sum
es <- 0
nes <- 0
p.value <- 1
# Identify indexes of set within the sorted reference list
inSet <- rep(0, length(reflist))
inSet[which(names(reflist) %in% set)] <- 1
### Compute Enrichment Score
# Compute running sum for hits
hits<-abs(reflist*inSet) # Get the values for the elements in the set
hits<-hits^w # Raise this score to the power of w
score_hit <- cumsum(hits) # Cumulative sum of hits' scores
# The cumulative sum is divided by the final sum value
score_hit <- score_hit / score_hit[length(score_hit)]
# Compute running sum for non-hits
score_miss <- cumsum(1-inSet)
score_miss <- score_miss/score_miss[length(score_miss)]
# The Running Score is the difference between the two scores! Hits - nonhits
running_score <- score_hit - score_miss
# Safety measure, in the case the random genes have all a weight of 0
if(all(is.na(running_score))){
running_score<-rep(0,length(running_score))
}
# The ES is actually the minimum or maximum Running Scores
if(abs(max(running_score))>abs(min(running_score))){
es<-max(running_score)
} else {
es<-min(running_score)
}
### Identify leading edge
# Create a vector of 0s long as the reference list
ledge_indeces <- rep(0, length(running_score))
# Case 1: negative ES
if (es<0){
peak <- which(running_score==min(running_score))[1]
# Leading edge is stuff AFTER the peak point (ES is negative)
ledge_indeces[peak:length(ledge_indeces)] <- 1
ledge_indeces <- which(ledge_indeces == 1)
ledge_names <- names(reflist[ledge_indeces])
} else{ # Case 2: positive ES
peak <- which(running_score==max(running_score)) # Define the peak point
# Leading edge is stuff BEFORE the peak point (ES is positive)
ledge_indeces[1:peak] <- 1
ledge_indeces <- which(ledge_indeces == 1)
ledge_names <- names(reflist[ledge_indeces])
}
### Compute p-value by permutation
if(is.null(gsea_null)){
null_es<-null_gsea(set=set,reflist=reflist,np=np,w=w)
} else{
### If a null list is provided, use it
if(is(gsea_null,"gsea_nullist")){
null_es<-gsea_null[as.character(length(set))][[1]]
}else{
null_es<-gsea_null
}
}
if (es<0){
p.value <- sum(null_es<=es)/length(null_es)
} else {
p.value <- sum(null_es>=es)/length(null_es)
}
# }
# If we are in the tail, the p-value can be calculated in two ways
if(is.na(p.value) || p.value<0.05) {
if(p.value==0){
p.value <- 1/np
}
if (method=="pareto"){
# Extract the absolute null ESs above the 95th percentile
q95<-as.numeric(quantile(abs(null_es),0.95))
fit<-pareto.fit(abs(null_es),threshold=q95)
newp.value<-ppareto(abs(es), threshold=q95,
exponent=fit$exponent, lower.tail=FALSE)/20
# If Pareto cannot infer small ESs take the permutation p-value
if(is.na(newp.value)){
newp.value<-p.value
}
p.value<-newp.value
}
}
# Calculate the normalized enrichment score
nes<-p2z(p.value)*sign(es)
# Filter leading edge for only genes in set
ledge_names<-intersect(set,ledge_names)
gsea.obj<-list(
es=es,
nes=nes,
p.value=p.value,
ledge=ledge_names,
running_score=running_score,
set=set,
reflist=reflist,
inSet=inSet
)
class(gsea.obj)<-"gsea"
return(gsea.obj)
}
# Calculate Null Distribution for GSEA
null_gsea<-function(set,reflist,w=1,np=1000){
gsea_null <- rep(0, np)
gsea_null <- sapply(1:np, function(i) {
# Identify indexes of set within the sorted reference list
inSet <- rep(0, length(reflist))
inSet[which(names(reflist) %in% set)] <- 1
# By sampling the order of the set elements, we get the real permutation
null_inSet <- inSet[sample(1:length(inSet))]
# Same as before, cumulative sums of hits and nonhits
null_hit<-abs(reflist*null_inSet)
null_hit<-null_hit^w
null_hit <- cumsum(null_hit)
null_hit <- null_hit/null_hit[length(null_hit)]
null_miss <- cumsum(1-null_inSet)
null_miss <- null_miss/null_miss[length(null_miss)]
# And dependending on the cumulative sums, null running sum and
# null enrichment score
null_running_score <- null_hit - null_miss
# The ES is just he maximum or the minimum
if(abs(max(null_running_score))>abs(min(null_running_score))){
null_es<-max(null_running_score)
} else {
null_es<-min(null_running_score)
}
return(null_es)
})
class(gsea_null)<-"gsea_null"
return(gsea_null)
}
#' Plot GSEA results
#'
#' This function generates a GSEA plot from a gsea object
#'
#' @param gsea.obj GSEA object produced by the \code{gsea} function
#' @param twoColors the two colors to use for positive[1] and negative[2]
#' enrichment scores
#' @param colBarcode The color of the barcode
#' @param plotNames Logical. Should the set names be plotted?
#' @param title String to be plotted above the Running Enrichment Score
#' @param bottomYlabel String for the Y label of the bottom plot
#' @param bottomTitle String for the title of the bottom part of the plot
#' @param ext_nes Provide a NES from an external calculation
#' @param ext_es Provide an ES from an external calculation
#' @param ext_pvalue Provide a pvalue from an external calculation
#' @param omit_middle If TRUE, will not plot the running score
#' (FALSE by default)
#' @return Nothing, a plot is generated in the default output device
#' @examples
#' reflist<-setNames(-sort(rnorm(1000)),paste0('gene',1:1000))
#' set<-paste0('gene',sample(1:200,50))
#' obj<-gsea(reflist,set,method='pareto',np=1000)
#' plot_gsea(obj)
#' @export
plot_gsea <- function(gsea.obj, twoColors = c("red",
"blue"),
plotNames = FALSE, colBarcode = "black",
title = "Running Enrichment Score",
bottomTitle = "List Values",
bottomYlabel = "Signature values",
ext_nes = NULL, ext_pvalue = NULL, ext_es =NULL,
omit_middle = FALSE) {
# Keep original par device settings and layout
on.exit(layout(1))
opar<-par(no.readonly=TRUE)
on.exit(par(opar),add=TRUE,after=FALSE)
# Extract parameters from the gsea object
es <- gsea.obj$es
nes <- gsea.obj$nes
p.value <- gsea.obj$p.value
ledge <- gsea.obj$ledge
running_score <- gsea.obj$running_score
set <- gsea.obj$set
reflist <- gsea.obj$reflist
inSet <- gsea.obj$inSet
# Define plot borders
min.RES <- min(running_score)
max.RES <- max(running_score)
delta <- (max.RES - min.RES) * 0.5
min.plot <- min.RES
max.plot <- max.RES
max.corr <- max(reflist)
min.corr <- min(reflist)
corr0.line <- (-min.corr/(max.corr - min.corr)) * 1.25 * delta + min.plot
# if (es < 0) {
# l.ledge.ref.plot <- length(reflist) - length(ledge)
# } else {
# l.ledge.ref.plot <- length(ledge)
# }
# Define colors (red is positive nes,
# blue is negative nes)
if (nes > 0) {
col.f <- twoColors[1]
} else {
col.f <- twoColors[2]
}
N <- length(reflist)
ind <- 1:N
### Define layout, let's end this
### destructive putting everything in a
### single window
if (omit_middle) {
layoutMatrix<-rbind(1,2)
layout(layoutMatrix, heights=c(1,2))
} else {
layoutMatrix<-rbind(1,2,3)
layout(layoutMatrix, heights = c(1,4,2))
}
# layout.show(n=3)
### PLOT 1: barcode-like enrichment tags
par(mar=c(0,4.1,2,2.1))
plot(0, col = "white", xlim = c(1, N),
ylim = c(0, 10), xaxt = "n", yaxt = "n",
type = "n", frame.plot = FALSE, xlab = "",
ylab = "", xaxs = "r", yaxs = "r",
main = paste("Number of elements: ",
N, " (in full list), ", length(set),
" (in element set)", sep = "",
collapse = ""))
for (position in 1:N) {
if (inSet[position] == 1) {
if (N < 50 & length(set) <= 10) {
rect(xleft = position - 0.2,
ybottom = 0, xright = position +
0.2, ytop = 10, col = colBarcode,
border = NA)
} else {
abline(v = position, lwd = 2,
col = colBarcode)
}
if (plotNames) {
text(labels = names(reflist[position]),
x = position - 0.2, y = 0,
srt = 90, offset = 0, pos = 4,
font = 2)
}
}
}
### PLOT 2: The running sum plot
if (!omit_middle) {
par(mar=c(2.1,4.1,2,2.1))
plot(ind, running_score, sub = "",
xlab = "", ylab = "Enrichment Score",
xlim = c(1, N), ylim = c(min.plot,
max.plot), type = "l", pch = 20,
lwd = 4, cex = 1, col = col.f,
xaxs = "r", yaxs = "r", main = title)
grid(col = "dark grey", lty = 2)
# This is important: zero running score
# line
lines(c(1, N), c(0, 0), lwd = 1,
lty = 1, cex = 1)
# This is also important: it's the max
# enrichment vertical line (aka LEADING
# EDGE)
l.ledge.ref.plot<-which.max(abs(running_score))
lines(c(l.ledge.ref.plot, l.ledge.ref.plot),
c(min.plot, max.plot), lwd = 1,
lty = 1, cex = 1)
if (es >= 0) {
legend_position <- "topright"
} else {
legend_position <- "bottomleft"
}
# If external NES/ES/pvalue are not provided, the
# standard GSEA one is shown
if (is.null(ext_nes)) {
toshow_nes<-signif(nes,3)
}else{toshow_nes<-ext_nes}
if (is.null(ext_es)) {
toshow_es<-signif(es,3)
}else{toshow_es<-ext_es}
if (is.null(ext_pvalue)) {
toshow_pvalue<-signif(p.value,3)
}else{toshow_pvalue<-ext_pvalue}
legend(legend_position,
legend = c(paste0("ES = ",toshow_es),
paste0("NES = ",toshow_nes),
paste0("p-value = ",toshow_pvalue)
),
bg = "white")
}
### Plot3: weight values
if (omit_middle) {
bottomMain <- title
} else {
bottomMain <- bottomTitle
}
par(mar=c(2.1,4.1,2,2.1))
plot(ind, reflist, type = "l", pch = 20,
lwd = 3, xlim = c(1, N), cex = 1,
col = 1, xaxs = "r", yaxs = "r",
main = bottomMain, ylab = bottomYlabel,
cex.axis = 0.8, xlab="")
grid(col = "dark grey", lty = 2)
# zero correlation horizontal line
lines(c(1, N), c(corr0.line, corr0.line),
lwd = 1, lty = 1, cex = 1, col = 1)
if (omit_middle) {
# If external NES/ES/pvalue are not provided, the
# standard GSEA one is shown
if (is.null(ext_nes)) {
toshow_nes<-signif(nes,3)
}else{toshow_nes<-ext_nes}
if (is.null(ext_es)) {
toshow_es<-signif(es,3)
}else{toshow_es<-ext_es}
if (is.null(ext_pvalue)) {
toshow_pvalue<-signif(p.value,3)
}else{toshow_pvalue<-ext_pvalue}
legend(legend_position,
legend = c(paste0("ES = ",toshow_es),
paste0("NES = ",toshow_nes),
paste0("p-value = ",toshow_pvalue)
),
bg = "white")
}
}
# Probability density of Pareto distributions
dpareto <- function(x, threshold = 1, exponent,
log = FALSE) {
# Avoid doing limited-precision
# arithmetic followed by logs if we want
# the log!
if (!log) {
prefactor <- (exponent - 1)/threshold
f <- function(x) {
prefactor * (x/threshold)^(-exponent)
}
} else {
prefactor.log <- log(exponent - 1) -
log(threshold)
f <- function(x) {
prefactor.log - exponent * (log(x) -
log(threshold))
}
}
d <- ifelse(x < threshold, NA, f(x))
return(d)
}
# Cumulative distribution function of the Pareto distributions
ppareto <- function(x, threshold = 1, exponent,
lower.tail = TRUE) {
if (!lower.tail) {
f <- function(x) {
(x/threshold)^(1 - exponent)
}
}
if (lower.tail) {
f <- function(x) {
1 - (x/threshold)^(1 - exponent)
}
}
p <- ifelse(x < threshold, NA, f(x))
return(p)
}
# Estimate parameters of Pareto distribution
pareto.fit <- function(data, threshold) {
data <- data[data >= threshold]
n <- length(data)
x <- data/threshold
alpha <- 1 + n/sum(log(x))
# Calculate Log-Likelihood
loglike <- sum(dpareto(data, threshold = threshold,
exponent = alpha, log = TRUE))
# KS distance
newdata <- data[data >= threshold]
d <- suppressWarnings(ks.test(newdata,
ppareto, threshold = threshold,
exponent = alpha))
ks.dist <- as.vector(d$statistic)
fit <- list(type = "pareto", exponent = alpha,
xmin = threshold, loglike = loglike,
ks.dist = ks.dist, samples.over.threshold = n)
return(fit)
}
#' z2p
#'
#' This function gives a gaussian p-value corresponding to the provided Z-score
#'
#' @param z a Z score
#' @return a p-value
#' @examples
#' z<-1.96
#' z2p(z)
#' @export
z2p <- function(z) {
pnorm(abs(z), lower.tail = FALSE) * 2
}
#' p2z
#'
#' This function gives a gaussian Z-score corresponding to the provided p-value
#' Careful: sign is not provided
#'
#' @param p a p-value
#' @return z a Z score
#' @examples
#' p<-0.05
#' p2z(p)
#' @export
p2z <- function(p) {
qnorm(p/2, lower.tail = FALSE)
}
#' Stouffer integration of Z scores
#'
#' This function gives a gaussian Z-score corresponding to the provided p-value
#' Careful: sign is not provided
#'
#' @param x a vector of Z scores
#' @return Z an integrated Z score
#' @examples
#' zs<-c(1,3,5,2,3)
#' stouffer(zs)
#' @export
stouffer <- function(x) {
Z <- sum(x)/sqrt(length(x))
return(Z)
}
#' Weighted Stouffer integration of Z scores
#'
#' This function gives a gaussian Z-score corresponding to the provided p-value
#' Careful: sign is not provided
#'
#' @param x a vector of Z scores
#' @param w weight for each Z score
#' @return Z an integrated Z score
#' @examples
#' zs<-c(1,-3,5,2,3)
#' ws<-c(1,10,1,2,1)
#' wstouffer(zs,ws)
#' @export
wstouffer <- function(x, w) {
Z <- sum(x * w)/sqrt(sum(w^2))
return(Z)
}
#' Fisher integration of p-values
#'
#' This function applies the Fisher integration of pvalues
#'
#' @param ps a vector of p-values
#' @return p.val an integrated p-value
#' @examples
#' ps<-c(0.01,0.05,0.03,0.2)
#' fisherp(ps)
#' @export
fisherp <- function(ps) {
Xsq <- -2 * sum(log(ps))
p.val <- pchisq(Xsq, df = 2 * length(ps),
lower.tail = FALSE)
# p<-c(Xsq = Xsq, p.value = p.val)
return(p.val)
}
#' Slice
#'
#' This function prints a slice of a matrix
#'
#' @param matrix A matrix
#' @return A visualization of the first 5 rows and columns of the input matrix
#' @examples
#' set.seed(1)
#' example<-matrix(rnorm(1000),nrow=100,ncol=10)
#' slice(example)
#' @export
slice <- function(matrix) {
if (nrow(matrix) < 5) {
stop("Input matrix has less than 5 rows")
}
if (ncol(matrix) < 5) {
stop("Input matrix has less than 5 columns")
}
print(matrix[1:5, 1:5])
}
#' 2-way GSEA
#' GSEA Gene set enrichment analysis of two complementary gene sets using gsea
#' @param reflist named vector of reference scores
#' @param set1 element set 1
#' @param set2 element set 1
#' @param method one of 'permutation' or 'pareto'
#' @param w exponent used to raise the supplied scores. Default is 1 (original
#' scores unchanged)
#' @param np Number of permutations (Default: 1000)
#' @param gsea_null a GSEA null distribution (Optional)
#' @return A list of 2 GSEA objects. Each of which is a list of components:
#' \describe{
#' \item{ES}{The enrichment score}
#' \item{NES}{The normalized enrichment socre}
#' \item{ledge}{The items in the leading edge}
#' \item{p.value}{The permutation-based p-value}
#' }
#' @examples
#' reflist<-setNames(-sort(rnorm(1000)),paste0('gene',1:1000))
#' set1<-paste0('gene',sample(1:200,50))
#' set2<-paste0('gene',sample(801:1000,50))
#' obj<-gsea2(reflist,set1,set2,method='pareto',np=1000)
#' obj$p.value
#' @export
gsea2 <- function (reflist, set1,set2, method = c("permutation", "pareto"),
np = 1000, w = 1, gsea_null = NULL) {
g1 <- gsea(reflist,set1,method=method,np = np, w = w, gsea_null = gsea_null)
g2 <- gsea(reflist,set2,method=method,np = np, w = w, gsea_null = gsea_null)
ix <- order(reflist, decreasing = T)
reflist <- reflist[ix]
gsea.obj <- list(
es1 = g1$es,es2 = g2$es,
nes1 = g1$nes, nes2 = g2$nes,
p.value1 = g1$p.value,p.value2 = g2$p.value,
ledge1 = g1$ledge,ledge2 = g2$ledge,
running_score1 = g1$running_score,running_score2 = g2$running_score,
set1 = set1,set2 = set2,
reflist = reflist,
inSet1 = g1$inSet,inSet2 = g2$inSet)
return(gsea.obj)
}
#' Plot 2-way GSEA results
#'
#' This function generates a GSEA plot from a gsea object
#'
#' @param gsea.obj GSEA object produced by the \code{gsea} function
#' @param twoColors the two colors to use for positive[1] and negative[2]
#' enrichment scores, and of the barcodes
#' @param plotNames Logical. Should the set names be plotted?
#' @param title String to be plotted above the Running Enrichment Score
#' @param bottomTitle String for the title of the bottom part of the plot
#' @param bottomYlabel String for the Y label of the bottom plot
#' (FALSE by default)
#' @return Nothing, a plot is generated in the default output device
#' @examples
#' reflist<-setNames(-sort(rnorm(1000)),paste0('gene',1:1000))
#' set1<-paste0('gene',sample(1:200,50))
#' set2<-paste0('gene',sample(801:1000,50))
#' obj<-gsea2(reflist,set1,set2,method='pareto',np=1000)
#' plot_gsea2(obj)
#' @export
plot_gsea2 <- function (gsea.obj, twoColors = c("red", "blue"),
plotNames = FALSE,
title = "Running Enrichment Score",
bottomTitle = "List Values",
bottomYlabel = "Signature values") {
# Keep original par device settings and layout
on.exit(layout(1))
opar<-par(no.readonly=TRUE)
on.exit(par(opar),add=TRUE,after=FALSE)
# Extract parameters from the gsea object
es1 <- gsea.obj$es1; es2 <- gsea.obj$es2;
nes1 <- gsea.obj$nes1;nes2 <- gsea.obj$nes2;
p.value1 <- gsea.obj$p.value1; p.value2 <- gsea.obj$p.value2;
ledge1 <- gsea.obj$ledge1; ledge2 <- gsea.obj$ledge2;
running_score1 <- gsea.obj$running_score1;running_score2 <- gsea.obj$running_score2;
set1 <- gsea.obj$set1;set2 <- gsea.obj$set2;
inSet1 <- gsea.obj$inSet1; inSet2 <- gsea.obj$inSet2;
reflist <- gsea.obj$reflist
min.plot <- min(running_score1,running_score2)
max.plot <- max(running_score1,running_score2)
if (es1 < 0) {
l.ledge.ref.plot1 <- length(reflist) - length(ledge1)
}else {
l.ledge.ref.plot1 <- length(ledge1)
}
if (es2 < 0) {
l.ledge.ref.plot2 <- length(reflist) - length(ledge2)
}else {
l.ledge.ref.plot2 <- length(ledge2)
}
N<-length(reflist)
ind<- 1:N
layoutMatrix<-rbind(1,2,3,4)
layout(layoutMatrix,heights=c(1,5,1,2))
##-----------------------------------------
par(mar=c(0,4.1,2,2.1))
plot(0, col = "white", xlim = c(1, N), ylim = c(0, 10), xaxt = "n",
yaxt = "n", type = "n", frame.plot = FALSE, xlab = "",
ylab = "", xaxs = "r", yaxs = "r", main = paste("Number of elements: ",
N, " (in full list), ", length(set1), " (in element set1)",
sep = "", collapse = ""))
for (position in 1:N) {
if (inSet1[position] == 1) {
if (N < 50 | length(set1) <= 10) {
rect(xleft = position - 0.2, ybottom = 0, xright = position +
0.2, ytop = 10, col = twoColors[1], border = NA)
}
else {
abline(v = position, lwd = 1, col = twoColors[1])
}
if (plotNames) {
text(labels = names(reflist[position]), x = position -
0.2, y = 0, srt = 90, offset = 0, pos = 4,
font = 2)
}
}
}
##--------------------------------
plot(x=NULL,y=NULL,xlim=range(c(1,N)), ylim=range(c(min.plot, max.plot)), sub = "", xlab = "", ylab = "Running Enrichment Score",
pch = 20, lwd = 2, cex = 1, xaxt = "n",
yaxs = "r", main = title)
grid(col = "light grey", lty = 2)
lines(ind, running_score1, sub = "", xlab = "", ylab = "Enrichment Score",
lwd = 2, cex = 1, col = twoColors[1])
lines(ind, running_score2, sub = "", xlab = "", ylab = "Enrichment Score",
lwd = 2, cex = 1, col = twoColors[2])
lines(c(1, N), c(0, 0), lwd = 1, lty = 1, cex = 1)
lines(c(l.ledge.ref.plot1, l.ledge.ref.plot1), c(min(running_score1),
max(running_score1)), lwd = 1, lty = 1, cex = 1)
lines(c(l.ledge.ref.plot2, l.ledge.ref.plot2), c(min(running_score2),
max(running_score2)), lwd = 1, lty = 1, cex = 1)
if(es1 >=0){legside1="topright"
}else{legside1="topleft"}
legend(legside1, legend = c(paste("NES1 = ", signif(nes1, 3), sep = ""),
paste("P1 = ", sprintf("%.2e",p.value1), sep = "")),
bg = "white",box.col=twoColors[1],cex=1.5)
if(es2 >=0){legside2="bottomright"
}else{legside2="bottomleft"}
legend(legside2, legend = c(paste("NES2 = ", signif(nes2, 3), sep = ""),
paste("P2 = ", sprintf("%.2e",p.value2), sep = "")),
bg = "white",box.col=twoColors[2],cex=1.5)
# Integration
p.value.int<-fisherp(c(p.value1,p.value2))
legend("center",legend=paste0("pFisher = ",sprintf("%.2e",p.value.int)),cex=1.5)
##----------------------------------
plot(0, col = "white", xlim = c(1, N), ylim = c(0, 10), xaxt = "n",
yaxt = "n", type = "n", frame.plot = FALSE, xlab = "",
ylab = "", xaxs = "r", yaxs = "r", main = paste("Number of elements: ",
N, " (in full list), ", length(set2), " (in element set2)",
sep = "", collapse = ""))
for (position in 1:N) {
if (inSet2[position] == 1) {
if (N < 50 | length(set2) <= 10) {
rect(xleft = position - 0.2, ybottom = 0, xright = position +
0.2, ytop = 10, col = twoColors[2], border = NA)
}
else {
abline(v = position, lwd = 1, col = twoColors[2])
}
if (plotNames) {
text(labels = names(reflist[position]), x = position -
0.2, y = 0, srt = 90, offset = 0, pos = 4,
font = 2)
}
}
}
#-----------------------------------
par(mar=c(2.1,4.1,2,2.1))
plot(x=NULL,y=NULL,xlim=range(c(0,N)),
ylim=range(c(min(reflist),max(reflist))), type = "b", lwd = 2,
cex = 1, col = 1, xaxs = "r", yaxs = "r",main=bottomTitle,
ylab=bottomYlabel,xlab="")
grid(col = "light grey", lty = 2)
abline(h=0, lwd = 1,lty = 1, cex = 1, col = 1)
lines(ind,reflist,lty=1,lwd=2)
}
#' ssGSEA
#'
#' This function performs single sample GSEA
#' @param inmat A numeric matrix, with rownames/rows as genes or features, and
#' colnames/columns as sample names
#' @param groups a named list. Names are names of the groups (e.g. pathways) and
#' elements are character vectors indicating gene or feature names (that should
#' match, at least partially, with the rownames of inmat)
#' @param scale Boolean. Wheter the matrix should be row-scaled.
#' @param minsize Numeric. Include only groups with at least this many elements
#' Default is 10
#' @return A matrix of Normalized Enrichment Scores (NES), which can be converted to
#' p-values using the function _corto::z2p_
#' @examples
#' # A random matrix
#' set.seed(1)
#' inmat<-matrix(rnorm(200*50),nrow=200,ncol=50)
#' rownames(inmat)<-paste0("gene",1:nrow(inmat))
#' # A random list of groups
#' groups<-list()
#' for(i in 1:10){
#' somegenes<-sample(rownames(inmat),30)
#' groups[[paste0("pathway_",i)]]<-somegenes
#' }
#' # Run ssGSEA
#' nesmat<-ssgsea(inmat,groups)
#'
#' @export
ssgsea<-function(inmat,groups,scale=TRUE,minsize=10){
if(scale){
inmat<-t(apply(inmat,1,scale))
}
nesmat<-arena(inmat,groups,minsize=minsize)
return(nesmat)
}
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.