#' Perform Master Regulator Analysis (mra).
#'
#' The analysis is performed between two groups of samples in the form of expression matrices,
#' with genes/features as rows and samples as columns.
#' @param expmat1 A numeric expression matrix, with genes/features as rows and samples as columns.
#' If only expmat1 is provided (without expmat2), the function will perform a sample-by-sample
#' master regulator analysis, with the mean of the dataset as a reference. If expmat2 is provided,
#' expmat1 will be considered the "treatment" sample set. If a named vector is provided, with names
#' as genes/features and values as signature values (e.g. T-test statistics), signature
#' master regulator analysis is performed.
#' @param expmat2 A numeric expression matrix, with genes/features as rows and samples as columns.
#' If provided, it will be considered as the "control" or "reference" sample set for expmat1.
#' @param regulon A _regulon_ object, output of the _corto_ function.
#' @param minsize A minimum network size for each centroid/TF to be analyzed. Default is 10.
#' @param nperm The number of times the input data will be permuted to generate null signatures.
#' Default is 1000 if expmat2 is provided, and 10 if expmat2 is not provided (single sample mra).
#' @param nthreads The number of threads to use for generating null signatures. Default is 1
#' @param atacseq An optional 3 column matrix derived from an ATAC-Seq analysis, indicating
#' 1) gene symbol, 2) -log10(FDR)*sing(log2FC) of an ATAC-Seq design, 3) distance from TSS.
#' If provided, the output will contain an _atacseq_ field.
#' @param verbose Boolean, whether to print full messages on progress analysis. Default is FALSE
#' @return A list summarizing the master regulator analysis
#' \itemize{
#' \item nes: the normalized enrichment score: positive if the centroid/TF network is upregulated
#' in expmat1 vs expmat2 (or in expmat1 vs the mean of the dataset), negative if downregulated. A
#' vector in multisample mode, a matrix in sample-by-sample mode.
#' \item pvalue: the pvalue of the enrichment.
#' \item sig: the calculated signature (useful for plotting).
#' \item regulon: the original regulon used in the analysis (but filtered for _minsize_)
#' \item atac: Optionally present if atacseq data is provided. For each centroid/TF a number
#' ranging from 0 to 1 will indicate the fraction of changes in activity due to promoter effects
#' rather than distal effects.
#' }
#' @export
mra<-function(expmat1,expmat2=NULL,regulon,minsize=10,nperm=NULL,nthreads=2,verbose=FALSE,
atacseq=NULL){
# Setting default nperm if not set by the user
if(is.null(nperm)){
if(is.null(expmat2)){
nperm<-10
} else{
nperm<-1000
}
}
# Filter by minsize
regsizes<-sapply(regulon,function(x){length(x$likelihood)})
regulon<-regulon[regsizes>=minsize]
regsizes<-regsizes[names(regulon)]
# First we create a centroid/target matrix
centroids<-sort(names(regulon))
targets<-sort(unique(unlist(sapply(regulon,function(x){names(x$tfmode)}))))
netmat<-matrix(0,nrow=length(centroids),ncol=length(targets),dimnames=list(centroids,targets))
# Then we fill it
for(centroid in centroids){
vec<-sign(regulon[[centroid]]$tfmode)*regulon[[centroid]]$likelihood
netmat[centroid,names(vec)]<-vec
}
# Case 0: expmat1 is a signature
if(is.vector(expmat1)){
stop("Input data is provided as vector, Calculating Signature Master Regulator Analysis")
}
# Case 1: expmat2 is provided as control
if(!is.null(expmat2)){
# Check for zero-variance genes
vargenes<-apply(expmat1,1,var)
expmat11<-expmat1[which(vargenes>0),]
rm(vargenes)
vargenes<-apply(expmat2,1,var)
expmat22<-expmat2[which(vargenes>0),]
rm(vargenes)
if(nrow(expmat11)<nrow(expmat1)){
message("Removed ",nrow(expmat1)-nrow(expmat11)," rows with zero variance")
}
if(nrow(expmat22)<nrow(expmat2)){
message("Removed ",nrow(expmat2)-nrow(expmat22)," rows with zero variance")
}
common<-intersect(rownames(expmat11),rownames(expmat22))
expmat1<-expmat11[common,]
expmat2<-expmat22[common,]
rm(expmat11,expmat22)
# We create a signature
sig<-setNames(apply(cbind(expmat1,expmat2),1,function(x){
x1<-x[1:ncol(expmat1)]
x2<-x[(ncol(expmat1)+1):length(x)]
tt<-t.test(x1,x2)
return(tt$statistic)
}),rownames(expmat1))
sig<-sig[!is.na(sig)]
common<-intersect(names(sig),colnames(netmat))
sig<-sig[common]
netmat<-netmat[,common]
# Then we slightly reduce the contribution of targets shared by multiple centroids
# But only targets in the signature
netmat2<-apply(netmat,2,function(x){x/(sum(x!=0)^0.5)})
# plot(netmat[,1:20],netmat2[,1:20])
# abline(h=0,v=0)
netmat<-netmat2
rm(netmat2)
# # Then we normalize by regulon length, otherwise TFs with small networks will be penalized
# netmat<-t(apply(netmat,1,function(x){10*x/sum(abs(x))}))
# netmat[is.na(netmat)]<-0
# Calculate unnormalized scores
scores<-(netmat%*%sig)[,1]
# # Check Correlation of scores with regsizes
# regsizes<-regsizes[names(scores)]
# plot(regsizes,scores)
# mtext(paste0("Cor=",cor(regsizes,scores)))
# # Interestingly here there is no correlation
#cands<-c("BAZ1A","HCFC1","HDGF","MAZ","ZNF146","ZNF532")
#scores[cands]
# Permuted signatures
nullsigperm1<-function(seed=0,expmat1,expmat2,netmat){
permat<-cbind(expmat1,expmat2)
set.seed(seed)
permat<-permat[sample(nrow(permat)),]
permat<-permat[,sample(ncol(permat))]
nullsig<-setNames(apply(permat,1,function(x){
if(!is.null(expmat2)){
x1<-x[1:ncol(expmat1)]
x2<-x[(ncol(expmat1)+1):length(x)]
tt<-t.test(x1,x2)
} else {
x<-x-mean(x)
tt<-t.test(x)
}
return(tt$statistic)
}),rownames(permat))
nullsig<-nullsig[colnames(netmat)]
nullscores<-(netmat%*%nullsig)[,1]
return(nullscores)
}
# The pbapply snippet
cl<-parallel::makeCluster(nthreads)
#invisible(parallel::clusterExport(cl=cl,varlist=c("nthreads")))
#invisible(parallel::clusterEvalQ(cl= cl,library(dplyr)))
nullscores<-pbsapply(cl=cl,
X=1:nperm,
FUN=nullsigperm1,
expmat1=expmat1,expmat2=expmat2,netmat=netmat
)
parallel::stopCluster(cl)
# Compare scores with nullscores
# Fit gaussians
nes<-apply(cbind(scores,nullscores),1,function(x){
myscore<-x[1]
morescores<-x[2:length(x)]
mu<-mean(morescores)
sigma<-sd(morescores)
p<-pnorm(abs(myscore),mean=mu,sd=sigma,lower.tail=FALSE)*2
if(p==0){p<-.Machine$double.xmin}
mynes<-p2z(p)*sign(myscore)
if(myscore==0){mynes<-0}
return(mynes)
})
outlist<-list(nes=nes,pvalue=z2p(nes),sig=sig,regulon=regulon)
return(outlist)
# # Check correlation regsize nes
# regsizes<-regsizes[names(nes)]
# plot(regsizes,nes,col="grey")
# mtext(paste0("Cor=",cor(regsizes,nes)))
# text(regsizes[cands],nes[cands],labels=cands,col="black",cex=2,font=2)
#
# # Check correlation scores nes
# scores<-scores[names(nes)]
# plot(scores,nes)
# mtext(paste0("Cor=",cor(scores,nes)))
} else { # Case 2: expmat2 is not provided
# Create signatures
sigmat<-t(apply(expmat1,1,function(x){
y<-(x-mean(x))/sd(x)
return(y)
}))
# sigs<-t(scale(t(expmat1))) # Exactly the same
sigmat[is.na(sigmat)]<-0
common<-intersect(rownames(sigmat),colnames(netmat))
sigmat<-sigmat[common,]
netmat<-netmat[,common]
# Then we slightly reduce the contribution of targets shared by multiple centroids
# But only targets in the signature
netmat<-apply(netmat,2,function(x){x/(sum(x!=0)^0.5)})
# Calculate unnormalized scores
scores<-(netmat%*%sigmat)
# Permuted signatures
nullsigperm2<-function(seed=0,expmat1,netmat){
permat<-expmat1
set.seed(seed)
permat<-permat[sample(nrow(permat)),]
permat<-permat[,sample(ncol(permat))]
nullsigmat<-t(apply(permat,1,function(x){
y<-(x-mean(x))/sd(x)
return(y)
}))
nullsigmat<-nullsigmat[colnames(netmat),]
nullscores<-(netmat%*%nullsigmat)
return(nullscores)
}
# The pbapply snippet
cl<-parallel::makeCluster(nthreads)
#invisible(parallel::clusterExport(cl=cl,varlist=c("nthreads")))
#invisible(parallel::clusterEvalQ(cl= cl,library(dplyr)))
nullscores<-pblapply(cl=cl,
X=1:nperm,
FUN=nullsigperm2,
expmat1=expmat1,netmat=netmat
)
parallel::stopCluster(cl)
nullscores<-do.call(cbind,nullscores)
# Compare scores with nullscores
# Fit gaussians
nes<-t(apply(cbind(scores,nullscores),1,function(x){
myscores<-x[1:ncol(scores)]
morescores<-x[(ncol(scores)+1):length(x)]
mu<-mean(morescores)
sigma<-sd(morescores)
ps<-pnorm(abs(myscores),mean=mu,sd=sigma,lower.tail=FALSE)*2
ps[ps==0]<-.Machine$double.xmin
mynes<-p2z(ps)*sign(myscores)
mynes[myscores==0]<-0
return(mynes)
}))
return(nes)
# pnb<-nes["MYCN",names(hashtags)[hashtags=="pnb"]]
# ctr<-nes["MYCN",names(hashtags)[hashtags=="ctr"]]
# plot(density(pnb),col="red",lwd=3,main="MYCN Activity")
# lines(density(ctr),col="blue",lwd=3)
# legend("topright",col=c("red","blue"),legend=c("Panobinostat","Control"),lwd=3)
# abline(v=0,lty=2)
}
}
#' Plot a master regulator analysis
#'
#' Plotting function for master regulator analysis performed by the _mra_ function
#' @param mraobj The input object, output of the function mra
#' @param mrs Either a numeric value indicating how many MRs to show, sorted by
#' significance, or a character vector specifying which TFs to show. Default is 5
#' @param title Title of the plot (optional, default is "corto - Master Regulator Analysis")
#' @param pthr The p-value at which the MR is considered significant. Default is 0.01
#' @return A plot is generated
#' @export
mraplot<-function(mraobj,mrs=5,title="corto - Master Regulator Analysis",pthr=0.01){
# Checks ----
if(is.numeric(mrs)){
mrs<-names(sort(abs(mraobj$nes),decreasing=TRUE))[1:mrs]
} else if(is.character(mrs)){
mrs<-mrs
} else {
stop("Provide mrs as a number to show top mrs, or a vector of mrs")
}
# Define layout ----
ltitle<-c(1,1,1,2,2)
lblocks<-matrix(NA,nrow=0,ncol=length(ltitle))
for(i in 1:length(mrs)){
base<-3+(6*(i-1))
lblock<-rbind(
c(base,base+1,base+2,base+3,base+3),
c(base+4,base+4,base+4,base+3,base+3),
c(base+5,base+5,base+5,base+3,base+3)
)
lblocks<-rbind(lblocks,lblock)
}
lmatrix<-rbind(
ltitle,
lblocks
)
# Start plotting ----
layout(lmatrix,heights=c(1,rep(2,nrow(lmatrix)-1)))
# layout.show(n=max(lmatrix))
# Function for plotting text only ----
titplot<-function(title,box=TRUE,cex=2,bgcol="white",bold=FALSE){
if(bold){font<-2}else{font<-1}
opar<-par()$mar
par(mar = c(0.1,0.1,0.1,0.1))
plot(c(0,1),c(0,1),ann=F,bty='n',type='n',xaxt='n',yaxt='n')
rect(par("usr")[1],par("usr")[3],par("usr")[2],par("usr")[4],col=bgcol)
text(x=0.5,y=0.5,title, cex = cex, col = "black",font=font)
# if(bottom){
# axis(1,lwd=10,tick=FALSE,labels=FALSE)
# }
if(box){
box()
}
par(mar=opar)
}
# Panels 1 and 2 are titles ----
titplot(title)
titplot("Top targets")
# Prefetch signature ----
ranksig<-rank(mraobj$sig)
ranksiginv<-rank(-mraobj$sig)
ranksigabs<-rank(abs(mraobj$sig))
# Fill the plot with MR blocks ----
for(mr in mrs){
# Name of the MR
mycex<-16*1/nchar(mr)
titplot(mr,cex=mycex)
### NES ----
bgcol<-"white"
if(mraobj$nes[mr]>0&mraobj$pvalue[mr]<=pthr){
bgcol<-"salmon"
} else if(mraobj$nes[mr]<0&mraobj$pvalue[mr]<=pthr){
bgcol<-"cornflowerblue"
}
titplot(paste0("NES=",round(mraobj$nes[mr],2)),bgcol=bgcol)
### p-value ----
bold<-FALSE
if(mraobj$pvalue[mr]<=pthr){bold<-TRUE}
titplot(paste0("p=",signif(mraobj$pvalue[mr],3)),bold=bold)
### Network ----
# We show maybe the top 12 targets
# titplot("Network goes here")
opar<-par()$mar
par(mar=c(0,0,0,0),xaxs="i")
plot(0,xlim=c(-1.5,1.5),ylim=c(-1.2,1.2),type="n",ann=F,bty='n',xaxt='n',yaxt='n')
# Which targets to show
targets<-names(mraobj$regulon[[mr]]$tfmode)
targets<-intersect(targets,names(ranksig))
toshow<-names(sort(ranksigabs[targets],decreasing=TRUE)[1:12])
# Colors
col<-rep("white",12)
col[mraobj$sig[toshow]<0]<-"#6495ED66" # cornflowerblue
col[mraobj$sig[toshow]>0]<-"#FF8C6966" # salmon
# Type of regulation (mode of action)
moa<-rep("unknown",12)
tfmodehere<-mraobj$regulon[[mr]]$tfmode[toshow]
moa[tfmodehere>0]<-"activation"
moa[tfmodehere<0]<-"repression"
angle<-moa
angle[moa=="activation"]<-30
angle[moa=="repression"]<-90
angle[moa=="unknown"]<-0
# Circles Clock
draw.circle(0,1,0.1,border="gray90",col=col[1])
arrows(0,0.1,0,0.9,lwd=2,col="gray50",angle=angle[1])
text(0,1,font=2,cex=1.5,labels=toshow[1])
draw.circle(sin(pi/6),cos(pi/6),0.1,border="gray90",col=col[2])
arrows(0,0.1,sin(pi/6)-0.1,cos(pi/6)-0.1,lwd=2,col="gray50",angle=angle[2])
text(sin(pi/6),cos(pi/6),font=2,cex=1.5,labels=toshow[2])
draw.circle(sin(pi/3),cos(pi/3),0.1,border="gray90",col=col[3])
arrows(0,0.1,sin(pi/3)-0.2,cos(pi/3)-0.1,lwd=2,col="gray50",angle=angle[3])
text(sin(pi/3),cos(pi/3),font=2,cex=1.5,labels=toshow[3])
draw.circle(1,0,0.1,border="gray90",col=col[4])
arrows(0.1,0,0.7,0,lwd=2,col="gray50",angle=angle[4])
text(1,0,font=2,cex=1.5,labels=toshow[4])
draw.circle(sin(pi/3),-cos(pi/3),0.1,border="gray90",col=col[5])
arrows(0,-0.1,sin(pi/3)-0.2,-cos(pi/3)+0.1,lwd=2,col="gray50",angle=angle[5])
text(sin(pi/3),-cos(pi/3),font=2,cex=1.5,labels=toshow[5])
draw.circle(sin(pi/6),-cos(pi/6),0.1,border="gray90",col=col[6])
arrows(0,-0.1,sin(pi/6)-0.1,-cos(pi/6)+0.1,lwd=2,col="gray50",angle=angle[6])
text(sin(pi/6),-cos(pi/6),font=2,cex=1.5,labels=toshow[6])
draw.circle(0,-1,0.1,border="gray90",col=col[7])
arrows(0,-0.1,0,-0.9,lwd=2,col="gray50",angle=angle[7])
text(0,-1,font=2,cex=1.5,labels=toshow[7])
draw.circle(-sin(pi/6),-cos(pi/6),0.1,border="gray90",col=col[8])
arrows(0,-0.1,-sin(pi/6)+0.1,-cos(pi/6)+0.1,lwd=2,col="gray50",angle=angle[8])
text(-sin(pi/6),-cos(pi/6),font=2,cex=1.5,labels=toshow[8])
draw.circle(-sin(pi/3),-cos(pi/3),0.1,border="gray90",col=col[9])
arrows(0,-0.1,-sin(pi/3)+0.2,-cos(pi/3)+0.1,lwd=2,col="gray50",angle=angle[9])
text(-sin(pi/3),-cos(pi/3),font=2,cex=1.5,labels=toshow[9])
draw.circle(-1,0,0.1,border="gray90",col=col[10])
arrows(-0.1,0,-0.7,0,lwd=2,col="gray50",angle=angle[10])
text(-1,0,font=2,cex=1.5,labels=toshow[10])
draw.circle(-sin(pi/3),cos(pi/3),0.1,border="gray90",col=col[11])
arrows(0,0.1,-sin(pi/3)+0.1,cos(pi/3)-0.1,lwd=2,col="gray50",angle=angle[11])
text(-sin(pi/3),cos(pi/3),font=2,cex=1.5,labels=toshow[11])
draw.circle(-sin(pi/6),cos(pi/6),0.1,border="gray90",col=col[12])
arrows(0,0.1,-sin(pi/6)+0.2,cos(pi/6)-0.1,lwd=2,col="gray50",angle=angle[12])
text(-sin(pi/6),cos(pi/6),font=2,cex=1.5,labels=toshow[12])
# Plot Centroid
draw.circle(0,0,0.1,border="gray90",col="#00000011")
text(0,0,labels=mr,cex=2,font=2)
box()
par(mar=opar)
### Signature ----
# Define Transparency for barcode plot coloring
if(mraobj$nes[mr]>0&mraobj$pvalue[mr]<=pthr){
transp<-255*(ranksig^3)/(length(ranksig)^3)
transp<-as.hexmode(round(transp))
transp<-format(transp,width=2)
names(transp)<-names(ranksig)
transpinv<-255*(ranksiginv^3)/(length(ranksiginv)^3)
transpinv<-as.hexmode(round(transpinv))
transpinv<-format(transpinv,width=2)
names(transpinv)<-names(ranksig)
} else if(mraobj$nes[mr]<0&mraobj$pvalue[mr]<=pthr){
transp<-255*(ranksiginv^3)/(length(ranksiginv)^3)
transp<-as.hexmode(round(transp))
transp<-format(transp,width=2)
names(transp)<-names(ranksig)
transpinv<-255*(ranksig^3)/(length(ranksig)^3)
transpinv<-as.hexmode(round(transpinv))
transpinv<-format(transpinv,width=2)
names(transpinv)<-names(ranksig)
} else {
transp<-255*(ranksigabs^3)/(length(ranksigabs)^3)
transp<-as.hexmode(round(transp))
transp<-format(transp,width=2)
names(transp)<-names(ranksig)
transpinv<-transp
}
# Start plotting
opar<-par()$mar
par(mar=c(0,0,0,0),xaxs="i")
plot(0,xlim=c(1,length(ranksig)),ylim=c(0,1),type="n",xaxt="n")
targets<-mraobj$regulon[[mr]]$tfmode
# Positive targets
postargets<-names(targets[targets>0])
postargets<-intersect(names(ranksig),postargets)
pos<-ranksig[postargets]
if(length(pos)>0){
segments(pos,0.4,pos,1,col=paste0("#CD0000",transp[postargets]),lwd=3)
}
# Negative targets
negtargets<-names(targets[targets<0])
negtargets<-intersect(names(ranksig),negtargets)
neg<-ranksig[negtargets]
if(length(neg)>0){
segments(neg,0,neg,0.6,col=paste0("#000080",transpinv[negtargets]),lwd=3)
}
# text(0,0.5,labels="-",pos=4,cex=12,font=1)
# text(length(ranksig),0.5,labels="+",pos=2,cex=10,font=1)
uparrow<-intToUtf8(8593)
dnarrow<-intToUtf8(8595)
text(length(ranksig)/2,0.5,labels=paste0(dnarrow," ",uparrow),pos=NULL,cex=5,font=2)
par(mar=opar)
### Keep blank to separate from the next MR
titplot("")
}
}
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.