# #############################################################################
# Calculates gene enrichment: i-CisTarget version
# Arguments should be kept in the same order as .calcEnr_Aprox
.calcEnr_iCisTarget <- function(gsRankings, maxRank,
signifRankingNames, plotCurve, nCores, nMean)
{
# nMean: ignored
# Calculate RCC for all motifs
rccs <- .calcRCC(gsRankings, maxRank, nCores)
# Estimate mean and mean+2sd at each rank
rccMean <- apply(rccs, 1, mean)
rccM2sd <- rccMean + (2*apply(rccs, 1, sd))
# TO DO: Save/return somehow?
# Max enrichment for the selected rankings[,srName]
maxEnr <- sapply(signifRankingNames, function(sr) {
x <- min(which.max(rccs[,sr]-rccM2sd))
c(x=x, y=unname(rccs[x,sr]))
})
colnames(maxEnr) <- signifRankingNames
# Plot
if(plotCurve)
{
na <- sapply(colnames(maxEnr), function(srName) {
.plotRCC(rccMean, rccM2sd, srName, maxEnr[,srName], rccs[,srName])
})
}
return(maxEnr)
}
# 50-110 secs
##############################################################################
# Calculates gene enrichment: Aproximated/faster version
# Arguments should be kept in the same order as .calcEnr_iCisTarget
.calcEnr_Aprox <- function(gsRankings, maxRank,
signifRankingNames, plotCurve, nCores, nMean)
{
# Calculate aproximated-RCC across all motifs at each rank position
# maxRank <- min(c(maxRank, nrow(gsRankings))) # Correct??
maxRankExtra <- maxRank+nMean
gsRankings.asMat <- as.matrix(gsRankings) # Much faster!
gsRankings.asMat[gsRankings.asMat>maxRankExtra] <- NA
globalMat <- matrix(0, nrow=max(nrow(gsRankings),max(gsRankings.asMat, na.rm=T)), ncol=maxRankExtra) # nrow=nrow(gsRankings): can be out of range, add extra rows and subset at the end
# x <- x[seq_len(min(length(x), nrow(globalMat)))] # or: # if(nrow(globalMat) < length(x)) x <- x[seq_len(nrow(globalMat))] # cannot be in the loop... too slow!
for(i in 1:nrow(gsRankings)) # (TO DO: Paralellize?)
{
x <- sort(gsRankings.asMat[i,])
if(length(x) > 0){
coords <- cbind(y=seq_along(x), x)
globalMat[coords] <- globalMat[coords]+1
# for(y in seq_along(x)) globalMat[y,x[y]] <- globalMat[y,x[y]]+1
}
}
globalMat <- globalMat[1:nrow(gsRankings), 1:maxRankExtra] #
# Estimate mean and mean+2sd at each rank
rccStatsRaw <- apply(globalMat, 2, function(x){
tmp <- x
if(sum(x)>0) tmp <- rep(seq_along(x), x)
rccMean <- mean(tmp)
rccSd <- sd(tmp)
c(mean=rccMean, sd=rccSd)
})
# Remove NAs (assign left value)
nas <- which(is.na(rccStatsRaw), arr.ind=TRUE)
if(any(nas[,2] == 1)) { # First value cannot be assigned to left
rccStatsRaw[nas[which(nas[,2]==1), ]] <- 0
nas <- nas[which(nas[,2]!=1), , drop=FALSE]
}
if(nrow(nas)>0){
for(i in seq_len(nrow(nas)))
{
x <- nas[i,]
rccStatsRaw[x[1], x[2]] <- rccStatsRaw[x[1], x[2]-1]
}
}
# Reduce noise in the stats with the rolling mean
rccStats <- t(apply(rccStatsRaw, 1,
function(x)
c(x[1:5], # Correct??
zoo::rollmean(x, nMean, align="center",
fill="extend"))))[,1:(maxRank-1)]
rccM2sd <- rccStats["mean",] + (2*rccStats["sd",])
rccMean <- rccStats["mean",]
rm(rccStats); rm(globalMat)
# Calculate real RCC & max enrichment for selected rankings
rccs <- .calcRCC(gsRankings[signifRankingNames,,drop=FALSE], maxRank, nCores)
maxEnr <- sapply(signifRankingNames, function(sr) {
x <- min(which.max(rccs[,sr]-rccM2sd))
c(x=x, y=unname(rccs[x,sr]))
})
# Plot
if(plotCurve)
{
# Global estimation plot
plot(rccStatsRaw["mean",]+2*rccStatsRaw["sd",],
type="l", col="lightgreen",
xlab="Rank", ylab="#genes recovered",
main="Global mean and SD estimation",
xlim=c(0,maxRank))
lines(rccStatsRaw["mean",],type="l", col="pink")
lines(rccMean, col="red")
lines(rccM2sd, col="darkgreen")
# RCC for each significant ranking
na <- sapply(colnames(maxEnr), function(srName) {
.plotRCC(rccMean, rccM2sd, srName, maxEnr[,srName], rccs[,srName])
})
}
return(maxEnr)
}
###############################################################################
# Aux functions
# Calculates RCC (of the gene-set) ONE RANKING
.calcRCC.oneRanking <- function(x, maxRank)
{
x <- unlist(x)
x <- sort(x[x<maxRank])
curranking <- c(x, maxRank)
unlist(mapply(rep, seq_along(curranking)-1,
c(curranking[1], diff(curranking))))[-1]
}
# Apply .calcRCC.oneRanking on all rankings (= each column)
.calcRCC <- function(gsRankings, maxRank, nCores)
{
if(nCores==1)
{
rccs <- apply(gsRankings, 1, .calcRCC.oneRanking, maxRank)
}else
{
# Split rankings into 10 groups and run in parallel
doParallel::registerDoParallel()
options(cores=nCores)
rowsNam <- rownames(gsRankings)
suppressWarnings(
rowNamsGroups <- split(rowsNam, (seq_along(rowsNam)) %% nCores))
# Expected warning: Not multiple
# rccs <- foreach(colsGr=colsNamsGroups, .combine="cbind") %do%
rowsGr <- NULL
rccs <- foreach::"%do%"(foreach::foreach(rowsGr=rowNamsGroups,
.combine="cbind"),
{
apply(gsRankings[rowsGr,, drop=FALSE],1, .calcRCC.oneRanking, maxRank)
})
}
return(rccs)
}
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.