# R/power.R In PROPER: PROspective Power Evaluation for RNAseq

#### Documented in comparePowerpower.seqDepthsummaryPower

######################################################
## A list of functions for power calculation
######################################################

## With simulation results, this function compute stratified power, etc.
comparePower <- function(simOutput, alpha.type=c("fdr","pval"), alpha.nominal=0.1,
stratify.by=c("expr", "dispersion"), strata,
filter.by=c("none", "expr"), strata.filtered=1,
target.by=c("lfc", "effectsize"), delta=0.5) {

alpha.type = match.arg(alpha.type)
stratify.by = match.arg(stratify.by)
filter.by = match.arg(filter.by)
target.by = match.arg(target.by)

## set strata if not given
if(missing(strata)) {
if(stratify.by == "expr")
strata = c(0,10,2^(1:7)*10,Inf)
else
strata = c(0, c(0.001, 0.01, 0.05, 0.1, 0.2, 0.5, 0.8, 1), Inf)
}

## some general parameters
Nreps1 = simOutput\$Nreps1
Nreps2 = simOutput\$Nreps2
ngenes = simOutput\$sim.opts\$ngenes
sim.opts = simOutput\$sim.opts
DEids = simOutput\$DEid
lfcs = simOutput\$lfcs
nsims = dim(simOutput\$pvalue)[3]

## initialize results
## determine dimension of results, for filtering
nr = length(strata) - 1
if(filter.by == "expr") {
nr = nr - length(strata.filtered)
}
TD = FD = FDR = alpha = power = array(NA,dim=c(nr,length(Nreps1), nsims))
power.marginal = alpha.marginal = FDR.marginal = matrix(NA,length(Nreps1), nsims)

## loop over simulation and replicates
for(i in 1:nsims) {
for(j in seq(along=Nreps1)) {
nn1 = Nreps1[j]
nn2 = Nreps2[j]
## get DE flags.
DEid = simOutput\$DEid[[i]]
lfc = simOutput\$lfcs[[i]]
Zg = Zg2 = rep(0, ngenes)
Zg[DEid] = 1
## find target (interesting) genes
if(target.by == "lfc") {
lfc = simOutput\$lfcs[[i]]
ix = abs(lfc) > delta
} else if (target.by == "effectsize") {
effectsize = simOutput\$lfcs[[i]] / sqrt(exp(simOutput\$sim.opts\$lOD[DEid]))
ix = abs(effectsize) > delta
}
Zg2[DEid[ix]] = 1

## stratification and filtering
X.bar1 = simOutput\$xbar[,j,i]
ix.keep = which(X.bar1>0)
if(stratify.by == "expr") { ## stratify by expression
xgr = cut(X.bar1[ix.keep], strata)
## filtering. This only applies when stratifying by expression
if(filter.by == "expr") { ## filter by expression levels (average counts)
lev = levels(xgr)
ix.keep = ix.keep[!(xgr %in% lev[strata.filtered])]
## recut
xgr = cut(X.bar1[ix.keep], strata[-strata.filtered])
}
}
else if(stratify.by == "dispersion") { ## stratify by dispersion
xgr=cut(simOutput\$sim.opts\$lOD[ix.keep], log(strata))
if(filter.by != "none") { ## filter
stop("Filtering only applies when stratifying by expression level.")
}
}

## get type I error
if(alpha.type == "pval")
x = simOutput\$pvalue[ix.keep,j,i]
else { ## use FDR.
x = simOutput\$fdr[ix.keep,j,i]
if(filter.by != "none") { ## Need to recompute FDR after filtering. I'll just use BH
pval = simOutput\$pvalue[ix.keep,j,i]
}
}

## update Zg flags after filtering
Zg = Zg[ix.keep]
Zg2 = Zg2[ix.keep]

## calculate stratified power-related quantities
power00 = POWER1(x, p.crit=alpha.nominal, Zg, Zg2, xgr=xgr)
TD[,j,i] = power00\$TD
FD[,j,i] = power00\$FD
alpha[,j,i] = power00\$alpha
alpha.marginal[j,i] = power00\$alpha.marginal
,j,i] = power00\$power
power.marginal[j,i] = power00\$power.marginal
FDR[,j,i] = power00\$FDR
FDR.marginal[j,i] = power00\$FDR.marginal
}
}

output <- list(TD=TD, FD=FD, FDR=FDR, alpha=alpha, power=power,
alpha.marginal=alpha.marginal, power.marginal=power.marginal, FDR.marginal=FDR.marginal,
## below are input parameters
alpha.type=alpha.type, alpha.nominal=alpha.nominal,
stratify.by=stratify.by, strata=strata,
target.by=target.by, Nreps1=simOutput\$Nreps1, Nreps2=simOutput\$Nreps2,
delta=delta)

output
}

########################################################
## Power function, compute the stratified power-realted quantities
##
## Return values:
## TD: number of True Discoveries in each stratum
## FD: number of False Discoveries in each stratum
## power: within strata, proportion of TD out of total DE
## alpha.nomial: cutoff of alpha on raw p-values
## alpha: empirical p-value in each stratum
## alpha.marginal: overall empirical p-value
########################################################

POWER1 <- function(p, p.crit, Zg, Zg2, xgr){
## p is input nominal p-value or FDR.
## alpha is cutoff of p
## !Zg is indicator for TN,  Zg2 is indicators for TP,
## xgr is grouping in covariate

ix.D = p <= p.crit
N = sum(ix.D) ## this is the total discovery
N.stratified = tapply(ix.D, xgr, sum)

##  TD
id.TP = Zg2==1
TD = tapply(p[id.TP] <= p.crit, xgr[id.TP], sum)
TD[is.na(TD)] = 0

##  FD
id.TN = Zg==0
FD = tapply(p[id.TN] <= p.crit, xgr[id.TN], sum)
FD[is.na(FD)] = 0

## type I error
alpha = as.vector(FD/table(xgr[id.TN]))
alpha.marginal = sum(FD)/sum(id.TN)

## power
power=as.vector(TD/table(xgr[id.TP]))
power[is.nan(power)] = 0
power.marginal=sum(TD,na.rm=TRUE)/sum(id.TP)

## FDR
FDR = FD / N.stratified
FDR.marginal = sum(FD, na.rm=TRUE) / N

list(TD=TD,FD=FD,alpha.nominal=p.crit,
alpha=alpha, alpha.marginal=alpha.marginal,
power=power, power.marginal=power.marginal,
FDR=FDR, FDR.marginal=FDR.marginal)
}

###########################################################################
## summary the power calculation result.
## The result is an object from comparePower function.
###########################################################################
summaryPower <- function(powerOutput) {
nn1 <- powerOutput\$Nreps1
nn2 <- powerOutput\$Nreps2

alpha.type <- powerOutput\$alpha.type
if(alpha.type == "pval") {
alpha.nam <- "type I error"
alpha.mar <- rowMeans(powerOutput\$alpha.marginal)
}  else {
alpha.nam <- "FDR"
alpha.mar <- rowMeans(powerOutput\$FDR.marginal)
}

TD.avg <- colSums(apply(powerOutput\$TD,c(1,2), mean, na.rm=TRUE))
FD.avg <- colSums(apply(powerOutput\$FD,c(1,2), mean, na.rm=TRUE))

res <- cbind(nn1, nn2, powerOutput\$alpha.nominal, alpha.mar, rowMeans(powerOutput\$power.marginal),
TD.avg, FD.avg, FD.avg/TD.avg)
colnames(res) <- c("SS1", "SS2",  paste(c("Nominal", "Actual"), alpha.nam),
"Marginal power", "Avg # of TD", "Avg # of FD", "FDC")

print(signif(res,2))
return(invisible(res))

}

#######################################################
## recompute power based on sequencing depth
#######################################################
power.seqDepth <- function(simResult, powerOutput,
depth.factor=c(0.2, 0.5, 1, 2, 5, 10)) {
## first get stratified power
power.expr <- apply(powerOutput\$power,c(1,2),mean, na.rm=TRUE)

dd = dim(powerOutput\$TD)
nsims = dd[3]
nSS = dd[2]

res = matrix(0, nrow=length(depth.factor), ncol=nSS)
rownames(res) = depth.factor
target.by = powerOutput\$target.by
delta = powerOutput\$delta
for(idepth in 1:length(depth.factor)) {
for(isim in 1:nsims) {
## find biologically interesting DE genes.
DEid = simResult\$DEid[[isim]]
lfc = simResult\$lfcs[[isim]]
if(target.by == "lfc") {
ix = abs(lfc) > delta
} else if (target.by == "effectsize") {
effectsize = lfc / sqrt(exp(simResult\$sim.opts\$lOD[DEid]))
ix = abs(effectsize) > delta
}
DEid2 = DEid[ix]

for(iSS in 1:nSS) {
X.bar1 = simResult\$xbar[DEid2, iSS, isim] * depth.factor[idepth]
K=which(X.bar1>0)
xgr = cut(X.bar1[K], powerOutput\$strata)
tt = table(xgr)
tt = tt/sum(tt)
power.expr = powerOutput\$,iSS,isim]
}
}