#Subsample analysis of pathways-based methods
ssapbm <- function(data=NULL, pathways=NULL, ref=NULL,target=NULL, minp=2, maxp=9999,method= c("sumoftsq","ht2", "GSEA", "GAGE","GSA"), perm=T, sampling=c("sample.labels","gene.labels"),B=100, sample.size=NULL, steps=10, fdr=NULL,thr=.05,dc=T,rep=F,cr=NULL){
if(is.null(target)){
cls2 <- c(1:ncol(data))[-ref]
}
else{
cls2 <- target
}
d <- .processdata(data, pathways)
data <- d[[1]]
pathways <- d[[2]]
cls1 <- ref
TP <- pvl <- fdr.tp <- NULL
if(is.null(fdr)){
fdr <- TRUE
fdr.method <- "BH"
}
else{
fdr.method <- fdr
fdr <- TRUE
}
print(fdr.method)
if(method[1]=="sumoftsq"){
TP <- p.squared.t.test(data, cls1, cls2,steps=B, sampling=sampling[1], pathways=pathways)
if(!is.null(sample.size)){
pvl <- subsampanalysis(data, cls1, cls2, pathways, B=100, steps, sample.size=sample.size, method="sumoftsq", sampling=sampling[1],rep=rep, rn=rownames(TP),dc=dc,thr=thr)
#pvtmp <- c()
#for(m in 1:length(pvl)){
# pvtmp <- cbind(pvtmp, pvl[[m]])
#}
#pvl <- pvtmp
#rm(pvtmp)
}
if(fdr){
fdr.tp <- p.adjust(TP[,"pval"], method=fdr.method)
}
}
if(method[1]=="ht2"){
TP <- pval.htsq(data, cls1, cls2,pathways=pathways, perm=perm, sampling=sampling[1], steps=B)
TP <- cbind(TP[[1]],TP[[2]])
colnames(TP) <- c("ht2","pval")
if(!is.null(sample.size)){
pvl <- subsampanalysis(data, cls1, cls2, pathways, B=100, steps, sample.size=sample.size, method="sumoftsq", sampling=sampling[1],rep=rep, rn=rownames(TP), dc=dc,thr=thr)
#pvtmp <- c()
#for(m in 1:length(pvl)){
# pvtmp <- cbind(pvtmp, pvl[[m]])
#}
#pvl <- pvtmp
#rm(pvtmp)
#pvl <- as.matrix(as.data.frame(pvl))
}
if(fdr){
fdr.tp <- p.adjust(TP[,2], method=fdr.method)
}
}
if(method[1]=="GSEA"){
clsn <- rep(0, length(cls1)+length(cls2))
clsn[cls2] <- 1
lp<- sapply(pathways, length)
lp <- c(min(lp), max(lp))
TP <- GSEAx(data, ref=clsn, pathways, reshuffling.type=sampling[1], nperm=B, gs.size.threshold.min = lp[1], gs.size.threshold.max=lp[2])
TP <- as.matrix(rbind(TP[[1]][,c("ES","NOM p-val")], TP[[2]][,c("ES","NOM p-val")]))
print(TP)
tmp1 <- cbind(as.numeric(as.vector(TP[,1])), as.numeric(as.vector(TP[,2])))
rownames(tmp1) <- rownames(TP)
colnames(tmp1) <- colnames(TP)
TP <- tmp1
TP <- TP[names(pathways),]
colnames(TP) <- c("GS", "pval")
if(!is.null(sample.size)){
pvl <- subsampanalysis(data, cls1, cls2, pathways, B=100, steps, sample.size=sample.size, method="GSEA", sampling=sampling[1],rep=rep, rn=rownames(TP),dc=dc,thr=thr)
}
if(fdr){
fdr.tp <- p.adjust(TP[,"pval"], method=fdr.method)
}
}
if(method[1]=="GAGE"){
lp<- sapply(pathways, length)
lp <- c(min(lp), max(lp))
print(head(data))
pvl1 <- gage(data, gsets = pathways, ref = cls1, samp = cls2, compare = "unpaired", set.size=c(lp[1],lp[2]), same.dir=FALSE)
#pvl1 <- lapply(pvl1[1:2], function(x)x[,1:5])
#pvx <- lapply(pvl1[1:2], function(x)x[,"p.val"])
#pvx <- cbind(pvx[[1]], pvx[[2]][names(pvx[[1]])], names(pvx[[1]]))
#tx <- apply(pvx,1,which.min)
#tx <- t(apply(pvx,1,function(x)c(which.min(as.numeric(x[1:2])),x[3])))
#TP <- t(apply(tx,1, function(x)pvl1[[as.numeric(x[1])]][x[2],]))
TP <- pvl1[[1]][,2:4]
colnames(TP) <- c("stat.mean", "pval", "fdr.tp")
if(fdr){
fdr.tp <- p.adjust(TP[,"pval"], method=fdr.method)
}
if(!is.null(sample.size)){
pvl <- subsampanalysis(data, cls1, cls2, pathways, B=100, steps, sample.size=sample.size, method="GAGE",dc=dc,thr=thr)
}
}
if(method[1]=="GSA"){
clsn <- rep(1, length(cls1)+length(cls2))
clsn[cls2] <- 2
lp<- sapply(pathways, length)
lp <- c(min(lp), max(lp))
TP<-GSA(data, clsn, genenames=rownames(data), genesets=pathways, resp.type="Two class unpaired", nperms=B, minsize=lp[1], maxsize=lp[2])
TP <- cbind(TP$GSA.scores, TP$pvalues.lo, TP$pvalues.hi)
rownames(TP) <- names(pathways)
tx <- (apply(TP,1,function(x)c(which.min(as.numeric(x[2:3])))))
TP <- cbind(TP,tx)
print(TP)
TP <- t(apply(TP,1, function(x)x[c(1,(x[4]+1))]))
colnames(TP) <- c("GSA_score", "pval")
if(fdr){
fdr.tp <- p.adjust(TP[,"pval"], method=fdr.method)
}
if(!is.null(sample.size)){
pvl <- subsampanalysis(data, cls1, cls2, pathways, B=100, steps, sample.size=sample.size, method="GSA",dc=dc,thr=thr)
}
}
dcoverall <- corpaths <- NULL
if(!is.null(cr)){
crall <- avgcorpath(data, ref=c(cls1,cls2), pathways, method=cr)
crctrl <- avgcorpath(data, ref=c(cls1), pathways, method=cr)
crtreat <- avgcorpath(data, ref=c(cls2), pathways, method=cr)
corpaths <- cbind(crall, crctrl, crtreat)
}
if(dc){
clsn <- rep(1, length(cls1)+length(cls2))
clsn[cls2] <- 2
dcoverall <- pcdetectioncall(data, ref=clsn, pathways=pathways, fdr=fdr.method, thr=thr)
}
if(fdr){
qval<- fdr.tp
sigpath <- names(which(qval<=thr))
list(cbind(TP, qval), pvl, dcoverall=dcoverall, corpaths=corpaths, thr=thr, pathways=names(pathways),sigpath=sigpath,fdr=fdr.method)
}
#else{
#}
}
subsampanalysis <- function(data, cls1, cls2, pathways, B=100, steps, sample.size=NULL, method=NULL, rep=F, sampling=NULL, rn=NULL, fdr="BH", dc=T, thr=0.05){
plist <- list()
dcall <- list()
for(i in 1:length(sample.size)){
pvec <- c()
dc_p <- c()
for(j in 1:steps){
print("*****")
print(cls1)
smp1 <- sample(cls1, sample.size[i], rep=rep)
smp2 <- sample(cls2, sample.size[i], rep=rep)
if(method=="sumoftsq"){
tmp <- p.squared.t.test(data[,c(smp1,smp2)], c(1:length(smp1)), c((length(smp1)+1):(length(smp2)+length(smp1))), steps=B, sampling=sampling[1], pathways=pathways)
pvec <- cbind(pvec, tmp[names(pathways),"pval"])
}
if(method=="ht2"){
tmp <- pval.htsq(data[,c(smp1,smp2)], c(1:length(smp1)), c((length(smp1)+1):(length(smp2)+length(smp1))), steps=B, sampling=sampling, perm=perm)
pvec <- cbind(pvec, tmp[[2]])
}
if(method=="GSEA"){
clsn <- rep(0, length(smp1)+length(smp2))
clsn <- c(rep(0, length(smp1)), rep(1, length(smp2)))
lp<- sapply(pathways, length)
lp <- c(min(lp), max(lp))
print("xxxxxxsdfsdfsdf")
print(smp1)
tmp <- GSEAx(data[,c(smp1, smp2)], ref=clsn, pathways, reshuffling.type=sampling[1], nperm=B, gs.size.threshold.min = lp[1], gs.size.threshold.max=lp[2])
ptmp1 <- as.numeric(as.vector(tmp[[1]][,"NOM p-val"]))
ptmp2 <- as.numeric(as.vector(tmp[[2]][,"NOM p-val"]))
tmp1 <- c(ptmp1,ptmp2)
tmp <- tmp1[rn]
pvec <- cbind(pvec, tmp)
}
if(method=="GAGE"){
print(c(1:length(smp1)))
print(head(data[,c(smp1,smp2)]))
pvl <- gage(data[,c(smp1,smp2)], gsets = pathways, ref = c(1:length(smp1)), samp = c((length(smp1)+1):(length(smp1)+length(smp2))), compare = "unpaired", set.size=c(2, 9999),same.dir=FALSE)
pvn <- pvl[[1]][,"p.val"]
#pvl <- lapply(pvl[1:2], function(x)x[,1:5])
#pvx <- lapply(pvl[1:2], function(x)x[,"p.val"])
#pvx <- cbind(pvx[[1]], pvx[[2]][names(pvx[[1]])], names(pvx[[1]]))
#tx <- t(apply(pvx,1,function(x)c(which.min(as.numeric(x[1:2])),x[3])))
#pvn <- t(apply(tx,1, function(x)pvl[[as.numeric(x[1])]][x[2],]))
pvec <- cbind(pvec, pvn[names(pathways)])
}
if(method=="GSA"){
clsn <- rep(1, length(smp1)+length(smp2))
clsn <- c(rep(1, length(smp1)), rep(2, length(smp2)))
TPx<-GSA(data[,c(smp1, smp2)], clsn, genenames=rownames(data), genesets=pathways, resp.type="Two class unpaired", nperms=B,minsize=2, maxsize=9999)
TPx <- cbind(TPx$GSA.scores, TPx$pvalues.lo, TPx$pvalues.hi)
rownames(TPx) <- names(pathways)
tx <- (apply(TPx,1,function(x)c(which.min(as.numeric(x[2:3])))))
TPx <- cbind(TPx,tx)
TPx <- (apply(TPx,1, function(x)x[c((x[4]+1))]))
pvec <- cbind(pvec, TPx[names(pathways)])
}
if(dc){
dctmp <- pcdetectioncall(data[,c(smp1, smp2)], ref=c(rep(1,length(smp1)), rep(2,length(smp2))), pathways=pathways, fdr=fdr, thr=thr)
dc_p <- cbind(dc_p, dctmp[names(pathways)])
}
}
plist[[i]] <- pvec
if(dc){
dcall[[i]] <- dc_p
}
}
names(plist) <- sample.size
if(dc){
names(dcall) <- sample.size
}
plist$dcall <- dcall
invisible(plist)
}
powerpath <- function(x, type="pval"){
tp <- x[[1]]
thr <- x$thr
fdr <- x$fdr
k <- x$sigpath
if(length(k)==0){
stop("no p-value is below threshold")
}
resampval <- length(x[[2]][-length(x[[2]])])
rml <- list()
tprx <- fprx <- fdrx <- list()
tpx <- k
tnx <- setdiff(rownames(tp), k)
#if(type=="qval"){
#bh <- lapply(ff, function(x)apply(x,2,function(y)p.adjust(y,method=xx$fdr)))
#}
for(i in 1:resampval){
p <- x[[2]][[i]]
#p <- apply(p, 2)
p <- apply(p,2,function(x)p.adjust(x,method=fdr))
ptmp <- apply(p,2,function(x){m<- rep(0, length(x));m[which(x<=thr)]<-1;m})
rownames(ptmp) <- rownames(p)
rml[[i]] <- (rowMeans(ptmp[k,]))
fdrtmp <- fprtmp <- tprtmp <- c()
for(j in 1:ncol(ptmp)){
n1 <- rownames(ptmp[which(ptmp[,j]==1),])
tprtmp <- c(tprtmp, length(intersect(tpx, n1))/length(tpx))
fprtmp <- c(fprtmp, length(intersect(tnx, n1))/length(tnx))
fdrtmp <- c(fdrtmp, length(intersect(tnx, n1))/length(n1))
}
tprx[[i]] <- tprtmp
fprx[[i]] <- fprtmp
fdrx[[i]] <- fdrtmp
}
names(tprx) <- names(fprx) <- names(fdrx) <- names(rml) <- names(xx[[2]][1:resampval])
list(power=rml, tpr=tprx, fpr=fprx, fdr=fdrx, sigpath=k)
}
pcdetectioncall <- function(data,pvl=NULL, ref=NULL, pathways=NULL, fdr="BH", thr=.05){
if(is.null(pvl)){
tb <-unique(ref)
k1 <- which(ref==tb[1])
k2 <- which(ref==tb[2])
print(k1)
print(k2)
pvl <- apply(data,1, function(x){t.test(x[k1],x[k2])$p.value})
}
cnt <- which(pvl<=thr)
nm <- numeric(length(pvl))
names(nm) <- names(pvl)
fdr.method <- fdr
fdr <- TRUE
if(fdr){
pvl <- p.adjust(pvl, method="BH")
}
nm[cnt] <- 1
diff <- sapply(pathways, function(x){count <- length(which(nm[x]==1));100*(count/length(x))})
names(diff) <- names(pathways)
diff
}
avgcorpath <- function(data, ref=NULL, pathways, method="pearson"){
if(is.null(ref)){ref <- 1:nrow(data)}
crx <- sapply(pathways, function(x){cr <- t(cor(m[x,ref], method=method)); mean(cr[upper.tri(cr)])})
crx
}
plot.ssapbm <- function(xx, data, type=c("power","fdr","fpr","tpr","dc","cor"), val="pval", pathways=NULL,nc=1,nr=1,pos="bottom"){
print(type)
pth <- NULL
if(length(pathways)==0){
pth <- xx$pathways
}
else{pth <- pathways}
type <- type[1]
typex=c("power","fdr","fpr","tpr","dc","cor")
nm <- c("Power", "FDR", "FPR", "TPR", "Detection call", "Correlation")
names(nm) <- typex
if((type!="dc")&&(type!="cor")){
pwr <- powerpath(xx, type=val)
dat <- pwr[[type[1]]]
dat <- getdf(dat)
colnames(dat) <- c("sample", "stat")
}
dftrl <- list()
if(type=="dc"){
dcv <- xx[[2]][[length(xx[[2]])]]
for(i in 1:(length(dcv))){
#if(i==(length(dcv)+1)){
#dctmp <- dcv[[i-1]][pth,]
#}
#else{
dctmp <- dcv[[i]][pth,]
#}
dftr <- data.frame(rep(rownames(dctmp), ncol(dctmp)), as.vector(dctmp) )
colnames(dftr) <- c("Pathways", "DC")
#if(i!=(length(dcv)+1)){
xtst <- cbind(xx$dcoverall[pth],pth)
colnames(xtst) <- c("dcall", "Pathways")
xtst <- data.frame(xtst, type=factor(rep(1,length(pth))))
xtst$dcall <- as.numeric(as.vector(xtst$dcall))
print(xtst)
gdc <- ggplot(dftr, aes(x=Pathways, y=DC,fill=Pathways)) + geom_boxplot()+xlab(paste("Pathways","(sample: ",names(dcv)[i],")",sep=""))+ylab("Detection Call")+theme(legend.position="none") + theme(axis.text.x=element_blank())
gdc <- gdc + scale_x_discrete(limits=pth)
gdc <- gdc +geom_point(data=xtst,aes(x=Pathways, y=dcall, colour = factor(type,labels="Overall detection call")),shape=5, size=2, fill="red")+ ylim(min(c(xtst$dcall,dftr$DC)), max(c(xtst$dcall,dftr$DC)))+theme(legend.title=element_blank())
#}
#else{
#gdc <- ggplot(dftr, aes(x=Pathways, y=DC,fill=Pathways)) + geom_boxplot()
#gdc <- g_legend(gdc)
#}
dftrl[[i]] <- gdc
}
}
if(type=="cor"){
sig <- rep(0, length(xx$pathways))
crdat <- data.frame(xx$corpaths,sig)
crdat[xx$sigpath, "sig"] <- 1
crdat$sig <- as.factor(crdat$sig)
p1 <- ggplot(crdat, aes(x=rank(crall), y=crall, color=factor(sig, labels=c("Differentially expressed", "Not Differentially expressed")) ))+geom_point()+theme(legend.position="none")+ labs(color = "",x="Ordered pathways", y="Average correlation of individual pathways")
p2 <- ggplot(crdat, aes(x=crctrl, y=crtreat, color=factor(sig)))+geom_point()+theme(legend.position="none")+ geom_line(data=crdat,color=rgb(0,0,1,alpha=.4),size=1.5, aes(x=crctrl, y=crctrl))+ labs(color = "",x="Average correlation:\n Individual pathways (control data)", y="Average correlation: \n Individual pathways (treatment data)")
crd <- getdf(list(ctrl=crdat[xx$sigpath,2], crtreat=crdat[xx$sigpath,3]))
print(crd)
p3 <- ggplot(crd, aes(x=sample, y=stat, fill=sample))+
geom_boxplot()+
scale_x_discrete(limits=c("ctrl","crtreat"),breaks=c("ctrl", "crtreat"),labels=c("Control", "Treatment")) +
theme(legend.position="none")
grid_arrange_shared_legend(list(p1,p2,p3),nrow=nr, ncol=nc,position=pos)
}
if(type=="dc"){
#multiplot(dftrl, cols=length(dftrl))
print(length(dftrl))
grid_arrange_shared_legend(dftrl,nrow=nr, ncol=nc,position=pos)
}
if((type!="dc")&&(type!="cor")){
ggplot(dat, aes(y=stat, x=factor(sort(as.numeric(sample))),fill=sample)) + geom_boxplot()+xlab("Sample-size")+ylab(nm[type[1]])
}
}
getdf <- function(lst=NULL){
if(!is.null(lst)){
nm <- names(lst)
len <- sapply(lst, length)
lab <- rep(nm, len)
dfx <- data.frame(sample=lab, stat=unlist(lst),stringsAsFactors=FALSE )
}
}
library(ggplot2)
library(gridExtra)
library(grid)
grid_arrange_shared_legend <- function(plots, ncol = 1, nrow = 1, position = c("bottom", "right")) {
if(is.null(ncol)||((ncol*nrow)<length(plots))){
ncol <- length(plots)
nrow <- 1
}
if(is.null(ncol)||((ncol*nrow)<length(plots))){
nrow <- length(plots)
ncol <- 1
}
position <- match.arg(position)
g <- ggplotGrob(plots[[1]] + theme(legend.position = position))$grobs
legend <- g[[which(sapply(g, function(x) x$name) == "guide-box")]]
lheight <- sum(legend$height)
lwidth <- sum(legend$width)
gl <- lapply(plots, function(x) x + theme(legend.position="none"))
gl <- c(gl, ncol = ncol, nrow = nrow)
combined <- switch(position,
"bottom" = arrangeGrob(do.call(arrangeGrob, gl),
legend,
ncol = 1,
heights = unit.c(unit(1, "npc") - lheight, lheight)),
"right" = arrangeGrob(do.call(arrangeGrob, gl),
legend,
ncol = 2,
widths = unit.c(unit(1, "npc") - lwidth, lwidth)))
grid.newpage()
grid.draw(combined)
}
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.