#' @title Main function
#' @description Main function
#' @details Place holder
#' @param datasetNameFile File with names of datasets for processing, one item per line, e.g. GSE1145
#' @param methodNameFile File with names of methods for each dataset, one item per line, e.g. spiapcc
#' @param alpha Alpha value for any method that considers alpha
#' @param perm Number of permutations for any method that does bootstrap
#' @return Returns nothing. Writes all results to files under working directory
#' @export
run <- function(datasetNameFile=NULL,methodNameFile=NULL,alpha=0.05, perm=2000){
if(is.null(datasetNameFile)){
stop("ERROR in run: data not specified.")
}
datasets <- .readFileAsList(datasetNameFile)
if(is.null(methodNameFile)){
stop("ERROR in run: method not specified.")
}
methods <- .readFileAsList(methodNameFile)
# prepare pathway gene list and network files (mostly for EB)
pwy.files <- list.files(file.path(system.file("extdata",package="spiapcc.demo"),"keggxml"),pattern="*.xml",full.names=TRUE)
pwys <- sapply(pwy.files, parseKGML)
# Push gs,grn into global env.
# Not a good practise
# Necessary to bypass memory issue since R doesn't pass by pointer
.GlobalEnv$gs <- .extractKeggGS(pwys)
.GlobalEnv$grn <- .compileGRNFromKEGG(pwys)
require(KEGGdzPathwaysGEO)
require(KEGGandMetacoreDzPathwaysGEO)
# loop over data sets
for(i in 1:length(datasets)){
# load dataset
data(list=datasets[[i]])
.GlobalEnv$dataset <- get(datasets[[i]])
# name of dataset, e.g. "GSE1145"
name <- dataset@experimentData@name
cat("\n======================================\n")
cat(paste("Starting dataset:", name, sep=" "))
cat("\n======================================\n")
# create folder
if(!dir.exists(name)){
dir.create(name)
}
# get local working directory path
wd <- paste(getwd(),"/",name,sep="")
# target pathway, e.g. "05410"
target <- dataset@experimentData@other$targetGeneSets
sum <- list(name=name,target=target)
.GlobalEnv$EBSE <- .preprocessEB(dataset=dataset)
# serial routine
for(j in 1:length(methods)){
method=methods[[j]]
cat("\n======================================\n")
cat(paste("Starting method:", method, sep=" "))
cat("\n======================================\n")
res <- NULL
if(method=="spiapcc"){
res <- .call_spiapcc(alpha=alpha, perm=perm, wd=wd)
}
else if(method=="spia"){
res <- .call_spia(alpha=alpha,perm=perm,wd=wd)
}
else if(method=="gwspia"){
res <- .call_gwspia(alpha=alpha,perm=perm,wd=wd)
}
else if(method=="fopa"){
res <- .call_fopa(alpha=alpha,perm=perm,wd=wd)
}
else{
res <- .call_EB(method=method, alpha=alpha, perm=perm, wd=wd)
}
cat("\n======================================\n")
cat(paste("Finished method:", method, sep=" "))
cat("\n======================================\n")
sum <- c(sum,res)
}
# combine result and write to file
sum <- data.frame(sum)
write.csv(sum, paste(wd,"/",name,".summary.csv",sep=""))
cat("\n======================================\n")
cat(paste("Finished dataset:",name,sep=" "))
cat("\n======================================\n")
if(exists(datasets[[i]])){
rm(list=datasets[[i]],envir=.GlobalEnv)
}
if(exists("dataset")){
rm(list="dataset",envir=.GlobalEnv)
}
if(exists("EBSE")){
rm(list="EBSE",envir=.GlobalEnv)
}
}
if(exists("gs")){
rm(list="gs",envir=.GlobalEnv)
}
if(exists("grn")){
rm(list="grn",envir=.GlobalEnv)
}
results <- .combineSummaryFiles()
write.csv(results,paste(getwd(),"Summary.csv",sep="/"))
return(results)
}
.call_spiapcc <- function(alpha=0.05, perm=2000, wd=NULL){
dataset <- get("dataset")
# name of dataset, e.g. "GSE1145"
name <- dataset@experimentData@name
# target pathway, e.g. "05410"
target <- dataset@experimentData@other$targetGeneSets
# make required SPIA data file if one doesn't exist
if(!file.exists(system.file("extdata/hsaSPIA.RData",package="spiapcc"))){
SPIA::makeSPIAdata(kgml.path=system.file("extdata/keggxml",package="spiapcc.demo"),organism="hsa",out.path=system.file("extdata",package="spiapcc"))
}
# calling preprocess to generated needed parameters
pp <- spiapcc::preprocess(dataset=dataset, alpha=alpha, plots=TRUE, plots.dir=wd)
res_tn <- spiapcc::spiapcc(de=pp$de, all=pp$all, gse_madat2=pp$gse_madat2, normal=pp$normal, tumor=pp$tumor, organism="hsa", data.dir=paste(system.file("extdata",package="spiapcc"),"/",sep=""), pathids=NULL, nB=perm, plots=FALSE, verbose=TRUE, beta=NULL, combine="fisher",flag="tn")
res_nt <- spiapcc::spiapcc(de=pp$de, all=pp$all, gse_madat2=pp$gse_madat2, normal=pp$normal, tumor=pp$tumor, organism="hsa", data.dir=paste(system.file("extdata",package="spiapcc"),"/",sep=""), pathids=NULL, nB=perm, plots=FALSE, verbose=TRUE, beta=NULL, combine="fisher",flag="nt")
res_abs <- spiapcc::spiapcc(de=pp$de, all=pp$all, gse_madat2=pp$gse_madat2, normal=pp$normal, tumor=pp$tumor, organism="hsa", data.dir=paste(system.file("extdata",package="spiapcc"),"/",sep=""), pathids=NULL, nB=perm, plots=FALSE, verbose=TRUE, beta=NULL, combine="fisher",flag="abs")
# spit out results in .csv format to directory specified with wd
write.csv(res_tn, file=paste(wd,"/",name,".spiapcc.tn.csv",sep=""))
write.csv(res_nt, file=paste(wd,"/",name,".spiapcc.nt.csv",sep=""))
write.csv(res_abs, file=paste(wd,"/",name,".spiapcc.abs.csv",sep=""))
# get summary
sum_tn <- .summarize(res_tn, target, "TN")
sum_nt <- .summarize(res_nt, target, "NT")
sum_abs <- .summarize(res_abs, target, "ABS")
#return(sum_tn)
return(c(sum_tn, sum_nt, sum_abs))
}
.call_spia <- function(alpha=0.05, perm=2000, wd=NULL){
dataset <- get("dataset")
# name of dataset, e.g. "GSE1145"
name <- dataset@experimentData@name
# target pathway, e.g. "05410"
target <- dataset@experimentData@other$targetGeneSets
# make required SPIA data file if one doesn't exist
if(!file.exists(system.file("extdata/hsaSPIA.RData",package="SPIA"))){
SPIA::makeSPIAdata(kgml.path=system.file("extdata/keggxml",package="spiapcc.demo"),organism="hsa",out.path=system.file("extdata",package="SPIA"))
}
# preprocess data to generated needed parameters
se <- EnrichmentBrowser::probe2gene(dataset)
se <- EnrichmentBrowser::normalize(se)
colData(se)$GROUP <- ifelse(colData(se)$Group == "d",1,0)
se <- EnrichmentBrowser::deAna(se, padj.method="BH")
all_de <- rowData(se, use.names=TRUE)
tg <- all_de[all_de$ADJ.PVAL < alpha,]
tg <- data.frame(tg)
de <- tg$FC
names(de) <- as.vector(rownames(tg))
all <- rownames(all_de)
# call gwspia
res <- SPIA::spia(de=de, all=all, organism="hsa", data.dir=paste(system.file("extdata",package="SPIA"),"/",sep=""), pathids=NULL, nB=perm, plots=FALSE, verbose=TRUE, beta=NULL, combine="fisher")
colnames(res)[which(names(res)=="pG")] <- "PVAL"
colnames(res)[which(names(res)=="pGFdr")] <- "FDR.BH"
res$FDR.BY <- p.adjust(res$PVAL,"BY")
# spit out results in .csv format to directory specified with wd
write.csv(res, file=paste(wd,"/",name,".spia.csv",sep=""))
# get summary
sum <- .summarize(res, target, "spia")
return(sum)
}
.call_gwspia <- function(alpha=0.05, perm=2000, wd=NULL){
dataset <- get("dataset")
# name of dataset, e.g. "GSE1145"
name <- dataset@experimentData@name
# target pathway, e.g. "05410"
target <- dataset@experimentData@other$targetGeneSets
# make required SPIA data file if one doesn't exist
if(!file.exists(system.file("extdata/hsaSPIA.RData",package="GWSPIA"))){
SPIA::makeSPIAdata(kgml.path=system.file("extdata/keggxml",package="spiapcc.demo"),organism="hsa",out.path=system.file("extdata",package="GWSPIA"))
}
# preprocess data to generated needed parameters
kgml.path=system.file("extdata/keggxml",package="GWSPIA")
mdir=kgml.path
paths <- dir(mdir,pattern=".xml")
se <- EnrichmentBrowser::probe2gene(dataset)
se <- EnrichmentBrowser::normalize(se)
colData(se)$GROUP <- ifelse(colData(se)$Group == "d",1,0)
se <- EnrichmentBrowser::deAna(se, padj.method="BH")
all_de <- rowData(se, use.names=TRUE)
tg <- all_de[all_de$ADJ.PVAL < alpha,]
tg <- data.frame(tg)
de <- tg$FC
names(de) <- as.vector(rownames(tg))
all <- rownames(all_de)
allt <- all_de$limma.STAT
names(allt) <- rownames(all_de)
BC1 <- GWSPIA::BC(mdir,paths)
SP1 <- GWSPIA::SP(mdir,paths)
IF1 <- GWSPIA::IF(mdir,paths,de,all)
wi1 <- GWSPIA::wi(SP1,IF1)
# call gwspia
res <- GWSPIA::gwspia(de=de, all=all, allt=allt, betweennesslist=BC1, DEdegree=wi1, organism="hsa", data.dir=paste(system.file("extdata",package="GWSPIA"),"/",sep=""), pathids=NULL, nB=perm, plots=FALSE, verbose=TRUE, beta=NULL, combine="fisher")
colnames(res)[which(names(res)=="pG")] <- "PVAL"
colnames(res)[which(names(res)=="pGFdr")] <- "FDR.BH"
res$FDR.BY <- p.adjust(res$PVAL,"BY")
# spit out results in .csv format to directory specified with wd
write.csv(res, file=paste(wd,"/",name,".gwspia.csv",sep=""))
# get summary
sum <- .summarize(res, target, "gwspia")
return(sum)
}
.call_EB <- function(method=NULL, alpha=0.05, perm=2000, wd=NULL){
dataset <- get("dataset")
EBSE <- get("EBSE")
gs <- get("gs")
grn <- get("grn")
# name of dataset, e.g. "GSE1145"
name <- dataset@experimentData@name
# target pathway, e.g. "05410"
target <- dataset@experimentData@other$targetGeneSets
sum <- tryCatch({
res <- NULL
if(method %in% nbeaMethods()){
if(is.null(grn)){
stop("Invalid argument to EB: grn is NULL")
}
res <- nbea(method=method, se=EBSE, gs=gs, grn=grn, alpha=alpha, perm=perm)$res.tbl
}else if(method %in% sbeaMethods()){
res <- sbea(method=method, se=EBSE, gs=gs, alpha=alpha, perm=perm)$res.tbl
}else{
stop(paste("Method",method,"in EB not found.",sep=" "))
}
res$FDR.BH <- p.adjust(res$PVAL,"BH")
res$FDR.BY <- p.adjust(res$PVAL,"BY")
res <- data.frame(res)
write.csv(res, file=paste(wd,"/",name,".",method,".csv",sep=""))
sum <- .summarize(res, target, method)
}, error = function(e){
write.csv(toString(e), file=paste(wd,"/",name,".",method,".csv",sep=""))
sum <- list(R="ERR", N="ERR", P="ERR", FDR.BH="ERR", FDR.BY="ERR")
names(sum) <- c(paste(method,"R",sep="."),paste(method,"N",sep="."),paste(method,"P",sep="."),paste(method,"FDR.BH",sep="."),paste(method,"FDR.BY",sep="."))
return(sum)
})
return(sum)
}
.call_fopa <- function(alpha=0.05, perm=2000, wd=NULL){
dataset <- get("dataset")
# name of dataset, e.g. "GSE1145"
name <- dataset@experimentData@name
# target pathway, e.g. "05410"
target <- dataset@experimentData@other$targetGeneSets
# Change wd to launch FoPA in its root dir
old_wd <- getwd()
setwd("~/github/FoPA")
# Generate DEG and gene list via FoPA's included R script
DEG <- paste("./data/input_genes",paste(name,"DEG.txt",sep="_"),sep="/")
ALL <- paste("./data/input_genes",paste(name,"ALL.txt",sep="_"),sep="/")
cmd <- paste("./data/input_genes/generate_input_files.R",name,alpha,DEG,ALL,sep=" ")
cat(paste("DEBUG: executing",cmd," \n",sep=" "))
# Execute via shell
system(cmd)
# Calling FoPA via shell
DEG <- paste(name,"DEG.txt",sep="_")
ALL <- paste(name,"ALL.txt",sep="_")
OUT <- paste(wd,"/",name,".FoPA_raw.txt",sep="")
cmd <- paste("python","~/github/FoPA/src/FoPA.py","-n",DEG,ALL,OUT)
cat(paste("DEBUG: executing",cmd," \n",sep=" "))
system(cmd)
# restore wd
setwd(old_wd)
# Read in FoPA result as table since it's not structured
res <- read.table(OUT,header=FALSE,sep="\t",quote="")
# Add column names
colnames(res) <- c("ID","Pathway","Score","PVAL")
# From table to dataframe
res <- data.frame(res)
# prepend '0' to ID since it's only 4 digits
res$ID <- paste("0",res$ID,sep="")
# Append FDR stats
res$FDR.BH <- p.adjust(res$PVAL,"BH")
res$FDR.BY <- p.adjust(res$PVAL,"BY")
# spit out results in .csv format to directory specified with wd
write.csv(res, file=paste(wd,"/",name,".FoPA.csv",sep=""))
# get summary
sum <- .summarize(res, target, "fopa")
return(sum)
}
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.