Materials

Equipment

Hardware requirements:

Software requirements:

Create EnrichmentMap - Automaticallly from R using cyRest

Load in required libraries

#install required R and bioconductor packages
tryCatch(expr = { library("RCurl")}, 
         error = function(e) {  install.packages("RCurl")}, 
         finally = library("RCurl"))

#use library
tryCatch(expr = { library("limma")}, 
         error = function(e) { source("https://bioconductor.org/biocLite.R")
           biocLite("limma")}, 
         finally = library("limma"))
tryCatch(expr = { library("Biobase")}, 
         error = function(e) { source("https://bioconductor.org/biocLite.R")
           biocLite("Biobase")}, 
         finally = library("Biobase"))
tryCatch(expr = { library("ggplot2")}, 
         error = function(e) { install.packages("ggplot2")}, 
         finally = library("ggplot2"))

#For creating json and communicating with cytoscape
tryCatch(expr = { library("httr")}, 
         error = function(e) { install.packages("httr")}, 
         finally = library("httr"))
tryCatch(expr = { library("RJSONIO")}, 
         error = function(e) { install.packages("RJSONIO")}, 
         finally = library("RJSONIO"))

Configurable Parameters

In order to run GSEA automatically through the notebook you will need to download the gsea jar from here. Specify the exact path to the gsea jar in the parameters in order to automatically compute enrichments using GSEA. Run this script in a working folder containing files with a list of genes and according metric (check whether this has to be sorted or not - for now supply sorted).

#path to GSEA jar 
# In order to run GSEA automatically you need to speciry the path to the gsea jar file.
gsea_jar <- params$gsea_jar

#Gsea takes a long time to run.  If you have already run GSEA manually or previously there is no need to re-run GSEA.  Make sure the 
# gsea results are in the current directory and the notebook will be able to find them and use them.
run_gsea = params$run_gsea

#navigate to the directory where you put the downloaded protocol files.
working_dir <- params$working_dir

# leave blank if you want the notebook to discover the gsea directory for itself
#gsea_directory = paste(working_dir,"Mesen_vs_Immuno.GseaPreranked.1497635459262",sep="/") 
gsea_directory = params$gsea_directory
dir.create(gsea_directory)
if(length(list.files(gsea_directory)) > 0 & params$run_gsea == "true") stop("You are about to overwrite old results! Either define a new directory or delete all old results.")

Download the latest pathway definition file

Only Human, Mouse and Rat gene set files are currently available on the baderlab downloads site. Check here to see all available species.


Run GSEA

(GSEA)[http://software.broadinstitute.org/gsea/index.jsp] is a stand alone java program with many customizable options. It can be easily run through its integrated user interface. To make this a seemless pipeline we can run GSEA from the command line with a set of options. Any of the supplied options can be customized and there are many additional options that can be specified. For more details see (here)[http://software.broadinstitute.org/gsea/doc/GSEAUserGuideTEXT.htm#_Running_GSEA_from]

In the below command the following options have been specified:

if(run_gsea){
dir <- params$working_dir
files <- list.files(pattern = params$rnkPattern)
rnkFiles <- paste(dir, files, sep='/')
for(i in rnkFiles){
  contrast <- gsub("\\..*", "", basename(i))
  command <- paste("java  -Xmx1G -cp",params$gsea_jar,  "xtools.gsea.GseaPreranked -gmx", params$gmt_file , "-rnk" ,i, "-collapse false -nperm 1000 -permute gene_set -scoring_scheme weighted -rpt_label ",contrast,"  -num 100 -plot_top_x 20 -rnd_seed 12345 -set_max", params$gseaMax, "-set_min" , params$gseaMin, "-zip_report false -out" , params$gsea_directory, "-gui false > gsea_output.txt",sep=" ")
  system(command)
}
}

Get the name of the GSEA output directory

Although GSEA allows you to specify the name of the output directory and the destination folder it add additional words and numbers to the folder name. Some are predictable and some are automatically generated. Get all the GSEA results directories found in the current directory. If there are multiple GSEA results folders each will be used to create an enrichment map.

  gsea_output_dir <- gsea_directory

Launch Cytoscape

Create EM through Cyrest interface - make sure you open cytoscape with a -R 1234 (to enable rest functionality) and allow R to talk directly to cytoscape.

Launch Cytoscape (by default cytoscape will automatically enable rest so as long as cytoscape 3.3 or higher is open R should be able to communicate with it)

Set up connection from R to cytoscape

# Basic settings
port.number = 1234
base.url = paste("http://localhost:", toString(port.number), "/v1", sep="")

#print(base.url)

version.url = paste(base.url, "version", sep="/")
cytoscape.open = TRUE

tryCatch(expr = { GET(version.url)}, 
         error = function(e) { return (cytoscape.open = FALSE)}, finally =function(r){ return(cytoscape.open = TRUE)})

if(!cytoscape.open){
  #try and launch cytoscape
 print("Cytoscape is not open.  Please launch cytoscape.")
} else{
  cytoscape.version =  GET(version.url)
  cy.version = fromJSON(rawToChar(cytoscape.version$content))
  print(cy.version)

}

Create an Enrichment map

#use easy cyRest library to communicate with cytoscape.
tryCatch(expr = { library("RCy3")}, 
         error = function(e) { source("https://bioconductor.org/biocLite.R")
           biocLite("RCy3")}, finally = library("RCy3"))

#defined threshold for GSEA enrichments (need to be strings for cyrest call)
pvalue_gsea_threshold <- params$pval_thresh
qvalue_gsea_threshold <- params$fdr_thresh

similarity_threshold <- params$similarity_threshold
similarity_metric <-  params$similarity_metric

gsea_directories <- list.files(path = params$gsea_directory, pattern = "\\.GseaPreranked")
for(res in gsea_directories){
gsea_output <- paste(params$gsea_directory, res, sep = "/")  
gsea_results_path <- paste(gsea_output,"edb",sep="/")
gsea_results_filename <- paste(gsea_results_path,"results.edb",sep="/")
cur_model_name <- gsub("\\..*", "", res)
#although there is a gmt file in the gsea edb results directory it have been filtered to 
#contain only genes represented in the expression set.  If you use this fltered file you 
#will get different pathway connectivity depending on the dataset being used.  We recommend 
#using the original gmt file used for the gsea analysis and not the filtered one in the results directory.
gmt_gsea_file <- params$gmt_file
gsea_ranks_file <- paste(gsea_results_path,list.files(gsea_results_path,pattern=".rnk"),sep="/")

#######################################
#create EM
current_network_name <- paste(cur_model_name,pvalue_gsea_threshold,qvalue_gsea_threshold, params$gseaMax, params$descr, sep="_")

em_command = paste('enrichmentmap build analysisType="gsea" gmtFile=',gmt_gsea_file,
                   'pvalue=',pvalue_gsea_threshold, 'qvalue=',qvalue_gsea_threshold,
                   'similaritycutoff=',similarity_threshold,
                   'coefficients=',similarity_metric,'ranksDataset1=', 
                   gsea_ranks_file,'enrichmentsDataset1=',gsea_results_filename, 
                   'filterByExpressions=false', sep=" ")
# Don't make network for aalyses where the q value does not reach the threshold - error in command stops the loop
reports <- list.files(pattern = "gsea_report_for_na.*xls", path=gsea_output, full.names = TRUE)
reports <- rbind(read.csv(reports[1], sep = "\t"),read.csv(reports[2], sep = "\t"))
minQval <- min(reports$FDR.q.val)

#enrichment map command will return the suid of newly created network.
if(minQval<qvalue_gsea_threshold){
response <- commandsGET(em_command)

current_network_suid <- 0
#enrichment map command will return the suid of newly created network unless it Failed.  
#If it failed it will contain the word failed
if(grepl(pattern="Failed", response)){
  paste(response)
} else {
  current_network_suid <- response
}
response <- renameNetwork(title = current_network_name, 
                          network = as.numeric(current_network_suid),base.url)

### Auto-Annotate the Enrichment Map
if(params$annotate) { 
aa_command = paste("autoannotate annotate-clusterBoosted clusterAlgorithm=MCL maxWords=3 network=", current_network_name, sep=" ")
commandsGET(aa_command)
}
}
}


vincent-van-hoef/myFunctions documentation built on May 14, 2019, 10:36 p.m.