#' AMARETTO_HTMLreport
#'
#' Retrieve an interactive html report, including gene set enrichment analysis if asked for.
#'
#' @param AMARETTOinit AMARETTO initialize output
#' @param AMARETTOresults AMARETTO results output
#' @param ProcessedData List of processed input data
#' @param SAMPLE_annotation SAMPLE annotation will be added to heatmap
#' @param ID ID column of the SAMPLE annotation data frame
#' @param hyper_geo_test_bool Boolean if a hyper geometric test needs to be performed. If TRUE provide a GMT file in the hyper_geo_reference parameter.
#' @param hyper_geo_reference GMT file with gene sets to compare with.
#' @param output_address Output directory for the html files.
#' @param show_row_names if True, sample names will appear in the heatmap
#' @param driverGSEA if TRUE, module drivers will also be included in the hypergeometric test.
#' @param phenotype_association_table a Data Frame, containing all modules phenotype association data. Optional.
#' @param MSIGDB TRUE if gene sets were retrieved from MSIGDB. Links will be created in the report.
#'
#' @import dplyr
#' @importFrom doParallel registerDoParallel
#' @importFrom DT datatable formatRound formatSignif formatStyle styleColorBar styleInterval
#' @importFrom reshape2 melt
#' @importFrom dplyr arrange group_by left_join mutate select summarise rename filter
#' @importFrom foreach foreach %dopar% %do%
#' @importFrom parallel makeCluster stopCluster
#' @importFrom knitr knit_meta
#' @importFrom utils write.table
#' @importFrom tibble rownames_to_column
#' @importFrom stats p.adjust phyper
#' @importFrom rmarkdown render
#' @return result
#' @export
#' @examples
#'\dontrun{
#' data('ProcessedDataLIHC')
#' AMARETTOinit <- AMARETTO_Initialize(ProcessedData = ProcessedDataLIHC,
#' NrModules = 2, VarPercentage = 50)
#'
#' AMARETTOresults <- AMARETTO_Run(AMARETTOinit)
#'
#' AMARETTO_HTMLreport(AMARETTOinit= AMARETTOinit,AMARETTOresults= AMARETTOresults,
#' ProcessedData = ProcessedDataLIHC,
#' hyper_geo_test_bool=FALSE,
#' output_address='./')
#'}
AMARETTO_HTMLreport <- function(AMARETTOinit,
AMARETTOresults,
ProcessedData,
show_row_names = FALSE,
SAMPLE_annotation = NULL,
ID = NULL,
hyper_geo_test_bool = FALSE,
hyper_geo_reference = NULL,
output_address = './',
MSIGDB = TRUE,
driverGSEA = TRUE,
phenotype_association_table=NULL){
`%dopar%` <- foreach::`%dopar%`
`%do%` <- foreach::`%do%`
CNV_matrix <- ProcessedData[[2]]
MET_matrix <- ProcessedData[[3]]
NrModules<-AMARETTOresults$NrModules
VarPercentage<-AMARETTOinit$Parameters$VarPercentage
NrCores<-AMARETTOinit$NrCores
if (!dir.exists(output_address)){
stop("Output directory is not existing.")
}
if (hyper_geo_test_bool==TRUE){
if (!file.exists(hyper_geo_reference)){
stop("GMT for hyper geometric test is not existing.\n")
}
}
report_address <- file.path(output_address)
dir.create(paste0(report_address,"/AMARETTOhtmls/modules"),recursive = TRUE,showWarnings = FALSE)
cat("The output folder structure is created.\n")
if (hyper_geo_test_bool){
GmtFromModules(AMARETTOinit,AMARETTOresults,driverGSEA)
output_hgt<-HyperGTestGeneEnrichment(hyper_geo_reference, "./Modules_genes.gmt",NrCores)
GeneSetDescriptions<-GeneSetDescription(hyper_geo_reference,MSIGDB)
}
cat("The hyper geometric test results are calculated.\n")
cluster <- parallel::makeCluster(c(rep("localhost", NrCores)), type = "SOCK")
doParallel::registerDoParallel(cluster,cores=NrCores)
full_path<-normalizePath(report_address)
ModuleOverviewTable<-NULL
buttons_list = list(list(extend ='csv'), list(extend ='excel'), list(extend = 'pdf', pageSize = 'A4', orientation = 'landscape'),list(extend ='print'), list(extend ='colvis'))
ModuleOverviewTable<-foreach (ModuleNr = 1:NrModules, .packages = c('AMARETTO','tidyverse','DT','rmarkdown')) %dopar% {
#for(ModuleNr in 1:NrModules){
print(paste0("ModuleNr = ",ModuleNr))
heatmap_module<-AMARETTO_VisualizeModule(AMARETTOinit, AMARETTOresults, ProcessedData, show_row_names = show_row_names, SAMPLE_annotation=SAMPLE_annotation, ID=ID, ModuleNr=ModuleNr)
print("visualization is done")
ModuleRegulators <- AMARETTOresults$RegulatoryPrograms[ModuleNr,which(AMARETTOresults$RegulatoryPrograms[ModuleNr,] != 0)]
print("ModuleRegulators is done")
dt_regulators<-DT::datatable(tibble::rownames_to_column(as.data.frame(ModuleRegulators),"RegulatorIDs") %>% dplyr::rename(Weights="ModuleRegulators")%>%mutate(Weights=signif(Weights, digits = 3)) %>% dplyr::mutate(RegulatorIDs=paste0('<a href="https://www.genecards.org/cgi-bin/carddisp.pl?gene=',RegulatorIDs,'">',RegulatorIDs,'</a>'))%>%dplyr::arrange(Weights),
class = 'display',filter = 'top', extensions = c('Buttons','KeyTable'), rownames = FALSE, options = list(
columnDefs = list(list(width = '200px',className = 'dt-head-center', targets = "_all"),list(className = 'text-left', targets = "_all")),pageLength = 10, lengthMenu = c(5, 10, 20, 50, 100), keys = TRUE, dom = 'Blfrtip',
buttons = buttons_list),colnames = c("Driver Gene", "Weight"),escape = 'Weight') %>% DT::formatStyle('Weights',color = DT::styleInterval(0, c('darkblue', 'darkred')))
print("dt_regulators is done")
dt_targets<-DT::datatable(as.data.frame(AMARETTOresults$ModuleMembership) %>% tibble::rownames_to_column("TargetIDs")%>% dplyr::arrange(TargetIDs) %>% dplyr::rename(moduleNr=ModuleNr) %>% dplyr::filter(moduleNr==ModuleNr) %>% dplyr::select(-moduleNr) %>% dplyr::mutate(TargetIDs=paste0('<a href="https://www.genecards.org/cgi-bin/carddisp.pl?gene=',TargetIDs,'">',TargetIDs,'</a>')),
class = 'display', filter = 'top', extensions = c('Buttons','KeyTable'), rownames = FALSE, options = list(
columnDefs = list(list(width = '200px',className = 'dt-head-center', targets = "_all"),list(className = 'text-left', targets = "_all")),pageLength = 10, lengthMenu = c(5, 10, 20, 50, 100), keys = TRUE, dom = 'Blfrtip',
buttons = buttons_list),colnames = c("Target Gene"),escape = FALSE)
print("dt_targets is done")
if (hyper_geo_test_bool){
output_hgt_filter<-output_hgt %>% dplyr::filter(Testset==paste0("Module_",as.character(ModuleNr))) %>% dplyr::arrange(padj)
output_hgt_filter<-dplyr::left_join(output_hgt_filter,GeneSetDescriptions,by=c("Geneset"="GeneSet")) %>% dplyr::mutate(overlap_perc=n_Overlapping/NumberGenes)%>%mutate(overlap_perc=signif(overlap_perc, digits = 3)) %>% dplyr::select(Geneset,Description,Geneset_length,n_Overlapping,Overlapping_genes,overlap_perc,p_value,padj)%>%arrange(padj)%>%mutate(Geneset_length=as.integer(Geneset_length),n_Overlapping=as.integer(n_Overlapping))
if (MSIGDB==TRUE){
dt_genesets<-DT::datatable(output_hgt_filter %>% dplyr::mutate(Geneset=paste0('<a href="http://software.broadinstitute.org/gsea/msigdb/cards/',Geneset,'">',gsub("_"," ",Geneset),'</a>')),class = 'display', filter = 'top', extensions = c('Buttons','KeyTable'), rownames = FALSE,
options = list(pageLength = 10, lengthMenu = c(5, 10, 20, 50, 100), keys = TRUE, dom = 'Blfrtip',buttons = buttons_list,columnDefs = list(list(className = 'dt-head-center', targets = "_all"),list(className = 'text-left', targets = "_all"))),
colnames=c("Gene Set Name","Gene Set Description","# Genes in Gene Set","# Genes in Overlap","Genes in Overlap","% Genes in overlap","P-value","FDR Q-value"),escape = FALSE) %>%
DT::formatSignif(c('p_value','padj','overlap_perc'),2) %>% DT::formatStyle('overlap_perc',background = DT::styleColorBar(c(0,1), 'lightblue'),backgroundSize = '98% 88%',backgroundRepeat = 'no-repeat', backgroundPosition = 'center')%>%DT::formatStyle(columns = c(5), fontSize = '60%')
}
else{
dt_genesets<-DT::datatable(output_hgt_filter,class = 'display', filter = 'top', extensions = c('Buttons','KeyTable'), rownames = FALSE,options = list(
columnDefs = list(list(className = 'dt-head-center', targets = "_all"),list(className = 'text-left', targets = "_all")),pageLength = 10, lengthMenu = c(5, 10, 20, 50, 100), keys = TRUE, dom = 'Blfrtip',
buttons = buttons_list)) %>% DT::formatSignif(c('p_value','padj','overlap_perc'),2)%>%DT::formatStyle(columns = c(6), fontSize = '60%')
}
ngenesets<-nrow(output_hgt_filter %>% dplyr::filter(padj<0.05))
} else {
dt_genesets<-"Genesets were not analysed as they were not provided."
ngenesets<-"NA"
}
print("hypergeotest is done")
if (!is.null(phenotype_association_table)){
moduleNumber<-ModuleNr
module_phenotype_association_datatable<-datatable(phenotype_association_table%>%mutate(p.value=signif(p.value, digits = 3),q.value=signif(q.value, digits = 3))%>%dplyr::filter(ModuleNr==paste0("Module ",moduleNumber))%>%arrange(q.value)%>%
dplyr::select(-ModuleNr),class='display',filter = 'top', extensions = c('Buttons','KeyTable'),rownames = FALSE,options = list(
pageLength = 10,lengthMenu = c(5, 10, 20, 50, 100), keys = TRUE, dom = 'Blfrtip',buttons = buttons_list),colnames=c("Phenotype","Statistics Test","P-value","FDR Q-value","Descriptive Statistics"),escape = FALSE)%>%DT::formatSignif(c('p.value','q.value'),2)
}
else{
module_phenotype_association_datatable<-"Phenotype association resuls were not provided."
}
print("phenotype is done!")
modulemd<-paste0(full_path,"/AMARETTOhtmls/modules/module",ModuleNr,".rmd")
file.copy(system.file("templates/TemplateReportModule.Rmd",package="AMARETTO"),modulemd)
print("file.copy is done!")
knitr::knit_meta(class=NULL, clean = TRUE)
rmarkdown::render(modulemd,output_file = paste0("module",ModuleNr,".html"), params = list(
report_address = report_address,
ModuleNr = ModuleNr,
heatmap_module = heatmap_module,
dt_regulators = dt_regulators,
dt_targets = dt_targets,
module_phenotype_association_datatable=module_phenotype_association_datatable,
dt_genesets = dt_genesets),quiet = TRUE)
print("rmarkdown is done and module html is created :)")
file.remove(modulemd)
#file.remove(paste0(full_path,"/AMARETTOhtmls/modules/module",ModuleNr,"_files"))
print("file removed successfully :) Done!")
#ModuleOverviewTable<-rbind(ModuleOverviewTable,c(ModuleNr,length(which(AMARETTOresults$ModuleMembership==ModuleNr)),length(ModuleRegulators),ngenesets))
return(c(ModuleNr,length(which(AMARETTOresults$ModuleMembership==ModuleNr)),length(ModuleRegulators),ngenesets))
dev.off()
# },error=function(e){message(paste("an error occured for Module", ModuleNr))})
}
file_remove<-suppressWarnings(suppressMessages(file.remove(paste0(full_path,"/AMARETTOhtmls/modules/module",c(1:NrModules),"_files"))))
parallel::stopCluster(cluster)
cat("The module htmls are finished.\n")
ModuleOverviewTable<-data.frame(matrix(unlist(ModuleOverviewTable),byrow=TRUE,ncol=4),stringsAsFactors=FALSE)
colnames(ModuleOverviewTable)<-c("ModuleNr","NrTarGenes","NrRegGenes","SignGS")
if (!is.null(CNV_matrix)){
nCNV = ncol(CNV_matrix)
} else {nCNV = NA}
if (!is.null(MET_matrix)){
nMET = ncol(MET_matrix)
} else {nMET = NA}
nExp = ncol(AMARETTOresults$RegulatoryProgramData)
nGenes = length(AMARETTOresults$AllGenes)
nMod = AMARETTOresults$NrModules
options('DT.warn.size'=FALSE) # avoid showing datatable size-related warnings.
dt_overview<-DT::datatable(ModuleOverviewTable %>% dplyr::mutate(ModuleNr=paste0('<a href="./modules/module',ModuleNr,'.html">Module ',ModuleNr,'</a>')),class = 'display',filter = 'top', extensions = c('Buttons','KeyTable'), rownames = FALSE,colnames =c("Module","# Target Genes", "# Driver Genes", "# Gene Sets"),
options = list(pageLength = 10, lengthMenu = c(5, 10, 20, 50, 100), keys = TRUE, dom = 'Blfrtip',buttons = buttons_list,columnDefs = list(list(className = 'dt-head-center', targets = "_all"),list(className = 'text-left', targets = "_all"))),escape = FALSE)
all_targets<-tibble::rownames_to_column(data.frame(AMARETTOresults$ModuleMembership),"Genes") %>% dplyr::rename(Module="ModuleNr") %>%dplyr::mutate(value=0)%>% dplyr::mutate(Type="Target")%>%select(Genes,Module,value,Type)
all_regulators<-reshape2::melt(tibble::rownames_to_column(as.data.frame(AMARETTOresults$RegulatoryPrograms),"Module"),id.vars = "Module") %>% dplyr::filter(value!=0) %>% dplyr::mutate(Module=sub("Module_","",Module),Type="Driver") %>% dplyr::rename(Genes='variable')%>%select(Genes,Module,value,Type)
all_genes<-rbind(all_targets,all_regulators) %>% dplyr::arrange(Genes) %>% dplyr::mutate(Genes=paste0('<a href="https://www.genecards.org/cgi-bin/carddisp.pl?gene=',Genes,'">',Genes,'</a>')) %>% dplyr::mutate(Module=paste0('<a href="./modules/module',Module,'.html">Module ',Module,'</a>'))
all_genes<-all_genes%>%dplyr::mutate(Color=sapply(as.numeric(value), function(x){
if(is.na(x)){
return("")
}
else if(x>0){
return("darkred")
}
else if(x<0){
return("darkblue")
}
else {
return("darkgreen")
}
}))%>%dplyr::mutate(Type=paste0('<font color=',Color,'>',Type,'</font>'))%>%select(-Color,-value)
all_genes<-as.matrix(all_genes)
dt_genes<-DT::datatable(all_genes, class = 'display',filter = 'top',extensions = c('Buttons','KeyTable'), rownames = FALSE,colnames =c("Gene","Module","Gene Type"),
options = list(deferRender=TRUE,columnDefs = list(list(className = 'dt-head-center', targets = "_all"),list(className = 'text-left', targets = "_all")), pageLength = 10, lengthMenu = c(5, 10, 20, 50, 100), keys = TRUE, dom = 'Blfrtip',buttons = buttons_list),escape = FALSE)
if (hyper_geo_test_bool){
genesetsall<-output_hgt %>% dplyr::left_join(GeneSetDescriptions,by=c("Geneset"="GeneSet")) %>% dplyr::mutate(Testset=paste0('<a href="./modules/module',sub("Module_","",Testset),'.html">',paste0(Testset,paste0(rep(" ",14),collapse = "")),'</a>')) %>% dplyr::mutate(Modules=gsub("_"," ",Testset))%>%dplyr::mutate(overlap_perc=n_Overlapping/NumberGenes)%>%mutate(overlap_perc=signif(overlap_perc, digits = 3))
genesetsall<-genesetsall%>%select(Modules,Geneset,Description,Geneset_length,n_Overlapping,Overlapping_genes,overlap_perc,p_value,padj)%>%arrange(padj)%>%filter(n_Overlapping>2)%>%mutate(Geneset_length=as.integer(Geneset_length),n_Overlapping=as.integer(n_Overlapping))
#genesetsall<-dplyr::left_join(output_hgt %>% dplyr::group_by(Geneset) %>% dplyr::mutate(Testset=paste0('<a href="./modules/module',sub("Module_","",Testset),'.html">',Testset,'</a>')) %>% dplyr::summarise(Modules=paste(Testset,collapse=", ")),GeneSetDescriptions,by=c("Geneset"="GeneSet")) %>% dplyr::mutate(Modules=gsub("_"," ",Modules))
if (MSIGDB==TRUE){
genesetsall<-dplyr::mutate(genesetsall,Geneset=paste0('<a href="http://software.broadinstitute.org/gsea/msigdb/cards/',Geneset,'">',gsub("_"," ",Geneset),'</a>'))
}
genesetsall<-as.matrix(genesetsall)
dt_genesetsall<-DT::datatable(genesetsall[1:10,],class = 'display',filter = 'top', extensions = c('Buttons'), rownames = FALSE,
options = list(data=genesetsall,deferRender=TRUE,paging =TRUE, pageLength = 10, lengthMenu = c(5, 10, 20, 50, 100), keys = TRUE, dom = 'Blfrtip',buttons = buttons_list,columnDefs = list(list(className = 'dt-head-center', targets = "_all"),list(className = 'text-left', targets = "_all"))),
colnames=c("Module","Gene Set Name","Gene Set Description","# Genes in Gene Set","# Genes in Overlap","Genes in Overlap","% Genes in overlap","P-value","FDR Q-value"),escape = FALSE)%>%
DT::formatSignif(c('p_value','padj','overlap_perc'),2) %>% DT::formatStyle('overlap_perc',background = DT::styleColorBar(c(0,1), 'lightblue'),backgroundSize = '98% 88%',backgroundRepeat = 'no-repeat', backgroundPosition = 'center')%>%DT::formatStyle(columns = c(6), fontSize = '60%')
}else{
dt_genesetsall<-"Genesets were not analysed as they were not provided."
}
if (!is.null(phenotype_association_table)){
phenotype_association_datatable<-DT::datatable(phenotype_association_table%>%mutate(p.value=signif(p.value, digits = 3),q.value=signif(q.value, digits = 3))%>%mutate(ModuleNr=paste0('<a href="./modules/module',gsub("Module ","",ModuleNr),'.html">',ModuleNr,'</a>'))%>%arrange(q.value),class='display',filter = 'top', extensions = c('Buttons','KeyTable'),rownames = FALSE,
options = list(pageLength = 10, lengthMenu = c(5, 10, 20, 50, 100), keys = TRUE, dom = 'Blfrtip',buttons = buttons_list,columnDefs = list(list(className = 'dt-head-center', targets = "_all"),list(className = 'text-left', targets = "_all"))),colnames=c("Module","Phenotype","Statistics Test","P-value","FDR Q-value","Descriptive Statistics"),escape = FALSE)%>%formatSignif(c('p.value','q.value'),2)
}
else{
phenotype_association_datatable<-"Phenotype association resuls were not provided."
}
rmarkdown::render(system.file("templates/TemplateIndexPage.Rmd",package="AMARETTO"), output_dir=paste0(full_path,"/AMARETTOhtmls/"),output_file= "index.html", params = list(
nExp = nExp,
nCNV = nCNV,
nMET = nMET,
nGenes = nGenes,
VarPercentage = VarPercentage,
nMod = nMod,
dt_overview = dt_overview,
dt_genes=dt_genes,
phenotype_association_datatable=phenotype_association_datatable,
dt_genesetsall = dt_gensesetsall),quiet = TRUE)
cat("The report is ready to use\n")
}
#' Hyper Geometric Geneset Enrichement Test
#'
#' Calculates the p-values for unranked gene set enrichment based on two gmt files as input and the hyper geometric test.
#' @return result
#' @param gmtfile The gmt file with reference gene set.
#' @param testgmtfile The gmt file with gene sets to test. In our case, the gmt file of the modules.
#' @param NrCores Number of cores used for parallelization.
#' @param ref.numb.genes The total number of genes teste, standard equal to 45 956 (MSIGDB standard).
#' @importFrom foreach foreach
#' @importFrom parallel makeCluster stopCluster
#' @importFrom doParallel registerDoParallel
#' @keywords internal
HyperGTestGeneEnrichment<-function(gmtfile,testgmtfile,NrCores,ref.numb.genes=45956){
`%dopar%` <- foreach::`%dopar%`
`%do%` <- foreach::`%do%`
test.gmt<-readGMT(testgmtfile) # our gmt_file_output_from Amaretto
gmt.path<-readGMT(gmtfile) # the hallmarks_and_co2...
########################### Parallelizing :
cluster <- parallel::makeCluster(c(rep("localhost", NrCores)), type = "SOCK")
doParallel::registerDoParallel(cluster,cores=NrCores)
#resultloop<-c()
resultloop<-foreach(j=1:length(test.gmt), .combine='rbind') %do% {
#print(j)
foreach(i=1:length(gmt.path),.combine='rbind') %dopar% {
#print(i)
# for(j in 1:length(test.gmt)){
# print(paste0("test_gmt = ",j))
# for(i in 1:length(gmt.path)){
l<-length(gmt.path[[i]])
k<-sum(gmt.path[[i]] %in% test.gmt[[j]])
m<-ref.numb.genes
n<-length(test.gmt[[j]])
p1<-stats::phyper(k-1,l,m-l,n,lower.tail=FALSE)
if (k>0){
overlapping.genes<-gmt.path[[i]][gmt.path[[i]] %in% test.gmt[[j]]]
overlapping.genes<-paste(overlapping.genes,collapse = ', ')
# resultloop<-rbind(resultloop,c(Geneset=names(gmt.path[i]),Testset=names(test.gmt[j]),p_value=p1,n_Overlapping=k,Overlapping_genes=overlapping.genes))
c(Geneset=names(gmt.path[i]),Testset=names(test.gmt[j]),Geneset_length=l,p_value=p1,n_Overlapping=k,Overlapping_genes=overlapping.genes)
}
}
}
parallel::stopCluster(cluster)
resultloop<-as.data.frame(resultloop,stringsAsFactors=FALSE)
resultloop$p_value<-as.numeric(resultloop$p_value)
resultloop$n_Overlapping<-as.numeric((resultloop$n_Overlapping))
resultloop$Geneset_length<-as.numeric(resultloop$Geneset_length)
resultloop[,"padj"]<-stats::p.adjust(resultloop[,"p_value"],method='BH')
return(resultloop)
}
#' GmtFromModules
#' @return result
#'
#' @param AMARETTOinit List output from AMARETTO_Initialize().
#' @param driverGSEA if TRUE , module driver genes will also be added to module target genes for GSEA.
#' @param AMARETTOresults List output from AMARETTO_Run().
#'
#' @importFrom tibble rownames_to_column
#' @importFrom reshape2 melt
#' @importFrom dplyr arrange mutate select rename filter
#' @importFrom utils write.table
#' @keywords internal
GmtFromModules <- function(AMARETTOinit,AMARETTOresults,driverGSEA){
ModuleMembership<-tibble::rownames_to_column(as.data.frame(AMARETTOresults$ModuleMembership),"GeneNames")
if(driverGSEA){
all_regulators <-reshape2::melt(tibble::rownames_to_column(as.data.frame(AMARETTOresults$RegulatoryPrograms),"Module"), id.vars = "Module") %>%
dplyr::filter(value > 0) %>% dplyr::select(variable, Module) %>% dplyr::mutate(Module = sub("Module_", "", Module)) %>% dplyr::rename(GeneNames = "variable")%>% dplyr::rename(ModuleNr = "Module")
ModuleMembership<-rbind(ModuleMembership,all_regulators)
}
NrModules<-AMARETTOresults$NrModules
ModuleMembership<-ModuleMembership %>% dplyr::arrange(GeneNames)
ModuleMembers_list<-split(ModuleMembership$GeneNames,ModuleMembership$ModuleNr)
names(ModuleMembers_list)<-paste0("Module_",names(ModuleMembers_list))
gmt_file="./Modules_genes.gmt"
utils::write.table(sapply(names(ModuleMembers_list),function(x) paste(x,paste(ModuleMembers_list[[x]],collapse="\t"),sep="\t")),gmt_file,quote = FALSE,row.names = TRUE,col.names = FALSE,sep='\t')
}
#' GeneSetDescription
#'
#' @param filename The name of the gmt file.
#' @param MSIGDB If True, the gene set description column will be provided from MSIGDB.
#'
#' @importFrom utils data
#' @return result
#' @keywords internal
GeneSetDescription<-function(filename,MSIGDB){
utils::data(MsigdbMapping)
gmtLines<-strsplit(readLines(filename),"\t")
gmtLines_description <- lapply(gmtLines, function(x) {
c(x[[1]],x[[2]],length(x)-2)
})
gmtLines_description<-data.frame(matrix(unlist(gmtLines_description),byrow=TRUE,ncol=3),stringsAsFactors=FALSE)
rownames(gmtLines_description)<-NULL
colnames(gmtLines_description)<-c("GeneSet","Description","NumberGenes")
gmtLines_description$NumberGenes<-as.numeric(gmtLines_description$NumberGenes)
if(MSIGDB){
gmtLines_description$Description<-sapply(gmtLines_description$GeneSet, function(x) {
index<-which(MsigdbMapping$geneset==x)
ifelse(length(index)!=0, MsigdbMapping$description[index],gmtLines_description$Description[which(gmtLines_description$GeneSet==x)])
})}
return(gmtLines_description)
}
#' readGMT
#'
#' @param filename
#'
#' @return result
#' @keywords internal
readGMT<-function(filename){
gmtLines<-strsplit(readLines(filename),"\t")
gmtLines_genes <- lapply(gmtLines, tail, -2)
names(gmtLines_genes) <- sapply(gmtLines, head, 1)
return(gmtLines_genes)
}
#' Title plot_run_history
#'
#' @param AMARETTOinit AMARETTO initialize output
#' @param AMARETTOresults AMARETTO results output
#'
#' @import ggplot2
#' @importFrom gridExtra grid.arrange
#' @importFrom stats sd
#' @return plot
#' @export
#'
#' @examples
#' data('ProcessedDataLIHC')
#' AMARETTOinit <- AMARETTO_Initialize(ProcessedData = ProcessedDataLIHC,
#' NrModules = 2, VarPercentage = 50)
#'
#' AMARETTOresults <- AMARETTO_Run(AMARETTOinit)
#'
#' plot_run_history(AMARETTOinit,AMARETTOresults)
plot_run_history<-function(AMARETTOinit,AMARETTOresults){
means<-unlist(lapply(AMARETTOresults$run_history$error_history, mean))
stds<-unlist(lapply(AMARETTOresults$run_history$error_history, sd))
iterationNr<-c(1:length(means))
NrReassignGenes<-AMARETTOresults$run_history$NrReassignGenes_history[-1]
threshold<-AMARETTOinit$Parameters$convergence_cutoff*nrow(AMARETTOinit$MA_matrix_Var)
TotGenesNr<-nrow(AMARETTOinit$MA_matrix_Var)
df<-data.frame(iterationNr = iterationNr,
means = means,
stds = stds,
NrReassignGenes = NrReassignGenes,
threshold = threshold,
TotGenesNr = TotGenesNr,
stringsAsFactors = FALSE)
p1<-ggplot2::qplot(x = iterationNr, y = means, data = df) + ggplot2::geom_errorbar(ggplot2::aes(x=iterationNr, ymin=means-stds, ymax=means+stds),data=df,width=0.25) + ggplot2::xlab("Iteration Number") + ggplot2::ylab("Mean Square Error") + ggplot2::geom_line() + ggplot2::geom_point()
p2<-ggplot2::qplot(x = iterationNr, y = NrReassignGenes) +ggplot2::geom_hline(yintercept = TotGenesNr, linetype="dashed", color = "blue")+ggplot2::geom_hline(yintercept = threshold, linetype="dashed", color = "red") + ggplot2::xlab("Iteration Number") + ggplot2::ylab("Target Gene Reassignments Number") + ggplot2::geom_line() + ggplot2::geom_point() + ggplot2::scale_y_continuous(trans='log2')
gridExtra::grid.arrange(p1, p2, nrow = 2)
return(TRUE)
}
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.