#'ggenrichData function
#'@export
#'@description
#'ggenrichData function is used to preprocess the input file
#'@param fnames input file or files
#'@param format the way to get the results of the enrichment analysis.(JAVA_GSEA,R_ClusterProfiler,DAVID)
#'@param type analysis type.(pathways,groups_pathway,cluster,groups_cluster) default pathways
#'@param GENE_MAX The maximum number of genes included in GO term,default 300
#'@param GENE_MIN The minimum number of genes included in GO term,default 100
#'@return a data set that is used as input to the ggenrichplot,enrichplot_MultiGroup,term_clustering and treeplot functions.
#'@note For DAVID format, The first column of the input file submitted by the user is the annotation information(term), the second column is the gene ID(or ENTREZ ID,gene symbol), and the third column is the adj_pval.
#'@examples
#result <- ggenrichData(system.file("extdata", c("HALLMARK_COAGULATION.xls","HALLMARK_HYPOXIA.xls","HALLMARK_PI3K_AKT_MTOR_SIGNALING.xls"), package = "ggenrich",mustWork = TRUE),format="JAVA_GSEA")
#result <- ggenrichData(system.file("extdata", "easy_input.csv", package = "ggenrich",mustWork = TRUE),format="DAVID",type="cluster")
#result <- ggenrichData(system.file("extdata", c("enrichGO1.csv","enrichGO2.csv","enrichGO3.csv"), package = "ggenrich",mustWork = TRUE),format="R_ClusterProfiler",type="groups_cluster",GENE_MAX=400,GENE_MIN=200)
#result <- ggenrichData(system.file("extdata", "gsea_new_output.csv", package = "ggenrich",mustWork = TRUE),format="R_ClusterProfiler")
#result <- ggenrichData(system.file("extdata", c("gsea_group1.csv","gsea_group2.csv"), package = "ggenrich",mustWork = TRUE),format="R_ClusterProfiler",type="groups_pathway")
#result <- ggenrichData(system.file("extdata", c("enrichGO1.csv","enrichGO2.csv","enrichGO3.csv"), package = "ggenrich",mustWork = TRUE),format="R_ClusterProfiler",type="cluster",GENE_MAX=400,GENE_MIN=200)
ggenrichData<- function(fnames,format,type="pathways",GENE_MAX=300,GENE_MIN=100) {
if(format == "JAVA_GSEA"){
if(type == "pathways"){
fdataset <- lapply(fnames,read.delim)
names(fdataset) <- fnames
if (requireNamespace("plyr", quietly = TRUE)) {
result <- plyr::ldply(fdataset, data.frame)
}
result$pathway <- unlist(strsplit(result$.id,split = ".xls"))
return(result)
}
}
if(format == "R_ClusterProfiler"){
if(type == "pathways"){
result <- read.csv(fnames,header = TRUE,sep=",")
return(result)
}
if(type == "groups_pathway"){
fdataset <- lapply(fnames,read.csv)
names(fdataset) <- fnames
if (requireNamespace("plyr", quietly = TRUE)) {
result <- plyr::ldply(fdataset, data.frame)
}
return(result)
}
if(type == "cluster"){
fdataset <- lapply(fnames, function(x){read.csv(x)[,c(2,3,5,7)]})
names(fdataset) <- fnames
if (requireNamespace("plyr", quietly = TRUE)) {
ego.all <- plyr::ldply(fdataset, data.frame)
}
ego.all$group <- unlist(strsplit(ego.all$.id, split = ".csv"))
ego.all <- ego.all[ego.all$p.adjust < 0.001,]
ego.ID <- unique(ego.all[,c(2:4)])
if (requireNamespace("stringr", quietly = TRUE)) {
ego.ID$Bg <- as.numeric(stringr::str_split_fixed(ego.ID$BgRatio, "/",2)[,1])
}
ego.ID <- ego.ID[ego.ID$Bg < GENE_MAX,]
ego.ID <- ego.ID[ego.ID$Bg > GENE_MIN,]
return(ego.ID)
}
if(type == "groups_cluster"){
fdataset <- lapply(fnames, function(x){read.csv(x)[,c(2,3,5,7)]})
names(fdataset) <- fnames
if (requireNamespace("plyr", quietly = TRUE)) {
ego.all <- plyr::ldply(fdataset, data.frame)
}
ego.all$group <- unlist(strsplit(ego.all$.id, split = ".csv"))
ego.all <- ego.all[ego.all$p.adjust < 0.001,]
ego.ID <- unique(ego.all[,c(2:4)])
if (requireNamespace("stringr", quietly = TRUE)) {
ego.ID$Bg <- as.numeric(stringr::str_split_fixed(ego.ID$BgRatio, "/",2)[,1])
}
ego.ID <- ego.ID[ego.ID$Bg < GENE_MAX,]
ego.ID <- ego.ID[ego.ID$Bg > GENE_MIN,]
MyMerge <- function(x, y){
df <- merge(x, y, by= "ID", all.x= TRUE, all.y= TRUE)
return(df)
}
ego.m <- Reduce(MyMerge, fdataset)
if (requireNamespace("dplyr", quietly = TRUE)) {
ego.m <- dplyr::select(ego.m, ID,dplyr::contains("p.adjust"))
}
ego.m <- merge(ego.ID[,1:2], ego.m, by= "ID", all.x= TRUE)
rownames(ego.m) <- ego.m$Description
ego.m$ID <- NULL
ego.m$Description <- NULL
colnames(ego.m) <- paste0("G", seq(1:length(fnames)))
return(ego.m)
}
}
if(format == "DAVID"){
if(type == "cluster"){
fdataset <- lapply(fnames, function(x){read.csv(x)})
names(fdataset) <- fnames
if (requireNamespace("plyr", quietly = TRUE)) {
ego.all <- plyr::ldply(fdataset, data.frame)
}
ego.all$group <- unlist(strsplit(ego.all$.id, split = ".csv"))
return(ego.all)
}
if(type == "groups_cluster"){
fdataset <- lapply(fnames, function(x){read.csv(x)})
names(fdataset) <- fnames
MyMerge <- function(x, y){
df <- merge(x, y, by= "Annotation_information", all.x= TRUE, all.y= TRUE)
return(df)
}
ego.m <- Reduce(MyMerge, fdataset)
if (requireNamespace("dplyr", quietly = TRUE)) {
ego.m <- unique(dplyr::select(ego.m, Annotation_information,dplyr::contains("adj_pval")))
}
rownames(ego.m) <- ego.m$Annotation_information
ego.m$Annotation_information <- NULL
return(ego.m)
}
}
}
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.