R/Module_annotation.R

Defines functions GO_Module_NameR GSEAGO_Builder Query_Prep

Documented in GO_Module_NameR GSEAGO_Builder Query_Prep

    #' Query Prep
    #' @param Auto_WGCNA_OUTPUT R object generated by Auto_WGCNA function.
    #' @param numGenes integer indicating the number of random genes to test 
    #' for hub gene detection.
    #' Default is 500.
    #' @param Find_hubs logical value. If TRUE, module hub genes will be returned. 
    #' If FALSE (default),
    #' intramodularConnectivity will be returned without hub gene identification.
    #' @param calculate_intramodularConnectivity a logical value. If TRUE 
    #' (default), the intramodularConnectivity
    #' will be caluculated using the intramodularConnectivity function from WGCNA. 
    #' If FALSE, a table of modules
    #' and genes will be returned without intramodularConnectivity values.
    #' @return a data.frame detailing the gene symbols for each module. Gene
    #' intramodularConnectivity may also be returned. If detected, hub genes are 
    #' annotated. 
    #' @inheritParams WGCNA::blockwiseModules
    #' @export
    #' @examples 
    #' GMIC_Builder_dir<-system.file("extdata", "GMIC_Builder.Rdata", 
    #' package = "GmicR", mustWork = TRUE)
    #' load(GMIC_Builder_dir)
    #' GMIC_Builder<-Query_Prep(GMIC_Builder,  Find_hubs = TRUE)
    #' head(GMIC_Builder$Query)
    
    
    Query_Prep<-function(Auto_WGCNA_OUTPUT, numGenes=500, Find_hubs=FALSE,
    nThreads=NULL,
    calculate_intramodularConnectivity=TRUE){
    
    datExpr<-Auto_WGCNA_OUTPUT$Network_Output$datExpr
    softPower<-Auto_WGCNA_OUTPUT$Input_Parameters$softPower
    modules<-Auto_WGCNA_OUTPUT$Network_Output$modules
    ref_genes<- Auto_WGCNA_OUTPUT$Network_Output$ref_genes
    corFnc<- Auto_WGCNA_OUTPUT$Input_Parameters$corFnc
    networkType<- Auto_WGCNA_OUTPUT$Input_Parameters$networkType
    
    if(calculate_intramodularConnectivity== TRUE){
    allowWGCNAThreads(nThreads = nThreads)
    adj_mat<-adjacency(datExpr, type = networkType, power = softPower, 
    corFnc = corFnc)
    disableWGCNAThreads()
    ConnectivityMeasures<-intramodularConnectivity(adj_mat, colors=modules, 
    scaleByMax = TRUE)
    
    if(Find_hubs==TRUE){
    possibleError <-tryCatch(Module_hubs<-data.frame(
    "hub"=chooseOneHubInEachModule(datExpr = datExpr,
    colorh = modules,
    power = softPower,
    numGenes = numGenes,
    type = networkType, corFnc = corFnc), 
    stringsAsFactors = FALSE),error = function(e) e)
    
    if(inherits(possibleError, "error")|inherits(possibleError, "simpleError")){
    message("unable to detect hubs")
    Hub_counts_mod<-data.frame(table(modules))
    Hub_counts_mod$hub<-c("None")
    rownames(Hub_counts_mod)<-Hub_counts_mod$modules
    } else if(!is.null(Module_hubs)){
    module_hub_summary<-as.data.frame(cbind(Module_hubs))
    module_hub_summary$modules<-rownames(module_hub_summary)
    module_gene_count<-data.frame(table(modules))
    Hub_counts_mod<-merge(module_hub_summary, module_gene_count, 
    by.x ="modules",
    by.y = "modules", all = TRUE)
    rownames(Hub_counts_mod)<-Hub_counts_mod$modules
    }
    
    } else if(Find_hubs==FALSE){
    Hub_counts_mod<-data.frame(table(modules))
    Hub_counts_mod$hub<-c("None")
    rownames(Hub_counts_mod)<-Hub_counts_mod$modules
    }
    
    # genes and module list ---------------------------------------------------
    
    Node_annotations<-as.data.frame(cbind(ref_genes, modules))
    rownames(Node_annotations)<-Node_annotations$ref_genes
    Merg_Node_anno_hubs<-merge(Node_annotations, 
    Hub_counts_mod, by.x ="modules",
    by.y = "modules", all = TRUE)
    
    rownames(Merg_Node_anno_hubs)<-Merg_Node_anno_hubs$ref_genes
    # creating Node annotation frame ------------------------------------------
    
    dat_Node_anno<-merge(Merg_Node_anno_hubs, 
    ConnectivityMeasures, 
    by = "row.names", all = TRUE)
    rownames(dat_Node_anno)<-dat_Node_anno$Row.names
    dat_Node_anno$Row.names<-NULL
    dat_Node_anno<-dat_Node_anno[c("ref_genes",
    "modules", "hub", "Freq", "kTotal", "kWithin")]
    
    dat_Node_anno$modules<-as.character(dat_Node_anno$modules)
    dat_Node_anno$modules<-as.numeric(dat_Node_anno$modules)
    dat_Node_anno<-dat_Node_anno[order(dat_Node_anno$modules,
    -dat_Node_anno$kWithin),]
    
    Auto_WGCNA_OUTPUT$Query<-dat_Node_anno
    }  else if(calculate_intramodularConnectivity ==FALSE){
    Node_annotations<-as.data.frame(cbind(ref_genes, modules))
    rownames(Node_annotations)<-Node_annotations$ref_genes
    
    Hub_counts_mod<-data.frame(table(modules))
    Hub_counts_mod$hub<-c("None")
    rownames(Hub_counts_mod)<-Hub_counts_mod$modules
    
    Merg_Node_anno_hubs<-merge(Node_annotations, 
    Hub_counts_mod, by.x ="modules",
    by.y = "modules", all = TRUE)
    
    rownames(Merg_Node_anno_hubs)<-Merg_Node_anno_hubs$ref_genes
    Auto_WGCNA_OUTPUT$Query<-Merg_Node_anno_hubs
    }
    
    return(Auto_WGCNA_OUTPUT)
    }
    
    
    #' GO enrichment for module names
    #' @import org.Hs.eg.db
    #' @import org.Mm.eg.db
    #' @importFrom GSEABase GeneSetCollection GOCollection
    #' @importFrom AnnotationDbi toTable GOAllFrame GOFrame
    #' @importFrom Category GSEAGOHyperGParams
    #' @import GOstats
    #' @import foreach
    #' @param no_cores Number of cores to use. Default = 4.
    #' @param colname_correct a logical value. If TRUE (default), "." in gene 
    #' names will be replaced
    #' with "-". This corrects a name change that is induced by R when creating a 
    #' data.frame. If FALSE,
    #' no changes will be made.
    #' @param species either 'Homo sapiens' (default) or 'Mus musculus'.
    #' @param Auto_WGCNA_OUTPUT output from Auto_WGCNA function.
    #' @param ontology string either 'BP'(Biological Process; default), 
    #' 'CC'(Cellular Component),
    #' or 'MF' (Molecular Function).
    #' @param GO_conditional A logical indicating whether the calculation 
    #' should condition on the GO structure. 
    #' will not be carried out. If TRUE, 
    #' @return Lists with gene ontology 
    #' enrichment analysis, performed using GOstats,
    #' for each module. 
    #' @note
    #' gene names must be official gene symbol
    #' @export
    #' @examples 
    #' 
    #' GMIC_Builder_dir<-system.file("extdata", "GMIC_Builder.Rdata", 
    #'                               package = "GmicR", mustWork = TRUE)
    #' load(GMIC_Builder_dir)
    #' GMIC_Builder$GSEAGO_Builder_Output<-NULL
    #' Test_GMIC_Builder<-GSEAGO_Builder(GMIC_Builder, no_cores = 1)
    #' summary(Test_GMIC_Builder$GSEAGO_Builder_Output)
    
    GSEAGO_Builder<-function(Auto_WGCNA_OUTPUT, species='Homo sapiens', 
    no_cores = 4, ontology = 'BP',
    GO_conditional = FALSE,colname_correct = TRUE){
    
    datExpr<-Auto_WGCNA_OUTPUT$Network_Output$datExpr
    modules<-as.vector(Auto_WGCNA_OUTPUT$Network_Output$modules)
    # replacing . with - in columnn names
    if(isTRUE(colname_correct)){
    colnames(datExpr)<-gsub(".", "-", colnames(datExpr), fixed = TRUE)
    }
    
    # getting ref_genes from column names
    ref_genes<-colnames(datExpr)
    
    # GO for humans
    if(species=='Homo sapiens'){
    # loading human gene set
    GO = AnnotationDbi::toTable(org.Hs.egGO); 
    SYMBOL = AnnotationDbi::toTable(org.Hs.egSYMBOL)
    } else if(species=='Mus musculus'){
    # GO for mouse
    GO = AnnotationDbi::toTable(org.Mm.egGO); 
    SYMBOL = AnnotationDbi::toTable(org.Mm.egSYMBOL)
    } 
    
    
    GOdf = data.frame(GO$go_id, GO$Evidence,
    SYMBOL$symbol[match(GO$gene_id,SYMBOL$gene_id)])
    GOframe = AnnotationDbi::GOAllFrame(AnnotationDbi::GOFrame(GOdf, 
    organism = species))
    gsc <- GSEABase::GeneSetCollection(GOframe, 
    setType = GSEABase::GOCollection())
    
    iterations <- length(unique(modules))
    GSEAGO = vector('list',iterations)
    GSEAGO<-setNames(GSEAGO, 0:(iterations-1))
    
    # cores
    cl <- parallel::makeCluster(no_cores)
    doParallel::registerDoParallel(cl)
    
    GSEAGOL<-foreach(ex = names(GSEAGO)) %dopar%{
    hyperGTest(
    Category::GSEAGOHyperGParams(name = paste(species, "GO", sep = " "),
    geneSetCollection = gsc, geneIds = colnames(datExpr)[modules==ex],
    universeGeneIds = ref_genes, ontology = ontology, pvalueCutoff = 0.05,
    conditional = GO_conditional, testDirection = 'over'))
    } 
    
    parallel::stopCluster(cl)

    #   
    GSEAGO<-lapply(GSEAGOL, summary)
    
    df<-geneIds(gsc)
    
    for(ew in 1:length(GSEAGO) ){
    geneIds_me = colnames(datExpr)[modules==(ew-1)]
    meFrame<-GSEAGO[[ew]]
    meFrame$Genes<-c(NA)
    for(q in meFrame$GOBPID){
    db_genes<-df[[q]]
    common_ids<-db_genes[db_genes %in% geneIds_me]
    meFrame[meFrame$GOBPID == q,]$Genes<-paste(common_ids, collapse="/")
    }
    GSEAGO[[ew]]<-meFrame
    }
    
    
    
    
    # preparing lists for export ----------------------------------------------
    GSEAGO_Builder_Output<-list(GSEAGO=GSEAGO, modules=modules)
    Auto_WGCNA_OUTPUT$GSEAGO_Builder_Output<-GSEAGO_Builder_Output
    
    return(Auto_WGCNA_OUTPUT)
    }
    
    
    #' GO enrichment for module names
    #' @param Auto_WGCNA_OUTPUT object returned by GSEAGO_Builder function
    #' @param cutoff_size integer indication the GO size cut off. If NULL 
    #' (default) the term with the smallest Pvalue will be returned.
    #' @export
    #' @return a data.frame listing the GO name for each module. Additionally, 
    #' a table similar to the output of Query_Prep function is returned, appended 
    #' with module GO names.
    #' @examples
    #' GMIC_Builder_dir<-system.file("extdata", "GMIC_Builder.Rdata", 
    #' package = "GmicR", mustWork = TRUE)
    #' load(GMIC_Builder_dir)
    #' GMIC_Builder<-GO_Module_NameR(GMIC_Builder, cutoff_size = 100)
    
    
    GO_Module_NameR<-function(Auto_WGCNA_OUTPUT, cutoff_size=NULL){
    
    GSEAGO<-Auto_WGCNA_OUTPUT$GSEAGO_Builder_Output$GSEAGO
    modules<-Auto_WGCNA_OUTPUT$GSEAGO_Builder_Output$modules
    Query_object<-Auto_WGCNA_OUTPUT$Query
    
    GO_module_name = rep(NA,length(unique(modules)))
    for (i in seq(1,length(unique(modules)))){
    if(is.null(cutoff_size)){
    
    GO_module_name[i] = GSEAGO[[i]][which.min(GSEAGO[[i]]$Pvalue),7]
    } else if(!is.null(cutoff_size)){
    GO_module_name[i] =
    GSEAGO[[i]][GSEAGO[[i]]$Size<cutoff_size,
    ][which.max(GSEAGO[[i]][GSEAGO[[i]]$Size<cutoff_size,
    ]$Count==max(GSEAGO[[i]][GSEAGO[[i]]$Size<cutoff_size,]$Count)),7]
    }
    }
    
    GO_module_name[1] = 'module 0'
    
    
    # output
    summary_mods<-as.data.frame(cbind(GO_module_name,
    row.names = c(paste("ME", 0:(length(unique(modules))-1), sep = ""))))
    
    rownames(summary_mods)<-summary_mods$row.names
    summary_mods$row.names<-NULL
    
    if(is.null(Query_object)){
    print(summary_mods)
    Auto_WGCNA_OUTPUT$GO_table<-GO_module_name
    return(Auto_WGCNA_OUTPUT)
    } else if(!is.null(Query_object)){
    summary_mods<-data.frame(cbind(GO_module_name,
    modules = c(0:(length(GO_module_name)-1))),
    stringsAsFactors = FALSE)
    
    Final_Summary<-merge(Query_object, summary_mods, by = "modules")
    rownames(Final_Summary)<-Final_Summary$ref_genes
    
    Final_Summary$modules<-as.character(Final_Summary$modules)
    Final_Summary$modules<-as.numeric(Final_Summary$modules)
    
    if(!is.null(Final_Summary$kWithin)){
    Final_Summary<-Final_Summary[order(Final_Summary$modules,
    -Final_Summary$kWithin),]
    }
    
    if(!is.null(Final_Summary$hub)){
    temp_summary<-unique(Final_Summary[c("GO_module_name",
    "modules", "hub", "Freq")])
    temp_summary<-temp_summary[order(temp_summary$modules),]
    rownames(temp_summary)<-c(paste("ME", 0:(length(unique(modules))-1), 
    sep = ""))
    } else if(is.null(Final_Summary$hub)){
    temp_summary<-unique(Final_Summary[c("GO_module_name",
    "modules", "Freq")])
    temp_summary<-temp_summary[order(temp_summary$modules),]
    rownames(temp_summary)<-c(paste("ME", 0:(length(unique(modules))-1),
    sep = ""))
    }
    
    Auto_WGCNA_OUTPUT$GO_Query<-Final_Summary
    Auto_WGCNA_OUTPUT$GO_table<-temp_summary
    return(Auto_WGCNA_OUTPUT)
    }
    
    }
    

Try the GmicR package in your browser

Any scripts or data that you put into this service are public.

GmicR documentation built on Nov. 8, 2020, 7:07 p.m.