R/reconstruct_network.R

Defines functions network_Rcis sort_TFs_degree output_grn enrich_module merge_Module_Regulations get_partial_regulations get_regulation_of_TFs_to_modules get_Enriched_TFs edge_handle network_analysis filter_regulation_Rcis

Documented in enrich_module filter_regulation_Rcis network_analysis output_grn sort_TFs_degree

#' Filtering regulatory relationship
#' @description Refine regulatory relationships through RcisTarget. Before run
#' this function, you should download Gene-motif rankings database from
#' https://resources.aertslab.org/cistarget, and set set the Rankingspath1 as
#' the path of downloaded Gene-motif rankings database.
#' @param Kmeans_result Kmeans result data.frame, the first column should be 'Symbol'
#' @param regulatory_relationships regulatory relationship, generated by \code{\link{get_cor}} function
#' @param Species character, indicating species of your data, and used to choose
#' annotation file in RcisTarget. Because of the limitaion of RcisTarget, this
#' function only support three species: 'Mm', 'Hs' and 'Fly'
#' @param Rankingsdir path of Gene-motif rankings file, which can be downloaded
#' in https://resources.aertslab.org/cistarget. For more details about motif ranking
#' file, you can read https://bioconductor.riken.jp/packages/3.9/bioc/vignettes/RcisTarget/inst/doc/RcisTarget.html#gene-motif_rankings
#' @param nesThreshold Numeric. NES threshold to calculate the motif significant
#'  (3.0 by default). The NES is calculated -for each motif- based on the AUC
#'  distribution of all the motifs for the gene-set [(x-mean)/sd]. The motifs
#'  are considered significantly enriched if they pass the the Normalized
#'  Enrichment Score (NES) threshold.
#' @importFrom RcisTarget importRankings
#' @importFrom RcisTarget cisTarget
#' @importFrom utils data
#' @return return filtered regulatory relationship
#' @export
#'
#' @examples Rankingspath1 <- 'hg19-500bp-upstream-7species.mc9nr.feather'
#'  # filtered_regulatory_relationships_Rcis <- filter_regulation(Kmeans_result,regulatory_relationships, 'Hs', Rankingspath1)
filter_regulation_Rcis<-function(Kmeans_result,regulatory_relationships,Species,
                                 Rankingsdir,nesThreshold=3){
  group1 <- levels(as.factor(Kmeans_result$KmeansGroup))
  genelist <- c()
  for (i in group1) {
    genelist[[i]] <- Kmeans_result[Kmeans_result$KmeansGroup==i,1]
  }
  if (Species == 'Hs') {
    utils::data('motifAnnotations_hgnc')
    geneannotate <- motifAnnotations_hgnc
  }else if (Species == 'Mm') {
    utils::data('motifAnnotations_mgi')
    geneannotate <- motifAnnotations_mgi
  }else if (Species == 'Fly') {
    utils::data('motifAnnotations_dmel')
    geneannotate <- motifAnnotations_dmel
  }
  motifRankings <- RcisTarget::importRankings(Rankingsdir)
  motifEnrichmentTable_wGenes <- RcisTarget::cisTarget(genelist, motifRankings,
                                                       motifAnnot=geneannotate,
                                                       nesThreshold=nesThreshold)
  regulation1 <- c()
  group1 <- levels(as.factor(motifEnrichmentTable_wGenes$geneSet))
  for (i in group1) {
    groupmotif <- motifEnrichmentTable_wGenes[motifEnrichmentTable_wGenes$geneSet==i,]
    signifMotifNames <- groupmotif$motif[1:nrow(groupmotif)]
    incidenceMatrix <- getSignificantGenes(genelist[[i]],
                                           motifRankings,
                                           signifRankingNames=signifMotifNames,
                                           plotCurve=FALSE, maxRank=5000-20,
                                           genesFormat="incidMatrix",
                                           method="aprox")$incidMatrix
    edges <- reshape2::melt(incidenceMatrix)
    edges <- edges[which(edges[,3]==1),1:2]
    colnames(edges) <- c("from","to")
    edges$TF <- motifEnrichmentTable_wGenes[
      match(edges$from,motifEnrichmentTable_wGenes$motif),]$TF_highConf
    edges$TF1 <- motifEnrichmentTable_wGenes[
      match(edges$from,motifEnrichmentTable_wGenes$motif),]$TF_lowConf
    regulation11 <- apply(edges[,2:4], 1,edge_handle)
    regulation11 <- unlist(regulation11)
    regulation1 <- c(regulation1,regulation11)
  }
  regulation2 <- paste(regulatory_relationships$TFSymbol,
                       regulatory_relationships$TargetSymbol)
  regulatory_relationships2 <- regulatory_relationships[regulation2 %in% regulation1,]
  return(regulatory_relationships2)
}

#' Function to make regulatory network analysis
#' @description This function calculates the significant transcription factors
#' and significant regulatory relationships in each module through hypergeometric
#' tests to construct regulatory networks
#' @param regulatory_relationships Refined or unrefined regulatory relationships.
#' Unrefined regulatory relationships are generated by \code{\link{get_cor}},
#' and you can use \code{\link{filter_regulation_fimo}} or consequence of
#' \code{\link{Footprints_FOS}} to refine it
#' @param Kmeans_result Kmeans result data.frame, row names should be ENSEMBEL ID,
#' and the first column should be gene Symbol ID, the second column should be KmeansGroup
#' @param TFFDR1 numeric, indicating the cutoff (FDR) to identify enriched transcription factors
#' @param TFFDR2 numeric, indicating the cutoff (FDR) to identify enriched
#' transcription factors that regulate each module to make regulatory network
#' for enriched transcription factors of each module
#' @param ModuleFDR numeric, indicating the cutoff (FDR) to identify enriched
#' modular regulatory relationships to make intramodular regulatory network
#' @importFrom stats phyper
#' @importFrom stats p.adjust
#' @importFrom utils write.table
#' @return This function return a list which contain 10 element
#'
#' Cor_TFs: data.frame of expressed TFs in the gene networks
#'
#' Cor_EnTFs: data.frame of TFs which significantly regulate gene modules (or enriched TFs)
#'
#' FOSF_RegMTF_Cor_EnTFs: regulatory pairs in which the source gene is enriched TF
#'
#' FOSF_RegMTF_Cor_EnTFsTarg: regulatory pairs in which both source gene and target gene are enriched TFs
#'
#' FOSF_RegMTF_Cor_EnTFsTargM: regulatory pairs only including regulations within each module but not those between modules
#'
#' TF_list: enriched TFs which significantly regulate gene modules
#'
#' TF_module_regulation: details of enriched TFs which significantly regulate gene modules
#'
#' TF_network: regulatory network for enriched transcription factors of each module
#'
#' intramodular_network: intramodular regulatory network
#'
#' @export
#'
#' @examples load(system.file("extdata", "test_clustering.rda", package = "IReNA"))
#' Kmeans_cluster_Ens <- add_ENSID(test_clustering,Spec1='Hs')
#' motif1 <- Tranfac201803_Hs_MotifTFsF
#' regulatory_relationships <- get_cor(Kmeans_cluster_Ens,motif1,0.7)
#' TFs_list <- network_analysis(regulatory_relationships,Kmeans_cluster_Ens)
network_analysis <- function(regulatory_relationships, Kmeans_result, TFFDR1 = 10
                             ,TFFDR2 =10, ModuleFDR = 0.05){
  validInput(regulatory_relationships,'regulatory_relationships','df')
  validInput(Kmeans_result,'Kmeans_result','df')
  validInput(TFFDR1,'TFFDR1','numeric')
  validInput(TFFDR2,'TFFDR2','numeric')
  validInput(ModuleFDR,'ModuleFDR','numeric')
  if (!'Correlation' %in% colnames(regulatory_relationships)) {
    stop('regulatory_relationships should contain Correlation column')
  }
  if (!'TF' %in% colnames(regulatory_relationships)) {
    stop('regulatory_relationships should contain TF column')
  }
  if (!'Target' %in% colnames(regulatory_relationships)) {
    stop('regulatory_relationships should contain Target column')
  }
  if (!'KmeansGroup' %in% colnames(Kmeans_result)) {
    stop('Kmeans_result should contain KmeansGroup column')
  }
  if (length(regulatory_relationships$TF[!regulatory_relationships$TF %in% rownames(Kmeans_result)])>0) {
    stop('rownames of Kmeans_result should contain all TF in regulatory_relationships')
  }
  if (length(regulatory_relationships$Target[!regulatory_relationships$Target %in% rownames(Kmeans_result)])>0) {
    stop('rownames of Kmeans_result should contain all Target in regulatory_relationships')
  }

  TFs_list <- get_Enriched_TFs(regulatory_relationships, Kmeans_result,
                               TFFdrThr1 = TFFDR1)
  TFs_list <- get_regulation_of_TFs_to_modules(TFs_list, TFFDR2)
  TFs_list <- get_partial_regulations(TFs_list)
  TFs_list <- merge_Module_Regulations(TFs_list, Kmeans_result, ModuleThr1 =
                                         ModuleFDR)
  return(TFs_list)
}

edge_handle <- function(edge){
  tfl <- strsplit(as.character(edge[3]),' ')[[1]]
  tfl <- stringr::str_replace_all(tfl,';','')
  tfh <- strsplit(as.character(edge[2]),' ')[[1]]
  tfh <- stringr::str_replace_all(tfh,';','')
  gene <- as.character(edge[1])
  edge_line <- c(paste(tfl,gene),paste(tfh,gene))
  return(edge_line)
}

get_Enriched_TFs <- function(GeneCor1, Kmeans_result, TFFdrThr1 = 2) {
  GeneCor1$TF <- factor(GeneCor1$TF)
  GeneCorP1 <- GeneCor1[GeneCor1$Correlation > 0, ]
  GeneCorN1 <- GeneCor1[GeneCor1$Correlation < 0, ]
  Module1 <- Kmeans_result
  TF0 <- table(GeneCor1$TF)
  TF01 <- rep(0, length(TF0))
  names(TF01) <- names(TF0)
  TFP01 <- table(GeneCorP1$TF)
  TFN01 <- table(GeneCorN1$TF)
  TFP1 <- TF01
  TFP1[match(names(TFP01), names(TF01))] <- TFP01
  TFN1 <- TF01
  TFN1[match(names(TFN01), names(TF01))] <- TFN01

  uGroup1 <- sort(unique(Module1$KmeansGroup))
  pTF4 <- c()
  for (i in 1:length(uGroup1)) {
    Module2 <- rownames(Module1)[Module1$KmeansGroup == uGroup1[i]]
    for (j in 1:2) {
      if (j == 1) {
        GeneCorPN <- GeneCorP1
        TF1 <- TFP1
        GeneCor2 <- GeneCorP1[is.element(GeneCorP1$Target, Module2), ]
      } else {
        GeneCorPN <- GeneCorN1
        TF1 <- TFN1
        GeneCor2 <- GeneCorN1[is.element(GeneCorN1$Target, Module2), ]
      }
      TF02 <- table(GeneCor2$TF)
      TF2 <- TF01
      TF2[match(names(TF02), names(TF01))] <- TF02
      TF3 <- cbind(TF2, TF1, nrow(GeneCorPN) - TF1, nrow(GeneCor2))

      ### perform hypergeometric test
      pTF3 <- apply(TF3, 1, function(X1) {
        X1 <- as.numeric(X1)
        if (X1[1] == 0 | X1[1] < 5 & X1[1] < 0.02 * X1[4]) {
          P1 <- 1
        } else {
          P1 <- phyper(X1[1], X1[2], X1[3], X1[4], lower.tail = FALSE)
        }
      })
      pTF31 <- p.adjust(pTF3, method = "fdr")
      pTF32 <- cbind(apply(TF3, 1, function(x1) {
        paste(x1, collapse = ";")
      }), pTF3, pTF31)
      pTF32 <- as.data.frame(pTF32)
      pTF32[,2] <- as.numeric(pTF32[,2])
      pTF32[,3] <- as.numeric(pTF32[,3])
      if (i == 1 & j == 1) {
        pTF4 <- pTF32
      } else {
        pTF4 <- cbind(pTF4, pTF32)
      }
    }
    colnames(pTF4)[(ncol(pTF4) - 5):ncol(pTF4)] <- paste0(c("Pnum", "Pp", "Pfdr"
                                                            , "Nnum", "Np",
                                                            "Nfdr"), uGroup1[i])
  }
  pTF4Mod <- Module1[match(rownames(pTF4), rownames(Module1)), ]
  Ind1 <- 1:length(uGroup1)
  pTF4Min <- t(apply(pTF4[, grep("fdr", colnames(pTF4))], 1, function(x1) {
    x12 <- as.numeric(as.character(x1))
    x2 <- -log10(x12)
    x21 <- x2[seq(1, length(x2), 2)]
    x22 <- x2[seq(2, length(x2), 2)]
    x212 <- paste(Ind1[x21 > TFFdrThr1], collapse = ";")
    x222 <- paste(Ind1[x22 > TFFdrThr1], collapse = ";")
    if (x212 == "") {
      x212 <- "NA"
    }
    if (x222 == "") {
      x222 <- "NA"
    }

    x3 <- max(x2)
    ix2 <- which.max(x2)
    if (ix2 %% 2 == 1) {
      ix3 <- paste0("P", floor((ix2 + 1) / 2))
    } else {
      ix3 <- paste0("N", floor((ix2 + 1) / 2))
    }

    return(c(x3, ix3, x212, x222))
  }))
  colnames(pTF4Min) <- c("TFMinNlogfdr", "TFMinGroup", "SigActModules", "SigRepModules")
  pTF4Min <- as.data.frame(pTF4Min)
  pTF4Min[,1] <- as.numeric(pTF4Min[,1])
  pTF42 <- cbind(pTF4Mod, pTF4Min, pTF4)

  #### Enriched TFs
  Ind42 <- sort(unique(c(1:nrow(pTF42))[as.numeric(as.character(pTF42[, "TFMinNlogfdr"])) > TFFdrThr1]))
  EnrichTF1 <- pTF42[Ind42, ]
  print(paste("Total TFs:", nrow(pTF42)))
  print(paste("Enriched TFs:", nrow(EnrichTF1)))

  GeneCorTFfdr <- pTF4Min[match(as.character(GeneCor1$TF), rownames(pTF4Min)), ]
  Regulation <- apply(GeneCor1, 1, function(x1) {
    x2 <- "Positive"
    if (as.numeric(x1["Correlation"]) < 0) {
      x2 <- "Negative"
    }
    return(x2)
  })

  GeneCor3 <- cbind(GeneCor1[, grep("TF", colnames(GeneCor1))], GeneCorTFfdr,
                    GeneCor1[, c(grep("Target", colnames(GeneCor1))[1]:ncol(GeneCor1))],
                    Regulation)
  #### Edges' regulator belongs to enriched TFs
  EnTFReg1 <- GeneCor3[is.element(GeneCor3$TF, rownames(EnrichTF1)), ]
  #### Edges' regulator and target belong to enriched TFs
  EnTFTarg1 <- GeneCor3[is.element(GeneCor3$TF, rownames(EnrichTF1)) & is.element(GeneCor3$Target,
                                                                                  rownames(EnrichTF1)), ]

  #### Get edges within each module
  EnTFGroup1 <- sort(unique(c(unique(EnTFTarg1$TFGroup), unique(EnTFTarg1$TargetGroup))))
  EnTFTarg2 <- c()
  for (i in 1:length(EnTFGroup1)) {
    EnTFTarg2 <- rbind(EnTFTarg2, EnTFTarg1[EnTFTarg1$TFGroup == EnTFGroup1[i] &
                                              EnTFTarg1$TargetGroup == EnTFGroup1[i], ])
  }
  list1 <- list(pTF42, EnrichTF1, EnTFReg1, EnTFTarg1, EnTFTarg2)
  names(list1) <- c("Cor_TFs", "Cor_EnTFs", "FOSF_RegMTF_Cor_EnTFs",
                    "FOSF_RegMTF_Cor_EnTFsTarg", "FOSF_RegMTF_Cor_EnTFsTargM")
  return(list1)
}


get_regulation_of_TFs_to_modules <- function(TFs_list, Thr = 2) {
  con1 <- TFs_list[['Cor_EnTFs']]
  Thr1 <- Thr
  name1 <- colnames(con1)[grep("^[PN]fdr\\d+$", colnames(con1))]
  ind1 <- grep("^[PN]fdr\\d+$", colnames(con1))
  col1 <- c()
  col1 <- c(paste("TF", "TFSymbol", "TFGroup", "TargetModule", "TargetGroup",
                  "Regulation", "Nlogfdr", sep = "\t"))
  TF <- c()
  for (i in 1:nrow(con1)) {
    for (j in ind1) {
      if (con1[i, ][j] == 0) {
        fdr1 <- Inf
      }
      else {
        fdr1 <- -log(con1[i, ][j]) / log(10)
      }
      if (fdr1 > Thr1) {
        name2 <- colnames(con1)[j]
        var1 <- paste(con1[i, ][1:2], collapse = "\t")
        if (grepl("^P", name2) == TRUE) {
          name3 <- "Positive"
        } else if (grepl("^N", name2) == TRUE) {
          name3 <- "Negative"
        }
        TG <- stringr::str_extract(name2, "\\d")
        var2 <- paste(rownames(con1)[i], var1, paste0("Group", TG), TG,
                      name3, fdr1, sep = "\t")
        col1 <- c(col1, var2)
        TF <- c(TF, con1[i, ][1]$Symbol)
      }
    }
  }
  col2 <- as.data.frame(col1)
  TF_list <- TF[!duplicated(TF)]
  TF_module_regulation <- strsplit(col2[1,], '\t')[[1]]
  TF_module_regulation <- matrix(TF_module_regulation, ncol =
                                   length(TF_module_regulation))
  for (i in 2:nrow(col2)) {
    out2 <- strsplit(col2[i,], '\t')[[1]]
    TF_module_regulation <- rbind(TF_module_regulation,out2)
  }
  TF_module_regulation <- as.data.frame(TF_module_regulation)
  colnames(TF_module_regulation) <- TF_module_regulation[1,]
  TF_module_regulation <- TF_module_regulation[-1,]
  TF_module_regulation[,7] <- as.numeric(TF_module_regulation[,7])
  TFs_list[['TF_list']] <- TF_list
  TFs_list[['TF_module_regulation']] <- TF_module_regulation
  return(TFs_list)
}


get_partial_regulations <- function(TFs_list) {
  con1 <- TFs_list[['FOSF_RegMTF_Cor_EnTFs']]
  hash2 <- TFs_list[['TF_list']]
  con1$TFSymbol <- as.character(con1$TFSymbol)
  con1$TargetSymbol <- as.character(con1$TargetSymbol)
  rowcount <- c()
  for (i in 1:nrow(con1)) {
    if (con1[i, ][2] %in% hash2 & con1[i, ][9] %in% hash2 == TRUE) {
      rowcount <- c(rowcount, i)
    }
  }
  col1 <- con1[rowcount, ]
  TFs_list[['TF_network']] <- col1
  return(TFs_list)
}


merge_Module_Regulations <- function(TFs_list, Kmeans_result, ModuleThr1 = 0.05) {
  TF1 <- Kmeans_result
  Regulation1 <- TFs_list[['FOSF_RegMTF_Cor_EnTFsTarg']]
  Regulation1$Correlation <- as.numeric(Regulation1$Correlation)
  Module1 <- sort(unique(TF1$KmeansGroup))

  RegulationNum1 <- c()
  RegulationP <- Regulation1[Regulation1$Regulation == "Positive", ]
  RegulationN <- Regulation1[Regulation1$Regulation == "Negative", ]
  for (i in 1:length(Module1)) {
    Regulation1A <- Regulation1[Regulation1$TFGroup == Module1[i], ]
    Regulation1AP <- Regulation1A[Regulation1A$Regulation == "Positive", ]
    Regulation1AN <- Regulation1A[Regulation1A$Regulation == "Negative", ]
    Regulation2P <- RegulationP[RegulationP$TargetGroup == Module1[i], ]
    Regulation2N <- RegulationN[RegulationN$TargetGroup == Module1[i], ]
    for (j in i:length(Module1)) {
      Regulation2A <- Regulation1[Regulation1$TFGroup == Module1[j], ]
      Regulation2AP <- Regulation2A[Regulation2A$Regulation == "Positive", ]
      Regulation2AN <- Regulation2A[Regulation2A$Regulation == "Negative", ]
      Regulation1P <- RegulationP[RegulationP$TargetGroup == Module1[j], ]
      Regulation1N <- RegulationN[RegulationN$TargetGroup == Module1[j], ]

      Regulation12P <- Regulation1AP[Regulation1AP$TargetGroup == Module1[j], ]
      Regulation12N <- Regulation1AN[Regulation1AN$TargetGroup == Module1[j], ]
      Regulation21P <- Regulation2AP[Regulation2AP$TargetGroup == Module1[i], ]
      Regulation21N <- Regulation2AN[Regulation2AN$TargetGroup == Module1[i], ]

      Regulation12Pnum <- c(Module1[i], Module1[j], "Positive",
                            mean(Regulation12P$Correlation), nrow(Regulation12P),
                            nrow(Regulation1P), nrow(RegulationP) - nrow(Regulation1P), nrow(Regulation1AP))
      Regulation12Nnum <- c(Module1[i], Module1[j], "Negative",
                            mean(Regulation12N$Correlation), nrow(Regulation12N),
                            nrow(Regulation1N), nrow(RegulationN) - nrow(Regulation1N), nrow(Regulation1AN))
      Regulation21Pnum <- c(Module1[j], Module1[i], "Positive",
                            mean(Regulation21P$Correlation), nrow(Regulation21P),
                            nrow(Regulation2P), nrow(RegulationP) - nrow(Regulation2P), nrow(Regulation2AP))
      Regulation21Nnum <- c(Module1[j], Module1[i], "Negative",
                            mean(Regulation21N$Correlation), nrow(Regulation21N),
                            nrow(Regulation2N), nrow(RegulationN) - nrow(Regulation2N), nrow(Regulation2AN))
      if (i == j) {
        RegulationNum1 <- rbind(RegulationNum1, Regulation12Pnum, Regulation12Nnum)
      } else {
        RegulationNum1 <- rbind(RegulationNum1, Regulation12Pnum, Regulation12Nnum,
                                Regulation21Pnum, Regulation21Nnum)
      }
    }
  }

  ### perform hypergeometric test
  RegulationNum1 <- as.data.frame(RegulationNum1)
  RegulationNum1 <- RegulationNum1[RegulationNum1$V4 != "NaN", ]
  RegulationP1 <- apply(RegulationNum1[, 5:ncol(RegulationNum1)], 1, function(X1) {
    X1 <- as.numeric(X1)
    if (X1[1] < 4) {
      P1 <- 1
    } else {
      P1 <- phyper(X1[1], X1[2], X1[3], X1[4], lower.tail = FALSE)
    }
  })
  RegulationP2 <- -log10(p.adjust(RegulationP1, method = "fdr"))
  RegulationP3 <- cbind(RegulationNum1[, 1:4], apply(RegulationNum1[, 5:ncol(RegulationNum1)], 1, function(x1) {
    paste(x1, collapse = ";")
  }), RegulationP1, RegulationP2)
  colnames(RegulationP3)[1:7] <- c("TFGroup", "TargetGroup", "Regulation",
                                   "Correlation", "NumberRegulation", "Pvalue", "NlogFdr")

  RegulationP4 <- RegulationP3[as.numeric(as.character(RegulationP3[, "NlogFdr"])) >
                                 -log10(ModuleThr1), ]
  RegulationP4 <- RegulationP4[order(RegulationP4[, "TargetGroup"]), ]
  RegulationP4 <- RegulationP4[order(RegulationP4[, "TFGroup"]), ]
  print(paste("Significant regulations:", nrow(RegulationP4)))
  TFs_list[['intramodular_network']] <- RegulationP4
  return(TFs_list)
}

#' Enrichment analysis of each module
#' @description This function integrate clusterprofile make enrichment analysis
#' for each moudle. Before run this function, you need to download the org.db for
#' your species through BiocManager.
#' @param Kmeans_result Kmeans result data.frame, row names should be ENSEMBEL ID,
#' and the first column should be gene Symbol ID, the second column should be KmeansGroup
#' @param org.db org.db of your species.
#' @param enrich.db 'GO' for GO enricment analysis, 'KEGG' for KEGG enrichment analysis
#' @param fun_num numeric, indicating the number of output functions per module
#' @param pvalueCutoff adjusted pvalue cutoff on enrichment tests to report
#' @param use_internal_data logical, use KEGG.db or latest online KEGG data
#' @param organism character, used for KEGG enrichment analysis.
#' supported organism listed in 'http://www.genome.jp/kegg/catalog/org_list.html'
#' @importFrom clusterProfiler bitr
#' @importFrom clusterProfiler enrichKEGG
#' @importFrom clusterProfiler enrichGO
#' @return data.frame contains enrichment functions for each module
#' @export
#'
#' @examples \dontrun{
#' library(org.Hs.eg.db)
#' Kmeans_cluster_Ens <- add_ENSID(clustering,Spec1='Hs')
#' enrichment <- enrich_module(Kmeans_cluster_Ens, org.Hs.eg.db, 'KEGG')
#' enrichment <- enrich_module(Kmeans_cluster_Ens, org.Hs.eg.db, 'GO')
#' }
enrich_module <- function(Kmeans_result, org.db, enrich.db ,fun_num = 5,
                          pvalueCutoff = 0.05, use_internal_data = TRUE, organism = NULL) {
  all_gene <- Kmeans_result
  all_gene<-all_gene[order(all_gene$KmeansGroup),]
  le<-levels(as.factor(all_gene$KmeansGroup))
  for (i in le) {
    acc11<-rownames(all_gene[all_gene$KmeansGroup == i,])
    gene1 <- clusterProfiler::bitr(acc11, fromType = "ENSEMBL",
                  toType = c("SYMBOL", "ENTREZID"),
                  OrgDb = org.db)
    if (enrich.db =='KEGG') {
      k1 <- clusterProfiler::enrichKEGG(gene = gene1$ENTREZID,
                                        pvalueCutoff = pvalueCutoff
                                        ,organism = organism
                                        ,use_internal_data = use_internal_data)
    }else if(enrich.db =='GO'){
      k1 = clusterProfiler::enrichGO(gene = gene1$ENTREZID,
                    OrgDb = org.db,
                    keyType = "ENTREZID",
                    ont = "BP",
                    pvalueCutoff = pvalueCutoff)
    }
    acc2 <- k1@result
    acc2$'-log10(q-value)' <- -log10(acc2$qvalue)
    acc2 <- acc2[order(-acc2$`-log10(q-value)`),]
    if (i=='1' | i == 1) {
      acc21 <- acc2[1:fun_num,]
      acc21$module <- rep(i,fun_num)
    }else{
      acc22 <- acc2[1:fun_num,]
      acc22$module<-rep(i,fun_num)
      acc21 <- rbind(acc21,acc22)
    }
  }
  acc21 <- acc21[,c(1,2,11,10,3:9)]
  return(acc21)
}

#' Generate grn format regulatory relationships
#'
#' @param regulatory_relationships regulatory relationships , columns should
#' contain 'TF', 'TFSymbol', 'TFGroup', 'Target', 'TargetSymbol', 'TargetGroup'
#' and 'Correlation'
#'
#' @return grn format regulatory relationships
#' @export
#'
#' @examples load(system.file("extdata", "test_clustering.rda", package = "IReNA"))
#' Kmeans_cluster_Ens <- add_ENSID(test_clustering,Spec1='Hs')
#' motif1 <- Tranfac201803_Hs_MotifTFsF
#' regulatory_relationships <- get_cor(Kmeans_cluster_Ens,motif1,0.9)
#' grn <- output_grn(regulatory_relationships)
output_grn <- function(regulatory_relationships){
  ID <- paste(regulatory_relationships$TFSymbol,regulatory_relationships$TargetSymbol,sep = '_')
  Attribution <- paste0('TFSymbol:',regulatory_relationships$TFSymbol,';','TargetSymbol:',regulatory_relationships$TargetSymbol)
  final <- regulatory_relationships[,c('TF','TFGroup','Target','TargetGroup','Correlation')]
  final$ID <- ID
  final$Attribution <- Attribution
  final <- final[,c(6,1:5,7)]
  return(final)
}


#' Sort Transcription factor based on degree.
#' @description Sort Transcription factor based on degree.
#' @param regulatory_relationships regulatory relationships , columns should
#' contain 'TF', 'TFSymbol', 'TFGroup', 'Target', 'TargetSymbol', 'TargetGroup'
#' and 'Correlation'
#'
#' @return Matrix contains sorted transcription factors
#' @export
#'
#' @examples load(system.file("extdata", "test_clustering.rda", package = "IReNA"))
#' test_clustering=add_ENSID(test_clustering,Spec1 = 'Hs')
#' correlation <- get_cor(test_clustering, Tranfac201803_Hs_MotifTFsF, 0.7, start_column=3)
#' TFs_degree <- sort_TFs_degree(correlation)
sort_TFs_degree <- function(regulatory_relationships){
  TFs <- table(regulatory_relationships$TFSymbol)
  Target <- table(regulatory_relationships$TargetSymbol)
  Target <- Target[names(Target) %in% names(TFs)]
  for (i in 1:length(Target)) {
    name1 <- names(Target[i])
    TFs[name1] <- TFs[name1] + Target[i]
  }
  result <- as.matrix(sort(TFs,decreasing = T))
  colnames(result)<-'degree'
  return(result)
}

network_Rcis <- function(Rcis){
  tfs <- Rcis$TFSymbol[!duplicated(Rcis$TFSymbol)]
  print(tfs)
  network <- Rcis[Rcis$TargetSymbol %in% tfs,]
  Regulation <- c()
  for (i in 1:nrow(network)) {
    if (network[i,7]>0) {
      Regulation <- c(Regulation,'Positive')
    }else{Regulation <- c(Regulation,'Negative')}
  }
  network$Regulation <- Regulation
  return(network)
}
jiang-junyao/IReNA documentation built on May 17, 2024, 12:29 a.m.