R/DEm-5-ge.R

Defines functions DEm_ge_analysis

#file: DE-method/DEm-4-assemble-fn-v1.R


# 6. gene enrichment analysis ---------------------------------------------


# gene enrichment analysis function ---------------------------------------

#added in description
#library(dplyr) #for summarise() function used to get genes in each FFL

#function
#' @title DEm_ge_analysis
#' @description gene enrichment analysis (FDR) for each predicted ffl
#'
#' @param arm1 DEm_arm1_miRNA_gene output
#' @param arm2 DEm_arm2_tf_gene output
#' @param ffls DEm_assemble_ffl output (either ffl1 or ffl2)
#'
#' @return data.frame with gene enrichment analysis for each loop

DEm_ge_analysis <- function(arm1, arm2, ffls){
  #####among predicted FFLs, find all possible miRNA-TF pairings
  #create df with all possible pairings
  mirna_tf <- expand.grid(unique(ffls$TF), unique(ffls$miRNA))
  colnames(mirna_tf) <- c("TF", "miRNA")
  #convert columns from factor to character
  mirna_tf[1:2] <- apply(mirna_tf[1:2], 2, as.character)

  #####annotate mirna_tf df: is this miRNA-TF pair included among predicted FFLs?
  #get predicted miRNA-TF pairs from FFLs & corresponding gene counts
  gene_counts <- summarise(group_by(ffls, TF, miRNA), gene_count = n())
  #merge gene_counts with mirna_tf to create "gene_counts" col. in mirna_tf
  mirna_tf <- merge(x = mirna_tf, y = gene_counts, by = c("TF", "miRNA"), all.x = TRUE)
  #only keep rows of mirna_tf whose gene_count > 0 (i.e. gene_count does not equal NA, aka. miRNA-TF pairs that are predicted to be in ffls)
  mirna_tf <- mirna_tf[!is.na(mirna_tf$gene_count), ]

  #####gene enrichment analysis: for loop, iterate through each row in mirna_tf
  #get DE genes
  DEgenes <- row.names(arm1$mrna_DE)
  DEgenes_up <- row.names(arm1$mrna_DE[arm1$mrna_DE$`log-ratio(gene)` > 0, ])
  DEgenes_down <- row.names(arm1$mrna_DE[arm1$mrna_DE$`log-ratio(gene)` < 0, ])

  #get DE genes columns to mirna_tf
  mirna_tf$DEgenes <- length(DEgenes) #!@# DE_genes -> DEgenes
  mirna_tf$DEgenes_up <- length(DEgenes_up)
  mirna_tf$DEgenes_down <- length(DEgenes_down)

  #add empty columns to mirna_tf
  mirna_tf$tftargets_among_DEgenes <- -111 #-111 is a filler, true numbers should be non-negative
  mirna_tf$non_tftargets_among_DEgenes <- -111 #-111 is a filler, true numbers should be non-negative
  mirna_tf$percent_DEgenes_tftargets <- -111 #-111 is a filler, true numbers should be non-negative
  mirna_tf$percent_upordowngenes_tftargets <- -111

  mirna_tf$mirtargets_among_DEgenes <- -111 #-111 is a filler, true numbers should be non-negative
  mirna_tf$percent_DEgenes_mirtargets <- -111 #-111 is a filler, true numbers should be non-negative
  mirna_tf$percent_upordowngenes_mirtargets <- -111

  mirna_tf$intersect <- -111 #-111 is a filler, true numbers should be non-negative
  mirna_tf$pval <- -111 #-111 is a filler, true numbers should be non-negative

  #for loop
  for (row_num in 1:nrow(mirna_tf)){
    #extract mirna & TF names
    mir <- mirna_tf[row_num, "miRNA"]
    tf <- mirna_tf[row_num, "TF"]

    #DE genes that are TF targets
    DEgenes_tftargets_among_DEgenes <- arm2$db_hits[arm2$db_hits$TF == tf, "gene"]
    mirna_tf[row_num, "tftargets_among_DEgenes"] <- length(DEgenes_tftargets_among_DEgenes)
    mirna_tf[row_num, "non_tftargets_among_DEgenes"] <- length(DEgenes) - length(DEgenes_tftargets_among_DEgenes)
    mirna_tf[row_num, "percent_DEgenes_tftargets"] <- length(DEgenes_tftargets_among_DEgenes)/length(DEgenes)*100
    #if TF upreg., then targets are also upreg. --> percent_updowngenes_tftargets = #targets/#upreg. genes
    if (arm2$db_hits[arm2$db_hits$TF == tf, "log-ratio(TF)"][1] > 0){
      mirna_tf[row_num, "percent_upordowngenes_tftargets"] <- length(DEgenes_tftargets_among_DEgenes)/length(DEgenes_up)*100}
    #if TF downreg., then targets are also downreg. --> percent_updowngenes_tftargets = #targets/#downreg. genes
    if (arm2$db_hits[arm2$db_hits$TF == tf, "log-ratio(TF)"][1] < 0){
      mirna_tf[row_num, "percent_upordowngenes_tftargets"] <- length(DEgenes_tftargets_among_DEgenes)/length(DEgenes_down)*100}

    #DE genes that are miRNA targets
    DEgenes_mirtargets_among_DEgenes <- arm1$db_hits[arm1$db_hits$miRNA == mir, "gene"]
    mirna_tf[row_num, "mirtargets_among_DEgenes"] <- length(DEgenes_mirtargets_among_DEgenes)
    mirna_tf[row_num, "percent_DEgenes_mirtargets"] <- length(DEgenes_mirtargets_among_DEgenes)/length(DEgenes)*100
    #if miRNA upreg., then targets are downreg. --> percent_updowngenes_tftargets = #targets/#downreg. genes
    if (arm1$db_hits[arm1$db_hits$miRNA == mir, "log-ratio(miRNA)"][1] > 0){
      mirna_tf[row_num, "percent_upordowngenes_mirtargets"] <- length(DEgenes_mirtargets_among_DEgenes)/length(DEgenes_down)*100}
    #if miRNA downreg., then targets are upreg. --> percent_updowngenes_tftargets = #targets/#upreg. genes
    if (arm1$db_hits[arm1$db_hits$miRNA == mir, "log-ratio(miRNA)"][1] < 0){
      mirna_tf[row_num, "percent_upordowngenes_mirtargets"] <- length(DEgenes_mirtargets_among_DEgenes)/length(DEgenes_up)*100}

    #DE genes that are in this particular ffl
    DEgenes_ffl <- intersect(DEgenes_tftargets_among_DEgenes, DEgenes_mirtargets_among_DEgenes)
    mirna_tf[row_num, "intersect"] <- length(DEgenes_ffl)
    #calculate pval
    mirna_tf[row_num, "pval"] <- phyper(q = length(DEgenes_ffl) - 1,
                                        m = length(DEgenes_tftargets_among_DEgenes),
                                        n = length(DEgenes) - length(DEgenes_tftargets_among_DEgenes),
                                        k = length(DEgenes_mirtargets_among_DEgenes),
                                        lower.tail = FALSE)
  }

  #calculate FDR
  mirna_tf$fdr <- p.adjust(mirna_tf$pval, method = "fdr")

  return(mirna_tf)
}
th789/ffl documentation built on Nov. 5, 2019, 10:04 a.m.