R/DEm-4-assemble.R

Defines functions DEm_assemble_ffl

#file: DE-method/DEm-4-assemble-fn-v1.R

# assemble FFL ------------------------------------------------------------


# assemble ffl function ---------------------------------------------------

#write function
#' @title DEm_assemble_ffl
#' @description takes in differentially-expressed and database-confirmed
#' arms (miRNA-gene, TF-gene, and miRNA-TF) of the FFL and assembles possible FFLs
#' (type 1 or type 2 FFLs)
#'
#' @param miRNA_gene_pairs db_hits from DEm_arm1_miRNA_gene output
#' @param tf_gene_pairs db_hits from DEm_arm2_tf_gene output
#' @param miRNA_tf_pairs output from DEm_arm3a_miRNA_tf or DEm_arm3b_tf_miRNA
#' @param ffl_type "ffl1" (where miRNA regulates TF) or "ffl2" (where TF regulates miRNA)
#'
#' @return data.frame of ffls

DEm_assemble_ffl <- function(miRNA_gene_pairs, tf_gene_pairs, miRNA_tf_pairs, ffl_type = c("ffl1", "ffl2")){

  #####find all possible triplet combinations (miRNA-gene-TF; not all loops will "close")
  #extract pairs
  arm1 <- miRNA_gene_pairs[ , c("miRNA", "gene")]
  arm2 <- tf_gene_pairs[ , c("TF", "gene")]
  arm3 <- miRNA_tf_pairs[ , c("miRNA", "TF")]
  #create the df with all possible triplet combinations
  triplets <- merge(arm1, arm2, by = "gene")

  #####find the predicted loops among all the possible triplet combinations
  #empty vector to store row#s in triplet that contain actual FFLs (loops that "close")
  ffl_rows <- numeric()
  #for loop: identify rows in triplet that contain actual FFLs
  for(row in 1:nrow(triplets)){
    #loop "closes" if miRNA & TF are found in the same row in arm3 pairs
    loop_found <- nrow(arm3[arm3$miRNA == triplets[row, "miRNA"] & arm3$TF == triplets[row, "TF"], ]) == 1
    if(loop_found){
      ffl_rows <- append(ffl_rows, row)}
  }
  #ffl: df with actual FFLs
  ffl <- triplets[ffl_rows, ]

  #####add other columns to ffl df
  #from arm1: add miRNA-gene data to ffl df
  miRNA_gene_cols <- c("miRNA", "gene", "Ensembl_ID_gene", "GeneID_gene", "sum_db_hits_miRNAgene",
                       "log-ratio(miRNA)", "P-Value(miRNA)", "P-adjust(miRNA)", "mean_case(miRNA)", "mean_control(miRNA)",
                       "log-ratio(gene)", "P-Value(gene)", "P-adjust(gene)", "mean_case(gene)", "mean_control(gene)")
  ffl <- merge(x = ffl, y = miRNA_gene_pairs[ , miRNA_gene_cols], by = c("miRNA", "gene"), all.x = TRUE)
  #from arm2: add TF-gene data to ffl df
  tf_gene_cols <- c("TF", "gene", "sum_db_hits_TFgene",
                    "log-ratio(TF)", "P-Value(TF)", "P-adjust(TF)", "mean_case(TF)", "mean_control(TF)")
  ffl <- merge(x = ffl, y = tf_gene_pairs[ , tf_gene_cols], by = c("TF", "gene"), all.x = TRUE)

  #from arm3: add miRNA-TF data to ffl df
  #ffl1 scenario: arm3a (miRNA-TF)
  if (ffl_type == "ffl1"){
    miRNA_tf_cols <- c("miRNA", "TF", "Ensembl_ID_TF", "GeneID_TF", "sum_db_hits_miRNATF")
    ffl <- merge(x = ffl, y = miRNA_tf_pairs[ , miRNA_tf_cols], by = c("miRNA", "TF"), all.x = TRUE)
  }
  #ffl2 scenario: arm3b (TF-miRNA)
  if (ffl_type == "ffl2"){
    tf_miRNA_cols <- c("TF", "miRNA", "sum_db_hits_TFmiRNA")
    ffl <- merge(x = ffl, y = miRNA_tf_pairs[ , tf_miRNA_cols], by = c("miRNA", "TF"), all.x = TRUE)
  }

  #####print #ffls total & #unique ffls
  #total ffls
  print(paste(dim(ffl)[1], "individual", paste(ffl_type, "s", sep = ""), sep = " "))
  #unique ffls
  number_unique_ffls <- ffl[ , c("TF", "miRNA")]
  number_unique_ffls <- number_unique_ffls[!duplicated(number_unique_ffls), ]
  print(paste(dim(number_unique_ffls)[1], "unique", paste(ffl_type, "s", sep = ""), sep = " "))

  return(ffl)
}
th789/ffl documentation built on Nov. 5, 2019, 10:04 a.m.