R/DEm-3b-arm3b.R

Defines functions DEm_arm3b_tf_miRNA

#file: DE-method/DEm-3b-arm3b-fn-v1.R

# arm3b: TF-miRNA ---------------------------------------------------------



# arm3b prep --------------------------------------------------------------

#!!! comment out database for now
# ###########load TF-miRNA database
# ###transmir evidence
# #load
# transmir <- read.table(file = "/Users/than/Dropbox/research/miRNA/ffl/new/databases/TF-miRNA/transmir_hsa_evidence.tsv",
#                        sep = '\t', header = FALSE)
# #add column names
# colnames(transmir) <- c("TF", "miRNA_short", "TSS", "TF_binding_site", "action_type", "SRAID/PMID", "evidence", "tissue", "species")
# #evidence types
# #Based on reliability of the transcriptional regulatory region (promoter region) annotation
# #used, we classified the TF-miRNA regulations derived from ChIP-seq into level 1 and level 2.
# #For level 1, we choose 5'-end of the pre-miRNA or that of the first member in the miRNA
# #cluster as the transcription start site. Next, a window from the 5kb upstream to the
# #1kb downstream of the miRNA TSS was identified as the putative transcriptional regulatory
# #region. Apparently, this definition could cover most of the miRNAs, but suffers from
# #substantial inaccuracy. Therefore for level 2, the miRNA TSS was supported by high-throughput
# #experiments from literature. And the 300bp upstream and 100bp downstream of each miRNA TSS
# #was identified as the putative transcriptional regulatory region. The level 2 TF-miRNA
# #regulations are much more stringent than level 1 TF-miRNA regulations, but cover less miRNAs.
#
# #literature: literature-curated TF-miRNA regulations
#
# # ###keep only blood tissue rows
# # transmir_blood <- transmir[transmir$tissue == "Blood", ]
# # #keep only first two columns (TF & miRNA_short)
# # transmir_blood_basic <- transmir_blood[ , c("TF", "miRNA_short")]
# # transmir_blood_basic <- transmir_blood_basic[!duplicated(transmir_blood_basic), ]
# # #add TransmiR column: 1 for all rows (represents whether TF-miRNA pair is in the transmir database; used when in step when searching whether pos. corr. pairs are in database)
# # transmir_blood_basic$TransmiR <- 1
#
# #search all tissues
# transmir_basic <- transmir[ , c("TF", "miRNA_short")]
# transmir_basic <- transmir_basic[!duplicated(transmir_basic), ]
# #add TransmiR column: 1 for all rows (represents whether TF-miRNA pair is in the transmir database; used when in step when searching whether pos. corr. pairs are in database)
# transmir_basic$TransmiR <- 1
# ###########fin
#
#
# ###########create list of TFs
# # arm3b_tf_list <- as.character(unique(transmir_blood_basic$TF))
#
# arm3b_tf_list <- as.character(unique(transmir_basic$TF))
# ###########fin



# arm3b function ----------------------------------------------------------

#' @title DEm_arm3b_tf_miRNA
#' @description identifies valid TF-miRNA (TF regulates miRNA) pairs for FFL
#'
#' @param mrna_DE dataframe with DE mRNAs
#' @param mirna_DE dataframe with DE miRNAs
#'
#' @return list of length 3:
#' 1) tf_DE: dataframe of DE TFs
#' 2) miRNA_DE: dataframe of DE miRNAs
#' 3) db_hits: TF-miRNA pairs that were found in database

DEm_arm3b_tf_miRNA <- function(mrna_DE, mirna_DE){
  ###get DE TFs & DE miRNAs
  #DE TFs
  tf_DE <- mrna_DE[row.names(mrna_DE) %in% arm3b_tf_list, ]
  #DE miRNAs
  #mirna_DE
  print("1/3 -- created DE TF & DE miRNA dataframes")

  ###find all possible TF-gene pairs
  #logFC: case - control
  #logFC pos: upregulated in t(11;14)
  #logFC neg: downregulated in t(11;14)
  #pairs: TF up, miRNA up
  tf_up <- row.names(tf_DE[tf_DE$`log-ratio` > 0, ]) #upregulated TFs
  mirna_up <- row.names(mirna_DE[mirna_DE$`log-ratio` > 0, ]) #upregulated miRNAs
  tf_up_mirna_up_pairs <- expand.grid(tf_up, mirna_up) #find all possible pairs
  colnames(tf_up_mirna_up_pairs) <- c("TF", "miRNA")
  #pairs: TF down, miRNA down
  tf_down <- row.names(tf_DE[tf_DE$`log-ratio` < 0, ]) #downregulated TFs
  mirna_down <- row.names(mirna_DE[mirna_DE$`log-ratio` < 0, ]) #downregulated miRNAs
  tf_down_mirna_down_pairs <- expand.grid(tf_down, mirna_down) #find all possible pairs
  colnames(tf_down_mirna_down_pairs) <- c("TF", "miRNA")
  #put all pairs together
  tf_mirna_pairs <- rbind(tf_up_mirna_up_pairs, tf_down_mirna_down_pairs)
  tf_mirna_pairs[ , 1:2] <- apply(tf_mirna_pairs[ , 1:2], 2, as.character)
  #print info
  print("2/3 -- finished gathering potential TF-miRNA pairs")
  print(paste(dim(tf_mirna_pairs)[1], "TF-miRNA potential pairs", sep = " "))

  ###look up TF-miRNA pairs in database
  #"miRNA_short" is a copy of "miRNA" for now
  tf_mirna_pairs$miRNA_short <- tf_mirna_pairs$miRNA
  #convert miRNA names all to "mir" (currently, names are "miR"; database miRNA names are all "mir")
  substr(tf_mirna_pairs$miRNA_short[substr(tf_mirna_pairs$miRNA_short, 5, 7) == "miR"], 5, 7) <- "mir" #among tf_mirna_pairs$miRNA, for those that are "miR" (some are "let"), convert to "mir"
  #if miRNA has -3p/5p at the end of name, remove it (miRNAs in transmir db don't have -3p/5p ending)
  ending_p <- substr(tf_mirna_pairs$miRNA_short, nchar(tf_mirna_pairs$miRNA_short) , nchar(tf_mirna_pairs$miRNA_short)) == "p" #find miRNAs whose names end in "p"
  tf_mirna_pairs$miRNA_short[ending_p] <- substr(tf_mirna_pairs$miRNA_short[ending_p], 1 , nchar(tf_mirna_pairs$miRNA_short[ending_p])-3) #for these miRNAs, remove the last 3 characters (-3p/-5p)
  #look up in database
  db_hits <- merge(x = tf_mirna_pairs, y = transmir_basic[ , c("TF", "miRNA_short", "TransmiR")], by.x = c("TF", "miRNA_short"), all.x = TRUE)
  #if TransmiR column is NA (i.e. TF-miRNA pair not in transmir database), change NA value to 0
  db_hits$TransmiR[is.na(db_hits$TransmiR)] <- 0
  #create sum_db_hits_TFmiRNA column (count # of databases that found this pair)
  db_hits$sum_db_hits_TFmiRNA <- db_hits$TransmiR
  #filter by sum_db_hits_TFmiRNA column
  db_hits_filtered <- db_hits[db_hits$sum_db_hits_TFmiRNA >= 1, ]
  print(paste(dim(db_hits_filtered)[1], "/", dim(tf_mirna_pairs)[1], "TF-miRNA pairs found in at least one database", sep = " "))
  print("3/3 -- finished TF-miRNA database search")

  return(list(tf_DE = tf_DE, mirna_DE = mirna_DE, db_hits = db_hits_filtered))
}
th789/ffl documentation built on Nov. 5, 2019, 10:04 a.m.