#file: DE-method/DEm-3b-arm3b-fn-v1.R
# arm3b: TF-miRNA ---------------------------------------------------------
# arm3b prep --------------------------------------------------------------
#!!! comment out database for now
# ###########load TF-miRNA database
# ###transmir evidence
# #load
# transmir <- read.table(file = "/Users/than/Dropbox/research/miRNA/ffl/new/databases/TF-miRNA/transmir_hsa_evidence.tsv",
# sep = '\t', header = FALSE)
# #add column names
# colnames(transmir) <- c("TF", "miRNA_short", "TSS", "TF_binding_site", "action_type", "SRAID/PMID", "evidence", "tissue", "species")
# #evidence types
# #Based on reliability of the transcriptional regulatory region (promoter region) annotation
# #used, we classified the TF-miRNA regulations derived from ChIP-seq into level 1 and level 2.
# #For level 1, we choose 5'-end of the pre-miRNA or that of the first member in the miRNA
# #cluster as the transcription start site. Next, a window from the 5kb upstream to the
# #1kb downstream of the miRNA TSS was identified as the putative transcriptional regulatory
# #region. Apparently, this definition could cover most of the miRNAs, but suffers from
# #substantial inaccuracy. Therefore for level 2, the miRNA TSS was supported by high-throughput
# #experiments from literature. And the 300bp upstream and 100bp downstream of each miRNA TSS
# #was identified as the putative transcriptional regulatory region. The level 2 TF-miRNA
# #regulations are much more stringent than level 1 TF-miRNA regulations, but cover less miRNAs.
#
# #literature: literature-curated TF-miRNA regulations
#
# # ###keep only blood tissue rows
# # transmir_blood <- transmir[transmir$tissue == "Blood", ]
# # #keep only first two columns (TF & miRNA_short)
# # transmir_blood_basic <- transmir_blood[ , c("TF", "miRNA_short")]
# # transmir_blood_basic <- transmir_blood_basic[!duplicated(transmir_blood_basic), ]
# # #add TransmiR column: 1 for all rows (represents whether TF-miRNA pair is in the transmir database; used when in step when searching whether pos. corr. pairs are in database)
# # transmir_blood_basic$TransmiR <- 1
#
# #search all tissues
# transmir_basic <- transmir[ , c("TF", "miRNA_short")]
# transmir_basic <- transmir_basic[!duplicated(transmir_basic), ]
# #add TransmiR column: 1 for all rows (represents whether TF-miRNA pair is in the transmir database; used when in step when searching whether pos. corr. pairs are in database)
# transmir_basic$TransmiR <- 1
# ###########fin
#
#
# ###########create list of TFs
# # arm3b_tf_list <- as.character(unique(transmir_blood_basic$TF))
#
# arm3b_tf_list <- as.character(unique(transmir_basic$TF))
# ###########fin
# arm3b function ----------------------------------------------------------
#' @title DEm_arm3b_tf_miRNA
#' @description identifies valid TF-miRNA (TF regulates miRNA) pairs for FFL
#'
#' @param mrna_DE dataframe with DE mRNAs
#' @param mirna_DE dataframe with DE miRNAs
#'
#' @return list of length 3:
#' 1) tf_DE: dataframe of DE TFs
#' 2) miRNA_DE: dataframe of DE miRNAs
#' 3) db_hits: TF-miRNA pairs that were found in database
DEm_arm3b_tf_miRNA <- function(mrna_DE, mirna_DE){
###get DE TFs & DE miRNAs
#DE TFs
tf_DE <- mrna_DE[row.names(mrna_DE) %in% arm3b_tf_list, ]
#DE miRNAs
#mirna_DE
print("1/3 -- created DE TF & DE miRNA dataframes")
###find all possible TF-gene pairs
#logFC: case - control
#logFC pos: upregulated in t(11;14)
#logFC neg: downregulated in t(11;14)
#pairs: TF up, miRNA up
tf_up <- row.names(tf_DE[tf_DE$`log-ratio` > 0, ]) #upregulated TFs
mirna_up <- row.names(mirna_DE[mirna_DE$`log-ratio` > 0, ]) #upregulated miRNAs
tf_up_mirna_up_pairs <- expand.grid(tf_up, mirna_up) #find all possible pairs
colnames(tf_up_mirna_up_pairs) <- c("TF", "miRNA")
#pairs: TF down, miRNA down
tf_down <- row.names(tf_DE[tf_DE$`log-ratio` < 0, ]) #downregulated TFs
mirna_down <- row.names(mirna_DE[mirna_DE$`log-ratio` < 0, ]) #downregulated miRNAs
tf_down_mirna_down_pairs <- expand.grid(tf_down, mirna_down) #find all possible pairs
colnames(tf_down_mirna_down_pairs) <- c("TF", "miRNA")
#put all pairs together
tf_mirna_pairs <- rbind(tf_up_mirna_up_pairs, tf_down_mirna_down_pairs)
tf_mirna_pairs[ , 1:2] <- apply(tf_mirna_pairs[ , 1:2], 2, as.character)
#print info
print("2/3 -- finished gathering potential TF-miRNA pairs")
print(paste(dim(tf_mirna_pairs)[1], "TF-miRNA potential pairs", sep = " "))
###look up TF-miRNA pairs in database
#"miRNA_short" is a copy of "miRNA" for now
tf_mirna_pairs$miRNA_short <- tf_mirna_pairs$miRNA
#convert miRNA names all to "mir" (currently, names are "miR"; database miRNA names are all "mir")
substr(tf_mirna_pairs$miRNA_short[substr(tf_mirna_pairs$miRNA_short, 5, 7) == "miR"], 5, 7) <- "mir" #among tf_mirna_pairs$miRNA, for those that are "miR" (some are "let"), convert to "mir"
#if miRNA has -3p/5p at the end of name, remove it (miRNAs in transmir db don't have -3p/5p ending)
ending_p <- substr(tf_mirna_pairs$miRNA_short, nchar(tf_mirna_pairs$miRNA_short) , nchar(tf_mirna_pairs$miRNA_short)) == "p" #find miRNAs whose names end in "p"
tf_mirna_pairs$miRNA_short[ending_p] <- substr(tf_mirna_pairs$miRNA_short[ending_p], 1 , nchar(tf_mirna_pairs$miRNA_short[ending_p])-3) #for these miRNAs, remove the last 3 characters (-3p/-5p)
#look up in database
db_hits <- merge(x = tf_mirna_pairs, y = transmir_basic[ , c("TF", "miRNA_short", "TransmiR")], by.x = c("TF", "miRNA_short"), all.x = TRUE)
#if TransmiR column is NA (i.e. TF-miRNA pair not in transmir database), change NA value to 0
db_hits$TransmiR[is.na(db_hits$TransmiR)] <- 0
#create sum_db_hits_TFmiRNA column (count # of databases that found this pair)
db_hits$sum_db_hits_TFmiRNA <- db_hits$TransmiR
#filter by sum_db_hits_TFmiRNA column
db_hits_filtered <- db_hits[db_hits$sum_db_hits_TFmiRNA >= 1, ]
print(paste(dim(db_hits_filtered)[1], "/", dim(tf_mirna_pairs)[1], "TF-miRNA pairs found in at least one database", sep = " "))
print("3/3 -- finished TF-miRNA database search")
return(list(tf_DE = tf_DE, mirna_DE = mirna_DE, db_hits = db_hits_filtered))
}
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.