#file: DE-method/DEm-2-arm2-fn-v1.R
# arm2: TF-gene -----------------------------------------------------------
# arm2 prep ---------------------------------------------------------------
# #!!! comment out TF-gene databases for now
# ###########load TF-gene databases
# ###database 1: TRRUST
# #load
# trrust <- read.table(file = "/Users/than/Dropbox/research/miRNA/ffl/new/databases/TF-gene/trrust_rawdata.human.tsv", sep = '\t', header = FALSE)
# #rename columns
# colnames(trrust) <- c("TF", "gene", "TRRUST_Regulation", "TRRUST_References_PMID")
# #check class of df columns
# table(sapply(trrust, class))
# trrust[ , 1:4] <- apply(trrust[ , 1:4], 2, as.character) #convert all columns from factor to character
# table(sapply(trrust, class))
# #add TRRUST column: 1 for all rows (represents whether TF-gene pair is in the TRRUST database; used when in step when searching whether pos. corr. pairs are in database)
# trrust$TRRUST <- 1
#
# ###database 2: ENCODE
# #load
# encode <- read.table(file = "/Users/than/Dropbox/research/miRNA/ffl/new/databases/TF-gene/ENCODE_gene_attribute_edges.txt", sep = '\t', header = TRUE)
# #remove first row ("GeneSym, NA, GeneID...")
# encode <- encode[2:nrow(encode), ]
# #check class of df columns
# table(sapply(encode, class))
# encode[ , 1:6] <- apply(encode[ , 1:6], 2, as.character)
# encode$weight <- as.numeric(encode$weight)
# table(sapply(encode, class))
# #keep only 4 columns (other columns are na)
# #1. target: TFs (there are 181 unique "target" values and 181 TFs in the dataset, see website)
# #2. target_id
# #3. source: genes
# #4. source_id
# encode <- encode[ , c("target", "source", "target_id", "source_id")]
# colnames(encode) <- c("TF", "gene", "ENCODE_TF_ID", "ENCODE_Gene_ID")
# #add ENCODE column: 1 for all rows (represents whether TF-gene pair is in the ENCODE database; used when in step when searching whether pos. corr. pairs are in database)
# encode$ENCODE <- 1
# ###########fin
#
#
# ###########create list of TFs
# ###get a list of TFs from TRRUST db
# tf_trrust <- trrust$TF
# tf_trrust <- unique(tf_trrust)
# #788/795 unique TFs are in unique(isoformAnnot$GeneSymbol)
# sum(tf_trrust %in% unique(isoformAnnot$GeneSymbol))
#
# ###get a list of TFs from ENCODE db
# tf_encode <- encode$TF
# tf_encode <- unique(tf_encode)
# #181/181 unique TFs are in unique(isoformAnnot$GeneSymbol)
# sum(tf_encode %in% unique(isoformAnnot$GeneSymbol))
#
# ###130/181 ENCODE TFs are in TRRUST TFs
# length(intersect(tf_trrust, tf_encode))
#
# ###all TFs
# arm2_tf_list <- unique(c(tf_trrust, tf_encode))
# ###########fin
#
# arm2 function -----------------------------------------------------------
###########arm2_tf_gene function
#' @title DEm_arm2_tf_gene
#' @description identifies valid TF-gene pairs for FFL
#' and that are found in databases
#'
#' @param mrna_DE dataframe of DE mRNAs
#'
#' @return list of length 3:
#' 1) tf_DE: dataframe of DE TFs
#' 2) mRNA_DE: dataframe of DE genes
#' 3) db_hits: dataframe of TF-miRNA pairs that were found in databases
DEm_arm2_tf_gene <- function(mrna_DE){
#####DE TFs
tf_DE <- mrna_DE[row.names(mrna_DE) %in% arm2_tf_list, ]
#for the DE columns, change "(gene)" to "(TF)"
names(tf_DE)[names(tf_DE) == "log-ratio(gene)"] <- "log-ratio(TF)"
names(tf_DE)[names(tf_DE) == "P-Value(gene)"] <- "P-Value(TF)"
names(tf_DE)[names(tf_DE) == "P-adjust(gene)"] <- "P-adjust(TF)"
names(tf_DE)[names(tf_DE) == "mean_case(gene)"] <- "mean_case(TF)"
names(tf_DE)[names(tf_DE) == "mean_control(gene)"] <- "mean_control(TF)"
#####DE mRNAs
#use mrna_DE itself
print("1/3 -- created DE TF & DE gene dataframes")
#####find all possible TF-gene pairs
#logFC: case - control
#logFC pos: upregulated in case group
#logFC neg: downregulated in case group
#pairs: TF up, gene up
tf_up <- row.names(tf_DE[tf_DE$`log-ratio` > 0, ]) #upregulated TFs
mrna_up <- row.names(mrna_DE[mrna_DE$`log-ratio` > 0, ]) #upregulated genes
tf_up_mrna_up_pairs <- expand.grid(tf_up, mrna_up) #find all possible pairs
colnames(tf_up_mrna_up_pairs) <- c("TF", "gene")
#pairs: TF down, gene down
tf_down <- row.names(tf_DE[tf_DE$`log-ratio` < 0, ]) #downregulated TFs
mrna_down <- row.names(mrna_DE[mrna_DE$`log-ratio` < 0, ]) #downregulated genes
tf_down_mrna_down_pairs <- expand.grid(tf_down, mrna_down) #find all possible pairs
colnames(tf_down_mrna_down_pairs) <- c("TF", "gene")
#put all pairs together
tf_gene_pairs <- rbind(tf_up_mrna_up_pairs, tf_down_mrna_down_pairs)
tf_gene_pairs[ , 1:2] <- apply(tf_gene_pairs[ , 1:2], 2, as.character)
#print info
print("2/3 -- finished gathering potential TF-gene pairs")
print(paste(dim(tf_gene_pairs)[1], "potential TF-gene pairs", sep = " "))
#####search TF-gene pairs in database
#1. search TRRUST database
db_hits <- merge(tf_gene_pairs, trrust[ , c("TF", "gene", "TRRUST")], by = c("TF", "gene"), all.x = TRUE)
#if TRRUST column is NA (i.e. TF-gene pair not in TRRUST database), change NA value to 0 (integers are used to calculate sum_db_hits_TFgene later)
db_hits$TRRUST[is.na(db_hits$TRRUST)] <- 0
#2. search ENCODE database
db_hits <- merge(db_hits, encode[, c("TF", "gene", "ENCODE")], by = c("TF", "gene"), all.x = TRUE)
#if ENCODE column is NA (i.e. TF-gene pair not in ENCODE database), change NA value to 0 (integers are used to calculate sum_db_hits_TFgene later)
db_hits$ENCODE[is.na(db_hits$ENCODE)] <- 0
#3. create sum_db_hits_TFgene column (indicates: TF-gene pair found in X databases)
db_hits$sum_db_hits_TFgene <- db_hits$TRRUST + db_hits$ENCODE
#filter by sum_db_hits_TFgene column
db_hits_filtered <- db_hits[db_hits$sum_db_hits_TFgene >= 1, ]
print(paste(dim(db_hits_filtered)[1], "/", dim(tf_gene_pairs)[1], "TF-gene pairs found in at least one database", sep = " "))
print("3/3 -- finished database searches")
#####add DE columns
#add TF DE columns
tf_DE_copy <- tf_DE
tf_DE_copy$TF <- row.names(tf_DE_copy)
db_hits_filtered <- merge(x = db_hits_filtered,
y = tf_DE_copy[c("TF", "log-ratio(TF)", "P-Value(TF)", "P-adjust(TF)", "mean_case(TF)", "mean_control(TF)")],
by = "TF",
all.x = TRUE)
#add gene DE columns
mrna_DE_copy <- mrna_DE
mrna_DE_copy$gene <- row.names(mrna_DE_copy)
db_hits_filtered <- merge(x = db_hits_filtered,
y = mrna_DE_copy[c("gene", "log-ratio(gene)", "P-Value(gene)", "P-adjust(gene)", "mean_case(gene)", "mean_control(gene)")],
by = "gene",
all.x = TRUE)
return(list(tf_DE = tf_DE, mrna_DE = mrna_DE, db_hits = db_hits_filtered))
}
###########fin
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.