data-raw/data_trrust_encode.R

# #!!! comment out TF-gene databases for now
# ###########load TF-gene databases
# ###database 1: TRRUST
# #load
# trrust <- read.table(file = "/Users/than/Dropbox/research/miRNA/ffl/new/databases/TF-gene/trrust_rawdata.human.tsv", sep = '\t', header = FALSE)
# #rename columns
# colnames(trrust) <- c("TF", "gene", "TRRUST_Regulation", "TRRUST_References_PMID")
# #check class of df columns
# table(sapply(trrust, class))
# trrust[ , 1:4] <- apply(trrust[ , 1:4], 2, as.character) #convert all columns from factor to character
# table(sapply(trrust, class))
# #add TRRUST column: 1 for all rows (represents whether TF-gene pair is in the TRRUST database; used when in step when searching whether pos. corr. pairs are in database)
# trrust$TRRUST <- 1
#
# ###database 2: ENCODE
# #load
# encode <- read.table(file = "/Users/than/Dropbox/research/miRNA/ffl/new/databases/TF-gene/ENCODE_gene_attribute_edges.txt", sep = '\t', header = TRUE)
# #remove first row ("GeneSym, NA, GeneID...")
# encode <- encode[2:nrow(encode), ]
# #check class of df columns
# table(sapply(encode, class))
# encode[ , 1:6] <- apply(encode[ , 1:6], 2, as.character)
# encode$weight <- as.numeric(encode$weight)
# table(sapply(encode, class))
# #keep only 4 columns (other columns are na)
# #1. target: TFs (there are 181 unique "target" values and 181 TFs in the dataset, see website)
# #2. target_id
# #3. source: genes
# #4. source_id
# encode <- encode[ , c("target", "source", "target_id", "source_id")]
# colnames(encode) <- c("TF", "gene", "ENCODE_TF_ID", "ENCODE_Gene_ID")
# #add ENCODE column: 1 for all rows (represents whether TF-gene pair is in the ENCODE database; used when in step when searching whether pos. corr. pairs are in database)
# encode$ENCODE <- 1
# ###########fin
#
#
# ###########create list of TFs
# ###get a list of TFs from TRRUST db
# tf_trrust <- trrust$TF
# tf_trrust <- unique(tf_trrust)
# #788/795 unique TFs are in unique(isoformAnnot$GeneSymbol)
# sum(tf_trrust %in% unique(isoformAnnot$GeneSymbol))
#
# ###get a list of TFs from ENCODE db
# tf_encode <- encode$TF
# tf_encode <- unique(tf_encode)
# #181/181 unique TFs are in unique(isoformAnnot$GeneSymbol)
# sum(tf_encode %in% unique(isoformAnnot$GeneSymbol))
#
# ###130/181 ENCODE TFs are in TRRUST TFs
# length(intersect(tf_trrust, tf_encode))
#
# ###all TFs
# arm2_tf_list <- unique(c(tf_trrust, tf_encode))
# ###########fin
#
# usethis::use_data(trrust, compress = "xz")
# usethis::use_data(encode, compress = "xz")
th789/ffl documentation built on Nov. 5, 2019, 10:04 a.m.