R/TF_motif.R

# This script analyses the TF binding motifs of DE genes
#BiocManager::install ('RcisTarget')
# load the database from https://resources.aertslab.org/cistarget/
#featherURL <- "https://resources.aertslab.org/cistarget/databases/homo_sapiens/hg19/refseq_r45/mc9nr/gene_based/hg19-tss-centered-10kb-7species.mc9nr.feather" 
#download.file(featherURL, destfile=paste (merge_dir, basename(featherURL),sep='/')) 
#----------------------------------------

#' Generate gene list for each pair of comparison
#'
#' @param markers data.frame generated by `find_DE_genes`
#' @param compare_list a list, each element has a 2-element vector
#' @importFrom magrittr %>%
DE_gene_pairwise_list <- function (markers, compare_list, logFC_col='logFC',
                                   gene_col='feature', logFC_thres=0.25,
                                   group1='group', group2='compare_group'){
        com1 <- lapply (compare_list, function(x){x[1:2]})
        com2 <- lapply (com1, function (x){ x[c(2,1)] })
        all_com <- c (com1, com2)
        all_lists <- all_com %>% lapply (function (x){
                markers %>% dplyr::filter (!!as.symbol (group1)==x [1] & 
                                           !!as.symbol (group2)== x[2] & 
                                           !!as.symbol (logFC_col)> 0.25) %>%
                        dplyr::pull (!!as.symbol (gene_col) ) }) 
        names (all_lists) <- lapply (all_com, function(x){paste (x, collapse='_')}) %>% unlist ()
        all_lists <- all_lists [!duplicated (names (all_lists))]
        print (paste (length(all_lists), 'pair of comparisons in total'))
        return (all_lists)
}

#' Compare TF motif enrichment between cell types
#'
#' @description Please ensure 'RcisTarget' is installed
#' @param ... refer to `DE_gene_pairwise_list`
#' @param motif_dir where the TF motifs are stored
#' @param save_dir save the results of the analysis which is computationally
#' intensive
#' @importFrom magrittr %>%
#' @export
TF_enrichment_pairwise <- function (markers, compare_list, motif_dir, save_path, ...){
        if (file.exists (save_path)){
                data.table::fread (save_path)
        }else{
                all_genes <- DE_gene_pairwise_list (markers, compare_list, ...)
                data(motifAnnotations_hgnc, package='RcisTarget')
                paste (motif_dir) %>% RcisTarget::importRankings() -> motifRankings 
                RcisTarget::cisTarget(all_genes, motifRankings,
                                      motifAnnot=motifAnnotations_hgnc)-> rcis
                data.table::fwrite (rcis, save_path)
                return (rcis)
        }
}

#' @param rcis a dataframe created by `RcisTarget::cisTarget`
#' @importFrom magrittr %>%
clean_TF_enrich <- function (rcis){
        # remove the motifs that do not possess a binding TF
        rcis %>% dplyr::filter (TF_highConf != '') -> rcis
        print (paste (nrow (rcis), 'number of entries'))
        # remove the brackets
        rcis$enrichTF <- gsub ('\\(.*\\)\\.$', ';', trimws (rcis$TF_highConf)) %>% trimws () 

        # add '__' to make regular expression easier
        rcis$ID <- paste ('TF', 1:nrow (rcis), '__', sep='')
        rcis %>% dplyr::select (ID, enrichTF) %>% tibble::deframe () -> TF_vec
        TF_vec %>% as.list () %>% lapply (function (x){strsplit (x, ';')[[1]] }) %>% unlist () -> TF_vec
        names (TF_vec) <- gsub ('__[0-9]+$', '__', names (TF_vec))

        expanded_cis <- rcis [match (names (TF_vec), rcis$ID),]
        expanded_cis$enrichTF <- gsub (';', '', TF_vec) %>% trimws ()
        print (paste (nrow (expanded_cis), 'number of entries'))
        return (expanded_cis)
}

#' @importFrom magrittr %>%
append_TF_enrich <- function (markers, rcis, group1_col='group',
                              group2_col='compare_group', gene_col='feature'){
        markers$ID <- paste (markers [, group1_col], markers [, group2_col], markers [, gene_col], sep='_')
        ercis <- clean_TF_enrich (rcis)
        ercis %>% dplyr::group_by (geneSet, enrichTF) %>% 
                dplyr::summarise_if (is.numeric, mean) -> sum_cis
        sum_cis$IDtf <- paste (sum_cis$geneSet, sum_cis$enrichTF, sep='_')
        arrange_mark <- markers [match (sum_cis$IDtf, markers$ID),]
        return (cbind (data.frame (sum_cis), arrange_mark) %>% tidyr::drop_na (ID))
}
Yutong441/TBdev documentation built on Dec. 18, 2021, 8:22 p.m.