R/motif.TF.Plots.R

Defines functions TF.rank.plot motif.enrichment.plot

Documented in motif.enrichment.plot TF.rank.plot

#' motif.enrichment.plot to plot bar plots showing motif enrichment ORs and  95\% confidence interval for ORs
#' @description 
#' motif.enrichment.plot to plot bar plots showing motif enrichment ORs and 
#' 95\% confidence interval for ORs. Option motif.enrichment can be a data frame 
#' generated by \code{\link{get.enriched.motif}} or a path of XX.csv saved by the 
#' same function.
#' @param motif.enrichment A data frame or a file path of get.enriched.motif output
#'motif.enrichment.csv file.
#' @param significant A list to select subset of motif. Default is NULL.
#' @param dir.out A path specify the directory to which the figures will be saved. 
#'Current directory is default.
#' @param save A logic. If true (default), figure will be saved to dir.out.
#' @param label A character. Labels the outputs figure.
#' @param title Plot title. Default: no title
#' @param width Plot width
#' @param height Plot height. If NULL a default value will be calculated
#' @param summary Create a summary table along with the plot, it is necessary 
#'to add two new columns to object (NumOfProbes and PercentageOfProbes)
#' @return A figure shows the enrichment level for selected motifs.
#' @details motif.enrichment If input data.frame object, it should contain "motif",
#' "OR", "lowerOR", "upperOR" columns. motif specifies name of motif; 
#' OR specifies Odds Ratio, lowerOR specifies lower boundary of OR (95%) ; 
#' upperOR specifies upper boundary of OR(95%).
#' @details significant A list used to select subset of motif.enrichment by the 
#'cutoff of OR, lowerOR, upperOR. significant=list(OR=1). More than one cutoff 
#'can be specified such as significant = list(OR=1, lowerOR=1,upperOR=4) 
#' @importFrom ggplot2 aes ggplot geom_point geom_errorbar coord_flip geom_abline
#' @usage 
#' motif.enrichment.plot(motif.enrichment, 
#'                       significant = NULL, 
#'                       dir.out ="./", 
#'                       save = TRUE,
#'                       label = NULL,
#'                       title = NULL, 
#'                       width = 10, 
#'                       height = NULL,
#'                       summary = FALSE)
#' @author 
#' Lijing Yao (creator: lijingya@usc.edu) 
#' @references 
#' Yao, Lijing, et al. "Inferring regulatory element landscapes and transcription 
#' factor networks from cancer methylomes." Genome biology 16.1 (2015): 1.
#' @export
#' @importFrom grid gpar 
#' @importFrom gridExtra grid.arrange arrangeGrob
#' @examples
#' motif.enrichment <- data.frame(motif = c("TP53","NR3C1","E2F1","EBF1","RFX5","ZNF143", "CTCF"),
#'                                OR = c(19.33,4.83,1, 4.18, 3.67,3.03,2.49),
#'                                lowerOR = c(10,3,1.09,1.9,1.5,1.9, 0.82),
#'                                upperOR = c(23,5,3,7,6,5,5),
#'                                stringsAsFactors = FALSE)
#' motif.enrichment.plot(motif.enrichment = motif.enrichment,
#'                       significant = list(OR = 3),
#'                       label = "hypo", save = FALSE)
#' motif.enrichment.plot(motif.enrichment = motif.enrichment,
#'                       significant = list(OR = 3),
#'                       label = "hypo", 
#'                       title = "OR for paired probes hypomethylated in Mutant vs WT",
#'                       save = FALSE)
#' motif.enrichment <- data.frame(motif = c("TP53","NR3C1","E2F1","EBF1","RFX5","ZNF143", "CTCF"),
#'                                OR = c(19.33,4.83,1, 4.18, 3.67,3.03,2.49),
#'                                lowerOR = c(10,3,1.09,1.9,1.5,1.5, 0.82),
#'                                upperOR = c(23,5,3,7,6,5,5),
#'                                NumOfProbes = c(23,5,3,7,6,5,5),
#'                                PercentageOfProbes = c(0.23,0.05,0.03,0.07,0.06,0.05,0.05),
#'                                stringsAsFactors=FALSE)
#' motif.enrichment.plot(motif.enrichment = motif.enrichment,
#'                       significant = list(OR = 3),
#'                       label = "hypo", save = FALSE)
#' motif.enrichment.plot(motif.enrichment = motif.enrichment,
#'                       significant = list(OR = 3),
#'                       label = "hypo", 
#'                       summary = TRUE,
#'                       title = "OR for paired probes hypomethylated in Mutant vs WT",
#'                       save = TRUE)
motif.enrichment.plot <- function(motif.enrichment, 
                                  significant = NULL, 
                                  dir.out ="./", 
                                  save = TRUE,
                                  label = NULL,
                                  title = NULL, 
                                  width = 10, 
                                  height = NULL,
                                  summary = FALSE){
  if(missing(motif.enrichment)) stop("motif.enrichment is missing.")
  if(is.character(motif.enrichment)){
    motif.enrichment <- read.csv(motif.enrichment, stringsAsFactors=FALSE)
  }
  if(!is.null(significant)){
    for(i in names(significant)){
      motif.enrichment <- motif.enrichment[motif.enrichment[,i] > significant[[i]],]
    }
  } 
  if(nrow(motif.enrichment) == 0) return(NULL)
  motif.enrichment <- motif.enrichment[order(motif.enrichment$lowerOR,decreasing = TRUE),]
  
  if(summary){
    or.col <- paste0(round(motif.enrichment$OR,digits = 2), 
                     " (", round(motif.enrichment$lowerOR,digits = 2),"-", 
                     round(motif.enrichment$upperOR,digits = 2),")")
    nb.idx <- grep("NumOf",colnames(motif.enrichment))
    if("PercentageOfProbes" %in% colnames(motif.enrichment)){
      probe.col <- paste0(motif.enrichment[,nb.idx,drop=T],
                          " (", round(100 * motif.enrichment$PercentageOfProbes, digits = 2),"%)")
    } else {
      probe.col <- paste0(motif.enrichment[,nb.idx])
    }
    lab <- data.frame(x = factor(c("",as.character(motif.enrichment$motif)), 
                                 levels = rev(c("",as.character(motif.enrichment$motif)))),
                      y = rep(c(1,2,3),each=length(motif.enrichment$motif) + 1),
                      z = c("Motif",gsub("_HUMAN.H11MO.*","",as.character(motif.enrichment$motif)),
                            "Odds ratio \n (95% CI)",
                            or.col, 
                            ifelse("PercentageOfProbes"  %in% colnames(motif.enrichment), "# probes \n(% of paired)"," # regions"),
                            probe.col)
    )
    
    data_table <-  ggplot(lab, aes_string(x = 'y', y = 'x', label = format('z', nsmall = 1))) +
      theme_minimal() + 
      geom_text(size = 3.5, hjust=0, vjust=0.5) +
      geom_hline(aes(yintercept=c(nrow(motif.enrichment) - 0.65))) + 
      labs(x="",y="") +
      coord_cartesian(xlim=c(1,3.8)) +
      theme(panel.grid.major = element_blank(), 
            panel.grid.minor.x  = element_blank(),panel.background =element_blank(), 
            legend.position = "none",
            panel.border = element_blank(), 
            axis.text.x = element_text(colour="white"),#element_blank(),
            axis.text.y = element_blank(), 
            axis.ticks = element_line(colour="white"),#element_blank(),
            plot.margin = unit(c(0,0,0,0), "lines"))
    
    motif.enrichment$motif <- factor(motif.enrichment$motif,
                                     levels=as.character(motif.enrichment$motif[nrow(motif.enrichment):1]))
    limits <- aes_string(ymax = 'upperOR', ymin = 'lowerOR')
    motif.enrichment$probes <- NULL
    motif.enrichment <- rbind(motif.enrichment,NA)
    P <- ggplot(motif.enrichment, aes_string(x='motif', y='OR')) +
      geom_point() +
      geom_errorbar(limits, width=0.3) +
      coord_flip() +
      geom_abline(intercept = 1, slope = 0, linetype = "3313")+
      theme_bw() +
      theme(panel.grid.major = element_blank()) +
      xlab("") + ylab("Odds Ratio") +
      ggtitle(label = NULL, subtitle = NULL) + 
      scale_y_continuous(breaks=c(1,pretty(motif.enrichment$OR, n = 5))) + 
      theme(axis.text.y = element_blank(), 
            axis.ticks = element_line(colour="white"),#element_blank(),
            plot.margin = unit(c(0,0,0,0), "lines"))
    
    suppressWarnings({
      P <-  arrangeGrob(data_table, P, ncol=2,
                        widths = c(2,2), 
                        heights = c(0.95,0.05),
                        top = title) 
      
    })    
  } else {
    
    motif.enrichment$motif <- factor(motif.enrichment$motif,
                                     levels = as.character(motif.enrichment$motif[nrow(motif.enrichment):1]))
    limits <- aes_string(ymax = 'upperOR', ymin = 'lowerOR')
    P <- ggplot(motif.enrichment, aes_string(x = 'motif', y = 'OR')) +
      geom_point() +
      geom_errorbar(limits, width = 0.3) +
      coord_flip() +
      geom_abline(intercept = 1, slope = 0, linetype = "3313")+
      theme_bw() +
      theme(panel.grid.major = element_blank()) +
      xlab("Motifs") + ylab("Odds Ratio") +
      ggtitle(label = title, subtitle = NULL) + 
      scale_y_continuous(breaks = c(1,pretty(motif.enrichment$OR, n = 5)))
  }
  if(save) {
    dir.create(dir.out, recursive = TRUE, showWarnings = FALSE)
    ggsave(filename = sprintf("%s/%s.motif.enrichment.pdf",dir.out,label),
           useDingbats = FALSE, 
           plot = P,
           dpi = 320,
           width = width,
           limitsize = FALSE,
           height = ifelse(is.null(height), 3 * round(nrow(motif.enrichment)/8),height))
    return()
  }
  if(summary) grid.arrange(P)
  return(P)
}



#' TF.rank.plot to plot the scores (-log10(P value)) which assess the correlation between 
#' TF expression and average DNA methylation at motif sites.
#' @description 
#' TF.rank.plot is a function to plot the scores (-log10(P value)) which assess the
#' correlation between TF expression and average DNA methylation at motif sites. The the motif 
#' relevant TF and top3 TFs will be labeled in a different color.
#'@importFrom ggplot2 scale_color_manual geom_vline geom_text position_jitter 
#'@importFrom ggplot2 annotation_custom unit ggplot_gtable ggplot_build
#'@importFrom ggrepel geom_text_repel
#'@param motif.pvalue A matrix or a path specifying location of  "XXX.with.motif.pvalue.rda"
#'which is output of getTF. 
#'@param motif A vector of characters specify the motif to plot
#'@param TF.label A list shows the label for each motif. If TF.label is not specified, 
#'the motif relevant TF and top3 TF will be labeled.
#'@param dir.out A path specify the directory to which the figures will be saved. 
#'Current directory is default.
#'@param save A logic. If true (default), figure will be saved to dir.out
#'@param title Tite title (the motif will still be added to the title)
#'@param cores A interger which defines the number of cores to be used in parallel process. 
#'Default is 1: no parallel process.
#'@return A plot shows the score (-log(P value)) of association between TF
#'expression and DNA methylation at sites of a certain motif.
#'@export
#' @author Lijing Yao (maintainer: lijingya@usc.edu)
#' @importFrom graphics plot
#' @importFrom ggplot2 ggsave
#' @importFrom plyr alply
#'@examples
#' library(ELMER)
#' data <- tryCatch(ELMER:::getdata("elmer.data.example"), error = function(e) {
#'   message(e)
#'   data(elmer.data.example, envir = environment())
#' })
#' enriched.motif <- list("P53_HUMAN.H11MO.0.A"= c("cg00329272", "cg10097755", "cg08928189",
#'                                  "cg17153775", "cg21156590", "cg19749688", "cg12590404",
#'                                  "cg24517858", "cg00329272", "cg09010107", "cg15386853",
#'                                  "cg10097755", "cg09247779", "cg09181054"))
#' TF <- get.TFs(data, 
#'               enriched.motif, 
#'               group.col = "definition",
#'               group1 = "Primary solid Tumor", 
#'               group2 = "Solid Tissue Normal",
#'               TFs = data.frame(
#'                      external_gene_name=c("TP53","TP63","TP73"),
#'                      ensembl_gene_id= c("ENSG00000141510",
#'                                         "ENSG00000073282",
#'                                         "ENSG00000078900"),
#'                      stringsAsFactors = FALSE),
#'              label="hypo")
#' TF.meth.cor <- get(load("getTF.hypo.TFs.with.motif.pvalue.rda"))            
#' TF.rank.plot(motif.pvalue=TF.meth.cor,  
#'             motif="P53_HUMAN.H11MO.0.A", 
#'             TF.label=createMotifRelevantTfs("subfamily")["P53_HUMAN.H11MO.0.A"],
#'             save=TRUE)
#' TF.rank.plot(motif.pvalue=TF.meth.cor,  
#'             motif="P53_HUMAN.H11MO.0.A", 
#'             save=TRUE)
#' # Same as above
#' TF.rank.plot(motif.pvalue=TF.meth.cor,  
#'             motif="P53_HUMAN.H11MO.0.A", 
#'             dir.out = "TFplots",
#'             TF.label=createMotifRelevantTfs("family")["P53_HUMAN.H11MO.0.A"],
#'             save=TRUE)
TF.rank.plot <- function(motif.pvalue, 
                         motif, 
                         title = NULL,
                         TF.label = NULL, 
                         dir.out = "./", 
                         save = TRUE,
                         cores = 1){
  if(missing(motif.pvalue)) stop("motif.pvalue should be specified.")
  if(missing(motif)) stop("Please specify which motif you want to plot.")
  if(!all(motif %in% colnames(motif.pvalue))) {
    print(knitr::kable(sort(colnames(motif.pvalue)), col.names = "motifs"))
    stop("One of the motifs does not match. Select from the list above")
  }
  TF.sublabel <- NULL
  if(is.character(motif.pvalue)) {
    motif.pvalue <- get(load(motif.pvalue)) # The data is in the one and only variable
  }
  if(is.null(TF.label)){
    motif.relavent.TFs <- createMotifRelevantTfs()
    TF.label <- motif.relavent.TFs[motif]
    motif.relavent.TFs <- createMotifRelevantTfs("subfamily")
    TF.sublabel <- motif.relavent.TFs[motif]
    specify <- "No"
  } else {
    specify <- "Yes"
  }
  significant <- floor(0.05 * nrow(motif.pvalue))
  motif.pvalue <- -log10(motif.pvalue)
  
  parallel <- FALSE
  if (cores > 1){
    if (cores > detectCores()) cores <- detectCores()
    registerDoParallel(cores)
    parallel = TRUE
  }
  
  Plots <- alply(motif, 1, function(i){
    df <- data.frame(pvalue = motif.pvalue[,i], 
                     Gene = rownames(motif.pvalue), 
                     stringAsFactors = FALSE)
    df <- df[order(df$pvalue, decreasing = TRUE),]
    df$rank <- 1:nrow(df)
    
    df$label <- "None"
    df$label[df$rank %in% 1:3] <- "Top 3"
    
    
    # TF in the family
    if(!is.null(TF.label)){
      TF.family <- TF.label[[i]]
      df$label[df$Gene %in% TF.family] <- "Same family"
    }    
    if(!is.null(TF.sublabel)){
      # TF in the subfamily
      TF.subfamily <- TF.sublabel[[i]]
      df$label[df$Gene %in% TF.subfamily] <- "Same subfamily"
    }
    df.label <- data.frame(pvalue = df$pvalue[df$label %in% c("Same family","Same subfamily","Top 3")], 
                           text = as.character(df$Gene[df$label %in% c("Same family","Same subfamily","Top 3")]), 
                           x = which(df$label %in% c("Same family","Same subfamily","Top 3")), 
                           stringsAsFactors = FALSE)
    
    highlight.top3 <- df[df$label %in% c("Top 3"),]
    highlight.family <- df[df$label %in% c("Same family"),]
    highlight.subfamily <- df[df$label %in% c("Same subfamily"),]
    df$label <- factor(df$label, levels = c("None","Same family","Same subfamily","Top 3"))
    P <- ggplot(df, aes_string(x = 'rank', 
                               y = 'pvalue', 
                               color = 'label')) +
      scale_color_manual(name = "TF classification",values = c("None" = "black",
                                                               "Top 3" = "red",
                                                               "Same family" = "orange",
                                                               "Same subfamily" = "lightblue")) +
      geom_vline(xintercept = significant, linetype = "3313") +
      geom_point() +
      theme_bw() +
      theme(panel.grid.major = element_blank(),
            panel.grid.minor = element_blank())+
      theme(legend.position="top") +
      labs(x = "Rank", 
           y ="-log10(corrected P-value)", 
           title = ifelse(is.null(title),paste0("Motif: ",gsub("_HUMAN.H11MO.*","",i)),
                          paste0(title, " (", gsub("_HUMAN.H11MO.*","",i),")"))) + 
      geom_point(data = highlight.top3, aes_string(x = 'rank', y = 'pvalue')) +
      geom_point(data = highlight.family, aes_string(x = 'rank', y = 'pvalue')) +
      geom_point(data = highlight.subfamily, aes_string(x = 'rank', y = 'pvalue'))
    
    df$Gene <- as.character(df$Gene)
    df$Gene[df$label %in% "None"] <- "" 
    P <- P + geom_text_repel(
      data = df,
      aes_string(label = 'Gene'),
      min.segment.length = unit(0.0, "lines"),
      size = 3,
      segment.alpha = 0.5,
      segment.color = "gray",
      nudge_x = 10,
      show.legend = FALSE,
      fontface = 'bold', color = 'black',
      box.padding = unit(0.8, "lines"),
      point.padding = unit(1.0, "lines")
    ) 
    
    if(save){
      dir.create(dir.out, showWarnings = FALSE,recursive = TRUE)
      file <- sprintf("%s/%s.TFrankPlot.pdf", dir.out, i)
      message("Saving plot as: ", file)
      ggsave(P, filename = file, height = 8, width = 10)
    }
    P
  },.progress = "time",.parallel = parallel)
  
  names(Plots) <- motif
  
  if(!save) return(Plots)
}

Try the ELMER package in your browser

Any scripts or data that you put into this service are public.

ELMER documentation built on Nov. 8, 2020, 4:59 p.m.