R/profiles_normalized_by_control.R

Defines functions profiles_normalized_by_control

Documented in profiles_normalized_by_control

#' profile plots of signal normalized to control
#'
#' Control normalized signal are plotted for all genes or specific list of
#' genes.
#' @author pooja sethiya
#' @param feature_txDb A TxDb object of gene feature file, generated by
#'   \code{GenomicFeatures::makeTxDbFromGFF()}
#' @param bw_test A character vector containing name of the GRanges object of bw
#'   file.GRanges object for bw_files can be generated by: \code{
#'   rtracklayer::import()}
#' @param bw_control A character vector containing name of the GRanges object of bw
#'   file to normalize the bw_test. GRanges object for bw_files can be generated by: \code{
#'   rtracklayer::import()}
#' @param genelist_1 A tibble with two columns \code{1. gene_id and 2.Expression
#'   value} without header, to subset genes from the feature file.
#'   \code{default:NULL}
#' @param genelist_2 A tibble with either one or two columns to be considered as
#'   control genes.  \code{1. gene_id and/or 2.Expression value} without header,
#'   to subset genes from the feature file. \code{default:NULL}
#' @param output_name A character vector containing name of the output
#'   plot.\code{default:Sample}
#' @param top_line Logical, whether to plot profile with average signal as line
#'   plot at the top of plot. \code{default: TRUE}
#' @param ymax Numeric, y-axis max limit for lineplot \code{default:6}
#' @param ymin Numeric, y-axis minimum limit for lineplot \code{default:1}
#'
#' @import magrittr
#' @import png
#' @importFrom rtracklayer import
#' @importFrom GenomicFeatures promoters
#' @importFrom broom tidy
#' @importFrom dplyr mutate
#' @importFrom purrr map
#' @import EnrichedHeatmap
#' @import ComplexHeatmap
#' @importFrom  grid unit
#' @importFrom grid gpar
#' @importFrom stringr str_detect
#' @importFrom tidyr tibble
#' @importFrom purrr map2
#'
#' @return An image of two heatmaps normalized to control bw. \itemize{
#'   \item\code{Heatmap_list1:} profile of genelist_1 ordered by expression
#'   values (high to low). \item\code{Heatmap_list1:} profile of genelist_2. }
#' @export
#'
#' @examples
#' \dontrun{
#'
#'  # Load feature file from system data, to run example data
#'  feature_txDb <- AnnotationDbi::loadDb(system.file("extdata/sqllite/an_feature_file_s10_m04_r07.sqlite" , package = "FungalSporeAnalysis"))
#'
#'  # Or load feature file as TxDb
#'  feature_txDb <- GenomicFeatures::makeTxDbFromGFF("an_feature_file_s10_m04_r07.gff")
#'
#'  # Load bw files as GRanges object
#'  H3AC_veA_wt_spore <- rtracklayer::import.bw("H3AC_veA_wt_spore_mix22_CACAGTTGGT_normalized_repeat.bw")
#'  H3_an_spore <- rtracklayer::import.bw("H3_an_spore_spore_mix22_CACAGTTGGT_normalized_repeat.bw")
#'
#'  # Load GRanges object from system data, to run example data
#'  data_files <- system.file("extdata/sysdata.rda" , package = "FungalSporeAnalysis")
#'  load(data_files)
#'
#'  # Load gene_lists for specific genes
#'  genelist_1 <- readr::read_delim(system.file("extdata/genesets/an_spore_pol2.txt" , package = "FungalSporeAnalysis"), delim="\t", col_names = FALSE)
#'  genelist_2 <- readr::read_delim(system.file("extdata/genesets/an_spore_pol2_controlgenes.txt", package = "FungalSporeAnalysis"), delim="\t", col_names = FALSE)
#'
#'  #################################
#'  profiles_normalized_by_control(feature_txDb = feature_txDb,bw_test = "H3AC_veA_wt_spore", bw_control = "H3_an_spore",genelist_1 = genelist_1,genelist_2 = genelist_2, ymax = 3.8,ymin=0.5, output_name = "H3Ac_veA_wt_spore")
#'
#' }
profiles_normalized_by_control <- function(feature_txDb,bw_test,bw_control,genelist_1=NULL,genelist_2=NULL,ymax=6,ymin=1, output_name="Sample", top_line=TRUE){

          feature_gr <- GenomicFeatures::genes(feature_txDb)

          #--- provide the genelist data with expression value for the list one

          if(is.null(genelist_1)==FALSE){

                    genelist_1 <- genelist_1 %>% dplyr::arrange(desc(X2))

                    # order by genelist
                    sub_feature_gr_1 <-  subset(feature_gr, feature_gr$gene_id %in% genelist_1$X1)
                    sub_feature_gr_1 <- sub_feature_gr_1[match(genelist_1$X1,sub_feature_gr_1$gene_id),]
                    cluster_rows <- FALSE

                    if(is.null(genelist_2)==FALSE){#--- get second list co-ordinates

                              sub_feature_gr_2 <-  subset(feature_gr, feature_gr$gene_id %in% genelist_2$X1)
                              sub_feature_gr <- list(sub_feature_gr_1, sub_feature_gr_2)
                    }
                    else{
                              sub_feature_gr <- list(sub_feature_gr_1)
                    }
          }
          else{
                    if(is.null(genelist_2)==TRUE){
                              sub_feature_gr <- list(feature_gr)
                              cluster_rows <- TRUE
                    }
                    else{
                            print("gene_list_1 is NULL, so gene_list_2 should also be NULL!!")
                            exit()
                    }
          }


          if(stringr::str_detect(string = bw_test,paste(c("_", "-"),collapse = '|'))==TRUE){
                    names(sub_feature_gr) <- paste(gsub(pattern = "-.*|_.*", replacement="",x=bw_test),"_list",seq(1:length(sub_feature_gr)), sep="" )

          }
          else{
                    names(sub_feature_gr) <- paste(bw_test,"_list",seq(1:length(sub_feature_gr)), sep="")
          }

          gene_lists <- tidyr::tibble(name=names(sub_feature_gr), data=sub_feature_gr)
          print(gene_lists)

          #--- Load bw objects
          bw_file_test <- get(bw_test)
          bw_file_control <- get(bw_control)

          #--- generate normalised matrix in tidy way
          dd <-  gene_lists %>%
                    dplyr::mutate(mat_test=purrr::map(data, function(i){
                              nn <- EnrichedHeatmap::normalizeToMatrix(bw_file_test, i, value_column = "score",background = 0,
                                                                       smooth = TRUE,extend = c(1000))
                              nn[nn<0]=0
                              return(nn)
                    })) %>%

                    dplyr::mutate(mat_control=purrr::map(data, function(i){
                              nn <- EnrichedHeatmap::normalizeToMatrix(bw_file_control, i, value_column = "score",background = 0,
                                                                       smooth = TRUE,extend = c(1000))
                              nn[nn<0]=0
                              return(nn)
                    }))

          #--- normalise by h3
          message("Normalizing to control...")
          dd2 <- dd  %>% dplyr::mutate(norm_mat = purrr::map2(mat_test,mat_control, function(x,y){
                              mm = (x+0.01) / (y+0.01)
                              return(mm)
                              } ))

          print(dd2)

          if(top_line==TRUE){
                    top_annotation = ComplexHeatmap::HeatmapAnnotation(
                              lines = EnrichedHeatmap::anno_enriched(gp = grid::gpar(fontsize=12),
                                                                     ylim=c(ymin, ymax),
                                                                     # yaxis_side = "right",
                                                                     # yaxis_facing = "outside",
                                                                     # yaxis_gp = grid::gpar(fontsize = 10, lwd=1.5)
                                                                     axis_param = list(gp=grid::gpar(fontsize = 10, lwd=1.5),
                                                                                       side="right",
                                                                                       facing="outside")))
          }
          else{
                    top_annotation = NULL
          }


          get_enrichment_heatmap_list <- function(x, names, titles, ...) {

                    ll <- length(x)

                    ## first heatmap
                    ehml <- EnrichedHeatmap::EnrichedHeatmap(mat = x[[1]], name = names[[1]], column_title = titles[[1]], show_heatmap_legend = T,
                                            col=circlize::colorRamp2(quantile(x[[1]], c(0.1,0.5,0.6,0.9,0.99)),
                                                           col=c("#feebe2","#fcc5c0","#fa9fb5","#c51b8a","#7a0177")),
                                            use_raster = TRUE, ...)

                    ## several other heatmaps if length of x > 1.
                    if (ll > 1) {
                              for (i in 2:ll) {
                                        print(i)
                                        ehml <- ehml +
                                                  EnrichedHeatmap::EnrichedHeatmap(
                                                                      mat = x[[i]],
                                                                      col=circlize::colorRamp2(quantile(x[[1]], c(0.1,0.5,0.6,0.9,0.99)),
                                                                                     col=c("#feebe2","#fcc5c0","#fa9fb5","#c51b8a","#7a0177")),
                                                                      name = ifelse(length(names) >= i, names[i], "NA"),
                                                                      use_raster = TRUE,
                                                                      column_title = ifelse(length(titles) >= i, titles[i], "NA"),
                                                                      show_heatmap_legend = ifelse(length(names) >= i, TRUE, FALSE), ...
                                                            ) ## legend will be shown only if the name is given for a heatmap.
                              }
                    }

                    return(ehml)
          }

          ehm_list <- get_enrichment_heatmap_list(x = dd2$norm_mat,names = dd2$name,
                                                  titles = dd2$name,
                                                  cluster_rows = cluster_rows,
                                                  row_order=NULL,
                                                  show_row_names = FALSE,
                                                  axis_name_rot = 90,
                                                  heatmap_legend_param = list(color_bar = "continuous",legend_direction="horizontal", legend_width = grid::unit(3, "cm"),
                                                                              title_position = "topcenter",labels_gp = grid::gpar(fontsize=10)),
                                                  axis_name = c("-1kb","TSS","TES", "+1kb"),
                                                  axis_name_gp = grid::gpar(fontsize=12),
                                                  top_annotation = top_annotation
                                                  )

          # row_order_list = row_order(ehm_list)
          # list_1$H3K4me3 <- list_1[row_order_list,]$X1


          message("plotting...")
          png(file=paste(output_name, lengths(gene_lists[1]),"normalized_hm.png", sep="_"),width=nrow(gene_lists)*2,height=5,pointsize = 10, res=300,units = "in")
          ComplexHeatmap::draw(ehm_list, heatmap_legend_side = "top", gap = grid::unit(1.5, "mm"))
          dev.off()

}
sethiyap/FungalSporeAnalysis documentation built on July 31, 2020, 10:26 p.m.