R/lineplot_from_bw.R

Defines functions lineplot_for_bw

Documented in lineplot_for_bw

#' lineplot for bw file
#' @description Plot line-plot of each percentile for given gene-list.
#' @author pooja sethiya
#' @param feature_txDb A TxDb object of gene feature file, generated by
#'   \code{GenomicFeatures::makeTxDbFromGFF()}
#' @param genelist A tibble with two columns \code{1. gene_id and 2.Expression
#'   value or Percentile rank} without header, to subset genes from the feature
#'   file.
#' @param bw_files A character vector containing either name or names of GRanges
#'   object of bw or bdg files. GRanges object for bw_files can be generated by: \code{
#'   rtracklayer::import()}
#' @param tss Logical, if TRUE plot 1kb up/downstream of start co-ordinate; if
#'   FALSE plot 1kb up/downstream of gene-body \code{default: TRUE}
#' @param expression_value Logical, if TRUE percentile is calculated based on
#'   the expression value column (ie. second column), if FALSE second column
#'   should define the percentiles. \code{default: TRUE}
#' @param tiles Numeric, when \code{expression_value=TRUE}, specify the number
#'   of percentiles inorder to divide the data.\code{default: 4}
#' @param output_name A character vector containing name of the output
#'   plot.\code{default:Sample}
#' @param median Logical, to display median or mean line of
#'   percentiles.\code{default: TRUE}
#'
#' @return A line-plot of percentiles and a tibble of percentiles appended to
#'   expression data.
#' @export
#' @import magrittr
#' @import ggplot2
#' @importFrom dplyr distinct
#' @importFrom dplyr arrange
#' @importFrom dplyr ntile
#' @importFrom GenomicFeatures promoters
#' @importFrom GenomicFeatures genes
#' @importFrom EnrichedHeatmap normalizeToMatrix
#' @importFrom GenomicFeatures genes
#' @importFrom tidyr as_tibble
#' @importFrom dplyr mutate
#' @importFrom dplyr left_join
#' @importFrom tidyr tibble
#' @importFrom dplyr group_by
#' @importFrom dplyr ungroup
#' @importFrom dplyr summarise
#' @importFrom tidyr gather
#'
#'
#' @examples
#'
#' \dontrun{
#'
#'  # Load feature file from system data, to run example data
#'  feature_txDb <- AnnotationDbi::loadDb(system.file("extdata/sqllite/an_feature_file_s10_m04_r07.sqlite" , package = "FungalSporeAnalysis"))
#'
#'  # Or load feature file as TxDb
#'  feature_txDb <- GenomicFeatures::makeTxDbFromGFF("an_feature_file_s10_m04_r07.gff")
#'
#'  # load list of genes to subset from all the genes
#'  genelist_dat <- system.file("extdata/genesets/an_spore_pol2_for_percentilelineplot.txt" , package = "FungalSporeAnalysis")
#'  genelist <- readr::read_delim(genelist_dat,delim="\t", col_names=FALSE)
#'
#'  # Load bw files as GRanges object
#'  pol2_veA_wt_spore <- rtracklayer::import.bw("pol2_veA_wt_spore_mix22_CACAGTTGGT_normalized_repeat.bw")
#'
#'  # Load GRanges object from system data, to run example data
#'  data_files <- system.file("extdata/sysdata.rda" , package = "FungalSporeAnalysis")
#'  load(data_files)
#'
#'  ###########################################
#'
#'  lineplot_for_bw(feature_txDb, genelist =genelist,bw_file="pol2_veA_wt_spore", output_name = "pol2_veA_wt_spore")
#'
#' }
lineplot_for_bw <- function(feature_txDb,genelist,bw_file,output_name="Sample", tss="TRUE", expression_value=TRUE, tiles=4, median=TRUE){

          feature_gr <- GenomicFeatures::genes(feature_txDb)
          #--- subset genes according to genelist
          genelist <- genelist %>% dplyr::distinct(.) %>% dplyr::arrange(desc(X2))
          sub_feature_gr <-  subset(feature_gr, feature_gr$gene_id %in% genelist$X1)
          sub_feature_gr <- sub_feature_gr[match(genelist$X1,sub_feature_gr$gene_id),]

          #--- provide the genelist data with expression value for the list
          if(expression_value==TRUE){
                    message("computing percentile...")
                    genelist <- genelist %>% dplyr::mutate(quantile=dplyr::ntile(X2,tiles))
                    sub_feature_gr$quantile = genelist$quantile[match(sub_feature_gr$gene_id,genelist$X1)]
          }
          else{ #--- second column is considered as percentile column
                    colnames(genelist) <- c("X1", "quantile")
                    sub_feature_gr$quantile = genelist$quantile[match(sub_feature_gr$gene_id,genelist$X1)]
          }

          if(length(table(sub_feature_gr$quantile)) <= 5){
                    values=c("#e31a1c", "#1d91c0", "#41ab5d", "#ec7014", "black")
          }
          else{
                    values=rainbow(length(table(sub_feature_gr$quantile)))
          }

          if(tss=="TRUE"){
                    tss <- GenomicFeatures::promoters(sub_feature_gr, upstream = 0, downstream = 1)
                    breaks=c("u2","u50","d50")
                    labels=c("-1kb","TSS","+1kb")
                    inter=50
          }
          else{
                    tss=sub_feature_gr
                    breaks=c("u2","u50","d2","d50")
                    labels=c("-1kb","TSS","TES","+1kb")
                    inter =c(50,117)
          }


          bw <- get(bw_file)
          mat_1 <- EnrichedHeatmap::normalizeToMatrix(bw, tss, value_column = "score",background = 0,
                                                      smooth = TRUE,extend = c(1000))


          norm_mat <- data.frame(mat_1)

          summarised_mat <- norm_mat %>%
                              tidyr::as_tibble(rownames = "geneName") %>%
                              dplyr::left_join(tidyr::tibble(geneName = sub_feature_gr$gene_id , quantile = sub_feature_gr$quantile)) %>%
                              tidyr::gather(key=column_name, value=value,-quantile,-geneName) %>%
                              dplyr::mutate(column_name=factor(column_name, levels = unique(column_name))) %>%
                              dplyr::group_by(column_name,quantile)

          if(median==TRUE){
                    summarised_mat <- summarised_mat %>%
                                        dplyr::summarise(avg=median(value)) %>%
                                        dplyr::ungroup() %>%
                                        dplyr::arrange(desc(quantile)) %>%
                                        dplyr::mutate(quantile=factor(quantile, levels=unique(quantile)))

                    ylab <- "median signal"
          }
          else{
                    summarised_mat <- summarised_mat %>%
                              dplyr::summarise(avg=mean(value)) %>%
                              dplyr::ungroup() %>%
                              dplyr::arrange(desc(quantile)) %>%
                              dplyr::mutate(quantile=factor(quantile, levels=unique(quantile)))

                    ylab <- "mean signal"
          }

          gg <- ggplot2::ggplot(summarised_mat,ggplot2::aes(column_name,avg, group=quantile, color=quantile))+
                    ggplot2::geom_line(lwd=0.8)+
                    ggplot2::scale_color_manual(values=values)+
                    ggplot2::scale_x_discrete(breaks=breaks,labels=labels,"")+
                    ggplot2::theme_bw()+
                    ggplot2::geom_vline(xintercept = inter, size=0.5,color="black",linetype = "dashed")+
                    ggplot2::ggtitle(label = bw_file)+
                    ggplot2::theme(
                              panel.grid =ggplot2::element_blank(),
                              axis.text.x= ggplot2::element_text(color="black",size=8,angle=0,vjust=0.8),
                              axis.text.y = ggplot2::element_text(color="black",size=8),
                              axis.title.y=ggplot2::element_text(color="black",size=8),
                              axis.title.x=ggplot2::element_blank(),
                              legend.title=ggplot2::element_text(color="black",size=8),
                              legend.key.size = unit(1,"line"),
                              legend.box.spacing=unit(0.5,"mm"),
                              legend.text=ggplot2::element_text(color="black",size=8)) +
                    ggplot2::ylab(ylab)+
                    ggplot2::guides(color=ggplot2::guide_legend(title=""))

          print(gg)

          ggplot2::ggsave(plot = gg, filename = paste(output_name,"_lineplot.png",sep=""),device = "png", path="./", width = 8, height =6,unit="cm", dpi=300)

          return(genelist)
}
sethiyap/FungalSporeAnalysis documentation built on July 31, 2020, 10:26 p.m.