R/summarize_homer.R

Defines functions summarize_homer_annots

Documented in summarize_homer_annots

#' Parse peak annotations generated by homer annotatePeaks.pl
#' @details summarizes peak annotations generated with homer annotatePeaks.pl, also a generates a pie chart of peak distributions.
#'
#' @param anno Raw annotations generated by annotatePeaks.pl.
#' @param plot If TRUE generates a pie chart for peak distribution
#' @param legend_font_size font size for legend. Default 1.
#' @param label_size font size for labels. Default 0.8.
#'
#' @export

summarize_homer_annots = function(anno, sample_names = NULL, plot = TRUE, legend_font_size = 1, label_size = 0.8){

  homer = lapply(anno, function(homer.anno){
                  homer.anno = data.table::fread(homer.anno)
                  colnames(homer.anno)[1] = 'peakid'
                  homer.anno$anno = sapply(strsplit(x = as.character(homer.anno$Annotation), split = ' (', fixed = T), '[[', 1)

                  homer.anno = homer.anno[,.(Chr, Start, End, Strand, anno, `Gene Name`, `Gene Type`,`Distance to TSS`, `Nearest PromoterID`, `Nearest Ensembl`)]
                  homer.anno = homer.anno[order(anno, `Gene Name`)]
                  colnames(homer.anno) = c('Chr', 'Start', 'End', 'Strand', 'Annotation', 'Hugo_Symbol',
                                           'Biotype', 'Distance_to_TSS', 'Nearest_PromoterID', 'Nearest_Ens')
                  homer.anno$Annotation = gsub(pattern = "3' UTR", replacement = '3pUTR', x = homer.anno$Annotation)
                  homer.anno$Annotation = gsub(pattern = "5' UTR", replacement = '5pUTR', x = homer.anno$Annotation)
                  homer.anno
                })

  if(is.null(sample_names)){
    names(homer) = unlist(data.table::tstrsplit(x = basename(anno), split = "\\.", keep = 1))
  }else{
    names(homer) = sample_names
  }

  homer.anno.stats = lapply(homer, function(h){
                            homer.anno.stats = h[,.N,Annotation][,fract := N/sum(N)][order(N, decreasing = TRUE)]
                            homer.anno.stats[,leg := paste0(Annotation, " [", N, "]")]
                            homer.anno.stats
                          })
  npeaks = unlist(lapply(homer, nrow))

  homer.anno.stats = data.table::rbindlist(l = homer.anno.stats, idcol = "Sample")
  homer.anno.stats = data.table::dcast(data = homer.anno.stats, Annotation ~ Sample, value.var = "fract", fill = 0)
  data.table::setDF(x = homer.anno.stats, rownames = homer.anno.stats$Annotation)
  homer.anno.stats =homer.anno.stats[,-1]

  pie.cols = c('3pUTR' = '#E7298A', '5pUTR' = '#D95F02', 'Intergenic' = '#BEBADA',
               'TTS' = '#FB8072', 'exon' = '#80B1D3', 'intron' = '#FDB462',
               'non-coding' = '#FFFFB3',
               'NA' = 'gray70', 'promoter-TSS' = '#1B9E77')

  homer.anno.stats = homer.anno.stats[names(pie.cols)[names(pie.cols) %in% rownames(homer.anno.stats)],,]

  pie.col = pie.cols[rownames(homer.anno.stats)]

  #layout(mat = matrix(data = c(1, 2), nrow = 2), heights = c(4, 1.25))
  par(mar = c(2, 4, 5, 3))
  b = barplot(height = as.matrix(homer.anno.stats), col = pie.col, horiz = TRUE,
          las = 2, axes = FALSE, names.arg = rep(NA, ncol(homer.anno.stats)), border = pie.cols)
  axis(side = 1, at = seq(0, 1, 0.25), font = 2, lwd = 2)
  mtext(text = npeaks[colnames(x = homer.anno.stats)], side = 4, at = b, las = 2, font = 4, adj = -0.2)
  mtext(text = colnames(homer.anno.stats), side = 2, at = b, las = 2, adj = 1, font = 2)

  #plot.new()
  #par(mar = c(1, 0, 1, 1))
  add_legend("topleft", legend = names(pie.col), col = pie.col,  bty = "n", border=NA,
         xpd = TRUE, text.font = 2, pch = 15, xjust = 0, yjust = 0,
         cex = 0.8, y.intersp = 1.5, x.intersp = 1,
         pt.cex = 1.2 * 0.8, ncol = 3)

  #homer.anno
}

# if(plot){
#   homer.anno.stats = homer.anno[,.N,Annotation][,fract := round(N/sum(N), digits = 2)][order(N, decreasing = TRUE)]
#   homer.anno.stats[,leg := paste0(Annotation, " [", N, "]")]
#
#   pie.cols = c('3pUTR' = '#E7298A', '5pUTR' = '#D95F02', 'Intergenic' = '#BEBADA',
#                'TTS' = '#FB8072', 'exon' = '#80B1D3', 'intron' = '#FDB462', 'non-coding' = '#FFFFB3',
#                'NA' = 'gray70', 'promoter-TSS' = '#1B9E77')
#
#   par(bty="n", mgp = c(0.5,0.5,0), las=1, tcl=-.25, font.main=4,xpd=NA, mar=c(0,0,1,3))
#   pie(x = homer.anno.stats$fract, col = pie.cols[homer.anno.stats$Annotation], labels = homer.anno.stats$fract,
#       border = "white", radius = 0.8, init.angle = 0, font = 4, cex = label_size)
#   symbols(0,0,circles=.3, inches=FALSE, col="white", bg="white", lty=0, add=TRUE)
#   add_legend(x = "topright", bty = "n", legend = homer.anno.stats$leg,
#              col = pie.cols[homer.anno.stats$Annotation],
#              cex = legend_font_size, pch = 15, text.font = 4)
# }
PoisonAlien/peakseason documentation built on May 14, 2019, 4:01 a.m.