R/integPatternByChr.R

Defines functions integPatternByChr

Documented in integPatternByChr

#' @title Retroviral integration pattern on chromosomes
#' 
#' @description
#' Check retroviral integration pattern on chromosomes
#'          
#' @usage 
#' integPatternByChr(hits, ran_hits, excelOut = TRUE,
#'                   isExpList = FALSE, isRanList = FALSE,
#'                   outPath = getwd(),
#'                   outFileName = paste0('RIPAT', round(unclass(Sys.time()))))
#' 
#' @param hits a GR object or list. This object made by \code{\link{makeInputObj}} function.
#' @param ran_hits a GR object or list. This object is output of \code{\link{ranSetGenerator}} function.
#' @param excelOut TRUE or FALSE. If user want to make excel file, enter TRUE. Default is TRUE.
#' @param isExpList TRUE or FALSE. If \code{hits} is the list of several experimental data,\cr
#'                  enter TRUE. Default is FALSE.\cr
#' @param isRanList TRUE or FALSE. If \code{ran_hits} is the list several random data,\cr
#'                  enter TRUE. Default is FALSE.\cr
#' @param outPath a string vector. Directory path of tab-deliminated hit files\cr
#'                generated by this function.\cr
#' @param outFileName a character vector. Attached character to the result file name.
#' 
#' @return Return a result list that is made up of table(s) about\cr
#'         experimental/random data integration frequency on chromosomes.\cr
#'         
#' @examples 
#' data(blast_obj)
#' chrCheck = integPatternByChr(hits = blast_obj)
#'  
#' @export
#' 
integPatternByChr = function(hits, ran_hits = NULL, excelOut = TRUE, isExpList = FALSE, isRanList = FALSE, outPath = getwd(), outFileName = paste0('RIPAT', round(unclass(Sys.time())))){
  message('----- Integration frequency of each chromosome. (Time : ', date(), ')')
  message('- Validate options')
  if(stringr::str_ends(outPath, pattern = '/')){outPath = stringr::word(outPath, start = 1, end = nchar(outPath), sep = '')}
  message('- OK!')
  message('- Check up frequency (except non-unique hits)')
  if(isExpList){
    hit_tab = lapply(hits, function(a){data.frame(a$Decided, stringsAsFactors = FALSE)})
    vec = paste0('vector', c(1:length(hit_tab)))
    hit_freq = lapply(hit_tab, function(a){
      tmp = plyr::count(a$seqnames)
      key = setdiff(paste0('chr', c(1:22, 'X', 'Y')), as.character(tmp$x))
      tmp = rbind(tmp, data.frame(x = key, freq = rep(0, length(key))))
      tmp = data.frame(tmp, tmp$freq/nrow(a)*100)
      return(tmp)
   })
    hit_freq_tab = data.frame(do.call(rbind, hit_freq), stringsAsFactors = FALSE)
   } else {
    hit_tab = data.frame(hits$Decided, stringsAsFactors = FALSE)
    vec = 'vector1'
    hit_freq = plyr::count(hit_tab$seqnames)
    key = setdiff(paste0('chr', c(1:22, 'X', 'Y')), as.character(hit_freq$x))
    hit_freq = rbind(hit_freq, data.frame(x = key, freq = rep(0, length(key))))
    hit_freq_tab = data.frame(hit_freq, hit_freq$freq/nrow(hit_tab)*100, stringsAsFactors = FALSE)
   }
  names(hit_freq_tab) = c('Chr', 'Count', 'Percent')
  gg_hit = data.frame('Vector' = rep(vec, rep(24, length(vec))),
                      hit_freq_tab, stringsAsFactors = FALSE)
  gg_hit$Vector = factor(gg_hit$Vector, levels = vec)
  gg_hit$Chr = factor(gg_hit$Chr, levels = paste0('chr', c(1:22, 'X', 'Y')))
  grDevices::png(paste0(outPath, '/', outFileName, '_exp_dist_barplot.png'), width = 1200, height = 750)
  exp_plot = ggplot2::ggplot(gg_hit, ggplot2::aes(x = Chr, y = Percent, fill = Vector)) +
             ggplot2::geom_bar(stat = "identity", position = "dodge") + 
             ggplot2::theme(panel.background = ggplot2::element_blank(), panel.grid = ggplot2::element_line('darkgrey', linetype = 'dotted'), axis.text.x = ggplot2::element_text(hjust = 1, angle = 45), axis.text = ggplot2::element_text(size = 15), axis.title = ggplot2::element_text(size = 17))
  print(exp_plot)
  grDevices::dev.off()
  if(!is.null(ran_hits)){
    if(isRanList){
      ran_hit_tab = lapply(ran_hits, function(a){data.frame(a, stringsAsFactors = FALSE)})
      ran_vec = paste0('random', c(1:length(ran_hit_tab)))
      ran_hit_freq = lapply(ran_hit_tab, function(a){
        ran_tmp = plyr::count(a$seqnames)
        ran_key = setdiff(paste0('chr', c(1:22, 'X', 'Y')), as.character(ran_tmp$x))
        ran_tmp = rbind(ran_tmp, data.frame(x = ran_key, freq = rep(0, length(ran_key))))
        ran_tmp = data.frame(ran_tmp, ran_tmp$freq/nrow(a)*100)
        return(ran_tmp)
      })
      ran_hit_freq_tab = data.frame(do.call(rbind, ran_hit_freq), stringsAsFactors = FALSE)
    } else {
      ran_hit_tab = data.frame(ran_hits, stringsAsFactors = FALSE)
      ran_vec = 'random1'
      ran_hit_freq = plyr::count(ran_hit_tab$seqnames)
      ran_key = setdiff(paste0('chr', c(1:22, 'X', 'Y')), as.character(ran_hit_freq$x))
      ran_hit_freq = rbind(ran_hit_freq, data.frame(x = ran_key, freq = rep(0, length(ran_key))))
      ran_hit_freq_tab = data.frame(ran_hit_freq, ran_hit_freq$freq/nrow(ran_hit_tab)*100, stringsAsFactors = FALSE)
    }
    names(ran_hit_freq_tab) = c('Chr', 'Count', 'Percent')
    gg_ran_hit = data.frame('Random' = rep(ran_vec, rep(24, length(ran_vec))), ran_hit_freq_tab, stringsAsFactors = FALSE)
    gg_ran_hit$Random = factor(gg_ran_hit$Random, levels = ran_vec)
    gg_ran_hit$Chr = factor(gg_ran_hit$Chr, levels = paste0('chr', c(1:22, 'X', 'Y')))
    grDevices::png(paste0(outPath, '/', outFileName, '_ran_dist_barplot.png'), width = 1200, height = 750)
    ran_plot = ggplot2::ggplot(gg_ran_hit, ggplot2::aes(x = Chr, y = Percent, fill = Random)) +
      ggplot2::geom_bar(stat = "identity", position = "dodge") + 
      ggplot2::theme(panel.background = ggplot2::element_blank(), panel.grid = ggplot2::element_line('darkgrey', linetype = 'dotted'), axis.text.x = ggplot2::element_text(hjust = 1, angle = 45), axis.text = ggplot2::element_text(size = 15), axis.title = ggplot2::element_text(size = 17))
    print(ran_plot)
    grDevices::dev.off()
  }
  message('- OK!')
  if(excelOut){
    message('- Write a result file.')
    docs = openxlsx::createWorkbook()
    hs = openxlsx::createStyle(fgFill = "forestgreen", halign = "CENTER", textDecoration = "Bold", fontColour = "white")
    cat('+ Add sheet to file : \"Experimental data\"\n'); openxlsx::addWorksheet(wb = docs, sheetName = "Experimental_data")
    openxlsx::writeDataTable(wb = docs, sheet = "Experimental_data", x = gg_hit, headerStyle = hs, rowNames = FALSE, colNames = TRUE, sep = '\t')
    if(!is.null(ran_hits)){
      hs_random = openxlsx::createStyle(fgFill = "mediumpurple", halign = "CENTER", textDecoration = "Bold", fontColour = "white")
      cat('+ Add sheet to file : \"Random data\"\n'); openxlsx::addWorksheet(wb = docs, sheetName = "Random_data")
      openxlsx::writeDataTable(wb = docs, sheet = "Random_data", x = gg_ran_hit, headerStyle = hs_random, rowNames = FALSE, colNames = TRUE, sep = '\t')
    }
    openxlsx::saveWorkbook(wb = docs, file = paste0(outPath, '/', outFileName, '_integ_region_in_chr.xlsx'), overwrite = TRUE)
    message('- OK!')
  }
  message('----- Finish. (Time : ', date(), ')')
  return(ifelse(!is.null(ran_hits), yes = list('Exp_data' = gg_hit, 'Random_data' = gg_ran_hit), no = list('Exp_data' = gg_hit)))
}
bioinfo16/RIPAT documentation built on Oct. 16, 2020, 1:39 p.m.