R/import_bedGraph.R

Defines functions import_bedGraph

Documented in import_bedGraph

#' Import bedGraph file
#'
#' Given a path to a bedGraph file, imports data into a \code{GRanges} object.
#' \cr\strong{Note:} In the bedGraph format chromosome coordinates are zero-based
#' and half-open (the first chromosome position is 0, and the last position in a
#' chromosome of length N is N - 1). Imported data here are always converted to
#' one-based format, irrespective of the format of the imported file.
#' @param path Path to a bedGraph file. No default.
#' @param keep_zeros Logical indicating whether to keep rows with a score of
#' zero. In bedGraph files generated by our MACS2 FE ChIP-seq data analysis
#' pipeline these correspond to missing values and should not be kept.
#' Defaults to \code{FALSE}.
#' @param local_copy Logical indicating whether to create local copy of target
#' file before importing data. If \code{TRUE} a local copy is automatically
#' created and deleted after use. Use this argument to avoid reading files
#' directly from shared locations (namely \code{LabShare}).
#' Defaults to \code{TRUE}.
#' @param use_rtracklayer Logical indicating whether to use
#' function \code{\link[rtracklayer]{import.bedGraph}} from \code{rtracklayer}.
#' If \code{FALSE}, the file is read into a \code{tibble} using
#' \code{\link[readr]{read_tsv}} and then converted to a \code{GRanges} object.
#' Defaults to \code{FALSE}.
#' @return \code{GRanges} object.
#' @examples
#' \dontrun{
#' x <- import_bedGraph('data.bdg', keep_zeros=TRUE, local_copy=FALSE, use_rtracklayer)
#' 
#' dot1 <- import_bedGraph('/Volumes/LabShare/HTGenomics/HiSeqOutputs/\
#'                          AveReps_SacCer3_MACS2_FE/\
#'                          Red1-dot1D-195-16-Reps-SacCer3-B3W3-MACS2/\
#'                          Red1-dot1D-195-16-Reps-SacCer3-2mis_B3W3_MACS2_FE.bdg.gz')
#' }
#' @export

import_bedGraph <- function(path, keep_zeros=FALSE, local_copy=TRUE,
                            use_rtracklayer=FALSE){
  t0  <- proc.time()[3]
  
  # IO checks
  if (is(path, "character") & length(list.files(path)) == 0) {
    check_path(path)
  } else stop("'path' argument must be a path to a bedGraph file")
  
  check_package("GenomicRanges")
  if (use_rtracklayer) {
    check_package("rtracklayer")
  } else check_package("readr")
  
  if (local_copy) path <- make_local_copy(path)
  
  # Import bedGraph data
  message('Loading bedGraph file...')
  if (use_rtracklayer) {
    gr <- rtracklayer::import.bedGraph(path)
  } else {
    df <- readr::read_tsv(path, col_names=FALSE)
    if (ncol(df) != 4) stop('Imported file must have 4 columns.\n',
                            'To import files with a different # of columns try',
                            ' "use_rtracklayer=TRUE"')
    names(df) <- c('chr','start','end', 'score')
    message('Converting to "GRanges" object...')
    gr <- with(df, GenomicRanges::GRanges(chr, IRanges::IRanges(start + 1, end),
                                          score=score))
  }
  
  if (local_copy) {
    message('(deleting local copy...)')
    unlink('hwglabr2_imports_temp', recursive=TRUE)
  }
  
  if (!keep_zeros) {
    message('Dropping zero scores...')
    gr <- gr[gr$score > 0]
  }
  
  
  message('---')
  message('Completed in ', hwglabr2::elapsed_time(t0, proc.time()[3]))
  
  return(gr)
}
hochwagenlab/hwglabr2 documentation built on Nov. 12, 2022, 7:27 p.m.