R/importFiles.R

Defines functions importBED

Documented in importBED

# This file contains the function needed to import the user's files for
# annotation purposes
# Author: Daniel Fusca


#' Import BED files
#'
#' Given a filename referring to a BED file, this function returns a dataframe
#' containing the first 6 columns of the BED file. The BED file referred to by
#' the filename MUST have as its first 6 columns: coordinate chromosome,
#' coordinate start position, coordinate end position, coordinate name,
#' coordinate score, and coordinate strand. 0 and . can be used for missing
#' score and strand information, respectively. Resulting dataframes can then be
#' used as peaks or features for annotation.
#'
#' Use of read.delim from the utils package to import files was inspired by
#' RStudio's default method of importing text files. Code to rename column names
#' is based off of a StackOverflow post by Joshua Ulrich (see References).
#'
#' @param pathName A filename giving a path to the BED file to be imported
#'
#' @return A dataframe containing all coordinates given in the BED file. This
#'   dataframe contains 6 columns: coordinate chromosome, coordinate start
#'   position, coordinate end position, coordinate name, coordinate score, and
#'   coordinate strand. Note that the end coordinate will be one less than given
#'   in the BED file since standard BED format has right-open intervals.
#'
#' @examples
#'   pathToPeaks <- system.file("extdata",
#'      "H3K27me3Peaks.bed", package = "PeakMapper")
#'   pathToGenes <- system.file("extdata",
#'      "WS263Genes.bed", package = "PeakMapper")
#'   H3K27me3Peaks <- importBED(pathToPeaks)
#'   WS263Genes <- importBED(pathToGenes)
#'   H3K27me3Peaks$Name
#'   WS263Genes$Score
#'
#' @references
#' Joshua Ulrich. "How to rename a single column in a data.frame?". 23 September
#' 2011. Accessed 25 September 2019. https://stackoverflow.com/a/7532464
#'
#' @export
#' @import utils
importBED <- function(pathName) {
  # Usage of read.delim inspired by RStudio's default method of importing
  # text files
  bedFrame <- utils::read.delim(pathName, header = F, stringsAsFactors = F)

  # Check that the file being loaded has at least 6 columns, since PeakMapper
  # requires the first 6 columns to perform peak mapping
  numCols <- dim(bedFrame)[[2]]
  if (numCols < 6) {
    stop(paste("File at", pathName, "only has", numCols,
               "column(s). Valid BED files must have at least 6 columns."))
  }

  bedFrame <- data.frame(bedFrame[1:6])

  # Code to rename columns based off of StackOverflow post by Joshua Ulrich
  # (https://stackoverflow.com/a/7532464)
  colnames(bedFrame)[1:6] <- c("Chr", "Start", "End", "Name", "Score", "Strand")


  # Check that the imported dataframe is a valid BED file, as expected by
  # importBED This checking is done by a helper function in checkInput.R, and
  # raises an error if there is an issue with the dataframe
  checkBEDInput(bedFrame)

  # In BED format, the base pair in column 3 is not actually included in the
  # coordinate, but for our purposes it's easier if the value in this column is
  # the actual last base of each coordinate
  bedFrame$End <- bedFrame$End - 1

  return(bedFrame)
}


# [END]
fuscada2/PeakMapper documentation built on Dec. 8, 2019, 12:35 p.m.