# This file contains the function needed to import the user's files for
# annotation purposes
# Author: Daniel Fusca
#' Import BED files
#'
#' Given a filename referring to a BED file, this function returns a dataframe
#' containing the first 6 columns of the BED file. The BED file referred to by
#' the filename MUST have as its first 6 columns: coordinate chromosome,
#' coordinate start position, coordinate end position, coordinate name,
#' coordinate score, and coordinate strand. 0 and . can be used for missing
#' score and strand information, respectively. Resulting dataframes can then be
#' used as peaks or features for annotation.
#'
#' Use of read.delim from the utils package to import files was inspired by
#' RStudio's default method of importing text files. Code to rename column names
#' is based off of a StackOverflow post by Joshua Ulrich (see References).
#'
#' @param pathName A filename giving a path to the BED file to be imported
#'
#' @return A dataframe containing all coordinates given in the BED file. This
#' dataframe contains 6 columns: coordinate chromosome, coordinate start
#' position, coordinate end position, coordinate name, coordinate score, and
#' coordinate strand. Note that the end coordinate will be one less than given
#' in the BED file since standard BED format has right-open intervals.
#'
#' @examples
#' pathToPeaks <- system.file("extdata",
#' "H3K27me3Peaks.bed", package = "PeakMapper")
#' pathToGenes <- system.file("extdata",
#' "WS263Genes.bed", package = "PeakMapper")
#' H3K27me3Peaks <- importBED(pathToPeaks)
#' WS263Genes <- importBED(pathToGenes)
#' H3K27me3Peaks$Name
#' WS263Genes$Score
#'
#' @references
#' Joshua Ulrich. "How to rename a single column in a data.frame?". 23 September
#' 2011. Accessed 25 September 2019. https://stackoverflow.com/a/7532464
#'
#' @export
#' @import utils
importBED <- function(pathName) {
# Usage of read.delim inspired by RStudio's default method of importing
# text files
bedFrame <- utils::read.delim(pathName, header = F, stringsAsFactors = F)
# Check that the file being loaded has at least 6 columns, since PeakMapper
# requires the first 6 columns to perform peak mapping
numCols <- dim(bedFrame)[[2]]
if (numCols < 6) {
stop(paste("File at", pathName, "only has", numCols,
"column(s). Valid BED files must have at least 6 columns."))
}
bedFrame <- data.frame(bedFrame[1:6])
# Code to rename columns based off of StackOverflow post by Joshua Ulrich
# (https://stackoverflow.com/a/7532464)
colnames(bedFrame)[1:6] <- c("Chr", "Start", "End", "Name", "Score", "Strand")
# Check that the imported dataframe is a valid BED file, as expected by
# importBED This checking is done by a helper function in checkInput.R, and
# raises an error if there is an issue with the dataframe
checkBEDInput(bedFrame)
# In BED format, the base pair in column 3 is not actually included in the
# coordinate, but for our purposes it's easier if the value in this column is
# the actual last base of each coordinate
bedFrame$End <- bedFrame$End - 1
return(bedFrame)
}
# [END]
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.