#' Add feature to the model from a BED file
#'
#' Helper function to add features from BED files. All features will become
#' binary features, 0 indicating no overlap, 1 indicating overlap.
#' If the feature has more than one factor, each factor will become its own
#' binary feature.
#'
#' @param features a data.frame to add the features to.
#' @param ranges a \code{GRanges} object with the ranges to overlap features
#' (must have same length as the number of rows in the features data frame).
#' @param featureName what the feature should be called.
#' @param file path to the BED File with >=3 columns (chr, start, end,
#' feature(s)). If only 3 columns, the name of the file will be used as the
#' feature name. If 4 columns, and the 4th column has multiple factors,
#' uses the factor names of the 4th column as the feature names. Does not
#' support files with features in > the 4th column (please separate into
#' multiple BED files).
#'
#' @return the original features data.frame with the new feature(s) added from
#' the BED file as the last column(s)
#'
#' @importClassesFrom bsseq BSseq
#' @importClassesFrom S4Vectors Hits
#' @importMethodsFrom bsseq pData seqnames sampleNames start width
#'
#' @import bsseq
#' @import GenomicRanges
#' @import S4Vectors
#'
#' @export
addFeatureFromBED <- function(features,
ranges,
featureName = NULL,
file = NULL) {
# checks
if (is.null(file)) {
stop("Must specify the BED file to add using the file param")
}
if (length(ranges) != nrow(features)) {
stop("Ranges are not the same length as the features data frame = bad")
}
dat <- read.table(file, header = F, sep = "\t")
if (ncol(dat) < 3) {
stop(paste("BED file", file, "does not have >= 3 columns."))
}
if (ncol(dat) > 4) {
message(paste("Warning: there are more than 4 columns in", file,
"; ignoring everything after the 4th column."))
}
colnames(dat)[1:3] <- c("chr", "start", "end")
dat$start <- dat$start + 1 # make sure in same coordinates
if (ncol(dat) == 3) { # 3 columns = assume 1 feature, name it after file
grl <- makeGRangesFromDataFrame(dat)
overlaps <- findOverlaps(ranges, grl)
eval(parse(text = paste0("features$`", basename(file), "` <- 0")))
eval(parse(text = paste0("features$`", basename(file),
"`[queryHits(overlaps)] <- 1")))
eval(parse(text = paste0("features$`", basename(file), "` <- ",
"as.factor(features$`", basename(file), "`)")))
} else {
if (is.factor(dat[, 4])) { # multiple factors = make each factor a binary
# feature
grl <- split(makeGRangesFromDataFrame(dat), dat[, 4])
for (j in 1:length(grl)) {
overlaps <- findOverlaps(ranges, grl[j])
eval(parse(text = paste0("features$`", names(grl)[j], "` <- 0")))
eval(parse(text = paste0("features$`", names(grl)[j],
"`[queryHits(overlaps)] <- 1")))
eval(parse(text = paste0("features$`", names(grl)[j], "` <- ",
"as.factor(features$`", names(grl[j]), "`)")))
}
} else if (is.numeric(dat[, 4]) | is.integer(dat[, 4])) {
# numeric or integer = make one feature w/ the numeric/int value
grl <- makeGRangesFromDataFrame(dat)
overlaps <- findOverlaps(ranges, grl)
eval(parse(text = paste0("features$`", basename(file), "` <- 0")))
eval(parse(text = paste0("features$`", basename(file),
"`[queryHits(overlaps)] <- ",
"dat[subjectHits(overlaps), 4]")))
}
}
features
}
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.