#' Get DNA sequence from peak summit region
#'
#' This function extract the DNA sequence around peak summit region from narrowPeak file
#'
#' @param file peak file generated by macs2
#' @param peakFormat Peak file format. One of \code{c("narrowPeak", "broadPeak", "bed")}
#' Default: narrowPeak
#' @param sampleId sample ID
#' @param genome BSgenome object
#' @param length Length of the region. Default: 200
#' @param column_suffix Whether to modify the column names to include sampleId.
#' If TRUE, the new column names are of format: colName.sampleId
#'
#' @return A dataframe with three columns: name, summitRegion, summitSeq
#' @export
#'
#' @examples
#' summitSeq = get_peak_summit_seq("test.narropeak", "sampleId", BSgenome.Afumigatus.AspGD.Af293, 200)
#'
get_peak_summit_seq = function(file, peakFormat = "narrowPeak", sampleId = NULL, genome,
length = 200, column_suffix = TRUE){
peakFormat <- match.arg(arg = peakFormat, choices = c("narrowPeak", "broadPeak", "bed"))
seqAroundSummit <- round(length / 2)
np <- rtracklayer::import(con = file, format = peakFormat)
if(length(np) == 0){
return(NULL)
}
if(is.null(mcols(np)$peak)){
mcols(np)$peak <- as.integer(width(np) / 2)
}
## get the region around peak summit
np <- GenomicRanges::resize(
x = GenomicRanges::shift(x = np, shift = np$peak - seqAroundSummit),
width = seqAroundSummit*2, fix = "start"
)
## check if there are any out of bound ranges based on seqlengths and trim
suppressWarnings(seqlengths(np) <- seqlengths(genome)[seqlevels(np)])
np <- trim(np)
## get the DNA sequence for region
np$summitSeq <- BSgenome::getSeq(x = genome, names = np, as.character = TRUE)
motifSeq <- as.data.frame(np) %>%
dplyr::mutate(summitSeqRegion = paste(seqnames, ":", start, "-", end, sep = "")) %>%
dplyr::select(peakId = name, summitSeq, summitSeqRegion)
if(!is.null(sampleId) && column_suffix){
peakIdCol <- paste("peakId.", sampleId, sep = "")
regionCol <- paste("summitSeqRegion.", sampleId, sep = "")
seqCol <- paste("summitSeq.", sampleId, sep = "")
motifSeq <- dplyr::rename(
motifSeq,
!! peakIdCol := peakId,
!! regionCol := summitSeqRegion,
!! seqCol := summitSeq
)
}
return(motifSeq)
}
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.