#' Wrapper method for processing Beatson enhancer RNA-Seq and BS-Seq HTS data
#'
#' \code{process_beatson_enh_wrap} is a wrapper method for processing HTS data
#' and returning the methylation enhancer regions and the corresponding gene
#' expression data for those enhancer regions. Note that the format of BS-Seq
#' data should be in the Bismark Cov format and for the RNA-Seq data in
#' Beatson format.
#'
#' @param bs_files Filename (or vector of filenames if there are replicates) of
#' the BS-Seq '.bed' formatted data to read values from.
#' @param rna_files Filename of the RNA-Seq '.bed' formatted data to read values
#' from. Currently, this version does not support pooling RNA-Seq replicates.
#' @param enh_files Filename of the enhancer annotation .bed formatted data.
#' @param chrom_size_file Optional filename containing genome chromosome sizes.
#' @param chr_discarded A vector with chromosome names to be discarded.
#' @param upstream Integer defining the length of bp upstream of TSS for
#' creating the promoter region.
#' @param downstream Integer defining the length of bp downstream of TSS for
#' creating the promoter region.
#' @param min_bs_cov The minimum number of reads mapping to each CpG site. CpGs
#' with less reads will be considered as noise and will be discarded.
#' @param max_bs_cov The maximum number of reads mapping to each CpG site. CpGs
#' with more reads will be considered as noise and will be discarded.
#' @inheritParams create_methyl_region
#'
#' @return A \code{processHTS} object which contains following information:
#' \itemize{ \item{ \code{methyl_region}: A list containing methylation data,
#' where each entry in the list is an \eqn{L_{i} X 3} dimensional matrix,
#' where \eqn{L_{i}} denotes the number of CpGs found in region \code{i}. The
#' columns contain the following information: \enumerate{ \item{ 1st column:
#' Contains the locations of CpGs relative to TSS. Note that the actual
#' locations are scaled to the (fmin, fmax) region. } \item{ 2nd column:
#' Contains the total reads of each CpG in the corresponding location.} \item{
#' 3rd column: Contains the methylated reads each CpG in the corresponding
#' location.} } } \item{ \code{prom_region}: A
#' \code{\link[GenomicRanges]{GRanges}} object containing corresponding
#' annotated promoter regions for each entry of the \code{methyl_region} list.
#' The GRanges object has one additional metadata column named \code{tss},
#' which stores the TSS of each promoter. } \item{ \code{rna_data}: A
#' \code{\link[GenomicRanges]{GRanges}} object containing the corresponding
#' RNA-Seq data for each entry of the \code{methyl_region} list. The GRanges
#' object has three additional metadata columns which are explained in
#' \code{\link{read_rna_beatson}}} \item{ \code{upstream}: Integer
#' defining the length of bp upstream of TSS.} \item{ \code{downstream}:
#' Integer defining the length of bp downstream of TSS.} \item{
#' \code{cpg_density}: Integer defining the minimum number of CpGs that have
#' to be in a methylated region. Regions with less than \code{n} CpGs are
#' discarded.} \item{ \code{sd_thresh}: Numeric defining the minimum standard
#' deviation of the methylation change in a region. This is used to filter
#' regions with no methylation change.} \item{ \code{fmin}: Minimum range
#' value for region location scaling.} \item{ \code{fmax}: Maximum range value
#' for region location scaling.} }
#'
#' @author C.A.Kapourani \email{C.A.Kapourani@@ed.ac.uk}
#'
#' @examples
#' # Get the location of the files
#' bs_file <- system.file("extdata", "bism_rep1.bed", package = "processHTS")
#' rna_file <- system.file("extdata", "rna_beatson2.bed", package = "processHTS")
#' enh_file <- system.file("extdata", "enhancers_beatson.bed", package = "processHTS")
#' #data <- process_beatson_enh_wrap(bs_file, rna_file, enh_file)
#'
#' @export
process_beatson_enh_wrap <- function(bs_files, rna_files, enh_files,
chrom_size_file = NULL,
chr_discarded = NULL, upstream = -15000,
downstream = 15000, min_bs_cov = 0,
max_bs_cov = 1000, cpg_density = 1,
sd_thresh = 0, ignore_strand = FALSE,
fmin = -1, fmax = 1){
# Process BS-Seq file and return data in the required format
bs_data <- preprocess_bs_seq(files = bs_files,
file_format = "bismark_cov",
chr_discarded = chr_discarded,
min_bs_cov = min_bs_cov,
max_bs_cov = max_bs_cov)
# Read the chromosome size file, if it is supplied
if (!is.null(chrom_size_file)){
chrom_size <- read_chrom_size(file = chrom_size_file)
}else{
chrom_size <- NULL
}
# Read enhancer data
obj <- read_enh_beatson(enh_file = enh_files,
gene_file = rna_files,
chr_discarded = chr_discarded,
is_GRanges = TRUE)
# Extract infrmation from object obj
enh_data <- obj$enhancer_data
rna_data <- obj$gene_data
# Create enhancer regions
enh_reg <- create_enh_region(annot_data = enh_data,
chrom_size = chrom_size,
upstream = upstream,
downstream = downstream)
# Create methylation regions data
methyl_reg <- create_methyl_region(bs_data = bs_data,
prom_region = enh_reg,
cpg_density = cpg_density,
sd_thresh = sd_thresh,
ignore_strand = ignore_strand,
fmin = fmin,
fmax = fmax)
# Keep only the corresponding gene expression data
rna_data <- rna_data[methyl_reg$prom_ind]
# Keep only the corresponding enhancer annotation data
enh_reg <- enh_reg[methyl_reg$prom_ind]
# Create object
obj <- structure(list(methyl_region = methyl_reg$meth_data,
enh_region = enh_reg,
rna_data = rna_data,
upstream = upstream,
downstream = downstream,
cpg_density = cpg_density,
sd_thresh = sd_thresh,
fmin = fmin,
fmax = fmax),
class = "processHTS")
return(obj)
}
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.