#' Read and pool replicates from BS-Seq data
#'
#' \code{pool_bs_seq_rep} reads and pools replicate methylation data from BS-Seq
#' experiments that are either in Encode RRBS or Bismark Cov format. Read the
#' Important section below on when to use this function.
#'
#' @inheritParams preprocess_bs_seq
#'
#' @return A \code{\link[GenomicRanges]{GRanges}} object. The GRanges object
#' contains two additional metadata columns: \itemize{ \item
#' \code{total_reads}: total reads mapped to each genomic location. \item
#' \code{meth_reads}: methylated reads mapped to each genomic location. }
#' These columns can be accessed as follows: granges_object$total_reads
#'
#' @section Important: Unless you want to create a different workflow when
#' processing the BS-Seq data, you should NOT call this function, since this
#' is a helper function. Instead you should call the
#' \code{\link{preprocess_bs_seq}} function.
#'
#' Information about the file formats can be found in the following links:
#'
#' Encode RRBS format:
#' \url{http://rohsdb.cmb.usc.edu/GBshape/cgi-bin/hgTables?db=hg19&hgta_group=
#' regulation&hgta_track=wgEncodeHaibMethylRrbs&hgta_table=
#' wgEncodeHaibMethylRrbsBcbreast0203015BiochainSitesRep2&hgta_doSchema=
#' describe+table+schema}
#'
#' Bismark Cov format: \url{http://rnbeads.mpi-inf.mpg.de/data/RnBeads.pdf}
#'
#' @author C.A.Kapourani \email{C.A.Kapourani@@ed.ac.uk}
#'
#' @seealso \code{\link{read_bs_bismark_cov}},
#' \code{\link{read_bs_encode_haib}}, \code{\link{preprocess_bs_seq}}
#'
#' @examples
#' # Obtain the path to the file
#' bs_file1 <- system.file("extdata", "rrbs.bed", package = "BPRMeth")
#' bs_file2 <- system.file("extdata", "rrbs.bed", package = "BPRMeth")
#'
#' # Concatenate the files
#' bs_files <- c(bs_file1, bs_file2)
#' # Pool the replicates
#' pooled_data <- pool_bs_seq_rep(bs_files)
#'
#' @export
pool_bs_seq_rep <- function(files, file_format = "encode_rrbs",
chr_discarded = NULL){
assertthat::assert_that(length(files) > 1)
message("Pooling BS-Seq replicates ...")
# Read first file
if (file_format == "encode_rrbs"){
pooled_bs <- read_bs_encode_haib(file = files[1],
chr_discarded = chr_discarded,
is_GRanges = TRUE)
}else if (file_format == "bismark_cov"){
pooled_bs <- read_bs_bismark_cov(file = files[1],
chr_discarded = chr_discarded,
is_GRanges = TRUE)
}else{
stop("Wrong file format. Please check the available file formats!")
}
for (i in 2:length(files)){
# Read replicate file i
if (file_format == "encode_rrbs"){
bs_data_rep <- read_bs_encode_haib(file = files[i],
chr_discarded = chr_discarded,
is_GRanges = TRUE)
}else if (file_format == "bismark_cov"){
bs_data_rep <- read_bs_bismark_cov(file = files[i],
chr_discarded = chr_discarded,
is_GRanges = TRUE)
}
# Find overlaps between BS-Seq replicates.
# A Hits object containing in the 1st column the query indices and in
# the 2nd column the corresponding subject indices that overlap.
overlaps <- GenomicRanges::findOverlaps(query = pooled_bs,
subject = bs_data_rep)
# Get only the subset of overlapping CpG sites
tmp_bs <- pooled_bs[S4Vectors::queryHits(overlaps)]
# Add the total reads from the two distinct replicates
tmp_bs$total_reads <- tmp_bs$total_reads +
bs_data_rep[S4Vectors::subjectHits(overlaps)]$total_reads
# Add the methylated reads from the two distinct replicates
tmp_bs$meth_reads <- tmp_bs$meth_reads +
bs_data_rep[S4Vectors::subjectHits(overlaps)]$meth_reads
# Add CpG sites that were not overlapping between replicates
tmp_bs <- c(tmp_bs,
pooled_bs[-S4Vectors::queryHits(overlaps)],
bs_data_rep[-S4Vectors::subjectHits(overlaps)])
# Sort the pooled GRanges object
pooled_bs <- sort(tmp_bs, ignore.strand = TRUE)
}
message("Finished pooling BS-Seq replicates!\n")
return(pooled_bs)
}
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.