#' @title Read a methylation report file
#'
#' @description \code{read_methylation_report} read a file containing methylation information using \code{readr}.
#' In general, the metyylation report file was generated by \code{Bismark methylation extractor}.
#' The methylation report file format is the following:
#' <chromosome> <position> <strand> <count methylated> <count unmethylated> <C-context> <trinucleotide context>
#' Note: the 7th column is optional.
#'
#' @param file the path of methylation report file
#'
#' @param min_coverage filter out the sites which total reads < min_coverage. the default is 4.
#'
#' @param type specify the methylation context type : "CG", "CHG" or "CHH",the default is "CG".
#'
#' @return A \code{GRanges} object.
#' The GRanges object contains three additional metadata columns:
#' \itemize{ \item{ \code{methylated_reads}: the number if methylated reads for each C sites.}
#' \item{ \code{total_reads}: Total number of reads for each C sites.}
#' \item{ \code{type}:the C-context type: CG,CHG, or CHH}
#' }
#'
#' @author Hongen Kang \email{geneprophet@163.com}
#'
#' @export
#'
#' @examples
#' # Obtain the path to files
#' \dontrun{
#' #' file <- system.file("extdata", "example_human_chr22_CpG_report.txt", package = "BSDMR")
#' human_met <- read_methylation_report(file,min_coverage=4)
#' file <- system.file("extdata", "example_mouse_chr15_CpG_report.txt", package = "BSDMR")
#' mouse_met <- read_methylation_report(file,min_coverage=4)
#' }
#'
read_methylation_report <- function(file,
min_coverage = 4,
type = "CG"){
message("reading methylation report file ...")
met_report <- readr::read_delim(file, "\t",
escape_double = FALSE, col_names = FALSE,
col_types = cols(X1 = col_character(), X4 = col_integer(), X5 = col_integer()),
trim_ws = TRUE)
#filter sites which coverages under min_coverage and non-methylation
met_report <- met_report %>% dplyr::filter(X4+X5>=min_coverage & X4>0 & X6 == type)
CpG_gr <- GenomicRanges::GRanges(
seqnames = paste0("chr",met_report$X1),
ranges = IRanges::IRanges(start = met_report$X2, end = met_report$X2),
strand = met_report$X3,
methylated_reads = met_report$X4,
total_reads = met_report$X4+met_report$X5,
type = met_report$X6
)
return(CpG_gr)
}
#' @title Read a annotation file
#'
#' @description \code{read_annotation} reads a file containing annotation data, which
#' will be used to select genomic regions for downstream analysis.
#' The annotation file format is the following: "chromosome", "EnsemblID", "name","start", "end",
#' "strand".
#'
#' @param file the path of annotation file
#'
#' @return A \code{GRanges} object.
#' The GRanges object contains three additional metadata columns:
#' \itemize{ \item{ \code{id}: EnsemblID}
#' \item{ \code{center}: the center of the region,(strat+end)/2}
#' \item{ \code{annotation}:the annotation of the region, it can be promoter,downstream,gene,UTR...} }
#'
#'
#'
#' @author Hongen Kang \email{geneprophet@163.com}
#' @export
#'
#' @examples
#' # Obtain the path to files
#' \dontrun{
#' #' file <- system.file("extdata", "human_chr22_annotation.txt", package = "BSDMR")
#' human_anno <- read_annotation(file)
#' file <- system.file("extdata", "mouse_chr15_annotation.txt", package = "BSDMR")
#' mouse_anno <- read_annotation(file)
#' }
#'
read_annotation <- function(file){
message("reading annotation file...")
anno <- readr::read_delim(file,
"\t",
escape_double = FALSE,
col_names = FALSE,
col_types = cols(X1 = col_character()),
trim_ws = TRUE)
anno_gr <- GenomicRanges::GRanges(
seqnames = paste0("chr",anno$X1),
ranges = IRanges::IRanges(start = anno$X4,end = anno$X5),
strand = anno$X6,
id = anno$X2,
center = round((anno$X4+anno$X5)/2),
annotation = anno$X3
)
return(anno_gr)
}
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.