R/data.R

#' Example ChIP-seq peak file
#'
#' Human H3K27ac peak file generated with ChIP-seq using K562 cell-line.
#' Human genome build hg19 was used.
#' The peak file (.BED) was obtained from ENCODE project
#' (\url{https://www.encodeproject.org/files/ENCFF044JNJ/}).
#' The BED file was then imported as an GRanges object.
#' Peaks located on chromosome 1 were subsetted to reduce the dataset size.
#'
#' @source
#' The code to prepare the .Rda file from the raw peak file is:
#'
#' \code{# dataset was directly downloaded from} \cr
#' \code{# https://www.encodeproject.org/files/ENCFF044JNJ/}
#' \code{encode_H3K27ac <- ChIPseeker::readPeakFile("path", as = "GRanges")} \cr
#' \code{encode_H3K27ac <- 
#' encode_H3K27ac[seqnames(encode_H3K27ac) == "chr1"]} \cr
#' \code{my_label <-
#'  c("name","score","strand","signalValue","pValue","qValue","peak")} \cr
#' \code{colnames(GenomicRanges::mcols(encode_H3K27ac)) <- my_label} \cr
#' \code{usethis::use_data(encode_H3K27ac, overwrite = TRUE)} \cr
#'
#' @usage data("encode_H3K27ac")
"encode_H3K27ac"

#' Example CUT&Tag peak file
#'
#' Human H3K27ac peak file generated with CUT&Tag using K562 cell-line from
#' Kaya-Okur et al., (2019). Human genome build hg19 was used.
#' Raw peak file (.BED) was obtained from GEO
#' (\url{https://trace.ncbi.nlm.nih.gov/Traces/sra/?run=SRR8383507}).
#' Peak calling was performed by Leyla Abbasova using MACS2.
#' The peak file was then imported as an GRanges object.
#' Peaks located on chromosome 1 were subsetted to reduce the dataset size.
#'
#' @source
#' The code to prepare the .Rda file from the raw peak file is:
#'
#' \code{# sequences were directly downloaded from 
#' https://trace.ncbi.nlm.nih.gov/Traces/sra/?run=SRR8383507} \cr
#' \code{# and peaks (BED file) were generated by Leyla Abbasova 
#' (Neurogenomics Lab, Imperial College London)} \cr
#' \code{CnT_H3K27ac <- ChIPseeker::readPeakFile("path", as = "GRanges")} \cr
#' \code{CnT_H3K27ac <- CnT_H3K27ac[seqnames(CnT_H3K27ac)== "chr1"]} \cr
#' \code{my_label <- 
#' c("name","score","strand","signalValue","pValue","qValue","peak")} \cr
#' \code{colnames(GenomicRanges::mcols(CnT_H3K27ac)) <- my_label} \cr
#' \code{usethis::use_data(CnT_H3K27ac)} \cr
#' @usage data("CnT_H3K27ac")
"CnT_H3K27ac"

#' Example CUT&Run peak file
#'
#' Human H3K27ac peak file generated with CUT&Run using K562 cell-line from
#' Meers et al., (2019). Human genome build hg19 was used.
#' Raw peak file (.BED) was obtained from GEO
#' (\url{https://trace.ncbi.nlm.nih.gov/Traces/sra/?run=SRR8581604}).
#' Peak calling was performed by Leyla Abbasova using MACS2.
#' The peak file was then processed into GRanges object.
#' Peaks located on chromosome 1 were subsetted to reduce the dataset size.
#'
#' @source
#' The code to prepare the .Rda file from the raw peak file is:
#'
#' \code{# sequences were directly downloaded from
#'  https://trace.ncbi.nlm.nih.gov/Traces/sra/?run=SRR8581604} \cr
#' \code{# and peaks (BED file) were generated by Leyla Abbasova 
#' (Neurogenomics Lab, Imperial College London)} \cr
#' \code{CnR_H3K27ac <- ChIPseeker::readPeakFile("path", as = "GRanges")} \cr
#' \code{CnR_H3K27ac <- CnR_H3K27ac[seqnames(CnR_H3K27ac)== "chr1"]} \cr
#' \code{my_label <- 
#' c("name","score","strand","signalValue","pValue","qValue","peak")}\cr
#' \code{colnames(GenomicRanges::mcols(CnR_H3K27ac)) <- my_label} \cr
#' \code{usethis::use_data(CnR_H3K27ac, overwrite = TRUE)} \cr
#' @usage data("CnR_H3K27ac")
"CnR_H3K27ac"

#' Example Picard duplication metrics file 1
#'
#' Duplication metrics output of CUT&Tag H3K27ac file 
#' (sample accession: SRR8581604).
#' Raw sequences were aligned to hg19 genome and Picard 
#' was performed by Leyla Abbasova.
#' The duplication summary output generated by Picard was processed 
#' to reduce the size of data.
#'
#' @source
#' The code to prepare the .Rda file is:
#'
#' \code{picard <- read.table("path/to/picard/duplication/output", 
#' header = TRUE, fill = TRUE)]} \cr
#' \code{CnT_H3K27ac_picard <- picard[1,]} \cr
#' \code{usethis::use_data(CnT_H3K27ac_picard, overwrite = TRUE)} \cr
#' @usage data("CnT_H3K27ac_picard")
"CnT_H3K27ac_picard"

#' Example Picard duplication metrics file 2
#'
#' Duplication metrics output on CUT&Run H3K27ac file 
#' (sample accession: SRR8581604).
#' Raw sequences were aligned to hg19 genome and after, 
#' Picard was performed by Leyla Abbasova.
#' The duplication summary output generated by Picard 
#' was processed to reduce the size of data.
#'
#' @source
#' The code to prepare the .Rda file is:
#'
#' \code{picard <- read.table("path/to/picard/duplication/output", 
#' header = TRUE, fill = TRUE)} \cr
#' \code{CnR_H3K27ac_picard <- picard[1,]} \cr
#' \code{usethis::use_data(CnR_H3K27ac_picard, overwrite = TRUE)} \cr
#' @usage data("CnR_H3K27ac_picard")
"CnR_H3K27ac_picard"

#' Human genome hg19 blacklisted regions
#'
#' Obtained from \url{https://www.encodeproject.org/files/ENCFF001TDO/}.
#' The ENCODE blacklist includes regions of the hg19 genome that have anomalous
#' and/or unstructured signals independent of the cell-line or experiment.
#' Removal of ENCODE blacklist is recommended for quality measure.
#'
#' @source
#' The code to prepare the .Rda file is:
#'
#' \code{# blacklisted regions were directly downloaded} \cr
#' \code{# from https://www.encodeproject.org/files/ENCFF001TDO/} \cr
#' \code{hg19_blacklist <- 
#' ChIPseeker::readPeakFile(file.path(path), as = "GRanges")} \cr
#' \code{usethis::use_data(hg19_blacklist, overwrite = TRUE)} \cr
#' @usage data("hg19_blacklist")
"hg19_blacklist"

#' Human genome hg38 blacklisted regions
#'
#' Obtained from \url{https://www.encodeproject.org/files/ENCFF356LFX/}.
#' The ENCODE blacklist includes regions of the hg38 genome that have 
#' anomalous and/or unstructured signals 
#' independent of the cell-line or experiment. Removal
#' of ENCODE blacklist is recommended for quality measure.
#'
#' @source
#' The code to prepare the .Rda file is:
#'
#' \code{## blacklisted regions were directly downloaded} \cr
#' \code{## from https://www.encodeproject.org/files/ENCFF356LFX/} \cr
#' \code{hg38_blacklist <- 
#' ChIPseeker::readPeakFile(file.path(path), as = "GRanges")} \cr
#' \code{usethis::use_data(hg38_blacklist, overwrite = TRUE)} \cr
#' @usage data("hg38_blacklist")
"hg38_blacklist"

#' Mouse genome mm10 blacklisted regions
#'
#' Obtained from \url{https://www.encodeproject.org/files/ENCFF547MET/}.
#' The ENCODE blacklist includes regions of the mm10 genome that have anomalous 
#' and/or unstructured signals independent of the cell-line or experiment. 
#' Removal of ENCODE blacklist is recommended for quality measure.
#'
#' @source
#' The code to prepare the .Rda file is:
#'
#' \code{## blacklisted regions were directly downloaded} \cr
#' \code{## from https://www.encodeproject.org/files/ENCFF547MET/} \cr
#' \code{mm10_blacklist <- 
#' ChIPseeker::readPeakFile(file.path(path), as = "GRanges")} \cr
#' \code{usethis::use_data(mm10_blacklist, overwrite = TRUE)} \cr
#' @usage data("mm10_blacklist")
"mm10_blacklist"


#' Mouse genome mm9 blacklisted regions
#'
#' Blaklisted regions of the mm9 genome build 
#' btained by lifting over the \code{mm10_blacklist}.
#' @source
#' \code{
#' tmp <- base::get("mm10_blacklist", asNamespace("EpiCompare"))
#' mm9_blacklist <- liftover_grlist(grlist = tmp,
#'                                  input_build = "mm10",
#'                                  output_build = "mm9", 
#'                                  keep_chr = NULL)
#' usethis::use_data(mm9_blacklist, overwrite = TRUE)                                
#' }
#' @usage data("mm9_blacklist")
"mm9_blacklist"
neurogenomics/EpiCompare documentation built on April 30, 2024, 3:58 p.m.