knitr::opts_chunk$set(echo = TRUE)
DNAmCrosshyb
: Collection of functions useful in detecting cross-reactive probes on Illumina 450k/EPIC DNA methylation arrays.
The following functions are implemented:
map_probes
: Map 450k/EPIC probes to the reference genome, allowing for mismatches/INDELs.get_nr_matches_per_probe
: Get number of matches for per match length (on output of map_probes
function).find_repeat_overlaps
: Check if matches overlap with repeats (UCSC masks) (on output of map_probes
function).map_probes_sequence
: Map 450k/EPIC probes to a user-specified DNA sequence. get_probe_overlaps
: Check if there are overlapping 3'-subsequences in a set of probes. get_OOB
: Get out-of-band normalized beta-values and/or intensities. locusplot
: Plot a region of p-values. ## Install from repo ## Note: installation may take a little while, since the package includes ## probe sequences and annotation as internal data. devtools::install_github("pjhop/DNAmCrosshyb") ## Install from source install.packages("DNAmCrosshyb.tar.gz", repos=NULL, type="source") # current version packageVersion("DNAmCrosshyb")
library(DNAmCrosshyb) library(ggplot2) library(dplyr)
Map some probes to reference genome (hg19), for each width from 30bp to 50bp (in steps of 5). Bisulfite-converted reference genomes can be generated using the following scripts: https://github.com/pjhop/DNAmCrosshyb/blob/master/data-raw/bisulfite_convert_hg19.R and https://github.com/pjhop/DNAmCrosshyb/blob/master/data-raw/bisulfite_convert_hg38.R
Bisulfite-converted genomes in the R .rds file format are available at: https://zenodo.org/record/4088020
library(DNAmCrosshyb) probes <- c("cg00005164", "cg12344104", "cg25521682", "cg27660038", "cg20554142", "cg03890998", "cg00947801") matches <- map_probes(probes, path = "genome_bs/hg19", chromosomes = "all", min_width = 30, max_width = 50, step_size = 5, allow_mismatch = FALSE, allow_INDEL = FALSE, cores = 1 ) head(matches %>% data.frame())
nr_matches <- get_nr_matches_per_probe(matches) nr_matches
matches <- find_repeat_overlaps(matches, genome_build = "hg19", min_overlap = "any") head(matches %>% data.frame())
Map probes to the C9orf72 hexanucleotide repeat:
# Map probes to the C9orf72 hexanucleotide repeat repeat_sequence <- paste(rep("GGCCCC", 10), collapse="") matches_c9 <- map_probes_sequence(sequence = repeat_sequence, next_base = "G", prev_base = "C", array = "450k", min_width = 10, max_width = 25, allow_indel = FALSE, allow_mismatch = FALSE, step_size = 1, use_Y = FALSE, methylation_status = "methylated") head(matches_c9 %>% data.frame())
# Map probes to the C9orf72 hexanucleotide repeat repeat_sequence <- paste(rep("GGCCCC", 10), collapse="") matches_c9 <- map_probes_sequence(sequence = repeat_sequence, next_base = "G", prev_base = "C", array = "450k", min_width = 10, max_width = 25, allow_indel = FALSE, allow_mismatch = TRUE, min_distance = 6, step_size = 1, use_Y = FALSE, methylation_status = "methylated") head(matches_c9 %>% dplyr::filter(n_mismatch > 0) %>% data.frame())
# Map probes to the C9orf72 hexanucleotide repeat repeat_sequence <- paste(rep("GGCCCC", 10), collapse="") matches_c9_2 <- map_probes_sequence(sequence = repeat_sequence, next_base = "G", prev_base = "C", array = "450k", min_width = 10, max_width = 25, allow_indel = FALSE, allow_mismatch = TRUE, min_distance = 6, step_size = 1, use_Y = FALSE, methylation_status = "methylated", cores = 4) head(matches_c9_2 %>% data.frame())
The get_probe_overlaps
can be used to check for overlapping 3'-subsequences in a set of probes. Here we apply this function to probes that map to the C9 repeat (identified above) and some random probes
matches_c9_20bp <- matches_c9 %>% dplyr::filter(width >= 20) set.seed(10) random_probes <- sample(DNAmCrosshyb:::anno450k$Name, size = 10) # Calculate overlaps overlaps <- get_probe_overlaps(c(unique(matches_c9_20bp$Probe), random_probes)) ## Note this also includes overlap between _unmethylated and _methylated bead types of type I probes overlaps %>% dplyr::arrange(desc(overlaps$overlap)) %>% head(10)
Plot:
ggplot(overlaps %>% filter(Probe_Index != Probe_Target), aes(x = overlap)) + geom_histogram() + theme_classic() + xlab("Pair-wise probe overlap (bp)") + theme(text = element_text(size=13))
Extracting OOB betas from an RGset (minfi).
# For this example, the minfiData package is used. library(minfiData) # Load data baseDir <- system.file("extdata", package="minfiData") samplesheet <- read.metharray.sheet(baseDir) rgset <- read.metharray(samplesheet$Basename, extended=TRUE) # Get normalized OOB betas (only keep beta-values, other options are 'intensity' and 'both') oob_betas_normalized <- get_OOB(rgset, normalized = TRUE, keep = "beta") # Show subset oob_betas_normalized$beta[1:5,1:5]
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.