Nothing
## -----------------------------------------------------------------------------
knitr::opts_chunk$set(
collapse = TRUE,
comment = "#>",
fig.align = "center"
)
## -----------------------------------------------------------------------------
library(valr)
library(dplyr)
library(ggplot2)
library(tibble)
## -----------------------------------------------------------------------------
library(valr)
library(dplyr)
snps <- read_bed(valr_example("hg19.snps147.chr22.bed.gz"))
genes <- read_bed(valr_example("genes.hg19.chr22.bed.gz"))
# find snps in intergenic regions
intergenic <- bed_subtract(snps, genes)
# distance from intergenic snps to nearest gene
nearby <- bed_closest(intergenic, genes)
nearby |>
select(starts_with("name"), .overlap, .dist) |>
filter(abs(.dist) < 1000)
## -----------------------------------------------------------------------------
bed_file <- valr_example("3fields.bed.gz")
read_bed(bed_file) # accepts filepaths or URLs
## -----------------------------------------------------------------------------
bed <- tribble(
~chrom, ~start, ~end,
"chr1", 1657492, 2657492,
"chr2", 2501324, 3094650
)
bed
## -----------------------------------------------------------------------------
# a chromosome 100 basepairs in length
chrom <- tribble(
~chrom, ~start, ~end,
"chr1", 0, 100
)
chrom
# single base-pair intervals
bases <- tribble(
~chrom, ~start, ~end,
"chr1", 0, 1, # first base of chromosome
"chr1", 1, 2, # second base of chromosome
"chr1", 99, 100 # last base of chromosome
)
bases
## -----------------------------------------------------------------------------
# # access the `refGene` tbl on the `hg38` assembly.
# if (require(RMariaDB)) {
# ucsc <- db_ucsc("hg38")
# tbl(ucsc, "refGene")
# }
## -----------------------------------------------------------------------------
x <- tribble(
~chrom, ~start, ~end,
"chr1", 25, 50,
"chr1", 100, 125
)
y <- tribble(
~chrom, ~start, ~end,
"chr1", 30, 75
)
bed_glyph(bed_intersect(x, y))
## -----------------------------------------------------------------------------
x <- tribble(
~chrom, ~start, ~end,
"chr1", 1, 50,
"chr1", 10, 75,
"chr1", 100, 120
)
bed_glyph(bed_merge(x))
## -----------------------------------------------------------------------------
x <- tribble(
~chrom, ~start, ~end, ~strand,
"chr1", 1, 100, "+",
"chr1", 50, 150, "+",
"chr2", 100, 200, "-"
)
y <- tribble(
~chrom, ~start, ~end, ~strand,
"chr1", 50, 125, "+",
"chr1", 50, 150, "-",
"chr2", 50, 150, "+"
)
# intersect tbls by strand
x <- group_by(x, strand)
y <- group_by(y, strand)
bed_intersect(x, y)
## -----------------------------------------------------------------------------
x <- group_by(x, strand)
y <- flip_strands(y)
y <- group_by(y, strand)
bed_intersect(x, y)
## -----------------------------------------------------------------------------
# # calculate the mean and variance for a `value` column
# bed_map(a, b, .mean = mean(value), .var = var(value))
#
# # report concatenated and max values for merged intervals
# bed_merge(a, .concat = concat(value), .max = max(value))
## -----------------------------------------------------------------------------
# `valr_example()` identifies the path of example files
bedfile <- valr_example("genes.hg19.chr22.bed.gz")
genomefile <- valr_example("hg19.chrom.sizes.gz")
bgfile <- valr_example("hela.h3k4.chip.bg.gz")
genes <- read_bed(bedfile)
genome <- read_genome(genomefile)
y <- read_bedgraph(bgfile)
## -----------------------------------------------------------------------------
# generate 1 bp TSS intervals, `+` strand only
tss <- genes |>
filter(strand == "+") |>
mutate(end = start + 1)
# 1000 bp up and downstream
region_size <- 1000
# 50 bp windows
win_size <- 50
# add slop to the TSS, break into windows and add a group
x <- tss |>
bed_slop(genome, both = region_size) |>
bed_makewindows(win_size)
x
## -----------------------------------------------------------------------------
# map signals to TSS regions and calculate summary statistics.
res <- bed_map(x, y, win_sum = sum(value, na.rm = TRUE)) |>
group_by(.win_id) |>
summarize(
win_mean = mean(win_sum, na.rm = TRUE),
win_sd = sd(win_sum, na.rm = TRUE)
)
res
## -----------------------------------------------------------------------------
x_labels <- seq(
-region_size,
region_size,
by = win_size * 5
)
x_breaks <- seq(1, 41, by = 5)
sd_limits <- aes(
ymax = win_mean + win_sd,
ymin = win_mean - win_sd
)
ggplot(
res,
aes(
x = .win_id,
y = win_mean
)
) +
geom_point() +
geom_pointrange(sd_limits) +
scale_x_continuous(
labels = x_labels,
breaks = x_breaks
) +
labs(
x = "Position (bp from TSS)",
y = "Signal",
title = "Human H3K4me3 signal near transcription start sites"
) +
theme_classic()
Any scripts or data that you put into this service are public.
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.