# https://github.com/arq5x/bedtools2/blob/master/test/closest/test-closest.sh
test_that("1bp closer, check for off-by-one errors", {
x <- tibble::tribble(
~chrom, ~start, ~end,
"chr1", 10, 20
)
y <- tibble::tribble(
~chrom, ~start, ~end,
"chr1", 9, 10,
"chr1", 19, 20,
"chr1", 20, 21
)
res <- bed_closest(x, y)
expect_equal(nrow(res), 3)
expect_true(all(c(-1, 0, 1) == res$.dist))
expect_true(all(c(0, 1, 0) == res$.overlap))
})
test_that("reciprocal test of 1bp closer, check for off-by-one errors", {
x <- tibble::tribble(
~chrom, ~start, ~end,
"chr1", 10, 20
)
y <- tibble::tribble(
~chrom, ~start, ~end,
"chr1", 9, 10,
"chr1", 19, 20,
"chr1", 20, 21
)
res <- bed_closest(y, x)
expect_equal(nrow(res), 3)
expect_true(all(c(1, 0, -1) == res$.dist))
expect_true(all(c(0, 1, 0) == res$.overlap))
})
test_that("0bp apart closer, check for off-by-one errors", {
x <- tibble::tribble(
~chrom, ~start, ~end,
"chr1", 10, 20
)
y <- tibble::tribble(
~chrom, ~start, ~end,
"chr1", 9, 10,
"chr1", 19, 21,
"chr1", 20, 21
)
res <- bed_closest(x, y)
expect_equal(nrow(res), 3)
expect_true(all(c(-1, 0, 1) == res$.dist))
expect_true(all(c(0, 1, 0) == res$.overlap))
})
test_that("reciprocal of 0bp apart closer, check for off-by-one errors", {
x <- tibble::tribble(
~chrom, ~start, ~end,
"chr1", 10, 20
)
y <- tibble::tribble(
~chrom, ~start, ~end,
"chr1", 9, 10,
"chr1", 19, 21,
"chr1", 20, 21
)
res <- bed_closest(y, x)
res2 <- bed_closest(x, y)
expect_equal(nrow(res), 3)
expect_equal(nrow(res), 3)
expect_true(all(c(1, 0, -1) == res$.dist))
expect_true(all(c(0, 1, 0) == res$.overlap))
})
test_that("check that first left interval at index 0 is not lost", {
x <- tibble::tribble(
~chrom, ~start, ~end,
"chr1", 10, 20
)
y <- tibble::tribble(
~chrom, ~start, ~end,
"chr1", 9, 10
)
res <- bed_closest(x, y)
expect_equal(nrow(res), 1)
})
test_that("check that first right interval at index 0 is not lost", {
x <- tibble::tribble(
~chrom, ~start, ~end,
"chr1", 10, 20
)
y <- tibble::tribble(
~chrom, ~start, ~end,
"chr1", 20, 21
)
res <- bed_closest(x, y)
expect_equal(nrow(res), 1)
})
test_that("check that strand closest works (strand = TRUE)", {
x <- tibble::tribble(
~chrom, ~start, ~end, ~name, ~score, ~strand,
"chr1", 100, 200, "a", 10, "+"
)
y <- tibble::tribble(
~chrom, ~start, ~end, ~name, ~score, ~strand,
"chr1", 90, 120, "b", 1, "-"
)
res <- bed_closest(group_by(x, strand), group_by(y, strand))
# report NA
expect_equal(nrow(res), 1)
expect_equal(nrow(na.omit(res)), 0)
})
test_that("check that same strand is reported (strand = TRUE", {
x <- tibble::tribble(
~chrom, ~start, ~end, ~name, ~score, ~strand,
"chr1", 80, 100, "q1", 1, "+"
)
y <- tibble::tribble(
~chrom, ~start, ~end, ~name, ~score, ~strand,
"chr1", 5, 15, "d1.1", 1, "+",
"chr1", 20, 60, "d1.2", 2, "-",
"chr1", 200, 220, "d1.3", 3, "-"
)
pred <- tibble::tribble(
~chrom, ~start.x, ~end.x, ~name.x, ~score.x, ~strand.x, ~start.y, ~end.y, ~name.y, ~score.y, ~strand.y, ~.overlap, ~.distance,
"chr1", 80, 100, "q1", 1, "+", 5, 15, "d1.1", 1, "+", 0, -66
)
res <- bed_closest(group_by(x, strand), group_by(y, strand))
expect_true(all(pred == res))
})
test_that("check that different strand is reported (strand_opp = TRUE", {
x <- tibble::tribble(
~chrom, ~start, ~end, ~name, ~score, ~strand,
"chr1", 80, 100, "q1", 1, "+"
)
y <- tibble::tribble(
~chrom, ~start, ~end, ~name, ~score, ~strand,
"chr1", 5, 15, "d1.1", 1, "+",
"chr1", 20, 60, "d1.2", 2, "-",
"chr1", 200, 220, "d1.3", 3, "-"
)
pred <- tibble::tribble(
~chrom, ~start.x, ~end.x, ~name.x, ~score.x, ~strand.x, ~start.y, ~end.y, ~name.y, ~score.y, ~strand.y, ~.overlap, ~.dist,
"chr1", 80, 100, "q1", 1, "+", 20, 60, "d1.2", 2, "+", 0, -21
)
res <- bed_closest(group_by(x, strand), group_by(flip_strands(y), strand))
expect_true(all(pred == res))
})
test_that("check that reciprocal strand closest works (strand_opp = TRUE) ", {
x <- tibble::tribble(
~chrom, ~start, ~end, ~name, ~score, ~strand,
"chr1", 100, 200, "a", 10, "+"
)
y <- tibble::tribble(
~chrom, ~start, ~end, ~name, ~score, ~strand,
"chr1", 80, 90, "b", 1, "-"
)
res <- bed_closest(group_by(x, strand), group_by(flip_strands(y), strand))
expect_equal(nrow(res), 1)
})
test_that("overlapping intervals are removed (overlap = F)", {
x <- tibble::tribble(
~chrom, ~start, ~end,
"chr1", 10, 20
)
y <- tibble::tribble(
~chrom, ~start, ~end,
"chr1", 9, 10,
"chr1", 19, 21,
"chr1", 20, 21
)
res <- bed_closest(x, y, overlap = FALSE)
expect_true(res[2, "start.y"] != 19)
})
test_that("duplicate intervals are not reported", {
x <- tibble::tribble(
~chrom, ~start, ~end,
"chr1", 100, 200
)
y <- tibble::tribble(
~chrom, ~start, ~end,
"chr1", 100, 200,
"chr1", 150, 200,
"chr1", 550, 580,
"chr2", 7000, 8500
)
res <- bed_closest(x, y)
expect_false(any(duplicated(res)))
})
test_that("all overlapping features are reported", {
x <- tibble::tribble(
~chrom, ~start, ~end,
"chr1", 100, 200
)
y <- tibble::tribble(
~chrom, ~start, ~end,
"chr1", 100, 200,
"chr1", 150, 200,
"chr1", 50, 100,
"chr1", 200, 300
)
exp <- tibble::tribble(
~chrom, ~start.x, ~start.y,
"chr1", 100, 200
)
res <- bed_closest(x, y)
expect_true(nrow(res) == 4)
})
test_that("test reporting of first overlapping feature and
overlap = F excludes overlapping intervals", {
x <- tibble::tribble(
~chrom, ~start, ~end,
"chr1", 100, 101,
"chr1", 200, 201,
"chr1", 300, 301,
"chr1", 100000, 100010,
"chr1", 100020, 100040,
"chr2", 1, 10,
"chr2", 20, 30
)
y <- tibble::tribble(
~chrom, ~start, ~end,
"chr1", 100, 101,
"chr1", 150, 201,
"chr1", 175, 375
)
pred <- tibble::tribble(
~chrom, ~start.x, ~end.x, ~start.y, ~end.y, ~.dist,
"chr1", 100, 101, 150, 201, 50,
"chr1", 200, 201, 100, 101, -100,
"chr1", 300, 301, 150, 201, -100,
"chr1", 100000, 100010, 175, 375, -99626,
"chr1", 100020, 100040, 175, 375, -99646,
"chr2", 1, 10, NA, NA, NA,
"chr2", 20, 30, NA, NA, NA
)
res <- bed_closest(x, y, overlap = F)
expect_equal(res, pred)
})
### test distance reporting conditions ###
### tbls to test
d_q1 <- tibble::tribble(
~chrom, ~start, ~end, ~name, ~score, ~strand,
"chr1", 80, 100, "d_q1.1", 5, "+"
)
d_q2 <- tibble::tribble(
~chrom, ~start, ~end, ~name, ~score, ~strand,
"chr1", 80, 100, "d_q2.1", 5, "-"
)
d_d1F <- tibble::tribble(
~chrom, ~start, ~end, ~name, ~score, ~strand,
"chr1", 40, 60, "d1F.1", 10, "+"
)
d_d1R <- tibble::tribble(
~chrom, ~start, ~end, ~name, ~score, ~strand,
"chr1", 40, 60, "d1R.1", 10, "-"
)
d_d2F <- tibble::tribble(
~chrom, ~start, ~end, ~name, ~score, ~strand,
"chr1", 140, 160, "d2F.1", 10, "+"
)
d_d2R <- tibble::tribble(
~chrom, ~start, ~end, ~name, ~score, ~strand,
"chr1", 140, 160, "d2R.1", 10, "-"
)
test_that("default distance reporting works for forward hit on left, forward query", {
pred <- tibble::tribble(
~chrom, ~start.x, ~end.x, ~name.x, ~score.x, ~strand.x, ~start.y, ~end.y, ~name.y, ~score.y, ~strand.y, ~.overlap, ~.dist,
"chr1", 80, 100, "d_q1.1", 5, "+", 40, 60, "d1F.1", 10, "+", 0, -21
)
res <- bed_closest(d_q1, d_d1F)
expect_true(all(pred == res))
})
test_that("default distance reporting works for reverse hit on left, forward query", {
pred <- tibble::tribble(
~chrom, ~start.x, ~end.x, ~name.x, ~score.x, ~strand.x, ~start.y, ~end.y, ~name.y, ~score.y, ~strand.y, ~.overlap, ~.dist,
"chr1", 80, 100, "d_q1.1", 5, "+", 40, 60, "d1R.1", 10, "-", 0, -21
)
res <- bed_closest(d_q1, d_d1R)
expect_true(all(pred == res))
})
test_that("default distance reporting works for forward hit on left, reverse query", {
pred <- tibble::tribble(
~chrom, ~start.x, ~end.x, ~name.x, ~score.x, ~strand.x, ~start.y, ~end.y, ~name.y, ~score.y, ~strand.y, ~.overlap, ~.dist,
"chr1", 80, 100, "d_q2.1", 5, "-", 40, 60, "d1F.1", 10, "+", 0, -21
)
res <- bed_closest(d_q2, d_d1F)
expect_true(all(pred == res))
})
test_that("default distance reporting works for reverse hit on left, reverse query", {
pred <- tibble::tribble(
~chrom, ~start.x, ~end.x, ~name.x, ~score.x, ~strand.x, ~start.y, ~end.y, ~name.y, ~score.y, ~strand.y, ~.overlap, ~.dist,
"chr1", 80, 100, "d_q2.1", 5, "-", 40, 60, "d1R.1", 10, "-", 0, -21
)
res <- bed_closest(d_q2, d_d1R)
expect_true(all(pred == res))
})
test_that("default distance reporting works for forward hit on right, forward query", {
pred <- tibble::tribble(
~chrom, ~start.x, ~end.x, ~name.x, ~score.x, ~strand.x, ~start.y, ~end.y, ~name.y, ~score.y, ~strand.y, ~.overlap, ~.dist,
"chr1", 80, 100, "d_q1.1", 5, "+", 140, 160, "d2F.1", 10, "+", 0, 41
)
res <- bed_closest(d_q1, d_d2F)
expect_true(all(pred == res))
})
test_that("default distance reporting works for reverse hit on right, forward query", {
pred <- tibble::tribble(
~chrom, ~start.x, ~end.x, ~name.x, ~score.x, ~strand.x, ~start.y, ~end.y, ~name.y, ~score.y, ~strand.y, ~.overlap, ~.dist,
"chr1", 80, 100, "d_q1.1", 5, "+", 140, 160, "d2R.1", 10, "-", 0, 41
)
res <- bed_closest(d_q1, d_d2R)
expect_true(all(pred == res))
})
test_that("default distance reporting works for forward hit on right, reverse query", {
pred <- tibble::tribble(
~chrom, ~start.x, ~end.x, ~name.x, ~score.x, ~strand.x, ~start.y, ~end.y, ~name.y, ~score.y, ~strand.y, ~.overlap, ~.dist,
"chr1", 80, 100, "d_q2.1", 5, "-", 140, 160, "d2F.1", 10, "+", 0, 41
)
res <- bed_closest(d_q2, d_d2F)
expect_true(all(pred == res))
})
test_that("default distance reporting works for reverse hit on right, reverse query", {
pred <- tibble::tribble(
~chrom, ~start.x, ~end.x, ~name.x, ~score.x, ~strand.x, ~start.y, ~end.y, ~name.y, ~score.y, ~strand.y, ~.overlap, ~.dist,
"chr1", 80, 100, "d_q2.1", 5, "-", 140, 160, "d2R.1", 10, "-", 0, 41
)
res <- bed_closest(d_q2, d_d2R)
expect_true(all(pred == res))
})
### additional tbls for tests ###
a2 <- tibble::tribble(
~chrom, ~start, ~end, ~name, ~score, ~strand,
"chr1", 10, 20, "a1", 1, "-"
)
b2 <- tibble::tribble(
~chrom, ~start, ~end, ~name, ~score, ~strand,
"chr1", 8, 9, "b1", 1, "+",
"chr1", 21, 22, "b2", 1, "-"
)
test_that("Make sure non-overlapping ties are reported ", {
pred <- tibble::tribble(
~chrom, ~start.x, ~end.x, ~name.x, ~score.x, ~strand.x, ~start.y, ~end.y, ~name.y, ~score.y, ~strand.y, ~.overlap, ~.dist,
"chr1", 10, 20, "a1", 1, "-", 21, 22, "b2", 1, "-", 0, 2,
"chr1", 10, 20, "a1", 1, "-", 8, 9, "b1", 1, "+", 0, -2
)
res <- bed_closest(a2, b2)
expect_equal(pred, res)
})
test_that("Make sure non-overlapping ties are reported with strand = T ", {
pred <- tibble::tribble(
~chrom, ~start.x, ~end.x, ~name.x, ~score.x, ~strand.x, ~start.y, ~end.y, ~name.y, ~score.y, ~strand.y, ~.overlap, ~.dist,
"chr1", 10, 20, "a1", 1, "-", 21, 22, "b2", 1, "-", 0, 2
)
res <- bed_closest(group_by(a2, strand), group_by(b2, strand))
expect_true(all(pred == res))
})
test_that("Make sure non-overlapping ties are reported with strand_opp = T ", {
pred <- tibble::tribble(
~chrom, ~start.x, ~end.x, ~name.x, ~score.x, ~strand.x, ~start.y, ~end.y, ~name.y, ~score.y, ~strand.y, ~.overlap, ~.dist,
"chr1", 10, 20, "a1", 1, "-", 8, 9, "b1", 1, "-", 0, -2
)
res <- bed_closest(group_by(a2, strand), group_by(flip_strands(b2), strand))
expect_true(all(pred == res))
})
test_that("Make sure that closest intervals are captured when intervals span multiple interval tree nodes issue #105", {
# when the y tbl has >= 64 intervals two nodes of the interval tree will be generated
snps <- read_bed(valr_example("hg19.snps147.chr22.bed.gz"), n_max = 10)
genes_one_node <- read_bed(valr_example("genes.hg19.chr22.bed.gz"), n_max = 63)
genes_two_nodes <- read_bed(valr_example("genes.hg19.chr22.bed.gz"), n_max = 64)
res_expt_one_node <- bed_closest(snps, genes_one_node)
res_expt_two_nodes <- bed_closest(snps, genes_two_nodes)
# adding one extra interval should not result in doubling the reported intervals
expect_false(nrow(res_expt_two_nodes) >= 2 * nrow(res_expt_one_node))
})
test_that("test that a max of two duplicated x ivls are returned, assuming non-overlapping, and non-duplicate y ivls #105", {
snps <- read_bed(valr_example("hg19.snps147.chr22.bed.gz"), n_max = 10)
genes <- read_bed(valr_example("genes.hg19.chr22.bed.gz"), n_max = 64)
# make sure there are no repeated y ivls (otherwise more than 2 x ivls should be reported)
genes <- group_by(genes, chrom, start, end)
genes <- mutate(genes, ivl_count = n())
genes <- filter(genes, ivl_count == 1)
genes <- select(genes, -ivl_count)
genes <- group_by(genes, chrom)
res <- bed_closest(snps, genes, overlap = FALSE)
res <- group_by(res, chrom, start.x, end.x)
res <- summarize(res, n = n(), .groups = "keep")
# there should not be more than 2 possible closest ivls.
expect_true(all(res$n <= 2))
genome <- tibble::tribble(
~chrom, ~size,
"chr1", 10000000,
"chr2", 50000000,
"chr3", 60000000,
"chrX", 5000000
)
x <- bed_random(genome, n = 1e5, seed = 1)
y <- bed_random(genome, n = 1e5, seed = 2)
x$idx <- seq_len(nrow(x))
y$idx <- seq_len(nrow(y))
res <- bed_closest(x, y, overlap = FALSE)
res <- group_by(res, idx.x)
res_grps <- summarize(res, n = n(), .groups = "keep")
# if more than 1 x ivl reported, then it is due to duplicated y ivls in input
multi_grps <- res_grps[res_grps$n > 1, ]
if (nrow(multi_grps) > 0) {
grps <- res[res$idx.x %in% multi_grps$idx.x, ]
grps <- group_by(grps, idx.x)
res <- summarize(grps,
n_res = n(),
same_abs_dist = length(unique(abs(.dist))) == 1,
idx_y_distinct = length(unique(idx.y)) == n_res
)
expect_true(all(res$same_abs_dist & res$idx_y_distinct))
}
})
test_that("ensure that subtraction is done with respect to input tbls issue#108", {
x <- tibble::tribble(
~chrom, ~start, ~end, ~group,
"chr1", 100, 200, "A",
"chr1", 200, 400, "A",
"chr1", 300, 500, "A",
"chr1", 125, 175, "C",
"chr1", 150, 200, "B"
)
y <- tibble::tribble(
~chrom, ~start, ~end, ~group,
"chr1", 100, 200, "A",
"chr1", 200, 400, "B",
"chr1", 300, 500, "A",
"chr1", 125, 175, "C",
"chr2", 150, 200, "A"
)
x_grouped <- arrange(x, chrom, start) |>
group_by(group, chrom)
y_grouped <- arrange(y, chrom, start) |>
group_by(group, chrom)
res <- bed_closest(x_grouped, y_grouped)
expect_true(all(res$group.x == res$group.y))
})
# from https://github.com/arq5x/bedtools2/blob/master/test/closest/test-closest.sh
test_that("test closest forcing -s yet no matching strands on chrom", {
x <- tibble::tribble(
~chrom, ~start, ~end, ~group, ~score, ~strand,
"chr1", 100, 200, "a", 10, "+"
)
y <- tibble::tribble(
~chrom, ~start, ~end, ~group, ~score, ~strand,
"chr1", 90, 120, "b", 1, "-"
)
res <- bed_closest(group_by(x, strand), group_by(y, strand))
expect_true(nrow(res) == 1)
expect_true(nrow(na.omit(res)) == 0)
})
test_that("test closest forcing -S with only an opp strands on chrom", {
x <- tibble::tribble(
~chrom, ~start, ~end, ~group, ~score, ~strand,
"chr1", 100, 200, "a", 10, "+"
)
y <- tibble::tribble(
~chrom, ~start, ~end, ~group, ~score, ~strand,
"chr1", 90, 120, "b", 1, "-"
)
res <- bed_closest(group_by(x, strand), group_by(flip_strands(y), strand))
expect_true(nrow(res) == 1)
})
test_that("Make sure non-overlapping ties are reported", {
x <- tibble::tribble(
~chrom, ~start, ~end, ~group, ~score, ~strand,
"chr1", 10, 20, "a1", 1, "-"
)
y <- tibble::tribble(
~chrom, ~start, ~end, ~group, ~score, ~strand,
"chr1", 8, 9, "b1", 1, "+",
"chr1", 21, 22, "b2", 1, "-"
)
res <- bed_closest(x, y)
expect_true(nrow(res) == 2)
})
test_that("Make sure non-overlapping ties are reported, with strand option", {
x <- tibble::tribble(
~chrom, ~start, ~end, ~group, ~score, ~strand,
"chr1", 10, 20, "a1", 1, "-"
)
y <- tibble::tribble(
~chrom, ~start, ~end, ~group, ~score, ~strand,
"chr1", 8, 9, "b1", 1, "+",
"chr1", 21, 22, "b2", 1, "-"
)
res <- bed_closest(group_by(x, strand), group_by(y, strand))
expect_true(nrow(res) == 1)
})
test_that("Make sure non-overlapping ties are reported, with strand-oppo option", {
x <- tibble::tribble(
~chrom, ~start, ~end, ~group, ~score, ~strand,
"chr1", 10, 20, "a1", 1, "-"
)
y <- tibble::tribble(
~chrom, ~start, ~end, ~group, ~score, ~strand,
"chr1", 8, 9, "b1", 1, "+",
"chr1", 21, 22, "b2", 1, "-"
)
res <- bed_closest(group_by(x, strand), group_by(flip_strands(y), strand))
expect_true(nrow(res) == 1)
})
test_that("check ties, single db", {
x <- tibble::tribble(
~chrom, ~start, ~end, ~group, ~score, ~strand,
"chr1", 10, 20, "a1", 1, "-"
)
y <- tibble::tribble(
~chrom, ~start, ~end, ~group, ~score, ~strand,
"chr1", 8, 9, "b1", 1, "+",
"chr1", 21, 22, "b2", 1, "-"
)
res <- bed_closest(x, y)
expect_true(nrow(res) == 2)
})
test_that("check reporting of adjacent intervals issue #348", {
x <- tibble::tribble(
~chrom, ~start, ~end,
"chr1", 10, 20
)
y <- tibble::tribble(
~chrom, ~start, ~end,
"chr1", 8, 9,
"chr1", 9, 10,
"chr1", 20, 21,
"chr1", 21, 22
)
res <- bed_closest(x, y)
expect_true(nrow(res) == 2)
})
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.