tests/testthat/test_intersect.r

x <- tibble::tribble(
  ~chrom , ~start , ~end ,
  "chr1" ,    100 ,  200 ,
  "chr1" ,    150 ,  250 ,
  "chr1" ,    400 ,  500
)

y <- tibble::tribble(
  ~chrom , ~start , ~end ,
  "chr1" ,    175 ,  200 ,
  "chr1" ,    175 ,  225
)

test_that("simple overlap works", {
  res <- bed_intersect(x, y, min_overlap = 0L)
  expect_equal(nrow(res), 4)
})

test_that("invert param works", {
  res <- bed_intersect(x, y, invert = TRUE, min_overlap = 0L)
  expect_equal(nrow(res), 1)
})

test_that("multiple as", {
  x <- tibble::tribble(
    ~chrom , ~start , ~end ,
    "chr1" ,    100 ,  200 ,
    "chr1" ,    100 ,  200 ,
    "chr1" ,    100 ,  200 ,
    "chr1" ,    100 ,  200 ,
    "chr1" ,    100 ,  200
  )

  y <- tibble::tribble(
    ~chrom , ~start , ~end ,
    "chr1" ,    175 ,  200
  )

  res <- bed_intersect(x, y, min_overlap = 0L)
  expect_equal(nrow(res), 5)
})

test_that("multple bs", {
  x <- tibble::tribble(
    ~chrom , ~start , ~end ,
    "chr1" ,    100 ,  200
  )

  y <- tibble::tribble(
    ~chrom , ~start , ~end ,
    "chr1" ,    175 ,  200 ,
    "chr1" ,    175 ,  200 ,
    "chr1" ,    175 ,  200 ,
    "chr1" ,    175 ,  200 ,
    "chr1" ,    175 ,  200
  )

  res <- bed_intersect(x, y, min_overlap = 0L)
  expect_equal(nrow(res), 5)
})

test_that("no overlaps returns empty df", {
  x <- tibble::tribble(
    ~chrom , ~start , ~end ,
    "chr1" ,    100 ,  200
  )
  y <- tibble::tribble(
    ~chrom , ~start , ~end ,
    "chr1" ,    300 ,  400
  )
  res <- bed_intersect(x, y, min_overlap = 0L)
  expect_true("data.frame" %in% class(res))
  expect_equal(nrow(res), 0)
})

test_that("duplicate intervals are removed (#23)", {
  x <- tibble::tribble(
    ~chrom , ~start , ~end ,
    "chr1" ,    100 ,  500 ,
    "chr1" ,    175 ,  200
  )

  y <- tibble::tribble(
    ~chrom , ~start , ~end ,
    "chr1" ,    150 ,  400 ,
    "chr1" ,    151 ,  401
  )

  res <- bed_intersect(x, y, min_overlap = 0L)
  expect_equal(nrow(res), 4)
})

test_that("suffixes disambiguate x/y columns (#28)", {
  x <- tibble::tribble(
    ~chrom , ~start , ~end , ~name , ~score , ~strand ,
    "chr1" ,   1000 , 1500 , "."   , "."    , "-"
  )

  y <- tibble::tribble(
    ~chrom , ~start , ~end , ~name , ~score , ~strand ,
    "chr1" ,   1000 , 1200 , "."   , "."    , "-"
  )

  res <- bed_intersect(x, y, min_overlap = 0L)
  expect_true("start.y" %in% colnames(res))
})

test_that("incorrect `suffix` args throw errors", {
  x <- tibble::tribble(
    ~chrom , ~start , ~end , ~name , ~score ,
    "chr1" ,   1000 , 1500 , "."   , "."
  )

  y <- tibble::tribble(
    ~chrom , ~start , ~end , ~name , ~score ,
    "chr1" ,   1000 , 1200 , "."   , "."
  )

  expect_error(bed_intersect(x, y, suffix = "TESTING", min_overlap = 0L))
})

test_that("intersections from x bed_tbl with more chroms than y are captured", {
  x <- tibble::tribble(
    ~chrom , ~start , ~end ,
    "chr1" ,    100 ,  200 ,
    "chr3" ,    400 ,  500
  )

  y <- tibble::tribble(
    ~chrom , ~start , ~end ,
    "chr3" ,    425 ,  475
  )

  res <- bed_intersect(x, y, min_overlap = 0L)
  expect_true("chr3" %in% res$chrom)
})

test_that("intersections from y bed_tbl with more chroms are captured", {
  x <- tibble::tribble(
    ~chrom , ~start , ~end ,
    "chr3" ,    400 ,  500
  )

  y <- tibble::tribble(
    ~chrom , ~start , ~end ,
    "chr1" ,    100 ,  200 ,
    "chr3" ,    425 ,  475
  )

  res <- bed_intersect(x, y, min_overlap = 0L)
  expect_true("chr3" %in% res$chrom)
})

test_that("input x groups are used for comparing intervals issue #108", {
  x <- tibble::tribble(
    ~chrom , ~start , ~end , ~group ,
    "chr1" ,    100 ,  200 , "A"    ,
    "chr1" ,    200 ,  400 , "A"    ,
    "chr1" ,    300 ,  500 , "A"    ,
    "chr1" ,    125 ,  175 , "B"    ,
    "chr1" ,    150 ,  200 , "B"
  )
  x <- arrange(x, chrom, start)
  x <- group_by(x, group)
  res <- bed_intersect(x, x, min_overlap = 0L)
  expect_true(all(res$group.x == res$group.y))
})

test_that("tbls grouped by strand are processed", {
  x <- tibble::tribble(
    ~chrom , ~start , ~end , ~name , ~score , ~strand ,
    "chr1" ,   1000 , 1500 , "."   , "."    , "+"
  )

  y <- tibble::tribble(
    ~chrom , ~start , ~end , ~name , ~score , ~strand ,
    "chr1" ,   1000 , 1200 , "."   , "."    , "-"
  )

  res <- bed_intersect(
    group_by(x, strand),
    group_by(y, strand),
    min_overlap = 0L
  )
  expect_equal(nrow(res), 0)

  # flip strands
  res <- bed_intersect(
    group_by(x, strand),
    group_by(flip_strands(y), strand),
    min_overlap = 0L
  )
  expect_equal(nrow(res), 1)
})

test_that("invert = T, and custom suffixes dont result in failed anti_join()", {
  x <- tibble::tribble(
    ~chrom , ~start , ~end ,
    "chr3" ,    500 ,  600
  )

  y <- tibble::tribble(
    ~chrom , ~start , ~end ,
    "chr1" ,    100 ,  200 ,
    "chr3" ,    425 ,  475
  )

  res <- bed_intersect(x, y, invert = T, suffix = c("a", "b"), min_overlap = 0L)
  expect_equal(nrow(res), 1)
})

test_that("multiple y tbl_intervals can be passed to bed_intersect (#220)", {
  x <- tibble::tribble(
    ~chrom , ~start , ~end ,
    "chr1" ,    100 ,  500 ,
    "chr2" ,    200 ,  400 ,
    "chr2" ,    300 ,  500 ,
    "chr2" ,    800 ,  900
  )

  y <- tibble::tribble(
    ~chrom , ~start , ~end , ~value ,
    "chr1" ,    150 ,  400 ,    100 ,
    "chr1" ,    500 ,  550 ,    100 ,
    "chr2" ,    230 ,  430 ,    200 ,
    "chr2" ,    350 ,  430 ,    300
  )

  z <- tibble::tribble(
    ~chrom , ~start , ~end , ~value ,
    "chr1" ,    150 ,  400 ,    100 ,
    "chr1" ,    500 ,  550 ,    100 ,
    "chr2" ,    230 ,  430 ,    200 ,
    "chr2" ,    750 ,  900 ,    400
  )

  res <- bed_intersect(x, y, z, min_overlap = 0L)
  expect_true(all(c("y", "z") %in% res$.source))

  # check that named args can be passed also
  res <- bed_intersect(x, first_file = y, second_file = z, min_overlap = 0L)
  expect_true(all(c("first_file", "second_file") %in% res$.source))

  # check that list input is parsed correctly
  res1 <- bed_intersect(x, first_file = y, second_file = z, min_overlap = 0L)
  res2 <- bed_intersect(
    x,
    list(first_file = y, second_file = z),
    min_overlap = 0L
  )
  expect_equal(res1, res2)

  res1 <- bed_intersect(x, y, z, min_overlap = 0L)
  res2 <- bed_intersect(x, list(y, z), min_overlap = 0L)
  expect_equal(res1, res2)
})

test_that("groups are respected when passing multiple y tbl_intervals ", {
  x <- tibble::tribble(
    ~chrom , ~start , ~end , ~name , ~score , ~strand ,
    "chr1" ,   1000 , 1500 , "."   , "."    , "+"
  )

  y <- tibble::tribble(
    ~chrom , ~start , ~end , ~name , ~score , ~strand ,
    "chr1" ,   1000 , 1200 , "."   , "."    , "-"
  )

  z <- tibble::tribble(
    ~chrom , ~start , ~end , ~name , ~score , ~strand ,
    "chr1" ,   1000 , 1200 , "."   , "."    , "+"
  )
  x <- group_by(x, strand)
  y <- group_by(y, strand)
  z <- group_by(z, strand)

  res <- bed_intersect(x, y, z, min_overlap = 0L)
  expect_equal(nrow(res), 1)
})

test_that("same intervals are reported with single and multiple intersection", {
  x <- tibble::tribble(
    ~chrom , ~start , ~end ,
    "chr1" ,    100 ,  500 ,
    "chr2" ,    200 ,  400 ,
    "chr2" ,    300 ,  500 ,
    "chr2" ,    800 ,  900
  )

  y <- tibble::tribble(
    ~chrom , ~start , ~end , ~value ,
    "chr1" ,    150 ,  400 ,    100 ,
    "chr1" ,    500 ,  550 ,    100 ,
    "chr2" ,    230 ,  430 ,    200 ,
    "chr2" ,    350 ,  430 ,    300
  )

  z <- tibble::tribble(
    ~chrom , ~start , ~end , ~value ,
    "chr1" ,    150 ,  400 ,    100 ,
    "chr1" ,    500 ,  550 ,    100 ,
    "chr2" ,    230 ,  430 ,    200 ,
    "chr2" ,    750 ,  900 ,    400
  )
  a <- bed_intersect(x, y, min_overlap = 0L)
  b <- bed_intersect(x, z, min_overlap = 0L)
  orig <- bind_rows(a, b) |>
    arrange(chrom, start.x, start.y)
  new <- bed_intersect(x, y, z, min_overlap = 0L) |>
    arrange(chrom, start.x, start.y) |>
    select(-.source)
  expect_true(all(orig == new))
})

test_that("unmatched groups are included when invert = TRUE", {
  x <- tibble::tribble(
    ~chrom , ~start , ~end , ~group ,
    "chr1" ,    100 ,  500 , "A"    ,
    "chr2" ,    200 ,  400 , "B"    , # unmatched
    "chr2" ,    300 ,  500 , "A"    ,
    "chr2" ,    800 ,  900 , "A"
  ) |>
    group_by(chrom, group)

  y <- tibble::tribble(
    ~chrom , ~start , ~end , ~group ,
    "chr1" ,    150 ,  400 , "A"    ,
    "chr1" ,    500 ,  550 , "A"    ,
    "chr2" ,    230 ,  430 , "A"    ,
    "chr2" ,    350 ,  430 , "A"
  ) |>
    group_by(chrom, group)

  pred <- tibble::tribble(
    ~chrom , ~start , ~end , ~group ,
    "chr2" ,    200 ,  400 , "B"    , # unmatched
    "chr2" ,    800 ,  900 , "A"
  )

  res <- bed_intersect(x, y, invert = TRUE, min_overlap = 0L)
  expect_equal(res, pred, ignore_attr = TRUE)
})

# from https://github.com/arq5x/bedtools2/blob/master/test/intersect/test-intersect.sh
test_that("0 length records", {
  x <- tibble::tribble(
    ~chrom , ~start   , ~end     ,
    "chr7" , 33059403 , 33059403
  )

  y <- tibble::tribble(
    ~chrom , ~start    , ~end      , ~group   , ~type    ,
    "chr7" , 32599076  , 33069221  , "NAq"    , "intron" ,
    "chr7" , 33059336L , 33060883L , "NT5C3A" , "intron"
  )
  res <- bed_intersect(x, y, min_overlap = 0L)
  expect_equal(res$start.x - res$end.x, c(0, 0))
  expect_equal(res$start.y - res$end.y, c(-470145, -1547))
})


test_that("list input is robustly handled #380", {
  x <- tibble::tribble(
    ~chrom , ~start , ~end ,
    "chr1" ,    100 ,  500 ,
    "chr2" ,    200 ,  400 ,
    "chr2" ,    300 ,  500 ,
    "chr2" ,    800 ,  900
  )

  y <- tibble::tribble(
    ~chrom , ~start , ~end , ~value ,
    "chr1" ,    150 ,  400 ,    100 ,
    "chr1" ,    500 ,  550 ,    100 ,
    "chr2" ,    230 ,  430 ,    200 ,
    "chr2" ,    350 ,  430 ,    300
  )

  z <- tibble::tribble(
    ~chrom , ~start , ~end , ~value ,
    "chr1" ,    150 ,  400 ,    100 ,
    "chr1" ,    500 ,  550 ,    100 ,
    "chr2" ,    230 ,  430 ,    200 ,
    "chr2" ,    750 ,  900 ,    400
  )

  lst <- list(y, z)

  expect_equal(nrow(bed_intersect(x, y, z, min_overlap = 0L)), 11)
  expect_equal(nrow(bed_intersect(x, list(y, z), min_overlap = 0L)), 11)
  expect_equal(nrow(bed_intersect(x, lst[1:2], min_overlap = 0L)), 11)

  expect_equal(nrow(bed_intersect(x, lst, min_overlap = 0L)), 11)
  expect_equal(nrow(bed_intersect(x, lst[[1]], lst[[2]], min_overlap = 0L)), 11)
  expect_equal(nrow(bed_intersect(x, lst[1], min_overlap = 0L)), 6)
})

# Tests for min_overlap parameter (bedtools-compatible behavior)
test_that("min_overlap = 1L excludes book-ended intervals", {
  # Book-ended intervals: x ends exactly where y starts
  x <- tibble::tribble(
    ~chrom , ~start , ~end ,
    "chr1" ,    100 ,  200
  )
  y <- tibble::tribble(
    ~chrom , ~start , ~end ,
    "chr1" ,    200 ,  300
  )

  # With min_overlap = 0L (legacy), book-ended intervals ARE considered overlapping
  res_legacy <- bed_intersect(x, y, min_overlap = 0L)
  expect_equal(nrow(res_legacy), 1)

  # With min_overlap = 1L (bedtools), book-ended intervals are NOT overlapping
  res_strict <- bed_intersect(x, y, min_overlap = 1L)
  expect_equal(nrow(res_strict), 0)
})

test_that("min_overlap = 1L works for actual overlaps", {
  x <- tibble::tribble(
    ~chrom , ~start , ~end ,
    "chr1" ,    100 ,  200
  )
  y <- tibble::tribble(
    ~chrom , ~start , ~end ,
    "chr1" ,    150 ,  250
  )

  # Both should find the overlap
  res_legacy <- bed_intersect(x, y, min_overlap = 0L)
  res_strict <- bed_intersect(x, y, min_overlap = 1L)

  expect_equal(nrow(res_legacy), 1)
  expect_equal(nrow(res_strict), 1)
  expect_equal(res_legacy$.overlap, 50)
  expect_equal(res_strict$.overlap, 50)
})

test_that("min_overlap respects larger overlap thresholds", {
  x <- tibble::tribble(
    ~chrom , ~start , ~end ,
    "chr1" ,    100 ,  200
  )
  y <- tibble::tribble(
    ~chrom , ~start , ~end ,
    "chr1" ,    150 ,  250 ,
    "chr1" ,    195 ,  250
  )

  # Both have some overlap
  res_1 <- bed_intersect(x, y, min_overlap = 1L)
  expect_equal(nrow(res_1), 2)

  # Only first y interval has >= 10bp overlap
  res_10 <- bed_intersect(x, y, min_overlap = 10L)
  expect_equal(nrow(res_10), 1)
  expect_equal(res_10$.overlap, 50)

  # No y intervals have >= 100bp overlap
  res_100 <- bed_intersect(x, y, min_overlap = 100L)
  expect_equal(nrow(res_100), 0)
})

Try the valr package in your browser

Any scripts or data that you put into this service are public.

valr documentation built on Dec. 10, 2025, 9:08 a.m.