tests/testthat/test_cluster.r

# https://github.com/arq5x/bedtools2/blob/master/test/cluster/test-cluster.sh

x <- tibble::tribble(
  ~chrom, ~start, ~end, ~name, ~id, ~strand,
  "chr1", 72017, 884436, "a", 1, "+",
  "chr1", 72017, 844113, "b", 2, "+",
  "chr1", 939517, 1011278, "c", 3, "+",
  "chr1", 1142976, 1203168, "d", 4, "+",
  "chr1", 1153667, 1298845, "e", 5, "-",
  "chr1", 1153667, 1219633, "f", 6, "+",
  "chr1", 1155173, 1200334, "g", 7, "-",
  "chr1", 1229798, 1500664, "h", 8, "-",
  "chr1", 1297735, 1357056, "i", 9, "+",
  "chr1", 1844181, 1931789, "j", 10, "-"
)

test_that("basic cluster works", {
  res <- bed_cluster(x)
  # test number of groups in output
  expect_equal(length(unique(res$.id)), 4)
  expect_equal(res$.id, c(1, 1, 2, 3, 3, 3, 3, 3, 3, 4))
})

test_that("stranded cluster works", {
  res <- bed_cluster(group_by(x, strand))
  # test number of groups in output
  expect_equal(length(unique(res$.id)), 6)
  expect_equal(res$.id, c(1, 1, 2, 3, 3, 4, 4, 4, 5, 6))
})

x <- tibble::tribble(
  ~chrom, ~start, ~end, ~name, ~id, ~strand,
  "chr1", 72017, 884436, "a", 1, "+",
  "chr1", 72017, 844113, "b", 2, "+",
  "chr1", 939517, 1011278, "c", 3, "+",
  "chr2", 940000, 990000, "d", 4, "-"
)

test_that("cluster ids are not repeated per group issue #171", {
  res <- bed_cluster(x)
  # test that groups have unique ids
  chr1_ids <- filter(res, chrom == "chr1") |>
    select(.id) |>
    unique() |>
    unlist()
  chr2_ids <- filter(res, chrom == "chr2") |>
    select(.id) |>
    unique() |>
    unlist()
  shared_ids <- intersect(chr1_ids, chr2_ids)
  expect_equal(length(shared_ids), 0)
})


test_that("guard against max_dist argument preventing clustering first interval in contig issue #388", {
  x <- tibble::tribble(
    ~chrom, ~start, ~end,
    "a", 1, 10,
    "a", 20, 50,
    "b", 20, 50,
    "c", 100, 100
  )

  res <- bed_cluster(x, max_dist = 0)
  expect_equal(res$.id, 1L:4L)

  res <- bed_cluster(x, max_dist = 100)
  expect_equal(res$.id, c(1, 1, 2, 3))

  res <- bed_cluster(x, max_dist = 10)
  expect_equal(res$.id, c(1, 1, 2, 3))

  res <- bed_cluster(x, max_dist = 9)
  expect_equal(res$.id, 1L:4L)
})

test_that("check for off by one errors, related to issue #401 @kcamnairb ", {
  x <- tibble::tribble(
    ~chrom, ~start, ~end,
    "chr1", 1,      10,
    "chr1", 5,      20,
    "chr1", 30,     40
  )
  res <- bed_cluster(x, max_dist = 10)
  expect_equal(res$.id, c(1L, 1L, 1L))

  x <- tibble::tribble(
    ~chrom, ~start, ~end,
    "chr1", 1,      3,
    "chr1", 2,      4,
    "chr1", 5,      10,
    "chr1", 12,     14
  )
  res <- bed_cluster(x, max_dist = 0)
  expect_equal(res$.id, c(1L, 1L, 2L, 3L))

  res <- bed_cluster(x, max_dist = 1)
  expect_equal(res$.id, c(1L, 1L, 1L, 2L))
})

test_that("check for additional errors, related to issue #401 @kcamnairb ", {
  x <- tibble::tribble(
    ~chrom, ~start, ~end,
    "scaffold_66", 27262, 70396,
    "scaffold_66", 66594, 67647,
    "scaffold_66", 82218, 85280,
    "scaffold_66", 85878, 87553,
    "scaffold_66", 87831, 89885,
    "scaffold_66", 90498, 91996
  )
  res <- bed_cluster(x, max_dist = 20000)
  expect_true(all(res$.id == 1))

  x <- tibble::tribble(
    ~chrom, ~start, ~end,
    "chr1", 1, 10,
    "chr1", 1, 11,
    "chr1", 1, 9,
    "chr1", 1, 9,
    "chr1", 3, 4,
    "chr1", 3, 12,
    "chr1", 10, 14,
    "chr1", 100, 200,
    "chr2", 1, 10,
    "chr2", 1, 11,
    "chr2", 1, 9,
    "chr2", 1, 9,
    "chr2", 3, 4,
    "chr2", 3, 12,
    "chr2", 10, 14,
    "chr2", 100, 200
  )

  res <- bed_cluster(x, max_dist = 0)
  expect_true(max(res$.id) == 4)

  res <- bed_cluster(x, max_dist = 100)
  expect_equal(res$.id, c(rep(1, 8), rep(2, 8)))

  res <- bed_cluster(x, max_dist = -3)
  expect_true(max(res$.id) == 6)
})

Try the valr package in your browser

Any scripts or data that you put into this service are public.

valr documentation built on Sept. 19, 2023, 1:07 a.m.